opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""SimulatedUser class for multi-turn conversation simulation."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Dict, Optional
|
|
4
|
+
from opik.evaluation.models.models_factory import get as get_model
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SimulatedUser:
|
|
8
|
+
"""
|
|
9
|
+
A simulated user that generates responses using LLMs or fixed responses.
|
|
10
|
+
|
|
11
|
+
The user simulator generates string responses that are then incorporated
|
|
12
|
+
into the conversation by the application logic.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
persona: str,
|
|
18
|
+
model: str = "gpt-4o-mini",
|
|
19
|
+
fixed_responses: Optional[List[str]] = None,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Initialize a simulated user.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
persona: Description of the user's personality and behavior
|
|
26
|
+
model: LLM model to use for generating responses (default: gpt-4o-mini)
|
|
27
|
+
fixed_responses: Optional list of predefined responses to cycle through
|
|
28
|
+
"""
|
|
29
|
+
self.persona = persona
|
|
30
|
+
self.model = model
|
|
31
|
+
self.fixed_responses = fixed_responses or []
|
|
32
|
+
self._response_index = 0
|
|
33
|
+
|
|
34
|
+
# Initialize LLM backend using models_factory for consistency
|
|
35
|
+
self._llm = get_model(model_name=model)
|
|
36
|
+
|
|
37
|
+
def generate_response(self, conversation_history: List[Dict[str, str]]) -> str:
|
|
38
|
+
"""
|
|
39
|
+
Generate a response based on the conversation history.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
conversation_history: List of message dicts with 'role' and 'content' keys
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
String response from the simulated user
|
|
46
|
+
"""
|
|
47
|
+
# Use fixed responses first if available
|
|
48
|
+
if self.fixed_responses:
|
|
49
|
+
response = self.fixed_responses[
|
|
50
|
+
self._response_index % len(self.fixed_responses)
|
|
51
|
+
]
|
|
52
|
+
self._response_index += 1
|
|
53
|
+
return response
|
|
54
|
+
|
|
55
|
+
# Generate response using LLM
|
|
56
|
+
return self._generate_llm_response(conversation_history)
|
|
57
|
+
|
|
58
|
+
def _generate_llm_response(self, conversation_history: List[Dict[str, str]]) -> str:
|
|
59
|
+
"""Generate response using the LLM backend."""
|
|
60
|
+
# Build system prompt with persona and clear instructions
|
|
61
|
+
system_prompt = f"""You are a simulated user with the following persona: {self.persona}
|
|
62
|
+
|
|
63
|
+
Your task is to generate realistic user messages that this persona would send in a conversation.
|
|
64
|
+
Respond as if you are the user, not as an assistant describing the user.
|
|
65
|
+
Generate a single user message that fits your persona and the conversation context."""
|
|
66
|
+
|
|
67
|
+
# Convert conversation history to messages format expected by LLM
|
|
68
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
69
|
+
|
|
70
|
+
# Add all conversation history
|
|
71
|
+
messages.extend(conversation_history)
|
|
72
|
+
|
|
73
|
+
# Convert messages to string format for generate_string
|
|
74
|
+
conversation_text = self._format_messages_as_text(messages)
|
|
75
|
+
|
|
76
|
+
# Generate response
|
|
77
|
+
try:
|
|
78
|
+
response = self._llm.generate_string(input=conversation_text)
|
|
79
|
+
return response
|
|
80
|
+
except Exception as e:
|
|
81
|
+
# Fallback response if LLM fails
|
|
82
|
+
return f"I'm having trouble responding right now. ({str(e)})"
|
|
83
|
+
|
|
84
|
+
def _format_messages_as_text(self, messages: List[Dict[str, str]]) -> str:
|
|
85
|
+
"""Convert message list to text format for LLM input."""
|
|
86
|
+
formatted_messages = []
|
|
87
|
+
for message in messages:
|
|
88
|
+
role = message["role"]
|
|
89
|
+
content = message["content"]
|
|
90
|
+
if role == "system":
|
|
91
|
+
formatted_messages.append(f"System: {content}")
|
|
92
|
+
elif role == "user":
|
|
93
|
+
formatted_messages.append(f"User: {content}")
|
|
94
|
+
elif role == "assistant":
|
|
95
|
+
formatted_messages.append(f"Assistant: {content}")
|
|
96
|
+
else:
|
|
97
|
+
formatted_messages.append(f"{role.title()}: {content}")
|
|
98
|
+
|
|
99
|
+
return "\n".join(formatted_messages)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Multi-turn simulation functionality."""
|
|
2
|
+
|
|
3
|
+
from typing import Callable, Optional, Dict, Any, List
|
|
4
|
+
from opik import id_helpers, track
|
|
5
|
+
from .simulated_user import SimulatedUser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def run_simulation(
|
|
9
|
+
app: Callable,
|
|
10
|
+
user_simulator: SimulatedUser,
|
|
11
|
+
initial_message: Optional[str] = None,
|
|
12
|
+
max_turns: int = 5,
|
|
13
|
+
thread_id: Optional[str] = None,
|
|
14
|
+
project_name: Optional[str] = None,
|
|
15
|
+
**app_kwargs: Any,
|
|
16
|
+
) -> Dict[str, Any]:
|
|
17
|
+
"""
|
|
18
|
+
Run a multi-turn conversation simulation between a simulated user and an app.
|
|
19
|
+
|
|
20
|
+
1. The simulator passes single message strings to the app
|
|
21
|
+
2. The app manages full conversation history internally using thread_id
|
|
22
|
+
3. The app logs traces with thread_id for evaluation
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
app: Callable that processes messages and manages conversation history internally.
|
|
26
|
+
Signature: app(message: str, *, thread_id: str, **kwargs) -> Dict[str, str]
|
|
27
|
+
The app is automatically decorated with @track and thread_id is injected via opik_args.
|
|
28
|
+
user_simulator: SimulatedUser instance that generates user responses
|
|
29
|
+
initial_message: Optional initial message from the user. If None, generated by simulator
|
|
30
|
+
max_turns: Maximum number of conversation turns (default: 5)
|
|
31
|
+
thread_id: Optional thread ID for grouping traces. Generated if not provided
|
|
32
|
+
project_name: Optional project name for trace logging
|
|
33
|
+
**app_kwargs: Additional keyword arguments passed to the app
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Dict containing:
|
|
37
|
+
- thread_id: The thread ID used for this simulation
|
|
38
|
+
- conversation_history: List of message dicts from the simulation
|
|
39
|
+
- project_name: Project name if provided
|
|
40
|
+
"""
|
|
41
|
+
# Generate thread_id if not provided
|
|
42
|
+
if thread_id is None:
|
|
43
|
+
thread_id = id_helpers.generate_id()
|
|
44
|
+
|
|
45
|
+
# Automatically decorate app if not already decorated
|
|
46
|
+
if not hasattr(app, "opik_tracked"):
|
|
47
|
+
app_name = app.__name__ if hasattr(app, "__name__") else "simulation_app"
|
|
48
|
+
app = track(name=app_name)(app)
|
|
49
|
+
|
|
50
|
+
# Track conversation for simulator (app manages its own history internally)
|
|
51
|
+
conversation_history: List[Dict[str, str]] = []
|
|
52
|
+
|
|
53
|
+
# Generate initial message if needed
|
|
54
|
+
if initial_message is None:
|
|
55
|
+
initial_message = user_simulator.generate_response(conversation_history)
|
|
56
|
+
|
|
57
|
+
# Simulation loop
|
|
58
|
+
for turn in range(max_turns):
|
|
59
|
+
# Get user message
|
|
60
|
+
if turn == 0:
|
|
61
|
+
user_message_text = initial_message
|
|
62
|
+
else:
|
|
63
|
+
user_message_text = user_simulator.generate_response(conversation_history)
|
|
64
|
+
|
|
65
|
+
# Create message dict for tracking
|
|
66
|
+
user_message = {"role": "user", "content": user_message_text}
|
|
67
|
+
conversation_history.append(user_message)
|
|
68
|
+
|
|
69
|
+
# Call app with SINGLE message string, thread_id parameter, and opik_args for tracing
|
|
70
|
+
try:
|
|
71
|
+
assistant_message = app(
|
|
72
|
+
user_message_text,
|
|
73
|
+
thread_id=thread_id,
|
|
74
|
+
**app_kwargs,
|
|
75
|
+
opik_args={
|
|
76
|
+
"trace": {
|
|
77
|
+
"thread_id": thread_id,
|
|
78
|
+
"metadata": {"turn": turn + 1, "project_name": project_name},
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
# Handle app errors gracefully
|
|
84
|
+
assistant_message = {
|
|
85
|
+
"role": "assistant",
|
|
86
|
+
"content": f"Error processing message: {str(e)}",
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Validate assistant message format
|
|
90
|
+
if (
|
|
91
|
+
not isinstance(assistant_message, dict)
|
|
92
|
+
or "role" not in assistant_message
|
|
93
|
+
or "content" not in assistant_message
|
|
94
|
+
):
|
|
95
|
+
assistant_message = {
|
|
96
|
+
"role": "assistant",
|
|
97
|
+
"content": str(assistant_message)
|
|
98
|
+
if assistant_message
|
|
99
|
+
else "No response",
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
conversation_history.append(assistant_message)
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"thread_id": thread_id,
|
|
106
|
+
"conversation_history": conversation_history,
|
|
107
|
+
"project_name": project_name,
|
|
108
|
+
}
|
opik/synchronization.py
CHANGED
|
@@ -40,15 +40,14 @@ def until(
|
|
|
40
40
|
while True:
|
|
41
41
|
try:
|
|
42
42
|
if function():
|
|
43
|
-
|
|
43
|
+
return True
|
|
44
44
|
except Exception:
|
|
45
45
|
LOGGER.debug(
|
|
46
46
|
f"{function.__name__} raised error in 'until' function.", exc_info=True
|
|
47
47
|
)
|
|
48
48
|
if not allow_errors:
|
|
49
49
|
raise
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
return True
|
|
50
|
+
|
|
51
|
+
if (time.time() - start_time) > max_try_seconds:
|
|
52
|
+
return False
|
|
53
|
+
time.sleep(sleep)
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import threading
|
|
4
4
|
from typing import Optional
|
|
5
|
+
from . import config
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class TracingRuntimeConfig:
|
|
@@ -23,9 +24,7 @@ class TracingRuntimeConfig:
|
|
|
23
24
|
return self._tracing_active
|
|
24
25
|
|
|
25
26
|
try:
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
enabled = not _config_module.OpikConfig().track_disable
|
|
27
|
+
enabled = not config.OpikConfig().track_disable
|
|
29
28
|
self._tracing_active = enabled
|
|
30
29
|
|
|
31
30
|
except Exception:
|
|
@@ -34,16 +33,16 @@ class TracingRuntimeConfig:
|
|
|
34
33
|
return enabled
|
|
35
34
|
|
|
36
35
|
|
|
37
|
-
|
|
36
|
+
runtime_config = TracingRuntimeConfig()
|
|
38
37
|
|
|
39
38
|
|
|
40
39
|
def set_tracing_active(active: bool) -> None:
|
|
41
|
-
|
|
40
|
+
runtime_config.set_tracing_active(active)
|
|
42
41
|
|
|
43
42
|
|
|
44
43
|
def is_tracing_active() -> bool:
|
|
45
|
-
return
|
|
44
|
+
return runtime_config.is_tracing_active()
|
|
46
45
|
|
|
47
46
|
|
|
48
47
|
def reset_tracing_to_config_default() -> None:
|
|
49
|
-
|
|
48
|
+
runtime_config.reset_to_config_default()
|
opik/types.py
CHANGED
|
@@ -2,6 +2,7 @@ import enum
|
|
|
2
2
|
import sys
|
|
3
3
|
from typing import Literal, Optional
|
|
4
4
|
|
|
5
|
+
from pydantic import StrictStr
|
|
5
6
|
from typing_extensions import TypedDict
|
|
6
7
|
|
|
7
8
|
if sys.version_info < (3, 11):
|
|
@@ -79,6 +80,41 @@ class FeedbackScoreDict(TypedDict):
|
|
|
79
80
|
"""An optional explanation or justification for the given score."""
|
|
80
81
|
|
|
81
82
|
|
|
83
|
+
class BatchFeedbackScoreDict(TypedDict):
|
|
84
|
+
"""
|
|
85
|
+
A TypedDict representing a feedback score for batch operations.
|
|
86
|
+
|
|
87
|
+
This class defines the structure for feedback scores used in batch logging
|
|
88
|
+
operations, with a required id field and optional per-score project_name.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
id: Required[str]
|
|
92
|
+
"""
|
|
93
|
+
A unique identifier for the object this score should be assigned to.
|
|
94
|
+
Refers to either the trace_id, span_id or thread_id depending on how the score is logged.
|
|
95
|
+
Required for batch operations.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
name: Required[str]
|
|
99
|
+
"""The name of the feedback metric or criterion."""
|
|
100
|
+
|
|
101
|
+
value: Required[float]
|
|
102
|
+
"""The numerical value of the feedback score."""
|
|
103
|
+
|
|
104
|
+
project_name: NotRequired[Optional[StrictStr]]
|
|
105
|
+
"""
|
|
106
|
+
The name of the project for this specific score.
|
|
107
|
+
If not provided, falls back to the project_name parameter in the method call,
|
|
108
|
+
or the default project name configured in the Opik instance.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
category_name: NotRequired[Optional[str]]
|
|
112
|
+
"""An optional category name for the given score."""
|
|
113
|
+
|
|
114
|
+
reason: NotRequired[Optional[str]]
|
|
115
|
+
"""An optional explanation or justification for the given score."""
|
|
116
|
+
|
|
117
|
+
|
|
82
118
|
class ErrorInfoDict(TypedDict):
|
|
83
119
|
"""
|
|
84
120
|
A TypedDict representing the information about the error occurred.
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
from typing import Any, List, Optional
|
|
2
|
+
|
|
3
|
+
import opik.exceptions as exceptions
|
|
4
|
+
from . import validator, result
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ChatPromptMessagesValidator(validator.RaisableValidator):
|
|
8
|
+
"""
|
|
9
|
+
Validator for ChatPrompt messages list.
|
|
10
|
+
|
|
11
|
+
Validates that messages is a list of dicts with:
|
|
12
|
+
- "role" key with value "system", "user", or "assistant"
|
|
13
|
+
- "content" key with value either string or list of dicts
|
|
14
|
+
- If content is list of dicts, each dict must have "type" key
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
VALID_ROLES = {"system", "user", "assistant"}
|
|
18
|
+
URL_BASED_CONTENT_TYPES = {"image_url", "video_url", "audio_url"}
|
|
19
|
+
|
|
20
|
+
def __init__(self, messages: Any):
|
|
21
|
+
self.messages = messages
|
|
22
|
+
self.validation_result: Optional[result.ValidationResult] = None
|
|
23
|
+
|
|
24
|
+
def validate(self) -> result.ValidationResult:
|
|
25
|
+
failure_reasons: List[str] = []
|
|
26
|
+
|
|
27
|
+
# Validate messages is a list
|
|
28
|
+
if not self._validate_messages_is_list(failure_reasons):
|
|
29
|
+
self.validation_result = result.ValidationResult(
|
|
30
|
+
failed=True, failure_reasons=failure_reasons
|
|
31
|
+
)
|
|
32
|
+
return self.validation_result
|
|
33
|
+
|
|
34
|
+
# Validate each message in the list
|
|
35
|
+
for idx, message in enumerate(self.messages):
|
|
36
|
+
prefix = f"messages[{idx}]"
|
|
37
|
+
self._validate_message(prefix, message, failure_reasons)
|
|
38
|
+
|
|
39
|
+
# Create validation result
|
|
40
|
+
if len(failure_reasons) > 0:
|
|
41
|
+
self.validation_result = result.ValidationResult(
|
|
42
|
+
failed=True, failure_reasons=failure_reasons
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
self.validation_result = result.ValidationResult(failed=False)
|
|
46
|
+
|
|
47
|
+
return self.validation_result
|
|
48
|
+
|
|
49
|
+
def _validate_messages_is_list(self, failure_reasons: List[str]) -> bool:
|
|
50
|
+
"""Validate that messages is a list. Returns False if validation fails."""
|
|
51
|
+
if not isinstance(self.messages, list):
|
|
52
|
+
msg = (
|
|
53
|
+
f"messages must be a list but {type(self.messages).__name__} was given"
|
|
54
|
+
)
|
|
55
|
+
failure_reasons.append(msg)
|
|
56
|
+
return False
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
def _validate_message(
|
|
60
|
+
self, prefix: str, message: Any, failure_reasons: List[str]
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Validate a single message structure, role, and content."""
|
|
63
|
+
if not self._validate_message_structure(prefix, message, failure_reasons):
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
self._validate_role(prefix, message, failure_reasons)
|
|
67
|
+
self._validate_content(prefix, message, failure_reasons)
|
|
68
|
+
|
|
69
|
+
def _validate_message_structure(
|
|
70
|
+
self, prefix: str, message: Any, failure_reasons: List[str]
|
|
71
|
+
) -> bool:
|
|
72
|
+
"""Validate that message is a dict with exactly 'role' and 'content' keys. Returns False if validation fails."""
|
|
73
|
+
# Validate message is a dict
|
|
74
|
+
if not isinstance(message, dict):
|
|
75
|
+
msg = f"{prefix}: must be a dict but {type(message).__name__} was given"
|
|
76
|
+
failure_reasons.append(msg)
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
# Validate message has exactly "role" and "content" keys
|
|
80
|
+
message_keys = set(message.keys())
|
|
81
|
+
expected_keys = {"role", "content"}
|
|
82
|
+
|
|
83
|
+
if message_keys != expected_keys:
|
|
84
|
+
if not message_keys.issubset(expected_keys):
|
|
85
|
+
missing_keys = expected_keys - message_keys
|
|
86
|
+
msg = f"{prefix}: missing required keys: {sorted(missing_keys)}"
|
|
87
|
+
failure_reasons.append(msg)
|
|
88
|
+
if not expected_keys.issubset(message_keys):
|
|
89
|
+
extra_keys = message_keys - expected_keys
|
|
90
|
+
msg = (
|
|
91
|
+
f"{prefix}: unexpected keys: {sorted(extra_keys)}. "
|
|
92
|
+
f"Expected only: {sorted(expected_keys)}"
|
|
93
|
+
)
|
|
94
|
+
failure_reasons.append(msg)
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
def _validate_role(
|
|
100
|
+
self, prefix: str, message: dict, failure_reasons: List[str]
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Validate the role field of a message."""
|
|
103
|
+
role = message.get("role")
|
|
104
|
+
if role not in self.VALID_ROLES:
|
|
105
|
+
valid_roles_str = ", ".join([f"'{r}'" for r in sorted(self.VALID_ROLES)])
|
|
106
|
+
msg = (
|
|
107
|
+
f"{prefix}.role: must be one of [{valid_roles_str}] "
|
|
108
|
+
f"but {repr(role)} was given"
|
|
109
|
+
)
|
|
110
|
+
failure_reasons.append(msg)
|
|
111
|
+
|
|
112
|
+
def _validate_content(
|
|
113
|
+
self, prefix: str, message: dict, failure_reasons: List[str]
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Validate the content field of a message."""
|
|
116
|
+
content = message.get("content")
|
|
117
|
+
if content is None:
|
|
118
|
+
msg = f"{prefix}.content: must not be None"
|
|
119
|
+
failure_reasons.append(msg)
|
|
120
|
+
elif not isinstance(content, (str, list)):
|
|
121
|
+
msg = (
|
|
122
|
+
f"{prefix}.content: must be either str or list of dicts "
|
|
123
|
+
f"but {type(content).__name__} was given"
|
|
124
|
+
)
|
|
125
|
+
failure_reasons.append(msg)
|
|
126
|
+
elif isinstance(content, list):
|
|
127
|
+
self._validate_content_list(prefix, content, failure_reasons)
|
|
128
|
+
|
|
129
|
+
def _validate_content_list(
|
|
130
|
+
self, prefix: str, content: list, failure_reasons: List[str]
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Validate content when it is a list of content parts."""
|
|
133
|
+
for content_idx, content_part in enumerate(content):
|
|
134
|
+
content_prefix = f"{prefix}.content[{content_idx}]"
|
|
135
|
+
self._validate_content_part(content_prefix, content_part, failure_reasons)
|
|
136
|
+
|
|
137
|
+
def _validate_content_part(
|
|
138
|
+
self, content_prefix: str, content_part: Any, failure_reasons: List[str]
|
|
139
|
+
) -> None:
|
|
140
|
+
"""Validate a single content part in the content list."""
|
|
141
|
+
if not isinstance(content_part, dict):
|
|
142
|
+
msg = (
|
|
143
|
+
f"{content_prefix}: must be a dict "
|
|
144
|
+
f"but {type(content_part).__name__} was given"
|
|
145
|
+
)
|
|
146
|
+
failure_reasons.append(msg)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
if "type" not in content_part:
|
|
150
|
+
msg = f"{content_prefix}: must have 'type' key"
|
|
151
|
+
failure_reasons.append(msg)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Validate type-specific requirements
|
|
155
|
+
content_type = content_part.get("type")
|
|
156
|
+
self._validate_content_type_specific(
|
|
157
|
+
content_prefix, content_type, content_part, failure_reasons
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
def _validate_content_type_specific(
|
|
161
|
+
self,
|
|
162
|
+
content_prefix: str,
|
|
163
|
+
content_type: Any,
|
|
164
|
+
content_part: dict,
|
|
165
|
+
failure_reasons: List[str],
|
|
166
|
+
) -> None:
|
|
167
|
+
"""Validate type-specific requirements for content parts."""
|
|
168
|
+
if content_type in self.URL_BASED_CONTENT_TYPES:
|
|
169
|
+
self._validate_required_url_object(
|
|
170
|
+
content_prefix,
|
|
171
|
+
content_part,
|
|
172
|
+
content_type,
|
|
173
|
+
content_type,
|
|
174
|
+
failure_reasons,
|
|
175
|
+
)
|
|
176
|
+
elif content_type == "text":
|
|
177
|
+
self._validate_required_string_key(
|
|
178
|
+
content_prefix, content_part, "text", "text", failure_reasons
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def _validate_required_string_key(
|
|
182
|
+
self,
|
|
183
|
+
prefix: str,
|
|
184
|
+
content_part: dict,
|
|
185
|
+
key_name: str,
|
|
186
|
+
type_name: str,
|
|
187
|
+
failure_reasons: List[str],
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Validate that a required key exists and is a string."""
|
|
190
|
+
if key_name not in content_part:
|
|
191
|
+
msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
|
|
192
|
+
failure_reasons.append(msg)
|
|
193
|
+
elif not isinstance(content_part.get(key_name), str):
|
|
194
|
+
msg = (
|
|
195
|
+
f"{prefix}.{key_name}: must be a string "
|
|
196
|
+
f"but {type(content_part.get(key_name)).__name__} was given"
|
|
197
|
+
)
|
|
198
|
+
failure_reasons.append(msg)
|
|
199
|
+
|
|
200
|
+
def _validate_required_url_object(
|
|
201
|
+
self,
|
|
202
|
+
prefix: str,
|
|
203
|
+
content_part: dict,
|
|
204
|
+
key_name: str,
|
|
205
|
+
type_name: str,
|
|
206
|
+
failure_reasons: List[str],
|
|
207
|
+
) -> None:
|
|
208
|
+
"""Validate that a required key exists and is a dict with a 'url' key that is a string."""
|
|
209
|
+
if key_name not in content_part:
|
|
210
|
+
msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
|
|
211
|
+
failure_reasons.append(msg)
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
url_object = content_part.get(key_name)
|
|
215
|
+
if not isinstance(url_object, dict):
|
|
216
|
+
msg = (
|
|
217
|
+
f"{prefix}.{key_name}: must be a dict "
|
|
218
|
+
f"but {type(url_object).__name__} was given"
|
|
219
|
+
)
|
|
220
|
+
failure_reasons.append(msg)
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
if "url" not in url_object:
|
|
224
|
+
msg = f"{prefix}.{key_name}: must have 'url' key"
|
|
225
|
+
failure_reasons.append(msg)
|
|
226
|
+
elif not isinstance(url_object.get("url"), str):
|
|
227
|
+
msg = (
|
|
228
|
+
f"{prefix}.{key_name}.url: must be a string "
|
|
229
|
+
f"but {type(url_object.get('url')).__name__} was given"
|
|
230
|
+
)
|
|
231
|
+
failure_reasons.append(msg)
|
|
232
|
+
|
|
233
|
+
def raise_if_validation_failed(self) -> None:
|
|
234
|
+
if (
|
|
235
|
+
self.validation_result is not None
|
|
236
|
+
and len(self.validation_result.failure_reasons) > 0
|
|
237
|
+
):
|
|
238
|
+
raise exceptions.ValidationError(
|
|
239
|
+
prefix="ChatPrompt.__init__",
|
|
240
|
+
failure_reasons=self.validation_result.failure_reasons,
|
|
241
|
+
)
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import pydantic
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
|
-
from ..types import
|
|
4
|
+
from ..types import BatchFeedbackScoreDict
|
|
5
5
|
from . import validator, result
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class PydanticWrapper(pydantic.BaseModel):
|
|
9
9
|
model_config = pydantic.ConfigDict(extra="forbid")
|
|
10
|
-
feedback_score:
|
|
10
|
+
feedback_score: BatchFeedbackScoreDict
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str]}"
|
|
13
|
+
EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str], 'project_name': NotRequired[str]}"
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class FeedbackScoreValidator(validator.Validator):
|
opik/validation/validator.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import abc
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
from . import result
|
|
4
5
|
|
|
@@ -7,3 +8,30 @@ class Validator(abc.ABC):
|
|
|
7
8
|
@abc.abstractmethod
|
|
8
9
|
def validate(self) -> result.ValidationResult:
|
|
9
10
|
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RaisableValidator(Validator):
|
|
14
|
+
"""
|
|
15
|
+
Abstract validator class that extends Validator and adds raise_if_validation_failed method.
|
|
16
|
+
|
|
17
|
+
This is used for validators that need to raise ValidationError exceptions
|
|
18
|
+
when validation fails, typically used in class initialization.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Initialize the validator.
|
|
24
|
+
|
|
25
|
+
Subclasses can override this method with their own initialization signature.
|
|
26
|
+
"""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def raise_if_validation_failed(self) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Raise a ValidationError if validation failed.
|
|
33
|
+
|
|
34
|
+
This method should check the validation result and raise an appropriate
|
|
35
|
+
ValidationError exception if validation failed.
|
|
36
|
+
"""
|
|
37
|
+
pass
|