opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import collections
|
|
3
|
+
import datetime
|
|
4
|
+
import logging
|
|
5
|
+
import threading
|
|
6
|
+
from typing import List, Dict, Union, Optional, Any, Type, Callable
|
|
7
|
+
|
|
8
|
+
from opik import dict_utils
|
|
9
|
+
from opik.rest_api.types import span_write, trace_write
|
|
10
|
+
from opik.types import ErrorInfoDict, SpanType
|
|
11
|
+
from . import models
|
|
12
|
+
from .. import messages
|
|
13
|
+
from ..processors import message_processors
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
LOGGER = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC):
|
|
20
|
+
"""
|
|
21
|
+
This class acts as a stand-in for the actual backend.
|
|
22
|
+
|
|
23
|
+
The real message processor uses data from messages passed
|
|
24
|
+
to the process method to send information to the backend.
|
|
25
|
+
In contrast, the emulator does not send any requests — instead,
|
|
26
|
+
it collects the data from incoming messages and stores it
|
|
27
|
+
in its attributes.
|
|
28
|
+
|
|
29
|
+
Beyond simple data storage, it also constructs complete trace
|
|
30
|
+
and span trees from the received messages. These trees are defined
|
|
31
|
+
by model classes and their subclasses located in models. Concrete
|
|
32
|
+
model instances are created in the abstract create_* methods,
|
|
33
|
+
which must be implemented by subclasses.
|
|
34
|
+
|
|
35
|
+
Note: If a new message type is added to the Opik SDK, this
|
|
36
|
+
class must be updated accordingly.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
active: Flag indicating whether the emulator is active.
|
|
40
|
+
merge_duplicates: Flag indicating whether duplicates (traces or spans) should
|
|
41
|
+
be merged to retain only unique and updated ones.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, active: bool, merge_duplicates: bool) -> None:
|
|
45
|
+
self.merge_duplicates = merge_duplicates
|
|
46
|
+
self._active = active
|
|
47
|
+
|
|
48
|
+
self._register_handlers()
|
|
49
|
+
|
|
50
|
+
self._rlock = threading.RLock()
|
|
51
|
+
|
|
52
|
+
self.reset()
|
|
53
|
+
|
|
54
|
+
def reset(self) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Resets the internal state of the instance to its initial configuration.
|
|
57
|
+
|
|
58
|
+
This method clears and re-initializes all internal attributes related to
|
|
59
|
+
processed messages, traces, spans, and feedback scores. It ensures that
|
|
60
|
+
the instance can start fresh, as if it was just created, without retaining
|
|
61
|
+
any previous data.
|
|
62
|
+
"""
|
|
63
|
+
with self._rlock:
|
|
64
|
+
self._trace_trees: List[models.TraceModel] = []
|
|
65
|
+
|
|
66
|
+
self._traces_to_spans_mapping: Dict[str, List[str]] = (
|
|
67
|
+
collections.defaultdict(list)
|
|
68
|
+
)
|
|
69
|
+
# the same as _trace_trees but without a trace. Useful for distributed tracing.
|
|
70
|
+
self._span_trees: List[models.SpanModel] = []
|
|
71
|
+
self._trace_observations: Dict[str, models.TraceModel] = {}
|
|
72
|
+
self._span_observations: Dict[str, models.SpanModel] = {}
|
|
73
|
+
|
|
74
|
+
self._span_to_parent_span: Dict[str, Optional[str]] = {}
|
|
75
|
+
self._span_to_trace: Dict[str, Optional[str]] = {}
|
|
76
|
+
self._trace_to_feedback_scores: Dict[
|
|
77
|
+
str, List[models.FeedbackScoreModel]
|
|
78
|
+
] = collections.defaultdict(list)
|
|
79
|
+
self._span_to_feedback_scores: Dict[
|
|
80
|
+
str, List[models.FeedbackScoreModel]
|
|
81
|
+
] = collections.defaultdict(list)
|
|
82
|
+
self._experiment_items: List[models.ExperimentItemModel] = []
|
|
83
|
+
|
|
84
|
+
def is_active(self) -> bool:
|
|
85
|
+
with self._rlock:
|
|
86
|
+
return self._active
|
|
87
|
+
|
|
88
|
+
def set_active(self, active: bool) -> None:
|
|
89
|
+
with self._rlock:
|
|
90
|
+
self._active = active
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def trace_trees(self) -> List[models.TraceModel]:
|
|
94
|
+
"""
|
|
95
|
+
Builds a list of trace trees based on the data from the processed messages.
|
|
96
|
+
Before processing traces, builds span_trees
|
|
97
|
+
"""
|
|
98
|
+
with self._rlock:
|
|
99
|
+
# call to connect all spans
|
|
100
|
+
self._build_spans_tree()
|
|
101
|
+
|
|
102
|
+
for span_id, trace_id in self._span_to_trace.items():
|
|
103
|
+
if trace_id is None:
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
trace = self._trace_observations[trace_id]
|
|
107
|
+
if self._span_to_parent_span[
|
|
108
|
+
span_id
|
|
109
|
+
] is None and not _observation_already_stored(span_id, trace.spans):
|
|
110
|
+
span = self._span_observations[span_id]
|
|
111
|
+
trace.spans.append(span)
|
|
112
|
+
trace.spans.sort(key=lambda x: x.start_time)
|
|
113
|
+
|
|
114
|
+
for trace in self._trace_trees:
|
|
115
|
+
trace.feedback_scores = self._trace_to_feedback_scores[trace.id]
|
|
116
|
+
|
|
117
|
+
self._trace_trees.sort(key=lambda x: x.start_time)
|
|
118
|
+
return self._trace_trees
|
|
119
|
+
|
|
120
|
+
def _save_trace(self, trace: models.TraceModel) -> None:
|
|
121
|
+
if self.merge_duplicates:
|
|
122
|
+
# merge traces with the same id to keep only the latest one
|
|
123
|
+
if trace.id in self._trace_observations:
|
|
124
|
+
existing_trace: models.TraceModel = self._trace_observations[trace.id]
|
|
125
|
+
if trace.end_time is not None:
|
|
126
|
+
if (
|
|
127
|
+
existing_trace.end_time is None
|
|
128
|
+
or trace.end_time > existing_trace.end_time
|
|
129
|
+
):
|
|
130
|
+
# remove the current trace from the list
|
|
131
|
+
self._trace_trees.remove(existing_trace)
|
|
132
|
+
|
|
133
|
+
self._trace_trees.append(trace)
|
|
134
|
+
self._trace_observations[trace.id] = trace
|
|
135
|
+
|
|
136
|
+
def _save_span(
|
|
137
|
+
self, span: models.SpanModel, trace_id: str, parent_span_id: Optional[str]
|
|
138
|
+
) -> None:
|
|
139
|
+
if self.merge_duplicates:
|
|
140
|
+
# merge spans with the same id to keep only the latest one
|
|
141
|
+
if span.id in self._span_observations:
|
|
142
|
+
existing_span = self._span_observations[span.id]
|
|
143
|
+
if span.end_time is not None:
|
|
144
|
+
if (
|
|
145
|
+
existing_span.end_time is None
|
|
146
|
+
or span.end_time > existing_span.end_time
|
|
147
|
+
):
|
|
148
|
+
self._span_trees.remove(existing_span)
|
|
149
|
+
|
|
150
|
+
self._span_to_parent_span[span.id] = parent_span_id
|
|
151
|
+
if parent_span_id is None:
|
|
152
|
+
self._span_trees.append(span)
|
|
153
|
+
|
|
154
|
+
self._span_to_trace[span.id] = trace_id
|
|
155
|
+
self._span_observations[span.id] = span
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def span_trees(self) -> List[models.SpanModel]:
|
|
159
|
+
self._build_spans_tree()
|
|
160
|
+
return self._span_trees
|
|
161
|
+
|
|
162
|
+
def _build_spans_tree(self) -> None:
|
|
163
|
+
"""
|
|
164
|
+
Builds a list of span trees based on the data from the processed messages.
|
|
165
|
+
Children's spans are sorted by creation time
|
|
166
|
+
"""
|
|
167
|
+
with self._rlock:
|
|
168
|
+
for span_id, parent_span_id in self._span_to_parent_span.items():
|
|
169
|
+
if parent_span_id is None:
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
parent_span = self._span_observations[parent_span_id]
|
|
173
|
+
if not _observation_already_stored(span_id, parent_span.spans):
|
|
174
|
+
parent_span.spans.append(self._span_observations[span_id])
|
|
175
|
+
parent_span.spans.sort(key=lambda x: x.start_time)
|
|
176
|
+
|
|
177
|
+
all_span_ids = self._span_to_trace
|
|
178
|
+
for span_id in all_span_ids:
|
|
179
|
+
span = self._span_observations[span_id]
|
|
180
|
+
span.feedback_scores = self._span_to_feedback_scores[span_id]
|
|
181
|
+
|
|
182
|
+
self._span_trees.sort(key=lambda x: x.start_time)
|
|
183
|
+
|
|
184
|
+
def _dispatch_message(self, message: messages.BaseMessage) -> None:
|
|
185
|
+
message_type = type(message)
|
|
186
|
+
handler = self._handlers.get(message_type)
|
|
187
|
+
if handler is None:
|
|
188
|
+
LOGGER.debug("Unknown type of message - %s", message_type.__name__)
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
handler(message)
|
|
192
|
+
|
|
193
|
+
def process(self, message: messages.BaseMessage) -> None:
|
|
194
|
+
with self._rlock:
|
|
195
|
+
if not self.is_active():
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
self._dispatch_message(message)
|
|
200
|
+
except Exception as exception:
|
|
201
|
+
LOGGER.error(
|
|
202
|
+
"Failed to process message by emulator message processor, reason: %s",
|
|
203
|
+
exception,
|
|
204
|
+
exc_info=True,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
@abc.abstractmethod
|
|
208
|
+
def create_trace_model(
|
|
209
|
+
self,
|
|
210
|
+
trace_id: str,
|
|
211
|
+
start_time: datetime.datetime,
|
|
212
|
+
name: Optional[str],
|
|
213
|
+
project_name: str,
|
|
214
|
+
input: Any,
|
|
215
|
+
output: Any,
|
|
216
|
+
tags: Optional[List[str]],
|
|
217
|
+
metadata: Optional[Dict[str, Any]],
|
|
218
|
+
end_time: Optional[datetime.datetime],
|
|
219
|
+
spans: Optional[List[models.SpanModel]],
|
|
220
|
+
feedback_scores: Optional[List[models.FeedbackScoreModel]],
|
|
221
|
+
error_info: Optional[ErrorInfoDict],
|
|
222
|
+
thread_id: Optional[str],
|
|
223
|
+
last_updated_at: Optional[datetime.datetime] = None,
|
|
224
|
+
) -> models.TraceModel:
|
|
225
|
+
"""
|
|
226
|
+
Creates a trace model with the specified attributes. The method is abstract and must be
|
|
227
|
+
implemented in a subclass to define how a trace model is created. It involves parameters
|
|
228
|
+
such as timing details, trace-specific metadata, associated tags, input/output data,
|
|
229
|
+
and other relevant information.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
trace_id: A unique identifier for the trace.
|
|
233
|
+
start_time: The starting time of the trace.
|
|
234
|
+
name: An optional name representing the trace's purpose or identifier.
|
|
235
|
+
project_name: The name of the project associated with the trace.
|
|
236
|
+
input: Input data associated with the trace.
|
|
237
|
+
output: Output data generated from the trace execution.
|
|
238
|
+
tags: Optional list of tags for categorizing or labeling the trace.
|
|
239
|
+
metadata: Optional dictionary containing additional metadata related
|
|
240
|
+
to the trace.
|
|
241
|
+
end_time: An optional datetime indicating when the trace ended.
|
|
242
|
+
spans: A list of SpanModel instances representing spans of the trace.
|
|
243
|
+
feedback_scores: Optional list of FeedbackScoreModel instances detailing
|
|
244
|
+
collected feedback or evaluation metrics.
|
|
245
|
+
error_info: Optional dictionary providing details on any errors
|
|
246
|
+
encountered during the trace.
|
|
247
|
+
thread_id: An optional identifier for the thread executing the trace.
|
|
248
|
+
last_updated_at: Optional datetime to specify when the trace was
|
|
249
|
+
last updated.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
An instance of TraceModel with the specified attributes.
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
NotImplementedError: This method must be implemented in a subclass.
|
|
256
|
+
"""
|
|
257
|
+
raise NotImplementedError("This method must be implemented in a subclass.")
|
|
258
|
+
|
|
259
|
+
@abc.abstractmethod
|
|
260
|
+
def create_span_model(
|
|
261
|
+
self,
|
|
262
|
+
span_id: str,
|
|
263
|
+
start_time: datetime.datetime,
|
|
264
|
+
name: Optional[str],
|
|
265
|
+
input: Any,
|
|
266
|
+
output: Any,
|
|
267
|
+
tags: Optional[List[str]],
|
|
268
|
+
metadata: Optional[Dict[str, Any]],
|
|
269
|
+
type: SpanType,
|
|
270
|
+
usage: Optional[Dict[str, Any]],
|
|
271
|
+
end_time: Optional[datetime.datetime],
|
|
272
|
+
project_name: str,
|
|
273
|
+
spans: Optional[List[models.SpanModel]],
|
|
274
|
+
feedback_scores: Optional[List[models.FeedbackScoreModel]],
|
|
275
|
+
model: Optional[str],
|
|
276
|
+
provider: Optional[str],
|
|
277
|
+
error_info: Optional[ErrorInfoDict],
|
|
278
|
+
total_cost: Optional[float],
|
|
279
|
+
last_updated_at: Optional[datetime.datetime],
|
|
280
|
+
) -> models.SpanModel:
|
|
281
|
+
"""
|
|
282
|
+
Abstract method to create a span model representing a span of a trace.
|
|
283
|
+
This method is intended to facilitate the creation and organization of
|
|
284
|
+
span-related data for tracing or debugging purposes.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
span_id: Unique identifier for the span.
|
|
288
|
+
start_time : The start timestamp of the span.
|
|
289
|
+
name: Name of the span (e.g., an operation or process name).
|
|
290
|
+
input: Input data or parameters related to the span.
|
|
291
|
+
output: Output data or result associated with the span.
|
|
292
|
+
tags: List of tags or labels to categorize the span.
|
|
293
|
+
metadata: Additional metadata about the span.
|
|
294
|
+
type: The type or category of the span.
|
|
295
|
+
usage: Information about resource usage within the span.
|
|
296
|
+
end_time: The end timestamp of the span.
|
|
297
|
+
project_name: Name of the project to which the span belongs.
|
|
298
|
+
spans: List of child or nested spans within the current span context.
|
|
299
|
+
feedback_scores: Feedback scores related to this span, if applicable.
|
|
300
|
+
model: The model identifier relevant to the span's processing.
|
|
301
|
+
provider: The provider associated with the span or model.
|
|
302
|
+
error_info: Information regarding errors encountered during the span's execution.
|
|
303
|
+
total_cost: Total cost incurred during the span.
|
|
304
|
+
last_updated_at: Timestamp marking the last update of the span's information.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
models.SpanModel: A fully initialized span model.
|
|
308
|
+
|
|
309
|
+
Raises:
|
|
310
|
+
NotImplementedError: This method must be implemented in a subclass.
|
|
311
|
+
"""
|
|
312
|
+
raise NotImplementedError("This method must be implemented in a subclass.")
|
|
313
|
+
|
|
314
|
+
@abc.abstractmethod
|
|
315
|
+
def create_feedback_score_model(
|
|
316
|
+
self,
|
|
317
|
+
score_id: str,
|
|
318
|
+
name: str,
|
|
319
|
+
value: float,
|
|
320
|
+
category_name: Optional[str],
|
|
321
|
+
reason: Optional[str],
|
|
322
|
+
) -> models.FeedbackScoreModel:
|
|
323
|
+
"""
|
|
324
|
+
Creates a feedback score model with the specified parameters.
|
|
325
|
+
|
|
326
|
+
The method is abstract and must be implemented by a subclass. It defines
|
|
327
|
+
the structure for creating a feedback score model by combining the given
|
|
328
|
+
data into the appropriate model object.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
score_id: The unique identifier for the feedback score.
|
|
332
|
+
name: The name associated with the feedback score.
|
|
333
|
+
value: The numeric value representing the feedback score.
|
|
334
|
+
category_name: An optional category name for classifying the score.
|
|
335
|
+
reason: An optional explanation or reason related to the feedback
|
|
336
|
+
score.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
An instance of FeedbackScoreModel containing the created feedback
|
|
340
|
+
score details.
|
|
341
|
+
|
|
342
|
+
Raises:
|
|
343
|
+
NotImplementedError: This method must be implemented in a subclass.
|
|
344
|
+
"""
|
|
345
|
+
raise NotImplementedError("This method must be implemented in a subclass.")
|
|
346
|
+
|
|
347
|
+
def _register_handlers(self) -> None:
|
|
348
|
+
self._handlers: Dict[Type, Callable[[messages.BaseMessage], None]] = {
|
|
349
|
+
messages.CreateSpanMessage: self._handle_create_span_message, # type: ignore
|
|
350
|
+
messages.CreateTraceMessage: self._handle_create_trace_message, # type: ignore
|
|
351
|
+
messages.UpdateSpanMessage: self._handle_update_span_message, # type: ignore
|
|
352
|
+
messages.UpdateTraceMessage: self._handle_update_trace_message, # type: ignore
|
|
353
|
+
messages.AddTraceFeedbackScoresBatchMessage: self._handle_add_trace_feedback_scores_batch_message, # type: ignore
|
|
354
|
+
messages.AddSpanFeedbackScoresBatchMessage: self._handle_add_span_feedback_scores_batch_message, # type: ignore
|
|
355
|
+
messages.CreateSpansBatchMessage: self._handle_create_spans_batch_message, # type: ignore
|
|
356
|
+
messages.CreateTraceBatchMessage: self._handle_create_traces_batch_message, # type: ignore
|
|
357
|
+
messages.CreateExperimentItemsBatchMessage: self._handle_create_experiment_items_batch_message, # type: ignore
|
|
358
|
+
messages.AttachmentSupportingMessage: self._noop_handler, # type: ignore
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
def _handle_create_trace_message(
|
|
362
|
+
self, message: messages.CreateTraceMessage
|
|
363
|
+
) -> None:
|
|
364
|
+
trace = self.create_trace_model(
|
|
365
|
+
trace_id=message.trace_id,
|
|
366
|
+
name=message.name,
|
|
367
|
+
input=message.input,
|
|
368
|
+
output=message.output,
|
|
369
|
+
tags=message.tags,
|
|
370
|
+
metadata=message.metadata,
|
|
371
|
+
start_time=message.start_time,
|
|
372
|
+
end_time=message.end_time,
|
|
373
|
+
project_name=message.project_name,
|
|
374
|
+
error_info=message.error_info,
|
|
375
|
+
thread_id=message.thread_id,
|
|
376
|
+
last_updated_at=message.last_updated_at,
|
|
377
|
+
feedback_scores=None,
|
|
378
|
+
spans=None,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
self._save_trace(trace)
|
|
382
|
+
|
|
383
|
+
def _handle_create_span_message(self, message: messages.CreateSpanMessage) -> None:
|
|
384
|
+
span = self.create_span_model(
|
|
385
|
+
span_id=message.span_id,
|
|
386
|
+
name=message.name,
|
|
387
|
+
input=message.input,
|
|
388
|
+
output=message.output,
|
|
389
|
+
tags=message.tags,
|
|
390
|
+
metadata=message.metadata,
|
|
391
|
+
type=message.type,
|
|
392
|
+
start_time=message.start_time,
|
|
393
|
+
end_time=message.end_time,
|
|
394
|
+
usage=message.usage,
|
|
395
|
+
project_name=message.project_name,
|
|
396
|
+
model=message.model,
|
|
397
|
+
provider=message.provider,
|
|
398
|
+
error_info=message.error_info,
|
|
399
|
+
total_cost=message.total_cost,
|
|
400
|
+
last_updated_at=message.last_updated_at,
|
|
401
|
+
spans=None,
|
|
402
|
+
feedback_scores=None,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
self._save_span(
|
|
406
|
+
span, trace_id=message.trace_id, parent_span_id=message.parent_span_id
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
def _handle_update_span_message(self, message: messages.UpdateSpanMessage) -> None:
|
|
410
|
+
span = self._span_observations[message.span_id]
|
|
411
|
+
update_payload = {
|
|
412
|
+
"output": message.output,
|
|
413
|
+
"usage": message.usage,
|
|
414
|
+
"provider": message.provider,
|
|
415
|
+
"model": message.model,
|
|
416
|
+
"end_time": message.end_time,
|
|
417
|
+
"metadata": message.metadata,
|
|
418
|
+
"error_info": message.error_info,
|
|
419
|
+
"tags": message.tags,
|
|
420
|
+
"input": message.input,
|
|
421
|
+
"total_cost": message.total_cost,
|
|
422
|
+
}
|
|
423
|
+
cleaned_update_payload = dict_utils.remove_none_from_dict(update_payload)
|
|
424
|
+
span.__dict__.update(cleaned_update_payload)
|
|
425
|
+
|
|
426
|
+
def _handle_update_trace_message(
|
|
427
|
+
self, message: messages.UpdateTraceMessage
|
|
428
|
+
) -> None:
|
|
429
|
+
current_trace = self._trace_observations[message.trace_id]
|
|
430
|
+
update_payload = {
|
|
431
|
+
"output": message.output,
|
|
432
|
+
"end_time": message.end_time,
|
|
433
|
+
"metadata": message.metadata,
|
|
434
|
+
"error_info": message.error_info,
|
|
435
|
+
"tags": message.tags,
|
|
436
|
+
"input": message.input,
|
|
437
|
+
"thread_id": message.thread_id,
|
|
438
|
+
}
|
|
439
|
+
cleaned_update_payload = dict_utils.remove_none_from_dict(update_payload)
|
|
440
|
+
current_trace.__dict__.update(cleaned_update_payload)
|
|
441
|
+
|
|
442
|
+
def _handle_add_span_feedback_scores_batch_message(
|
|
443
|
+
self, message: messages.AddSpanFeedbackScoresBatchMessage
|
|
444
|
+
) -> None:
|
|
445
|
+
for feedback_score_message in message.batch:
|
|
446
|
+
feedback_model = self.create_feedback_score_model(
|
|
447
|
+
score_id=feedback_score_message.id,
|
|
448
|
+
name=feedback_score_message.name,
|
|
449
|
+
value=feedback_score_message.value,
|
|
450
|
+
category_name=feedback_score_message.category_name,
|
|
451
|
+
reason=feedback_score_message.reason,
|
|
452
|
+
)
|
|
453
|
+
self._span_to_feedback_scores[feedback_score_message.id].append(
|
|
454
|
+
feedback_model
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
def _handle_add_trace_feedback_scores_batch_message(
|
|
458
|
+
self, message: messages.AddTraceFeedbackScoresBatchMessage
|
|
459
|
+
) -> None:
|
|
460
|
+
for feedback_score_message in message.batch:
|
|
461
|
+
feedback_model = self.create_feedback_score_model(
|
|
462
|
+
score_id=feedback_score_message.id,
|
|
463
|
+
name=feedback_score_message.name,
|
|
464
|
+
value=feedback_score_message.value,
|
|
465
|
+
category_name=feedback_score_message.category_name,
|
|
466
|
+
reason=feedback_score_message.reason,
|
|
467
|
+
)
|
|
468
|
+
self._trace_to_feedback_scores[feedback_score_message.id].append(
|
|
469
|
+
feedback_model
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
def _handle_create_spans_batch_message(
|
|
473
|
+
self, message: messages.CreateSpansBatchMessage
|
|
474
|
+
) -> None:
|
|
475
|
+
for item in message.batch:
|
|
476
|
+
self._handle_span_write(item)
|
|
477
|
+
|
|
478
|
+
def _handle_span_write(self, message: span_write.SpanWrite) -> None:
|
|
479
|
+
if message.error_info is not None:
|
|
480
|
+
error_info = ErrorInfoDict(
|
|
481
|
+
exception_type=message.error_info.exception_type,
|
|
482
|
+
message=message.error_info.message,
|
|
483
|
+
traceback=message.error_info.traceback,
|
|
484
|
+
)
|
|
485
|
+
else:
|
|
486
|
+
error_info = None
|
|
487
|
+
|
|
488
|
+
span = self.create_span_model(
|
|
489
|
+
span_id=message.id,
|
|
490
|
+
name=message.name,
|
|
491
|
+
input=message.input,
|
|
492
|
+
output=message.output,
|
|
493
|
+
tags=message.tags,
|
|
494
|
+
metadata=message.metadata,
|
|
495
|
+
type=message.type,
|
|
496
|
+
start_time=message.start_time,
|
|
497
|
+
end_time=message.end_time,
|
|
498
|
+
usage=message.usage,
|
|
499
|
+
project_name=message.project_name,
|
|
500
|
+
model=message.model,
|
|
501
|
+
provider=message.provider,
|
|
502
|
+
total_cost=message.total_estimated_cost,
|
|
503
|
+
last_updated_at=message.last_updated_at,
|
|
504
|
+
spans=None,
|
|
505
|
+
feedback_scores=None,
|
|
506
|
+
error_info=error_info,
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
self._save_span(
|
|
510
|
+
span, trace_id=message.trace_id, parent_span_id=message.parent_span_id
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
def _handle_create_traces_batch_message(
|
|
514
|
+
self, message: messages.CreateTraceBatchMessage
|
|
515
|
+
) -> None:
|
|
516
|
+
for item in message.batch:
|
|
517
|
+
self._handle_trace_write(item)
|
|
518
|
+
|
|
519
|
+
def _handle_trace_write(self, message: trace_write.TraceWrite) -> None:
|
|
520
|
+
if message.error_info is not None:
|
|
521
|
+
error_info = ErrorInfoDict(
|
|
522
|
+
exception_type=message.error_info.exception_type,
|
|
523
|
+
message=message.error_info.message,
|
|
524
|
+
traceback=message.error_info.traceback,
|
|
525
|
+
)
|
|
526
|
+
else:
|
|
527
|
+
error_info = None
|
|
528
|
+
|
|
529
|
+
trace = self.create_trace_model(
|
|
530
|
+
trace_id=message.id,
|
|
531
|
+
name=message.name,
|
|
532
|
+
input=message.input,
|
|
533
|
+
output=message.output,
|
|
534
|
+
tags=message.tags,
|
|
535
|
+
metadata=message.metadata,
|
|
536
|
+
start_time=message.start_time,
|
|
537
|
+
end_time=message.end_time,
|
|
538
|
+
project_name=message.project_name,
|
|
539
|
+
thread_id=message.thread_id,
|
|
540
|
+
last_updated_at=message.last_updated_at,
|
|
541
|
+
spans=None,
|
|
542
|
+
feedback_scores=None,
|
|
543
|
+
error_info=error_info,
|
|
544
|
+
)
|
|
545
|
+
self._save_trace(trace)
|
|
546
|
+
|
|
547
|
+
def _handle_create_experiment_items_batch_message(
|
|
548
|
+
self, message: messages.CreateExperimentItemsBatchMessage
|
|
549
|
+
) -> None:
|
|
550
|
+
for experiment_item_message in message.batch:
|
|
551
|
+
experiment_item = models.ExperimentItemModel(
|
|
552
|
+
id=experiment_item_message.id,
|
|
553
|
+
experiment_id=experiment_item_message.experiment_id,
|
|
554
|
+
trace_id=experiment_item_message.trace_id,
|
|
555
|
+
dataset_item_id=experiment_item_message.dataset_item_id,
|
|
556
|
+
)
|
|
557
|
+
self._experiment_items.append(experiment_item)
|
|
558
|
+
|
|
559
|
+
def _noop_handler(self, message: messages.BaseMessage) -> None:
|
|
560
|
+
# just ignore the message
|
|
561
|
+
pass
|
|
562
|
+
|
|
563
|
+
@property
|
|
564
|
+
def experiment_items(self) -> List[models.ExperimentItemModel]:
|
|
565
|
+
"""Returns the list of experiment items collected."""
|
|
566
|
+
with self._rlock:
|
|
567
|
+
return self._experiment_items
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def _observation_already_stored(
|
|
571
|
+
observation_id: str,
|
|
572
|
+
observations: Union[List[models.SpanModel], List[models.TraceModel]],
|
|
573
|
+
) -> bool:
|
|
574
|
+
for observation in observations:
|
|
575
|
+
if observation.id == observation_id:
|
|
576
|
+
return True
|
|
577
|
+
|
|
578
|
+
return False
|