PyPI - opik - Versions diffs - 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl - Mend

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (592) hide show

opik/__init__.py +19 -3
opik/anonymizer/__init__.py +5 -0
opik/anonymizer/anonymizer.py +12 -0
opik/anonymizer/factory.py +80 -0
opik/anonymizer/recursive_anonymizer.py +64 -0
opik/anonymizer/rules.py +56 -0
opik/anonymizer/rules_anonymizer.py +35 -0
opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/data_helpers.py +79 -0
opik/api_objects/dataset/dataset.py +64 -4
opik/api_objects/dataset/rest_operations.py +11 -2
opik/api_objects/experiment/experiment.py +57 -57
opik/api_objects/experiment/experiment_item.py +2 -1
opik/api_objects/experiment/experiments_client.py +64 -0
opik/api_objects/experiment/helpers.py +35 -11
opik/api_objects/experiment/rest_operations.py +65 -5
opik/api_objects/helpers.py +8 -5
opik/api_objects/local_recording.py +81 -0
opik/api_objects/opik_client.py +600 -108
opik/api_objects/opik_query_language.py +39 -5
opik/api_objects/prompt/__init__.py +12 -2
opik/api_objects/prompt/base_prompt.py +69 -0
opik/api_objects/prompt/base_prompt_template.py +29 -0
opik/api_objects/prompt/chat/__init__.py +1 -0
opik/api_objects/prompt/chat/chat_prompt.py +210 -0
opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
opik/api_objects/prompt/client.py +189 -47
opik/api_objects/prompt/text/__init__.py +1 -0
opik/api_objects/prompt/text/prompt.py +174 -0
opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
opik/api_objects/prompt/types.py +23 -0
opik/api_objects/search_helpers.py +89 -0
opik/api_objects/span/span_data.py +35 -25
opik/api_objects/threads/threads_client.py +39 -5
opik/api_objects/trace/trace_client.py +52 -2
opik/api_objects/trace/trace_data.py +15 -24
opik/api_objects/validation_helpers.py +3 -3
opik/cli/__init__.py +5 -0
opik/cli/__main__.py +6 -0
opik/cli/configure.py +66 -0
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/healthcheck.py +21 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +49 -0
opik/cli/proxy.py +93 -0
opik/cli/usage_report/__init__.py +16 -0
opik/cli/usage_report/charts.py +783 -0
opik/cli/usage_report/cli.py +274 -0
opik/cli/usage_report/constants.py +9 -0
opik/cli/usage_report/extraction.py +749 -0
opik/cli/usage_report/pdf.py +244 -0
opik/cli/usage_report/statistics.py +78 -0
opik/cli/usage_report/utils.py +235 -0
opik/config.py +13 -7
opik/configurator/configure.py +17 -0
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +9 -1
opik/decorator/base_track_decorator.py +205 -133
opik/decorator/context_manager/span_context_manager.py +123 -0
opik/decorator/context_manager/trace_context_manager.py +84 -0
opik/decorator/opik_args/__init__.py +13 -0
opik/decorator/opik_args/api_classes.py +71 -0
opik/decorator/opik_args/helpers.py +120 -0
opik/decorator/span_creation_handler.py +25 -6
opik/dict_utils.py +3 -3
opik/evaluation/__init__.py +13 -2
opik/evaluation/engine/engine.py +272 -75
opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
opik/evaluation/engine/helpers.py +31 -6
opik/evaluation/engine/metrics_evaluator.py +237 -0
opik/evaluation/evaluation_result.py +168 -2
opik/evaluation/evaluator.py +533 -62
opik/evaluation/metrics/__init__.py +103 -4
opik/evaluation/metrics/aggregated_metric.py +35 -6
opik/evaluation/metrics/base_metric.py +1 -1
opik/evaluation/metrics/conversation/__init__.py +48 -0
opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
opik/evaluation/metrics/conversation/helpers.py +14 -15
opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
opik/evaluation/metrics/conversation/types.py +4 -5
opik/evaluation/metrics/conversation_types.py +9 -0
opik/evaluation/metrics/heuristics/bertscore.py +107 -0
opik/evaluation/metrics/heuristics/bleu.py +35 -15
opik/evaluation/metrics/heuristics/chrf.py +127 -0
opik/evaluation/metrics/heuristics/contains.py +47 -11
opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
opik/evaluation/metrics/heuristics/gleu.py +113 -0
opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
opik/evaluation/metrics/heuristics/meteor.py +119 -0
opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
opik/evaluation/metrics/heuristics/readability.py +129 -0
opik/evaluation/metrics/heuristics/rouge.py +26 -9
opik/evaluation/metrics/heuristics/spearman.py +88 -0
opik/evaluation/metrics/heuristics/tone.py +155 -0
opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/__init__.py +8 -0
opik/evaluation/models/base_model.py +107 -1
opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
opik/evaluation/models/langchain/message_converters.py +97 -15
opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
opik/evaluation/models/litellm/util.py +125 -0
opik/evaluation/models/litellm/warning_filters.py +16 -4
opik/evaluation/models/model_capabilities.py +187 -0
opik/evaluation/models/models_factory.py +25 -3
opik/evaluation/preprocessing.py +92 -0
opik/evaluation/report.py +70 -12
opik/evaluation/rest_operations.py +49 -45
opik/evaluation/samplers/__init__.py +4 -0
opik/evaluation/samplers/base_dataset_sampler.py +40 -0
opik/evaluation/samplers/random_dataset_sampler.py +48 -0
opik/evaluation/score_statistics.py +66 -0
opik/evaluation/scorers/__init__.py +4 -0
opik/evaluation/scorers/scorer_function.py +55 -0
opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
opik/evaluation/test_case.py +3 -2
opik/evaluation/test_result.py +1 -0
opik/evaluation/threads/evaluator.py +31 -3
opik/evaluation/threads/helpers.py +3 -2
opik/evaluation/types.py +9 -1
opik/exceptions.py +33 -0
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/hooks/__init__.py +23 -0
opik/hooks/anonymizer_hook.py +36 -0
opik/hooks/httpx_client_hook.py +112 -0
opik/httpx_client.py +12 -9
opik/id_helpers.py +18 -0
opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
opik/integrations/adk/helpers.py +16 -7
opik/integrations/adk/legacy_opik_tracer.py +7 -4
opik/integrations/adk/opik_tracer.py +14 -1
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
opik/integrations/adk/recursive_callback_injector.py +4 -7
opik/integrations/bedrock/converse/__init__.py +0 -0
opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
opik/integrations/bedrock/invoke_model/__init__.py +0 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
opik/integrations/bedrock/invoke_model/response_types.py +34 -0
opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
opik/integrations/bedrock/opik_tracker.py +42 -4
opik/integrations/bedrock/types.py +19 -0
opik/integrations/crewai/crewai_decorator.py +8 -51
opik/integrations/crewai/opik_tracker.py +31 -10
opik/integrations/crewai/patchers/__init__.py +5 -0
opik/integrations/crewai/patchers/flow.py +118 -0
opik/integrations/crewai/patchers/litellm_completion.py +30 -0
opik/integrations/crewai/patchers/llm_client.py +207 -0
opik/integrations/dspy/callback.py +80 -17
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_connector.py +2 -2
opik/integrations/haystack/opik_tracer.py +3 -7
opik/integrations/langchain/__init__.py +3 -1
opik/integrations/langchain/helpers.py +96 -0
opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_encoder_extension.py +1 -1
opik/integrations/langchain/opik_tracer.py +474 -229
opik/integrations/litellm/__init__.py +5 -0
opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
opik/integrations/litellm/litellm_completion_decorator.py +242 -0
opik/integrations/litellm/opik_tracker.py +43 -0
opik/integrations/litellm/stream_patchers.py +151 -0
opik/integrations/llama_index/callback.py +146 -107
opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
opik/integrations/openai/opik_tracker.py +1 -1
opik/integrations/sagemaker/auth.py +5 -1
opik/llm_usage/google_usage.py +3 -1
opik/llm_usage/opik_usage.py +7 -8
opik/llm_usage/opik_usage_factory.py +4 -2
opik/logging_messages.py +6 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batch_manager_constuctors.py +10 -0
opik/message_processing/batching/batchers.py +59 -27
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/__init__.py +0 -0
opik/message_processing/emulation/emulator_message_processor.py +578 -0
opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
opik/message_processing/emulation/models.py +162 -0
opik/message_processing/encoder_helpers.py +79 -0
opik/message_processing/messages.py +56 -1
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/processors/message_processors.py +92 -0
opik/message_processing/processors/message_processors_chain.py +96 -0
opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
opik/message_processing/queue_consumer.py +9 -3
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +43 -10
opik/opik_context.py +16 -4
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +346 -15
opik/rest_api/alerts/__init__.py +7 -0
opik/rest_api/alerts/client.py +667 -0
opik/rest_api/alerts/raw_client.py +1015 -0
opik/rest_api/alerts/types/__init__.py +7 -0
opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
opik/rest_api/annotation_queues/__init__.py +4 -0
opik/rest_api/annotation_queues/client.py +668 -0
opik/rest_api/annotation_queues/raw_client.py +1019 -0
opik/rest_api/automation_rule_evaluators/client.py +34 -2
opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
opik/rest_api/client.py +15 -0
opik/rest_api/dashboards/__init__.py +4 -0
opik/rest_api/dashboards/client.py +462 -0
opik/rest_api/dashboards/raw_client.py +648 -0
opik/rest_api/datasets/client.py +1310 -44
opik/rest_api/datasets/raw_client.py +2269 -358
opik/rest_api/experiments/__init__.py +2 -2
opik/rest_api/experiments/client.py +191 -5
opik/rest_api/experiments/raw_client.py +301 -7
opik/rest_api/experiments/types/__init__.py +4 -1
opik/rest_api/experiments/types/experiment_update_status.py +5 -0
opik/rest_api/experiments/types/experiment_update_type.py +5 -0
opik/rest_api/experiments/types/experiment_write_status.py +5 -0
opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
opik/rest_api/llm_provider_key/client.py +20 -0
opik/rest_api/llm_provider_key/raw_client.py +20 -0
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
opik/rest_api/manual_evaluation/__init__.py +4 -0
opik/rest_api/manual_evaluation/client.py +347 -0
opik/rest_api/manual_evaluation/raw_client.py +543 -0
opik/rest_api/optimizations/client.py +145 -9
opik/rest_api/optimizations/raw_client.py +237 -13
opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
opik/rest_api/prompts/__init__.py +2 -2
opik/rest_api/prompts/client.py +227 -6
opik/rest_api/prompts/raw_client.py +331 -2
opik/rest_api/prompts/types/__init__.py +3 -1
opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
opik/rest_api/spans/__init__.py +0 -2
opik/rest_api/spans/client.py +238 -76
opik/rest_api/spans/raw_client.py +307 -95
opik/rest_api/spans/types/__init__.py +0 -2
opik/rest_api/traces/client.py +572 -161
opik/rest_api/traces/raw_client.py +736 -229
opik/rest_api/types/__init__.py +352 -17
opik/rest_api/types/aggregation_data.py +1 -0
opik/rest_api/types/alert.py +33 -0
opik/rest_api/types/alert_alert_type.py +5 -0
opik/rest_api/types/alert_page_public.py +24 -0
opik/rest_api/types/alert_public.py +33 -0
opik/rest_api/types/alert_public_alert_type.py +5 -0
opik/rest_api/types/alert_trigger.py +27 -0
opik/rest_api/types/alert_trigger_config.py +28 -0
opik/rest_api/types/alert_trigger_config_public.py +28 -0
opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
opik/rest_api/types/alert_trigger_config_type.py +10 -0
opik/rest_api/types/alert_trigger_config_write.py +22 -0
opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
opik/rest_api/types/alert_trigger_event_type.py +19 -0
opik/rest_api/types/alert_trigger_public.py +27 -0
opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
opik/rest_api/types/alert_trigger_write.py +23 -0
opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
opik/rest_api/types/alert_write.py +28 -0
opik/rest_api/types/alert_write_alert_type.py +5 -0
opik/rest_api/types/annotation_queue.py +42 -0
opik/rest_api/types/annotation_queue_batch.py +27 -0
opik/rest_api/types/annotation_queue_item_ids.py +19 -0
opik/rest_api/types/annotation_queue_page_public.py +28 -0
opik/rest_api/types/annotation_queue_public.py +38 -0
opik/rest_api/types/annotation_queue_public_scope.py +5 -0
opik/rest_api/types/annotation_queue_reviewer.py +20 -0
opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
opik/rest_api/types/annotation_queue_scope.py +5 -0
opik/rest_api/types/annotation_queue_write.py +31 -0
opik/rest_api/types/annotation_queue_write_scope.py +5 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +62 -2
opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
opik/rest_api/types/boolean_feedback_definition.py +25 -0
opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
opik/rest_api/types/boolean_feedback_detail.py +29 -0
opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
opik/rest_api/types/dashboard_page_public.py +24 -0
opik/rest_api/types/dashboard_public.py +30 -0
opik/rest_api/types/dataset.py +4 -0
opik/rest_api/types/dataset_expansion.py +42 -0
opik/rest_api/types/dataset_expansion_response.py +39 -0
opik/rest_api/types/dataset_item.py +2 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +2 -0
opik/rest_api/types/dataset_item_filter.py +27 -0
opik/rest_api/types/dataset_item_filter_operator.py +21 -0
opik/rest_api/types/dataset_item_page_compare.py +5 -0
opik/rest_api/types/dataset_item_page_public.py +5 -0
opik/rest_api/types/dataset_item_public.py +2 -0
opik/rest_api/types/dataset_item_update.py +39 -0
opik/rest_api/types/dataset_item_write.py +1 -0
opik/rest_api/types/dataset_public.py +4 -0
opik/rest_api/types/dataset_public_status.py +5 -0
opik/rest_api/types/dataset_status.py +5 -0
opik/rest_api/types/dataset_version_diff.py +22 -0
opik/rest_api/types/dataset_version_diff_stats.py +24 -0
opik/rest_api/types/dataset_version_page_public.py +23 -0
opik/rest_api/types/dataset_version_public.py +59 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/experiment.py +7 -2
opik/rest_api/types/experiment_group_response.py +2 -0
opik/rest_api/types/experiment_public.py +7 -2
opik/rest_api/types/experiment_public_status.py +5 -0
opik/rest_api/types/experiment_score.py +20 -0
opik/rest_api/types/experiment_score_public.py +20 -0
opik/rest_api/types/experiment_score_write.py +20 -0
opik/rest_api/types/experiment_status.py +5 -0
opik/rest_api/types/feedback.py +25 -1
opik/rest_api/types/feedback_create.py +20 -1
opik/rest_api/types/feedback_object_public.py +27 -1
opik/rest_api/types/feedback_public.py +25 -1
opik/rest_api/types/feedback_score_batch_item.py +2 -1
opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
opik/rest_api/types/feedback_score_public.py +4 -0
opik/rest_api/types/feedback_update.py +20 -1
opik/rest_api/types/group_content_with_aggregations.py +1 -0
opik/rest_api/types/group_detail.py +19 -0
opik/rest_api/types/group_details.py +20 -0
opik/rest_api/types/guardrail.py +1 -0
opik/rest_api/types/guardrail_write.py +1 -0
opik/rest_api/types/ids_holder.py +19 -0
opik/rest_api/types/image_url.py +20 -0
opik/rest_api/types/image_url_public.py +20 -0
opik/rest_api/types/image_url_write.py +20 -0
opik/rest_api/types/llm_as_judge_message.py +5 -1
opik/rest_api/types/llm_as_judge_message_content.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
opik/rest_api/types/llm_as_judge_message_public.py +5 -1
opik/rest_api/types/llm_as_judge_message_write.py +5 -1
opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
opik/rest_api/types/manual_evaluation_request.py +38 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
opik/rest_api/types/manual_evaluation_response.py +27 -0
opik/rest_api/types/optimization.py +4 -2
opik/rest_api/types/optimization_public.py +4 -2
opik/rest_api/types/optimization_public_status.py +3 -1
opik/rest_api/types/optimization_status.py +3 -1
opik/rest_api/types/optimization_studio_config.py +27 -0
opik/rest_api/types/optimization_studio_config_public.py +27 -0
opik/rest_api/types/optimization_studio_config_write.py +27 -0
opik/rest_api/types/optimization_studio_log.py +22 -0
opik/rest_api/types/optimization_write.py +4 -2
opik/rest_api/types/optimization_write_status.py +3 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt.py +6 -0
opik/rest_api/types/prompt_detail.py +6 -0
opik/rest_api/types/prompt_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_public.py +6 -0
opik/rest_api/types/prompt_public_template_structure.py +5 -0
opik/rest_api/types/prompt_template_structure.py +5 -0
opik/rest_api/types/prompt_version.py +3 -0
opik/rest_api/types/prompt_version_detail.py +3 -0
opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_version_link.py +1 -0
opik/rest_api/types/prompt_version_link_public.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +3 -0
opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
opik/rest_api/types/prompt_version_template_structure.py +5 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +9 -0
opik/rest_api/types/provider_api_key_provider.py +1 -1
opik/rest_api/types/provider_api_key_public.py +9 -0
opik/rest_api/types/provider_api_key_public_provider.py +1 -1
opik/rest_api/types/score_name.py +1 -0
opik/rest_api/types/service_toggles_config.py +18 -0
opik/rest_api/types/span.py +1 -2
opik/rest_api/types/span_enrichment_options.py +31 -0
opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/span_filter.py +23 -0
opik/rest_api/types/span_filter_operator.py +21 -0
opik/rest_api/types/span_filter_write.py +23 -0
opik/rest_api/types/span_filter_write_operator.py +21 -0
opik/rest_api/types/span_llm_as_judge_code.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
opik/rest_api/types/span_public.py +1 -2
opik/rest_api/types/span_update.py +46 -0
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/rest_api/types/span_write.py +1 -2
opik/rest_api/types/studio_evaluation.py +20 -0
opik/rest_api/types/studio_evaluation_public.py +20 -0
opik/rest_api/types/studio_evaluation_write.py +20 -0
opik/rest_api/types/studio_llm_model.py +21 -0
opik/rest_api/types/studio_llm_model_public.py +21 -0
opik/rest_api/types/studio_llm_model_write.py +21 -0
opik/rest_api/types/studio_message.py +20 -0
opik/rest_api/types/studio_message_public.py +20 -0
opik/rest_api/types/studio_message_write.py +20 -0
opik/rest_api/types/studio_metric.py +21 -0
opik/rest_api/types/studio_metric_public.py +21 -0
opik/rest_api/types/studio_metric_write.py +21 -0
opik/rest_api/types/studio_optimizer.py +21 -0
opik/rest_api/types/studio_optimizer_public.py +21 -0
opik/rest_api/types/studio_optimizer_write.py +21 -0
opik/rest_api/types/studio_prompt.py +20 -0
opik/rest_api/types/studio_prompt_public.py +20 -0
opik/rest_api/types/studio_prompt_write.py +20 -0
opik/rest_api/types/trace.py +11 -2
opik/rest_api/types/trace_enrichment_options.py +32 -0
opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/trace_filter.py +23 -0
opik/rest_api/types/trace_filter_operator.py +21 -0
opik/rest_api/types/trace_filter_write.py +23 -0
opik/rest_api/types/trace_filter_write_operator.py +21 -0
opik/rest_api/types/trace_public.py +11 -2
opik/rest_api/types/trace_thread_filter_write.py +23 -0
opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
opik/rest_api/types/trace_thread_identifier.py +1 -0
opik/rest_api/types/trace_update.py +39 -0
opik/rest_api/types/trace_write.py +1 -2
opik/rest_api/types/value_entry.py +2 -0
opik/rest_api/types/value_entry_compare.py +2 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
opik/rest_api/types/value_entry_public.py +2 -0
opik/rest_api/types/video_url.py +19 -0
opik/rest_api/types/video_url_public.py +19 -0
opik/rest_api/types/video_url_write.py +19 -0
opik/rest_api/types/webhook.py +28 -0
opik/rest_api/types/webhook_examples.py +19 -0
opik/rest_api/types/webhook_public.py +28 -0
opik/rest_api/types/webhook_test_result.py +23 -0
opik/rest_api/types/webhook_test_result_status.py +5 -0
opik/rest_api/types/webhook_write.py +23 -0
opik/rest_api/types/welcome_wizard_tracking.py +22 -0
opik/rest_api/types/workspace_configuration.py +5 -0
opik/rest_api/welcome_wizard/__init__.py +4 -0
opik/rest_api/welcome_wizard/client.py +195 -0
opik/rest_api/welcome_wizard/raw_client.py +208 -0
opik/rest_api/workspaces/client.py +14 -2
opik/rest_api/workspaces/raw_client.py +10 -0
opik/s3_httpx_client.py +14 -1
opik/simulation/__init__.py +6 -0
opik/simulation/simulated_user.py +99 -0
opik/simulation/simulator.py +108 -0
opik/synchronization.py +5 -6
opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
opik-1.9.71.dist-info/METADATA +370 -0
opik-1.9.71.dist-info/RECORD +1110 -0
opik/api_objects/prompt/prompt.py +0 -112
opik/cli.py +0 -193
opik/hooks.py +0 -13
opik/integrations/bedrock/chunks_aggregator.py +0 -55
opik/integrations/bedrock/helpers.py +0 -8
opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
opik-1.8.39.dist-info/METADATA +0 -339
opik-1.8.39.dist-info/RECORD +0 -790
/opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
/opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
/opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0

opik/integrations/harbor/opik_tracker.py ADDED Viewed

@@ -0,0 +1,528 @@
+"""
+Opik tracking integration for Harbor benchmark evaluation framework.
+This module provides the `track_harbor` function to add Opik tracing to Harbor Jobs,
+enabling real-time streaming of trial results to Opik for visualization and analysis.
+Example:
+    >>> from opik.integrations.harbor import track_harbor
+    >>> from harbor.job import Job
+    >>> import os
+    >>>
+    >>> os.environ["OPIK_PROJECT_NAME"] = "swebench-evaluation"
+    >>>
+    >>> job = Job(config)
+    >>> tracked_job = track_harbor(job)
+    >>> result = await tracked_job.run()
+Or enable tracking globally (for CLI usage):
+    >>> from opik.integrations.harbor import track_harbor
+    >>> track_harbor()
+    >>> # Now run Harbor code - it will be traced
+"""
+import functools
+import logging
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing_extensions import override
+from harbor.job import Job
+from harbor.models.trajectories.step import Step
+from harbor.models.trial.result import TrialResult
+from harbor.models.verifier.result import VerifierResult
+from harbor.trial.trial import Trial
+from harbor.verifier.verifier import Verifier
+from opik import datetime_helpers, id_helpers, opik_context, track
+from opik.api_objects import opik_client, span
+from opik.decorator import arguments_helpers, base_track_decorator
+from opik.types import FeedbackScoreDict, SpanType
+from . import experiment_service
+LOGGER = logging.getLogger(__name__)
+class HarborTrialRunDecorator(base_track_decorator.BaseTrackDecorator):
+    """
+    Decorator for tracking Harbor Trial.run method.
+    Sets the trace name based on trial configuration before the span/trace
+    is sent to the backend.
+    """
+    @override
+    def _start_span_inputs_preprocessor(
+        self,
+        func: Callable,
+        track_options: arguments_helpers.TrackOptions,
+        args: Tuple,
+        kwargs: Dict[str, Any],
+    ) -> arguments_helpers.StartSpanParameters:
+        """Extract trial config and set trace name, input, metadata, and tags."""
+        # Extract Trial instance from args (Trial.run is an instance method)
+        if not args:
+            # Fallback if no args (shouldn't happen for instance methods)
+            name = (
+                track_options.name if track_options.name is not None else func.__name__
+            )
+            return arguments_helpers.StartSpanParameters(
+                name=name,
+                input=None,
+                type=track_options.type,
+                tags=track_options.tags,
+                metadata=track_options.metadata,
+                project_name=track_options.project_name,
+            )
+        trial: Trial = args[0]
+        config = trial.config
+        # Build trace name from config
+        trace_name = f"{config.agent.name}/{config.trial_name}"
+        # Build input dict
+        input_dict: Dict[str, Any] = {
+            "trial_name": config.trial_name,
+            "task": {
+                "name": config.task.name
+                if hasattr(config.task, "name")
+                else str(config.task.path),
+                "source": getattr(config.task, "source", None),
+            },
+            "agent": {
+                "name": config.agent.name,
+                "model": getattr(config.agent, "model_name", None),
+            },
+        }
+        # Build metadata
+        metadata = (
+            track_options.metadata.copy() if track_options.metadata is not None else {}
+        )
+        metadata["created_from"] = "harbor"
+        # Build tags
+        tags = track_options.tags if track_options.tags is not None else []
+        tags = list(tags)  # Make a copy to avoid mutating the original
+        if "harbor" not in tags:
+            tags.append("harbor")
+        if config.agent.name not in tags:
+            tags.append(config.agent.name)
+        return arguments_helpers.StartSpanParameters(
+            name=trace_name,
+            input=input_dict,
+            type=track_options.type,
+            tags=tags,
+            metadata=metadata,
+            project_name=track_options.project_name,
+        )
+    @override
+    def _end_span_inputs_preprocessor(
+        self,
+        output: Any,
+        capture_output: bool,
+        current_span_data: span.SpanData,
+    ) -> arguments_helpers.EndSpanParameters:
+        """Process output - minimal implementation since output is handled in _wrap_trial_run."""
+        # Output is handled separately in _wrap_trial_run via opik_context.update_current_trace
+        # So we don't need to process it here
+        return arguments_helpers.EndSpanParameters(output=None)
+    @override
+    def _streams_handler(
+        self,
+        output: Any,
+        capture_output: bool,
+        generations_aggregator: Optional[Callable[[List[Any]], Any]],
+    ) -> Optional[Any]:
+        """No stream handling needed for Trial.run."""
+        return None
+def _rewards_to_feedback_scores(
+    rewards: Optional[Dict[str, Any]],
+    error: Optional[str] = None,
+) -> List[FeedbackScoreDict]:
+    """Convert Harbor verifier rewards to Opik feedback scores."""
+    if rewards is None:
+        return []
+    feedback_scores: List[FeedbackScoreDict] = []
+    for name, value in rewards.items():
+        try:
+            float_value = float(value)
+            score = FeedbackScoreDict(name=name, value=float_value, reason=error)
+            feedback_scores.append(score)
+        except (ValueError, TypeError):
+            LOGGER.warning(
+                "Could not convert reward value to float: %s=%s", name, value
+            )
+    return feedback_scores
+def _source_to_span_type(source: str) -> SpanType:
+    """Convert ATIF step source to Opik span type."""
+    if source == "agent":
+        return "llm"
+    return "general"
+def _patch_step_class() -> None:
+    """Patch the Harbor Step class to create Opik spans on instantiation."""
+    # Check if already patched
+    if hasattr(_patch_step_class, "_patched"):
+        return
+    original_init = Step.__init__
+    @functools.wraps(original_init)
+    def patched_init(self: Step, *args: Any, **kwargs: Any) -> None:
+        original_init(self, *args, **kwargs)
+        trace_data = opik_context.get_current_trace_data()
+        if trace_data is None:
+            return
+        parent_span = opik_context.get_current_span_data()
+        parent_span_id = parent_span.id if parent_span else None
+        try:
+            client = opik_client.get_client_cached()
+            input_dict: Dict[str, Any] = {}
+            if self.message:
+                input_dict["message"] = self.message
+            if self.tool_calls:
+                input_dict["tool_calls"] = [
+                    {
+                        "tool_call_id": tc.tool_call_id,
+                        "function_name": tc.function_name,
+                        "arguments": tc.arguments,
+                    }
+                    for tc in self.tool_calls
+                ]
+            output_dict: Optional[Dict[str, Any]] = None
+            if self.observation and self.observation.results:
+                output_dict = {
+                    "results": [
+                        {"content": r.content} for r in self.observation.results
+                    ]
+                }
+            metadata: Dict[str, Any] = {
+                "source": self.source,
+                "created_from": "harbor",
+            }
+            if self.reasoning_content:
+                metadata["reasoning"] = self.reasoning_content
+            usage: Optional[Dict[str, Any]] = None
+            total_cost: Optional[float] = None
+            if self.metrics:
+                usage = {}
+                if self.metrics.prompt_tokens is not None:
+                    usage["prompt_tokens"] = self.metrics.prompt_tokens
+                if self.metrics.completion_tokens is not None:
+                    usage["completion_tokens"] = self.metrics.completion_tokens
+                if self.metrics.prompt_tokens and self.metrics.completion_tokens:
+                    usage["total_tokens"] = (
+                        self.metrics.prompt_tokens + self.metrics.completion_tokens
+                    )
+                if not usage:
+                    usage = None
+                total_cost = getattr(self.metrics, "cost_usd", None)
+            client.span(
+                id=id_helpers.generate_id(),
+                trace_id=trace_data.id,
+                parent_span_id=parent_span_id,
+                name=f"step_{self.step_id}",
+                type=_source_to_span_type(self.source),
+                start_time=datetime_helpers.parse_iso_timestamp(self.timestamp),
+                input=input_dict if input_dict else None,
+                output=output_dict,
+                metadata=metadata,
+                usage=usage,
+                total_cost=total_cost,
+                model=self.model_name if self.source == "agent" else None,
+                tags=["harbor", self.source],
+            )
+        except Exception as e:
+            LOGGER.debug("Failed to create span for step: %s", e)
+    Step.__init__ = patched_init  # type: ignore
+    setattr(_patch_step_class, "_patched", True)
+def _enable_harbor_tracking(project_name: Optional[str] = None) -> None:
+    """Internal: Enable Opik tracking for Harbor by patching classes.
+    This patches Harbor's Trial and Verifier classes to add tracing.
+    Args:
+        project_name: Opik project name. If None, uses OPIK_PROJECT_NAME env var.
+    """
+    # Patch Trial methods (only if not already patched)
+    if not hasattr(Trial.run, "opik_tracked"):
+        Trial.run = _wrap_trial_run(Trial.run, project_name)
+    if not hasattr(Trial._setup_environment, "opik_tracked"):
+        Trial._setup_environment = _wrap_setup_environment(
+            Trial._setup_environment, project_name
+        )
+    if not hasattr(Trial._setup_agent, "opik_tracked"):
+        Trial._setup_agent = _wrap_setup_agent(Trial._setup_agent, project_name)
+    if not hasattr(Trial._execute_agent, "opik_tracked"):
+        Trial._execute_agent = _wrap_execute_agent(Trial._execute_agent, project_name)
+    if not hasattr(Trial._run_verification, "opik_tracked"):
+        Trial._run_verification = _wrap_run_verification(
+            Trial._run_verification, project_name
+        )
+    # Patch Verifier (only if not already patched)
+    if not hasattr(Verifier.verify, "opik_tracked"):
+        Verifier.verify = _wrap_verify(Verifier.verify, project_name)
+    # Patch Step class for real-time step tracking
+    _patch_step_class()
+    LOGGER.info("Opik tracking enabled for Harbor")
+def track_harbor(
+    job: Optional["Job"] = None,
+    project_name: Optional[str] = None,
+) -> Optional["Job"]:
+    """Enable Opik tracking for Harbor.
+    Can be called two ways:
+    - track_harbor() - enables global tracking (for CLI usage)
+    - track_harbor(job) - wraps a job and enables tracking (for SDK usage)
+    Args:
+        job: Optional Harbor Job instance. If provided, returns the same job.
+        project_name: Opik project name. If None, uses OPIK_PROJECT_NAME env var.
+    Returns:
+        The job instance if provided, None otherwise.
+    Example:
+        >>> from opik.integrations.harbor import track_harbor
+        >>> job = Job(config)
+        >>> tracked_job = track_harbor(job)
+        >>> result = await tracked_job.run()
+    """
+    _enable_harbor_tracking(project_name=project_name)
+    return job
+def _wrap_trial_run(original: Callable, project_name: Optional[str]) -> Callable:
+    """Wrap Trial.run with tracing, feedback scores, and experiment linking."""
+    decorator = HarborTrialRunDecorator()
+    @decorator.track(
+        tags=["harbor"],
+        project_name=project_name,
+        capture_output=False,
+    )
+    @functools.wraps(original)
+    async def wrapped(self: Trial) -> TrialResult:
+        config = self.config
+        # Lazily setup experiment service if not already done
+        # This ensures experiment tracking works for both SDK and CLI modes
+        if experiment_service.get_service() is None:
+            try:
+                # Use job_id for consistent experiment naming
+                experiment_name = (
+                    f"harbor-job-{str(config.job_id)[:8]}" if config.job_id else None
+                )
+                # Build experiment config with agent/model info
+                experiment_config: Dict[str, Any] = {
+                    "agent_name": config.agent.name,
+                }
+                model_name = getattr(config.agent, "model_name", None)
+                if model_name:
+                    experiment_config["model_name"] = model_name
+                LOGGER.debug(
+                    "Lazily setting up experiment service: experiment_name=%s",
+                    experiment_name,
+                )
+                experiment_service.setup_lazy(
+                    experiment_name=experiment_name,
+                    experiment_config=experiment_config,
+                )
+            except Exception as e:
+                LOGGER.debug("Failed to lazily setup experiment service: %s", e)
+        result: TrialResult = await original(self)
+        # Update trace with output and feedback scores
+        output_dict: Dict[str, Any] = {
+            "trial_name": result.trial_name,
+            "task_name": result.task_name,
+        }
+        if result.verifier_result and result.verifier_result.rewards:
+            output_dict["rewards"] = result.verifier_result.rewards
+        feedback_scores = None
+        if result.verifier_result and result.verifier_result.rewards:
+            # Get error message if available
+            error_msg = getattr(result.verifier_result, "error", None) or getattr(
+                result, "error", None
+            )
+            feedback_scores = _rewards_to_feedback_scores(
+                result.verifier_result.rewards, error=error_msg
+            )
+        opik_context.update_current_trace(
+            output=output_dict,
+            feedback_scores=feedback_scores,
+        )
+        # Link to experiment
+        trace_data = opik_context.get_current_trace_data()
+        if trace_data is not None:
+            service = experiment_service.get_service()
+            LOGGER.debug(
+                "Linking trial to experiment: trial=%s, trace_id=%s, service=%s",
+                config.trial_name,
+                trace_data.id,
+                service,
+            )
+            if service is not None:
+                source = getattr(config.task, "source", None)
+                task_name = (
+                    config.task.name
+                    if hasattr(config.task, "name")
+                    else str(config.task.path)
+                )
+                service.link_trial_to_experiment(
+                    trial_name=config.trial_name,
+                    trace_id=trace_data.id,
+                    source=source,
+                    task_name=task_name,
+                )
+            else:
+                LOGGER.debug(
+                    "No experiment service available, skipping experiment linking"
+                )
+        return result
+    return wrapped
+def _wrap_setup_environment(
+    original: Callable, project_name: Optional[str]
+) -> Callable:
+    """Wrap Trial._setup_environment with tracing."""
+    @track(name="setup_environment", tags=["harbor"], project_name=project_name)
+    @functools.wraps(original)
+    async def wrapped(self: Trial) -> None:
+        opik_context.update_current_span(
+            input={"phase": "environment_setup"},
+            metadata={"created_from": "harbor"},
+        )
+        await original(self)
+        opik_context.update_current_span(output={"status": "completed"})
+    return wrapped
+def _wrap_setup_agent(original: Callable, project_name: Optional[str]) -> Callable:
+    """Wrap Trial._setup_agent with tracing."""
+    @track(name="setup_agent", tags=["harbor"], project_name=project_name)
+    @functools.wraps(original)
+    async def wrapped(self: Trial) -> None:
+        opik_context.update_current_span(
+            input={"phase": "agent_setup"},
+            metadata={"created_from": "harbor"},
+        )
+        await original(self)
+        opik_context.update_current_span(output={"status": "completed"})
+    return wrapped
+def _wrap_execute_agent(original: Callable, project_name: Optional[str]) -> Callable:
+    """Wrap Trial._execute_agent with tracing."""
+    @track(name="execute_agent", tags=["harbor"], project_name=project_name)
+    @functools.wraps(original)
+    async def wrapped(self: Trial) -> None:
+        input_dict = {}
+        if hasattr(self, "_task") and self._task:
+            input_dict["instruction"] = self._task.instruction
+        opik_context.update_current_span(
+            input=input_dict,
+            metadata={"created_from": "harbor"},
+        )
+        await original(self)
+        opik_context.update_current_span(output={"status": "completed"})
+    return wrapped
+def _wrap_run_verification(original: Callable, project_name: Optional[str]) -> Callable:
+    """Wrap Trial._run_verification with tracing."""
+    @track(name="run_verification", tags=["harbor"], project_name=project_name)
+    @functools.wraps(original)
+    async def wrapped(self: Trial) -> None:
+        opik_context.update_current_span(
+            input={"phase": "verification"},
+            metadata={"created_from": "harbor"},
+        )
+        await original(self)
+        opik_context.update_current_span(output={"status": "completed"})
+    return wrapped
+def _wrap_verify(original: Callable, project_name: Optional[str]) -> Callable:
+    """Wrap Verifier.verify with tracing."""
+    @track(name="verify", tags=["harbor"], project_name=project_name)
+    @functools.wraps(original)
+    async def wrapped(self: Verifier) -> VerifierResult:
+        opik_context.update_current_span(
+            input={"phase": "verify"},
+            metadata={"created_from": "harbor"},
+        )
+        result: VerifierResult = await original(self)
+        output_dict: Dict[str, Any] = {}
+        if result.rewards:
+            output_dict["rewards"] = result.rewards
+        opik_context.update_current_span(
+            output=output_dict if output_dict else {"status": "completed"}
+        )
+        return result
+    return wrapped
+def reset_harbor_tracking() -> None:
+    """Reset Harbor tracking state for testing purposes.
+    Resets the experiment service. Method patches remain active
+    (they use `opik_tracked` to prevent double-patching).
+    """
+    experiment_service.reset()

opik/integrations/haystack/opik_connector.py CHANGED Viewed

@@ -4,8 +4,8 @@ from typing import Any, Dict, Optional
 import haystack
 from haystack import tracing
-import opik.api_objects.opik_client as opik_client
-import opik.decorator.tracing_runtime_config as tracing_runtime_config
+from opik import tracing_runtime_config
+from opik.api_objects import opik_client
 from . import opik_tracer
 LOGGER = logging.getLogger(__name__)

opik/integrations/haystack/opik_tracer.py CHANGED Viewed

@@ -5,10 +5,8 @@ from typing import Any, Dict, Iterator, List, Optional, Union
 from haystack import tracing
-import opik.url_helpers as url_helpers
-import opik.decorator.tracing_runtime_config as tracing_runtime_config
-import opik.decorator.span_creation_handler as span_creation_handler
-import opik.decorator.arguments_helpers as arguments_helpers
+from opik import tracing_runtime_config, url_helpers
+from opik.decorator import arguments_helpers, span_creation_handler
 from opik.api_objects import opik_client
 from opik.api_objects import span as opik_span
 from opik.api_objects import trace as opik_trace
@@ -82,9 +80,7 @@ class OpikTracer(tracing.Tracer):
     ) -> opik_span_bridge.OpikSpanBridge:
         """Create a span or trace based on existing context using span_creation_handler."""
         # For pipeline operations, use the pipeline name, otherwise use component name
-        final_name = (
-            self._name if operation_name == constants.PIPELINE_RUN_KEY else span_name
-        )
+        final_name = self._name if "pipeline.run" in operation_name else span_name
         metadata = {"created_from": "haystack", "operation": operation_name}
         # Always use span_creation_handler - it handles existing context properly

opik/integrations/langchain/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from .opik_tracer import OpikTracer
+from .langgraph_async_context_bridge import extract_current_langgraph_span_data
+from .langgraph_tracer_injector import track_langgraph
-__all__ = ["OpikTracer"]
+__all__ = ["OpikTracer", "extract_current_langgraph_span_data", "track_langgraph"]

opik/integrations/langchain/helpers.py ADDED Viewed

@@ -0,0 +1,96 @@
+import logging
+from typing import Dict, Any, Tuple
+from ... import _logging
+LOGGER = logging.getLogger(__name__)
+LANGGRAPH_OUTPUT_SIZE_THRESHOLD = 5000
+def _extract_command_update(outputs: Dict[str, Any]) -> Dict[str, Any]:
+    """Extract state updates from LangGraph Command objects.
+    When a LangGraph node returns a Command, LangChain wraps it in {"output": Command(...)}.
+    This function detects Command objects and extracts the update dict to properly log state changes.
+    Args:
+        outputs: The outputs dict from a LangChain Run.
+    Returns:
+        The extracted update dict if a Command is found, otherwise the original outputs.
+    """
+    if "output" in outputs and len(outputs) == 1:
+        output_value = outputs["output"]
+        # Duck-type check for Command object
+        if hasattr(output_value, "update") and hasattr(output_value, "goto"):
+            try:
+                update_dict = output_value.update
+                if isinstance(update_dict, dict):
+                    _logging.log_once_at_level(
+                        logging.DEBUG,
+                        "Extracted state update from LangGraph Command object",
+                        LOGGER,
+                    )
+                    return update_dict
+            except Exception as e:
+                LOGGER.warning(
+                    f"Failed to extract update from Command-like object: {e}",
+                    exc_info=True,
+                )
+    return outputs
+def split_big_langgraph_outputs(
+    outputs: Dict[str, Any],
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    """
+    Split large LangGraph outputs to extract messages for thread display.
+    Returns:
+        tuple: (filtered_output_for_display, additional_metadata_for_span)
+    LangGraph agents often produce complex outputs with large internal state
+    that breaks thread display. This extracts conversational messages for
+    clean thread display while preserving the full state in metadata.
+    """
+    if not isinstance(outputs, dict):
+        return outputs, {}
+    outputs = _extract_command_update(outputs)
+    langgraph_like_output = "messages" in outputs and len(outputs) > 1
+    if langgraph_like_output:
+        output_str = str(outputs)
+        output_size = len(output_str)
+        if output_size > LANGGRAPH_OUTPUT_SIZE_THRESHOLD:
+            _logging.log_once_at_level(
+                logging.WARNING,
+                f"Filtering large LangGraph output ({output_size} chars) for thread display",
+                LOGGER,
+            )
+            filtered_output = {
+                "messages": outputs["messages"],
+            }
+            if "thread_id" in outputs:
+                filtered_output["thread_id"] = outputs["thread_id"]
+            additional_metadata = {
+                "_opik_langgraph_full_output": outputs,
+                "_opik_output_filtering": {
+                    "filtered": True,
+                    "original_size_chars": output_size,
+                    "filtered_keys": [
+                        k for k in outputs.keys() if k not in ["messages", "thread_id"]
+                    ],
+                    "reason": "Large LangGraph output filtered for better thread display",
+                },
+            }
+            return filtered_output, additional_metadata
+    return outputs, {}

opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl