PyPI - opik - Versions diffs - 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl - Mend

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (592) hide show

opik/__init__.py +19 -3
opik/anonymizer/__init__.py +5 -0
opik/anonymizer/anonymizer.py +12 -0
opik/anonymizer/factory.py +80 -0
opik/anonymizer/recursive_anonymizer.py +64 -0
opik/anonymizer/rules.py +56 -0
opik/anonymizer/rules_anonymizer.py +35 -0
opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/data_helpers.py +79 -0
opik/api_objects/dataset/dataset.py +64 -4
opik/api_objects/dataset/rest_operations.py +11 -2
opik/api_objects/experiment/experiment.py +57 -57
opik/api_objects/experiment/experiment_item.py +2 -1
opik/api_objects/experiment/experiments_client.py +64 -0
opik/api_objects/experiment/helpers.py +35 -11
opik/api_objects/experiment/rest_operations.py +65 -5
opik/api_objects/helpers.py +8 -5
opik/api_objects/local_recording.py +81 -0
opik/api_objects/opik_client.py +600 -108
opik/api_objects/opik_query_language.py +39 -5
opik/api_objects/prompt/__init__.py +12 -2
opik/api_objects/prompt/base_prompt.py +69 -0
opik/api_objects/prompt/base_prompt_template.py +29 -0
opik/api_objects/prompt/chat/__init__.py +1 -0
opik/api_objects/prompt/chat/chat_prompt.py +210 -0
opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
opik/api_objects/prompt/client.py +189 -47
opik/api_objects/prompt/text/__init__.py +1 -0
opik/api_objects/prompt/text/prompt.py +174 -0
opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
opik/api_objects/prompt/types.py +23 -0
opik/api_objects/search_helpers.py +89 -0
opik/api_objects/span/span_data.py +35 -25
opik/api_objects/threads/threads_client.py +39 -5
opik/api_objects/trace/trace_client.py +52 -2
opik/api_objects/trace/trace_data.py +15 -24
opik/api_objects/validation_helpers.py +3 -3
opik/cli/__init__.py +5 -0
opik/cli/__main__.py +6 -0
opik/cli/configure.py +66 -0
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/healthcheck.py +21 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +49 -0
opik/cli/proxy.py +93 -0
opik/cli/usage_report/__init__.py +16 -0
opik/cli/usage_report/charts.py +783 -0
opik/cli/usage_report/cli.py +274 -0
opik/cli/usage_report/constants.py +9 -0
opik/cli/usage_report/extraction.py +749 -0
opik/cli/usage_report/pdf.py +244 -0
opik/cli/usage_report/statistics.py +78 -0
opik/cli/usage_report/utils.py +235 -0
opik/config.py +13 -7
opik/configurator/configure.py +17 -0
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +9 -1
opik/decorator/base_track_decorator.py +205 -133
opik/decorator/context_manager/span_context_manager.py +123 -0
opik/decorator/context_manager/trace_context_manager.py +84 -0
opik/decorator/opik_args/__init__.py +13 -0
opik/decorator/opik_args/api_classes.py +71 -0
opik/decorator/opik_args/helpers.py +120 -0
opik/decorator/span_creation_handler.py +25 -6
opik/dict_utils.py +3 -3
opik/evaluation/__init__.py +13 -2
opik/evaluation/engine/engine.py +272 -75
opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
opik/evaluation/engine/helpers.py +31 -6
opik/evaluation/engine/metrics_evaluator.py +237 -0
opik/evaluation/evaluation_result.py +168 -2
opik/evaluation/evaluator.py +533 -62
opik/evaluation/metrics/__init__.py +103 -4
opik/evaluation/metrics/aggregated_metric.py +35 -6
opik/evaluation/metrics/base_metric.py +1 -1
opik/evaluation/metrics/conversation/__init__.py +48 -0
opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
opik/evaluation/metrics/conversation/helpers.py +14 -15
opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
opik/evaluation/metrics/conversation/types.py +4 -5
opik/evaluation/metrics/conversation_types.py +9 -0
opik/evaluation/metrics/heuristics/bertscore.py +107 -0
opik/evaluation/metrics/heuristics/bleu.py +35 -15
opik/evaluation/metrics/heuristics/chrf.py +127 -0
opik/evaluation/metrics/heuristics/contains.py +47 -11
opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
opik/evaluation/metrics/heuristics/gleu.py +113 -0
opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
opik/evaluation/metrics/heuristics/meteor.py +119 -0
opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
opik/evaluation/metrics/heuristics/readability.py +129 -0
opik/evaluation/metrics/heuristics/rouge.py +26 -9
opik/evaluation/metrics/heuristics/spearman.py +88 -0
opik/evaluation/metrics/heuristics/tone.py +155 -0
opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/__init__.py +8 -0
opik/evaluation/models/base_model.py +107 -1
opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
opik/evaluation/models/langchain/message_converters.py +97 -15
opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
opik/evaluation/models/litellm/util.py +125 -0
opik/evaluation/models/litellm/warning_filters.py +16 -4
opik/evaluation/models/model_capabilities.py +187 -0
opik/evaluation/models/models_factory.py +25 -3
opik/evaluation/preprocessing.py +92 -0
opik/evaluation/report.py +70 -12
opik/evaluation/rest_operations.py +49 -45
opik/evaluation/samplers/__init__.py +4 -0
opik/evaluation/samplers/base_dataset_sampler.py +40 -0
opik/evaluation/samplers/random_dataset_sampler.py +48 -0
opik/evaluation/score_statistics.py +66 -0
opik/evaluation/scorers/__init__.py +4 -0
opik/evaluation/scorers/scorer_function.py +55 -0
opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
opik/evaluation/test_case.py +3 -2
opik/evaluation/test_result.py +1 -0
opik/evaluation/threads/evaluator.py +31 -3
opik/evaluation/threads/helpers.py +3 -2
opik/evaluation/types.py +9 -1
opik/exceptions.py +33 -0
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/hooks/__init__.py +23 -0
opik/hooks/anonymizer_hook.py +36 -0
opik/hooks/httpx_client_hook.py +112 -0
opik/httpx_client.py +12 -9
opik/id_helpers.py +18 -0
opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
opik/integrations/adk/helpers.py +16 -7
opik/integrations/adk/legacy_opik_tracer.py +7 -4
opik/integrations/adk/opik_tracer.py +14 -1
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
opik/integrations/adk/recursive_callback_injector.py +4 -7
opik/integrations/bedrock/converse/__init__.py +0 -0
opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
opik/integrations/bedrock/invoke_model/__init__.py +0 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
opik/integrations/bedrock/invoke_model/response_types.py +34 -0
opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
opik/integrations/bedrock/opik_tracker.py +42 -4
opik/integrations/bedrock/types.py +19 -0
opik/integrations/crewai/crewai_decorator.py +8 -51
opik/integrations/crewai/opik_tracker.py +31 -10
opik/integrations/crewai/patchers/__init__.py +5 -0
opik/integrations/crewai/patchers/flow.py +118 -0
opik/integrations/crewai/patchers/litellm_completion.py +30 -0
opik/integrations/crewai/patchers/llm_client.py +207 -0
opik/integrations/dspy/callback.py +80 -17
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_connector.py +2 -2
opik/integrations/haystack/opik_tracer.py +3 -7
opik/integrations/langchain/__init__.py +3 -1
opik/integrations/langchain/helpers.py +96 -0
opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_encoder_extension.py +1 -1
opik/integrations/langchain/opik_tracer.py +474 -229
opik/integrations/litellm/__init__.py +5 -0
opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
opik/integrations/litellm/litellm_completion_decorator.py +242 -0
opik/integrations/litellm/opik_tracker.py +43 -0
opik/integrations/litellm/stream_patchers.py +151 -0
opik/integrations/llama_index/callback.py +146 -107
opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
opik/integrations/openai/opik_tracker.py +1 -1
opik/integrations/sagemaker/auth.py +5 -1
opik/llm_usage/google_usage.py +3 -1
opik/llm_usage/opik_usage.py +7 -8
opik/llm_usage/opik_usage_factory.py +4 -2
opik/logging_messages.py +6 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batch_manager_constuctors.py +10 -0
opik/message_processing/batching/batchers.py +59 -27
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/__init__.py +0 -0
opik/message_processing/emulation/emulator_message_processor.py +578 -0
opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
opik/message_processing/emulation/models.py +162 -0
opik/message_processing/encoder_helpers.py +79 -0
opik/message_processing/messages.py +56 -1
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/processors/message_processors.py +92 -0
opik/message_processing/processors/message_processors_chain.py +96 -0
opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
opik/message_processing/queue_consumer.py +9 -3
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +43 -10
opik/opik_context.py +16 -4
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +346 -15
opik/rest_api/alerts/__init__.py +7 -0
opik/rest_api/alerts/client.py +667 -0
opik/rest_api/alerts/raw_client.py +1015 -0
opik/rest_api/alerts/types/__init__.py +7 -0
opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
opik/rest_api/annotation_queues/__init__.py +4 -0
opik/rest_api/annotation_queues/client.py +668 -0
opik/rest_api/annotation_queues/raw_client.py +1019 -0
opik/rest_api/automation_rule_evaluators/client.py +34 -2
opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
opik/rest_api/client.py +15 -0
opik/rest_api/dashboards/__init__.py +4 -0
opik/rest_api/dashboards/client.py +462 -0
opik/rest_api/dashboards/raw_client.py +648 -0
opik/rest_api/datasets/client.py +1310 -44
opik/rest_api/datasets/raw_client.py +2269 -358
opik/rest_api/experiments/__init__.py +2 -2
opik/rest_api/experiments/client.py +191 -5
opik/rest_api/experiments/raw_client.py +301 -7
opik/rest_api/experiments/types/__init__.py +4 -1
opik/rest_api/experiments/types/experiment_update_status.py +5 -0
opik/rest_api/experiments/types/experiment_update_type.py +5 -0
opik/rest_api/experiments/types/experiment_write_status.py +5 -0
opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
opik/rest_api/llm_provider_key/client.py +20 -0
opik/rest_api/llm_provider_key/raw_client.py +20 -0
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
opik/rest_api/manual_evaluation/__init__.py +4 -0
opik/rest_api/manual_evaluation/client.py +347 -0
opik/rest_api/manual_evaluation/raw_client.py +543 -0
opik/rest_api/optimizations/client.py +145 -9
opik/rest_api/optimizations/raw_client.py +237 -13
opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
opik/rest_api/prompts/__init__.py +2 -2
opik/rest_api/prompts/client.py +227 -6
opik/rest_api/prompts/raw_client.py +331 -2
opik/rest_api/prompts/types/__init__.py +3 -1
opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
opik/rest_api/spans/__init__.py +0 -2
opik/rest_api/spans/client.py +238 -76
opik/rest_api/spans/raw_client.py +307 -95
opik/rest_api/spans/types/__init__.py +0 -2
opik/rest_api/traces/client.py +572 -161
opik/rest_api/traces/raw_client.py +736 -229
opik/rest_api/types/__init__.py +352 -17
opik/rest_api/types/aggregation_data.py +1 -0
opik/rest_api/types/alert.py +33 -0
opik/rest_api/types/alert_alert_type.py +5 -0
opik/rest_api/types/alert_page_public.py +24 -0
opik/rest_api/types/alert_public.py +33 -0
opik/rest_api/types/alert_public_alert_type.py +5 -0
opik/rest_api/types/alert_trigger.py +27 -0
opik/rest_api/types/alert_trigger_config.py +28 -0
opik/rest_api/types/alert_trigger_config_public.py +28 -0
opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
opik/rest_api/types/alert_trigger_config_type.py +10 -0
opik/rest_api/types/alert_trigger_config_write.py +22 -0
opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
opik/rest_api/types/alert_trigger_event_type.py +19 -0
opik/rest_api/types/alert_trigger_public.py +27 -0
opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
opik/rest_api/types/alert_trigger_write.py +23 -0
opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
opik/rest_api/types/alert_write.py +28 -0
opik/rest_api/types/alert_write_alert_type.py +5 -0
opik/rest_api/types/annotation_queue.py +42 -0
opik/rest_api/types/annotation_queue_batch.py +27 -0
opik/rest_api/types/annotation_queue_item_ids.py +19 -0
opik/rest_api/types/annotation_queue_page_public.py +28 -0
opik/rest_api/types/annotation_queue_public.py +38 -0
opik/rest_api/types/annotation_queue_public_scope.py +5 -0
opik/rest_api/types/annotation_queue_reviewer.py +20 -0
opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
opik/rest_api/types/annotation_queue_scope.py +5 -0
opik/rest_api/types/annotation_queue_write.py +31 -0
opik/rest_api/types/annotation_queue_write_scope.py +5 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +62 -2
opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
opik/rest_api/types/boolean_feedback_definition.py +25 -0
opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
opik/rest_api/types/boolean_feedback_detail.py +29 -0
opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
opik/rest_api/types/dashboard_page_public.py +24 -0
opik/rest_api/types/dashboard_public.py +30 -0
opik/rest_api/types/dataset.py +4 -0
opik/rest_api/types/dataset_expansion.py +42 -0
opik/rest_api/types/dataset_expansion_response.py +39 -0
opik/rest_api/types/dataset_item.py +2 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +2 -0
opik/rest_api/types/dataset_item_filter.py +27 -0
opik/rest_api/types/dataset_item_filter_operator.py +21 -0
opik/rest_api/types/dataset_item_page_compare.py +5 -0
opik/rest_api/types/dataset_item_page_public.py +5 -0
opik/rest_api/types/dataset_item_public.py +2 -0
opik/rest_api/types/dataset_item_update.py +39 -0
opik/rest_api/types/dataset_item_write.py +1 -0
opik/rest_api/types/dataset_public.py +4 -0
opik/rest_api/types/dataset_public_status.py +5 -0
opik/rest_api/types/dataset_status.py +5 -0
opik/rest_api/types/dataset_version_diff.py +22 -0
opik/rest_api/types/dataset_version_diff_stats.py +24 -0
opik/rest_api/types/dataset_version_page_public.py +23 -0
opik/rest_api/types/dataset_version_public.py +59 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/experiment.py +7 -2
opik/rest_api/types/experiment_group_response.py +2 -0
opik/rest_api/types/experiment_public.py +7 -2
opik/rest_api/types/experiment_public_status.py +5 -0
opik/rest_api/types/experiment_score.py +20 -0
opik/rest_api/types/experiment_score_public.py +20 -0
opik/rest_api/types/experiment_score_write.py +20 -0
opik/rest_api/types/experiment_status.py +5 -0
opik/rest_api/types/feedback.py +25 -1
opik/rest_api/types/feedback_create.py +20 -1
opik/rest_api/types/feedback_object_public.py +27 -1
opik/rest_api/types/feedback_public.py +25 -1
opik/rest_api/types/feedback_score_batch_item.py +2 -1
opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
opik/rest_api/types/feedback_score_public.py +4 -0
opik/rest_api/types/feedback_update.py +20 -1
opik/rest_api/types/group_content_with_aggregations.py +1 -0
opik/rest_api/types/group_detail.py +19 -0
opik/rest_api/types/group_details.py +20 -0
opik/rest_api/types/guardrail.py +1 -0
opik/rest_api/types/guardrail_write.py +1 -0
opik/rest_api/types/ids_holder.py +19 -0
opik/rest_api/types/image_url.py +20 -0
opik/rest_api/types/image_url_public.py +20 -0
opik/rest_api/types/image_url_write.py +20 -0
opik/rest_api/types/llm_as_judge_message.py +5 -1
opik/rest_api/types/llm_as_judge_message_content.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
opik/rest_api/types/llm_as_judge_message_public.py +5 -1
opik/rest_api/types/llm_as_judge_message_write.py +5 -1
opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
opik/rest_api/types/manual_evaluation_request.py +38 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
opik/rest_api/types/manual_evaluation_response.py +27 -0
opik/rest_api/types/optimization.py +4 -2
opik/rest_api/types/optimization_public.py +4 -2
opik/rest_api/types/optimization_public_status.py +3 -1
opik/rest_api/types/optimization_status.py +3 -1
opik/rest_api/types/optimization_studio_config.py +27 -0
opik/rest_api/types/optimization_studio_config_public.py +27 -0
opik/rest_api/types/optimization_studio_config_write.py +27 -0
opik/rest_api/types/optimization_studio_log.py +22 -0
opik/rest_api/types/optimization_write.py +4 -2
opik/rest_api/types/optimization_write_status.py +3 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt.py +6 -0
opik/rest_api/types/prompt_detail.py +6 -0
opik/rest_api/types/prompt_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_public.py +6 -0
opik/rest_api/types/prompt_public_template_structure.py +5 -0
opik/rest_api/types/prompt_template_structure.py +5 -0
opik/rest_api/types/prompt_version.py +3 -0
opik/rest_api/types/prompt_version_detail.py +3 -0
opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_version_link.py +1 -0
opik/rest_api/types/prompt_version_link_public.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +3 -0
opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
opik/rest_api/types/prompt_version_template_structure.py +5 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +9 -0
opik/rest_api/types/provider_api_key_provider.py +1 -1
opik/rest_api/types/provider_api_key_public.py +9 -0
opik/rest_api/types/provider_api_key_public_provider.py +1 -1
opik/rest_api/types/score_name.py +1 -0
opik/rest_api/types/service_toggles_config.py +18 -0
opik/rest_api/types/span.py +1 -2
opik/rest_api/types/span_enrichment_options.py +31 -0
opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/span_filter.py +23 -0
opik/rest_api/types/span_filter_operator.py +21 -0
opik/rest_api/types/span_filter_write.py +23 -0
opik/rest_api/types/span_filter_write_operator.py +21 -0
opik/rest_api/types/span_llm_as_judge_code.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
opik/rest_api/types/span_public.py +1 -2
opik/rest_api/types/span_update.py +46 -0
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/rest_api/types/span_write.py +1 -2
opik/rest_api/types/studio_evaluation.py +20 -0
opik/rest_api/types/studio_evaluation_public.py +20 -0
opik/rest_api/types/studio_evaluation_write.py +20 -0
opik/rest_api/types/studio_llm_model.py +21 -0
opik/rest_api/types/studio_llm_model_public.py +21 -0
opik/rest_api/types/studio_llm_model_write.py +21 -0
opik/rest_api/types/studio_message.py +20 -0
opik/rest_api/types/studio_message_public.py +20 -0
opik/rest_api/types/studio_message_write.py +20 -0
opik/rest_api/types/studio_metric.py +21 -0
opik/rest_api/types/studio_metric_public.py +21 -0
opik/rest_api/types/studio_metric_write.py +21 -0
opik/rest_api/types/studio_optimizer.py +21 -0
opik/rest_api/types/studio_optimizer_public.py +21 -0
opik/rest_api/types/studio_optimizer_write.py +21 -0
opik/rest_api/types/studio_prompt.py +20 -0
opik/rest_api/types/studio_prompt_public.py +20 -0
opik/rest_api/types/studio_prompt_write.py +20 -0
opik/rest_api/types/trace.py +11 -2
opik/rest_api/types/trace_enrichment_options.py +32 -0
opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/trace_filter.py +23 -0
opik/rest_api/types/trace_filter_operator.py +21 -0
opik/rest_api/types/trace_filter_write.py +23 -0
opik/rest_api/types/trace_filter_write_operator.py +21 -0
opik/rest_api/types/trace_public.py +11 -2
opik/rest_api/types/trace_thread_filter_write.py +23 -0
opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
opik/rest_api/types/trace_thread_identifier.py +1 -0
opik/rest_api/types/trace_update.py +39 -0
opik/rest_api/types/trace_write.py +1 -2
opik/rest_api/types/value_entry.py +2 -0
opik/rest_api/types/value_entry_compare.py +2 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
opik/rest_api/types/value_entry_public.py +2 -0
opik/rest_api/types/video_url.py +19 -0
opik/rest_api/types/video_url_public.py +19 -0
opik/rest_api/types/video_url_write.py +19 -0
opik/rest_api/types/webhook.py +28 -0
opik/rest_api/types/webhook_examples.py +19 -0
opik/rest_api/types/webhook_public.py +28 -0
opik/rest_api/types/webhook_test_result.py +23 -0
opik/rest_api/types/webhook_test_result_status.py +5 -0
opik/rest_api/types/webhook_write.py +23 -0
opik/rest_api/types/welcome_wizard_tracking.py +22 -0
opik/rest_api/types/workspace_configuration.py +5 -0
opik/rest_api/welcome_wizard/__init__.py +4 -0
opik/rest_api/welcome_wizard/client.py +195 -0
opik/rest_api/welcome_wizard/raw_client.py +208 -0
opik/rest_api/workspaces/client.py +14 -2
opik/rest_api/workspaces/raw_client.py +10 -0
opik/s3_httpx_client.py +14 -1
opik/simulation/__init__.py +6 -0
opik/simulation/simulated_user.py +99 -0
opik/simulation/simulator.py +108 -0
opik/synchronization.py +5 -6
opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
opik-1.9.71.dist-info/METADATA +370 -0
opik-1.9.71.dist-info/RECORD +1110 -0
opik/api_objects/prompt/prompt.py +0 -112
opik/cli.py +0 -193
opik/hooks.py +0 -13
opik/integrations/bedrock/chunks_aggregator.py +0 -55
opik/integrations/bedrock/helpers.py +0 -8
opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
opik-1.8.39.dist-info/METADATA +0 -339
opik-1.8.39.dist-info/RECORD +0 -790
/opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
/opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
/opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0

opik/integrations/crewai/patchers/llm_client.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""
+Client patchers for CrewAI LLM providers (v1.0.0+).
+This module handles patching of LLM clients used by CrewAI agents with Opik tracking.
+Each provider has its own patching function that handles missing dependencies gracefully.
+"""
+import logging
+from typing import Any, Optional, TYPE_CHECKING
+import crewai
+if TYPE_CHECKING:
+    import crewai.llms.providers.openai.completion as openai_completion
+    import crewai.llms.providers.anthropic.completion as anthropic_completion
+    import crewai.llms.providers.gemini.completion as gemini_completion
+    import crewai.llms.providers.bedrock.completion as bedrock_completion
+LOGGER = logging.getLogger(__name__)
+def patch_llm_client(crew: crewai.Crew, project_name: Optional[str]) -> None:
+    """
+    Patches LLM clients used by CrewAI agents with Opik tracking.
+    Handles missing provider libraries gracefully by logging warnings instead of failing.
+    Args:
+        crew: The Crew instance containing agents to patch.
+        project_name: The name of the project to associate with tracking.
+    """
+    for agent in crew.agents:
+        _patch_single_llm_client(agent.llm, project_name)
+def _patch_single_llm_client(llm: Any, project_name: Optional[str]) -> None:
+    """
+    Patches an LLM client based on its provider type.
+    Args:
+        llm: The CrewAI LLM instance to patch.
+        project_name: The name of the project to associate with tracking.
+    """
+    if _is_openai_llm(llm):
+        _patch_openai_client(llm, project_name)
+    elif _is_anthropic_llm(llm):
+        _patch_anthropic_client(llm, project_name)
+    elif _is_gemini_llm(llm):
+        _patch_gemini_client(llm, project_name)
+    elif _is_bedrock_llm(llm):
+        _patch_bedrock_client(llm, project_name)
+def _is_openai_llm(llm: Any) -> bool:
+    """
+    Checks if LLM is an OpenAI provider.
+    Args:
+        llm: The CrewAI LLM to check.
+    Returns:
+        True if LLM is OpenAI provider, False otherwise.
+    """
+    try:
+        import crewai.llms.providers.openai.completion
+        return isinstance(llm, crewai.llms.providers.openai.completion.OpenAICompletion)
+    except ImportError:
+        return False
+def _is_anthropic_llm(llm: Any) -> bool:
+    """
+    Checks if LLM is an Anthropic provider.
+    Args:
+        llm: The CrewAI LLM to check.
+    Returns:
+        True if LLM is Anthropic provider, False otherwise.
+    """
+    try:
+        import crewai.llms.providers.anthropic.completion
+        return isinstance(
+            llm, crewai.llms.providers.anthropic.completion.AnthropicCompletion
+        )
+    except ImportError:
+        return False
+def _is_gemini_llm(llm: Any) -> bool:
+    """
+    Checks if LLM is a Gemini provider.
+    Args:
+        llm: The CrewAI LLM to check.
+    Returns:
+        True if LLM is Gemini provider, False otherwise.
+    """
+    try:
+        import crewai.llms.providers.gemini.completion
+        return isinstance(llm, crewai.llms.providers.gemini.completion.GeminiCompletion)
+    except ImportError:
+        return False
+def _is_bedrock_llm(llm: Any) -> bool:
+    """
+    Checks if LLM is a Bedrock provider.
+    Args:
+        llm: The CrewAI LLM to check.
+    Returns:
+        True if LLM is Bedrock provider, False otherwise.
+    """
+    try:
+        import crewai.llms.providers.bedrock.completion
+        return isinstance(
+            llm, crewai.llms.providers.bedrock.completion.BedrockCompletion
+        )
+    except ImportError:
+        return False
+def _patch_openai_client(
+    llm: "openai_completion.OpenAICompletion", project_name: Optional[str]
+) -> None:
+    """
+    Patches OpenAI client for the given LLM.
+    Args:
+        llm: The CrewAI LLM instance with OpenAI client to patch.
+        project_name: The name of the project to associate with tracking.
+    """
+    try:
+        import opik.integrations.openai
+        llm.client = opik.integrations.openai.track_openai(
+            llm.client, project_name=project_name
+        )
+    except Exception:
+        LOGGER.warning("Failed to track OpenAI client for LLM", exc_info=True)
+def _patch_anthropic_client(
+    llm: "anthropic_completion.AnthropicCompletion", project_name: Optional[str]
+) -> None:
+    """
+    Patches Anthropic client for the given LLM.
+    Args:
+        llm: The CrewAI LLM instance with Anthropic client to patch.
+        project_name: The name of the project to associate with tracking.
+    """
+    try:
+        import opik.integrations.anthropic
+        llm.client = opik.integrations.anthropic.track_anthropic(
+            llm.client, project_name=project_name
+        )
+    except Exception:
+        LOGGER.warning("Failed to track Anthropic client for LLM", exc_info=True)
+def _patch_gemini_client(
+    llm: "gemini_completion.GeminiCompletion", project_name: Optional[str]
+) -> None:
+    """
+    Patches Gemini client for the given LLM.
+    Args:
+        llm: The CrewAI LLM instance with Gemini client to patch.
+        project_name: The name of the project to associate with tracking.
+    """
+    try:
+        import opik.integrations.genai
+        llm.client = opik.integrations.genai.track_genai(
+            llm.client, project_name=project_name
+        )
+    except Exception:
+        LOGGER.warning("Failed to track Gemini client for LLM", exc_info=True)
+def _patch_bedrock_client(
+    llm: "bedrock_completion.BedrockCompletion", project_name: Optional[str]
+) -> None:
+    """
+    Patches Bedrock client for the given LLM.
+    Args:
+        llm: The CrewAI LLM instance with Bedrock client to patch.
+        project_name: The name of the project to associate with tracking.
+    """
+    try:
+        import opik.integrations.bedrock
+        llm.client = opik.integrations.bedrock.track_bedrock(
+            llm.client, project_name=project_name
+        )
+    except Exception:
+        LOGGER.warning("Failed to track Bedrock client for LLM", exc_info=True)

opik/integrations/dspy/callback.py CHANGED Viewed

@@ -1,17 +1,16 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Tuple, Union
 import logging
 import dspy
 from dspy.utils import callback as dspy_callback
-import opik.types as types
-import opik.opik_context as opik_context
-import opik.context_storage as context_storage
+from opik import context_storage, opik_context, tracing_runtime_config
+from opik import llm_usage
 from opik.api_objects import helpers, span, trace, opik_client
-import opik.decorator.tracing_runtime_config as tracing_runtime_config
 from opik.decorator import error_info_collector
 from .graph import build_mermaid_graph_from_module
+from .parsers import LMHistoryInfo, extract_lm_info_from_history, get_span_type
 LOGGER = logging.getLogger(__name__)
@@ -35,6 +34,8 @@ class OpikCallback(dspy_callback.BaseCallback):
     ):
         self._map_call_id_to_span_data: Dict[str, span.SpanData] = {}
         self._map_call_id_to_trace_data: Dict[str, trace.TraceData] = {}
+        # Store (lm_instance, expected_messages) for extracting usage and verifying correct history entry
+        self._map_call_id_to_lm_info: Dict[str, Tuple[Any, Optional[Any]]] = {}
         self._origins_metadata: Dict[str, Any] = {"created_from": "dspy"}
@@ -106,7 +107,7 @@ class OpikCallback(dspy_callback.BaseCallback):
             parent_project_name=current_span_data.project_name,
             child_project_name=self._project_name,
         )
-        span_type = self._get_span_type(instance)
+        span_type = get_span_type(instance)
         span_data = span.SpanData(
             trace_id=current_span_data.trace_id,
@@ -130,7 +131,7 @@ class OpikCallback(dspy_callback.BaseCallback):
             current_trace_data.project_name,
             self._project_name,
         )
-        span_type = self._get_span_type(instance)
+        span_type = get_span_type(instance)
         span_data = span.SpanData(
             trace_id=current_trace_data.id,
@@ -201,13 +202,39 @@ class OpikCallback(dspy_callback.BaseCallback):
         call_id: str,
         outputs: Optional[Any],
         exception: Optional[Exception] = None,
+        usage: Optional[llm_usage.OpikUsage] = None,
+        extra_metadata: Optional[Dict[str, Any]] = None,
+        actual_provider: Optional[str] = None,
+        total_cost: Optional[float] = None,
     ) -> None:
         if span_data := self._map_call_id_to_span_data.pop(call_id, None):
             if exception:
                 error_info = error_info_collector.collect(exception)
                 span_data.update(error_info=error_info)
-            span_data.update(output={"output": outputs}).init_end_time()
+            # Prepare the update dict
+            update_kwargs: Dict[str, Any] = {
+                "output": {"output": outputs},
+                "usage": usage,
+                "total_cost": total_cost,
+            }
+            # Handle LLM routers like OpenRouter that return the actual serving provider
+            if (
+                actual_provider is not None
+                and span_data.provider is not None
+                and span_data.provider.lower() != actual_provider.lower()
+            ):
+                # Store the original provider (e.g., "openrouter") in metadata
+                if extra_metadata is None:
+                    extra_metadata = {}
+                extra_metadata["llm_router"] = span_data.provider
+                # Update to the actual provider for accurate cost tracking
+                update_kwargs["provider"] = actual_provider
+            update_kwargs["metadata"] = extra_metadata
+            span_data.update(**update_kwargs).init_end_time()
             if tracing_runtime_config.is_tracing_active():
                 self._opik_client.span(**span_data.as_parameters)
@@ -234,7 +261,7 @@ class OpikCallback(dspy_callback.BaseCallback):
             trace_id = current_callback_context_data.id
             parent_span_id = None
-        span_type = self._get_span_type(instance)
+        span_type = get_span_type(instance)
         return span.SpanData(
             trace_id=trace_id,
@@ -266,6 +293,13 @@ class OpikCallback(dspy_callback.BaseCallback):
             name=f"{span_data.name}: {provider} - {model}",
         )
         self._map_call_id_to_span_data[call_id] = span_data
+        # Store LM instance and expected messages for extracting usage
+        self._map_call_id_to_lm_info[call_id] = (
+            instance,
+            inputs.get("messages"),
+        )
         self._set_current_context_data(span_data)
     def on_lm_end(
@@ -274,10 +308,21 @@ class OpikCallback(dspy_callback.BaseCallback):
         outputs: Optional[Dict[str, Any]],
         exception: Optional[Exception] = None,
     ) -> None:
+        lm_info = self._extract_lm_info_from_history(call_id)
+        # Add cache_hit to span metadata only when we have a definitive value
+        extra_metadata = (
+            {"cache_hit": lm_info.cache_hit} if lm_info.cache_hit is not None else None
+        )
         self._end_span(
             call_id=call_id,
             exception=exception,
             outputs=outputs,
+            usage=lm_info.usage,
+            extra_metadata=extra_metadata,
+            actual_provider=lm_info.actual_provider,
+            total_cost=lm_info.total_cost,
         )
     def on_tool_start(
@@ -319,14 +364,32 @@ class OpikCallback(dspy_callback.BaseCallback):
             return span_data
         return self._context_storage.get_trace_data()
-    def _get_span_type(self, instance: Any) -> types.SpanType:
-        if isinstance(instance, dspy.Predict):
-            return "llm"
-        elif isinstance(instance, dspy.LM):
-            return "llm"
-        elif isinstance(instance, dspy.Tool):
-            return "tool"
-        return "general"
+    def _extract_lm_info_from_history(self, call_id: str) -> LMHistoryInfo:
+        """
+        Extract token usage, cache status, actual provider, and cost from the LM's history.
+        DSPy stores usage information in the LM's history after each call.
+        We verify the history entry matches our expected messages to handle
+        potential race conditions with concurrent LM calls.
+        For routers like OpenRouter, the response contains the actual provider
+        that served the request (e.g., "Novita", "Together"), which differs from
+        the router name used in the model string (e.g., "openrouter").
+        The cost field is provided by providers like OpenRouter and includes
+        accurate pricing for all token types (reasoning, cache, multimodal).
+        Returns:
+            LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
+        """
+        lm_info = self._map_call_id_to_lm_info.pop(call_id, None)
+        if lm_info is None:
+            return LMHistoryInfo(
+                usage=None, cache_hit=None, actual_provider=None, total_cost=None
+            )
+        lm_instance, expected_messages = lm_info
+        return extract_lm_info_from_history(lm_instance, expected_messages)
     def _get_opik_metadata(self, instance: Any) -> Dict[str, Any]:
         graph = None

opik/integrations/dspy/parsers.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Parsers and data structures for extracting information from DSPy LM responses.
+This module contains utilities for parsing DSPy LM history entries and
+extracting relevant information like usage, provider, and cost data.
+"""
+from dataclasses import dataclass
+from typing import Any, Optional, Tuple
+import logging
+import dspy
+from opik import llm_usage, types
+LOGGER = logging.getLogger(__name__)
+@dataclass
+class LMHistoryInfo:
+    """
+    Information extracted from a DSPy LM history entry.
+    This dataclass holds the parsed information from an LM call's history,
+    including usage statistics, cache status, provider information, and cost.
+    Attributes:
+        usage: Token usage information (prompt, completion, total tokens)
+        cache_hit: Whether the response was served from cache.
+            True if cached, False if not, None if unknown.
+        actual_provider: The actual provider that served the request.
+            This is useful for LLM routers like OpenRouter that may route
+            to different underlying providers (e.g., "Novita", "Together").
+        total_cost: The total cost of the request from the provider.
+            This includes accurate pricing for all token types.
+    """
+    usage: Optional[llm_usage.OpikUsage]
+    cache_hit: Optional[bool]
+    actual_provider: Optional[str]
+    total_cost: Optional[float]
+    def as_tuple(
+        self,
+    ) -> Tuple[
+        Optional[llm_usage.OpikUsage],
+        Optional[bool],
+        Optional[str],
+        Optional[float],
+    ]:
+        """Return the info as a tuple for backwards compatibility."""
+        return (self.usage, self.cache_hit, self.actual_provider, self.total_cost)
+def get_span_type(instance: Any) -> types.SpanType:
+    """
+    Determine the span type based on the DSPy instance type.
+    Args:
+        instance: A DSPy module, LM, or tool instance.
+    Returns:
+        The appropriate span type: "llm" for Predict/LM, "tool" for Tool,
+        or "general" for other types.
+    """
+    if isinstance(instance, dspy.Predict):
+        return "llm"
+    elif isinstance(instance, dspy.LM):
+        return "llm"
+    elif isinstance(instance, dspy.Tool):
+        return "tool"
+    return "general"
+def extract_lm_info_from_history(
+    lm_instance: Any,
+    expected_messages: Optional[Any],
+) -> LMHistoryInfo:
+    """
+    Extract token usage, cache status, actual provider, and cost from the LM's history.
+    DSPy stores usage information in the LM's history after each call.
+    We verify the history entry matches our expected messages to handle
+    potential race conditions with concurrent LM calls.
+    For routers like OpenRouter, the response contains the actual provider
+    that served the request (e.g., "Novita", "Together"), which differs from
+    the router name used in the model string (e.g., "openrouter").
+    The cost field is provided by providers like OpenRouter and includes
+    accurate pricing for all token types (reasoning, cache, multimodal).
+    Args:
+        lm_instance: The DSPy LM instance that has the history.
+        expected_messages: The expected messages to match in the history entry.
+    Returns:
+        LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
+    """
+    empty_result = LMHistoryInfo(
+        usage=None,
+        cache_hit=None,
+        actual_provider=None,
+        total_cost=None,
+    )
+    if not hasattr(lm_instance, "history") or not lm_instance.history:
+        return empty_result
+    try:
+        last_entry = lm_instance.history[-1]
+        # Verify we have the correct history entry by checking messages match
+        if last_entry.get("messages") != expected_messages:
+            LOGGER.debug(
+                "History entry messages don't match expected messages, "
+                "skipping usage extraction (possibly due to concurrent LM calls)"
+            )
+            return empty_result
+        response = last_entry.get("response")
+        usage_dict = last_entry.get("usage")
+        # Extract actual provider from response (useful for routers like OpenRouter)
+        # The response is a LiteLLM ModelResponse object with a 'provider' attribute
+        # when using routers like OpenRouter
+        actual_provider: Optional[str] = None
+        if response is not None and hasattr(response, "provider"):
+            actual_provider = response.provider
+        # Extract cost from history entry or usage dict
+        # OpenRouter and other providers return accurate cost including all token types
+        total_cost: Optional[float] = None
+        if last_entry.get("cost") is not None:
+            total_cost = last_entry.get("cost")
+        elif usage_dict and usage_dict.get("cost") is not None:
+            total_cost = usage_dict.get("cost")
+        # Get explicit cache_hit if set, otherwise infer from usage (empty = cached)
+        if response is None:
+            cache_hit = not usage_dict
+        elif hasattr(response, "cache_hit") and response.cache_hit is not None:
+            cache_hit = response.cache_hit
+        else:
+            # Fallback: infer from usage (empty = cached)
+            cache_hit = not usage_dict
+        if usage_dict:
+            usage = llm_usage.build_opik_usage_from_unknown_provider(usage_dict)
+            return LMHistoryInfo(
+                usage=usage,
+                cache_hit=cache_hit,
+                actual_provider=actual_provider,
+                total_cost=total_cost,
+            )
+        else:
+            return LMHistoryInfo(
+                usage=None,
+                cache_hit=cache_hit,
+                actual_provider=actual_provider,
+                total_cost=total_cost,
+            )
+    except Exception:
+        LOGGER.debug(
+            "Failed to extract info from DSPy LM history",
+            exc_info=True,
+        )
+        return empty_result

opik/integrations/harbor/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+Opik integration for Harbor benchmark evaluation framework.
+Example:
+    >>> from opik.integrations.harbor import track_harbor
+    >>> job = Job(config)
+    >>> tracked_job = track_harbor(job)
+    >>> result = await tracked_job.run()
+Or enable tracking globally (for CLI usage):
+    >>> from opik.integrations.harbor import track_harbor
+    >>> track_harbor()
+"""
+from .opik_tracker import track_harbor, reset_harbor_tracking
+__all__ = ["track_harbor", "reset_harbor_tracking"]

opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl