opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Client patchers for CrewAI LLM providers (v1.0.0+).
|
|
3
|
+
|
|
4
|
+
This module handles patching of LLM clients used by CrewAI agents with Opik tracking.
|
|
5
|
+
Each provider has its own patching function that handles missing dependencies gracefully.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Optional, TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
import crewai
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
import crewai.llms.providers.openai.completion as openai_completion
|
|
15
|
+
import crewai.llms.providers.anthropic.completion as anthropic_completion
|
|
16
|
+
import crewai.llms.providers.gemini.completion as gemini_completion
|
|
17
|
+
import crewai.llms.providers.bedrock.completion as bedrock_completion
|
|
18
|
+
|
|
19
|
+
LOGGER = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def patch_llm_client(crew: crewai.Crew, project_name: Optional[str]) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Patches LLM clients used by CrewAI agents with Opik tracking.
|
|
25
|
+
|
|
26
|
+
Handles missing provider libraries gracefully by logging warnings instead of failing.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
crew: The Crew instance containing agents to patch.
|
|
30
|
+
project_name: The name of the project to associate with tracking.
|
|
31
|
+
"""
|
|
32
|
+
for agent in crew.agents:
|
|
33
|
+
_patch_single_llm_client(agent.llm, project_name)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _patch_single_llm_client(llm: Any, project_name: Optional[str]) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Patches an LLM client based on its provider type.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
llm: The CrewAI LLM instance to patch.
|
|
42
|
+
project_name: The name of the project to associate with tracking.
|
|
43
|
+
"""
|
|
44
|
+
if _is_openai_llm(llm):
|
|
45
|
+
_patch_openai_client(llm, project_name)
|
|
46
|
+
elif _is_anthropic_llm(llm):
|
|
47
|
+
_patch_anthropic_client(llm, project_name)
|
|
48
|
+
elif _is_gemini_llm(llm):
|
|
49
|
+
_patch_gemini_client(llm, project_name)
|
|
50
|
+
elif _is_bedrock_llm(llm):
|
|
51
|
+
_patch_bedrock_client(llm, project_name)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _is_openai_llm(llm: Any) -> bool:
|
|
55
|
+
"""
|
|
56
|
+
Checks if LLM is an OpenAI provider.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
llm: The CrewAI LLM to check.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
True if LLM is OpenAI provider, False otherwise.
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
import crewai.llms.providers.openai.completion
|
|
66
|
+
|
|
67
|
+
return isinstance(llm, crewai.llms.providers.openai.completion.OpenAICompletion)
|
|
68
|
+
except ImportError:
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _is_anthropic_llm(llm: Any) -> bool:
|
|
73
|
+
"""
|
|
74
|
+
Checks if LLM is an Anthropic provider.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
llm: The CrewAI LLM to check.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
True if LLM is Anthropic provider, False otherwise.
|
|
81
|
+
"""
|
|
82
|
+
try:
|
|
83
|
+
import crewai.llms.providers.anthropic.completion
|
|
84
|
+
|
|
85
|
+
return isinstance(
|
|
86
|
+
llm, crewai.llms.providers.anthropic.completion.AnthropicCompletion
|
|
87
|
+
)
|
|
88
|
+
except ImportError:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _is_gemini_llm(llm: Any) -> bool:
|
|
93
|
+
"""
|
|
94
|
+
Checks if LLM is a Gemini provider.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
llm: The CrewAI LLM to check.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
True if LLM is Gemini provider, False otherwise.
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
import crewai.llms.providers.gemini.completion
|
|
104
|
+
|
|
105
|
+
return isinstance(llm, crewai.llms.providers.gemini.completion.GeminiCompletion)
|
|
106
|
+
except ImportError:
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _is_bedrock_llm(llm: Any) -> bool:
|
|
111
|
+
"""
|
|
112
|
+
Checks if LLM is a Bedrock provider.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
llm: The CrewAI LLM to check.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
True if LLM is Bedrock provider, False otherwise.
|
|
119
|
+
"""
|
|
120
|
+
try:
|
|
121
|
+
import crewai.llms.providers.bedrock.completion
|
|
122
|
+
|
|
123
|
+
return isinstance(
|
|
124
|
+
llm, crewai.llms.providers.bedrock.completion.BedrockCompletion
|
|
125
|
+
)
|
|
126
|
+
except ImportError:
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _patch_openai_client(
|
|
131
|
+
llm: "openai_completion.OpenAICompletion", project_name: Optional[str]
|
|
132
|
+
) -> None:
|
|
133
|
+
"""
|
|
134
|
+
Patches OpenAI client for the given LLM.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
llm: The CrewAI LLM instance with OpenAI client to patch.
|
|
138
|
+
project_name: The name of the project to associate with tracking.
|
|
139
|
+
"""
|
|
140
|
+
try:
|
|
141
|
+
import opik.integrations.openai
|
|
142
|
+
|
|
143
|
+
llm.client = opik.integrations.openai.track_openai(
|
|
144
|
+
llm.client, project_name=project_name
|
|
145
|
+
)
|
|
146
|
+
except Exception:
|
|
147
|
+
LOGGER.warning("Failed to track OpenAI client for LLM", exc_info=True)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _patch_anthropic_client(
|
|
151
|
+
llm: "anthropic_completion.AnthropicCompletion", project_name: Optional[str]
|
|
152
|
+
) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Patches Anthropic client for the given LLM.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
llm: The CrewAI LLM instance with Anthropic client to patch.
|
|
158
|
+
project_name: The name of the project to associate with tracking.
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
import opik.integrations.anthropic
|
|
162
|
+
|
|
163
|
+
llm.client = opik.integrations.anthropic.track_anthropic(
|
|
164
|
+
llm.client, project_name=project_name
|
|
165
|
+
)
|
|
166
|
+
except Exception:
|
|
167
|
+
LOGGER.warning("Failed to track Anthropic client for LLM", exc_info=True)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _patch_gemini_client(
|
|
171
|
+
llm: "gemini_completion.GeminiCompletion", project_name: Optional[str]
|
|
172
|
+
) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Patches Gemini client for the given LLM.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
llm: The CrewAI LLM instance with Gemini client to patch.
|
|
178
|
+
project_name: The name of the project to associate with tracking.
|
|
179
|
+
"""
|
|
180
|
+
try:
|
|
181
|
+
import opik.integrations.genai
|
|
182
|
+
|
|
183
|
+
llm.client = opik.integrations.genai.track_genai(
|
|
184
|
+
llm.client, project_name=project_name
|
|
185
|
+
)
|
|
186
|
+
except Exception:
|
|
187
|
+
LOGGER.warning("Failed to track Gemini client for LLM", exc_info=True)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _patch_bedrock_client(
|
|
191
|
+
llm: "bedrock_completion.BedrockCompletion", project_name: Optional[str]
|
|
192
|
+
) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Patches Bedrock client for the given LLM.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
llm: The CrewAI LLM instance with Bedrock client to patch.
|
|
198
|
+
project_name: The name of the project to associate with tracking.
|
|
199
|
+
"""
|
|
200
|
+
try:
|
|
201
|
+
import opik.integrations.bedrock
|
|
202
|
+
|
|
203
|
+
llm.client = opik.integrations.bedrock.track_bedrock(
|
|
204
|
+
llm.client, project_name=project_name
|
|
205
|
+
)
|
|
206
|
+
except Exception:
|
|
207
|
+
LOGGER.warning("Failed to track Bedrock client for LLM", exc_info=True)
|
|
@@ -1,17 +1,16 @@
|
|
|
1
|
-
from typing import Any, Dict, Optional, Union
|
|
1
|
+
from typing import Any, Dict, Optional, Tuple, Union
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
import dspy
|
|
5
5
|
from dspy.utils import callback as dspy_callback
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import opik.context_storage as context_storage
|
|
7
|
+
from opik import context_storage, opik_context, tracing_runtime_config
|
|
8
|
+
from opik import llm_usage
|
|
10
9
|
from opik.api_objects import helpers, span, trace, opik_client
|
|
11
|
-
import opik.decorator.tracing_runtime_config as tracing_runtime_config
|
|
12
10
|
from opik.decorator import error_info_collector
|
|
13
11
|
|
|
14
12
|
from .graph import build_mermaid_graph_from_module
|
|
13
|
+
from .parsers import LMHistoryInfo, extract_lm_info_from_history, get_span_type
|
|
15
14
|
|
|
16
15
|
LOGGER = logging.getLogger(__name__)
|
|
17
16
|
|
|
@@ -35,6 +34,8 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
35
34
|
):
|
|
36
35
|
self._map_call_id_to_span_data: Dict[str, span.SpanData] = {}
|
|
37
36
|
self._map_call_id_to_trace_data: Dict[str, trace.TraceData] = {}
|
|
37
|
+
# Store (lm_instance, expected_messages) for extracting usage and verifying correct history entry
|
|
38
|
+
self._map_call_id_to_lm_info: Dict[str, Tuple[Any, Optional[Any]]] = {}
|
|
38
39
|
|
|
39
40
|
self._origins_metadata: Dict[str, Any] = {"created_from": "dspy"}
|
|
40
41
|
|
|
@@ -106,7 +107,7 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
106
107
|
parent_project_name=current_span_data.project_name,
|
|
107
108
|
child_project_name=self._project_name,
|
|
108
109
|
)
|
|
109
|
-
span_type =
|
|
110
|
+
span_type = get_span_type(instance)
|
|
110
111
|
|
|
111
112
|
span_data = span.SpanData(
|
|
112
113
|
trace_id=current_span_data.trace_id,
|
|
@@ -130,7 +131,7 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
130
131
|
current_trace_data.project_name,
|
|
131
132
|
self._project_name,
|
|
132
133
|
)
|
|
133
|
-
span_type =
|
|
134
|
+
span_type = get_span_type(instance)
|
|
134
135
|
|
|
135
136
|
span_data = span.SpanData(
|
|
136
137
|
trace_id=current_trace_data.id,
|
|
@@ -201,13 +202,39 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
201
202
|
call_id: str,
|
|
202
203
|
outputs: Optional[Any],
|
|
203
204
|
exception: Optional[Exception] = None,
|
|
205
|
+
usage: Optional[llm_usage.OpikUsage] = None,
|
|
206
|
+
extra_metadata: Optional[Dict[str, Any]] = None,
|
|
207
|
+
actual_provider: Optional[str] = None,
|
|
208
|
+
total_cost: Optional[float] = None,
|
|
204
209
|
) -> None:
|
|
205
210
|
if span_data := self._map_call_id_to_span_data.pop(call_id, None):
|
|
206
211
|
if exception:
|
|
207
212
|
error_info = error_info_collector.collect(exception)
|
|
208
213
|
span_data.update(error_info=error_info)
|
|
209
214
|
|
|
210
|
-
|
|
215
|
+
# Prepare the update dict
|
|
216
|
+
update_kwargs: Dict[str, Any] = {
|
|
217
|
+
"output": {"output": outputs},
|
|
218
|
+
"usage": usage,
|
|
219
|
+
"total_cost": total_cost,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# Handle LLM routers like OpenRouter that return the actual serving provider
|
|
223
|
+
if (
|
|
224
|
+
actual_provider is not None
|
|
225
|
+
and span_data.provider is not None
|
|
226
|
+
and span_data.provider.lower() != actual_provider.lower()
|
|
227
|
+
):
|
|
228
|
+
# Store the original provider (e.g., "openrouter") in metadata
|
|
229
|
+
if extra_metadata is None:
|
|
230
|
+
extra_metadata = {}
|
|
231
|
+
extra_metadata["llm_router"] = span_data.provider
|
|
232
|
+
# Update to the actual provider for accurate cost tracking
|
|
233
|
+
update_kwargs["provider"] = actual_provider
|
|
234
|
+
|
|
235
|
+
update_kwargs["metadata"] = extra_metadata
|
|
236
|
+
|
|
237
|
+
span_data.update(**update_kwargs).init_end_time()
|
|
211
238
|
if tracing_runtime_config.is_tracing_active():
|
|
212
239
|
self._opik_client.span(**span_data.as_parameters)
|
|
213
240
|
|
|
@@ -234,7 +261,7 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
234
261
|
trace_id = current_callback_context_data.id
|
|
235
262
|
parent_span_id = None
|
|
236
263
|
|
|
237
|
-
span_type =
|
|
264
|
+
span_type = get_span_type(instance)
|
|
238
265
|
|
|
239
266
|
return span.SpanData(
|
|
240
267
|
trace_id=trace_id,
|
|
@@ -266,6 +293,13 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
266
293
|
name=f"{span_data.name}: {provider} - {model}",
|
|
267
294
|
)
|
|
268
295
|
self._map_call_id_to_span_data[call_id] = span_data
|
|
296
|
+
|
|
297
|
+
# Store LM instance and expected messages for extracting usage
|
|
298
|
+
self._map_call_id_to_lm_info[call_id] = (
|
|
299
|
+
instance,
|
|
300
|
+
inputs.get("messages"),
|
|
301
|
+
)
|
|
302
|
+
|
|
269
303
|
self._set_current_context_data(span_data)
|
|
270
304
|
|
|
271
305
|
def on_lm_end(
|
|
@@ -274,10 +308,21 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
274
308
|
outputs: Optional[Dict[str, Any]],
|
|
275
309
|
exception: Optional[Exception] = None,
|
|
276
310
|
) -> None:
|
|
311
|
+
lm_info = self._extract_lm_info_from_history(call_id)
|
|
312
|
+
|
|
313
|
+
# Add cache_hit to span metadata only when we have a definitive value
|
|
314
|
+
extra_metadata = (
|
|
315
|
+
{"cache_hit": lm_info.cache_hit} if lm_info.cache_hit is not None else None
|
|
316
|
+
)
|
|
317
|
+
|
|
277
318
|
self._end_span(
|
|
278
319
|
call_id=call_id,
|
|
279
320
|
exception=exception,
|
|
280
321
|
outputs=outputs,
|
|
322
|
+
usage=lm_info.usage,
|
|
323
|
+
extra_metadata=extra_metadata,
|
|
324
|
+
actual_provider=lm_info.actual_provider,
|
|
325
|
+
total_cost=lm_info.total_cost,
|
|
281
326
|
)
|
|
282
327
|
|
|
283
328
|
def on_tool_start(
|
|
@@ -319,14 +364,32 @@ class OpikCallback(dspy_callback.BaseCallback):
|
|
|
319
364
|
return span_data
|
|
320
365
|
return self._context_storage.get_trace_data()
|
|
321
366
|
|
|
322
|
-
def
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
367
|
+
def _extract_lm_info_from_history(self, call_id: str) -> LMHistoryInfo:
|
|
368
|
+
"""
|
|
369
|
+
Extract token usage, cache status, actual provider, and cost from the LM's history.
|
|
370
|
+
|
|
371
|
+
DSPy stores usage information in the LM's history after each call.
|
|
372
|
+
We verify the history entry matches our expected messages to handle
|
|
373
|
+
potential race conditions with concurrent LM calls.
|
|
374
|
+
|
|
375
|
+
For routers like OpenRouter, the response contains the actual provider
|
|
376
|
+
that served the request (e.g., "Novita", "Together"), which differs from
|
|
377
|
+
the router name used in the model string (e.g., "openrouter").
|
|
378
|
+
|
|
379
|
+
The cost field is provided by providers like OpenRouter and includes
|
|
380
|
+
accurate pricing for all token types (reasoning, cache, multimodal).
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
|
|
384
|
+
"""
|
|
385
|
+
lm_info = self._map_call_id_to_lm_info.pop(call_id, None)
|
|
386
|
+
if lm_info is None:
|
|
387
|
+
return LMHistoryInfo(
|
|
388
|
+
usage=None, cache_hit=None, actual_provider=None, total_cost=None
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
lm_instance, expected_messages = lm_info
|
|
392
|
+
return extract_lm_info_from_history(lm_instance, expected_messages)
|
|
330
393
|
|
|
331
394
|
def _get_opik_metadata(self, instance: Any) -> Dict[str, Any]:
|
|
332
395
|
graph = None
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parsers and data structures for extracting information from DSPy LM responses.
|
|
3
|
+
|
|
4
|
+
This module contains utilities for parsing DSPy LM history entries and
|
|
5
|
+
extracting relevant information like usage, provider, and cost data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any, Optional, Tuple
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
import dspy
|
|
13
|
+
|
|
14
|
+
from opik import llm_usage, types
|
|
15
|
+
|
|
16
|
+
LOGGER = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class LMHistoryInfo:
|
|
21
|
+
"""
|
|
22
|
+
Information extracted from a DSPy LM history entry.
|
|
23
|
+
|
|
24
|
+
This dataclass holds the parsed information from an LM call's history,
|
|
25
|
+
including usage statistics, cache status, provider information, and cost.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
usage: Token usage information (prompt, completion, total tokens)
|
|
29
|
+
cache_hit: Whether the response was served from cache.
|
|
30
|
+
True if cached, False if not, None if unknown.
|
|
31
|
+
actual_provider: The actual provider that served the request.
|
|
32
|
+
This is useful for LLM routers like OpenRouter that may route
|
|
33
|
+
to different underlying providers (e.g., "Novita", "Together").
|
|
34
|
+
total_cost: The total cost of the request from the provider.
|
|
35
|
+
This includes accurate pricing for all token types.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
usage: Optional[llm_usage.OpikUsage]
|
|
39
|
+
cache_hit: Optional[bool]
|
|
40
|
+
actual_provider: Optional[str]
|
|
41
|
+
total_cost: Optional[float]
|
|
42
|
+
|
|
43
|
+
def as_tuple(
|
|
44
|
+
self,
|
|
45
|
+
) -> Tuple[
|
|
46
|
+
Optional[llm_usage.OpikUsage],
|
|
47
|
+
Optional[bool],
|
|
48
|
+
Optional[str],
|
|
49
|
+
Optional[float],
|
|
50
|
+
]:
|
|
51
|
+
"""Return the info as a tuple for backwards compatibility."""
|
|
52
|
+
return (self.usage, self.cache_hit, self.actual_provider, self.total_cost)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_span_type(instance: Any) -> types.SpanType:
|
|
56
|
+
"""
|
|
57
|
+
Determine the span type based on the DSPy instance type.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
instance: A DSPy module, LM, or tool instance.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The appropriate span type: "llm" for Predict/LM, "tool" for Tool,
|
|
64
|
+
or "general" for other types.
|
|
65
|
+
"""
|
|
66
|
+
if isinstance(instance, dspy.Predict):
|
|
67
|
+
return "llm"
|
|
68
|
+
elif isinstance(instance, dspy.LM):
|
|
69
|
+
return "llm"
|
|
70
|
+
elif isinstance(instance, dspy.Tool):
|
|
71
|
+
return "tool"
|
|
72
|
+
return "general"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def extract_lm_info_from_history(
|
|
76
|
+
lm_instance: Any,
|
|
77
|
+
expected_messages: Optional[Any],
|
|
78
|
+
) -> LMHistoryInfo:
|
|
79
|
+
"""
|
|
80
|
+
Extract token usage, cache status, actual provider, and cost from the LM's history.
|
|
81
|
+
|
|
82
|
+
DSPy stores usage information in the LM's history after each call.
|
|
83
|
+
We verify the history entry matches our expected messages to handle
|
|
84
|
+
potential race conditions with concurrent LM calls.
|
|
85
|
+
|
|
86
|
+
For routers like OpenRouter, the response contains the actual provider
|
|
87
|
+
that served the request (e.g., "Novita", "Together"), which differs from
|
|
88
|
+
the router name used in the model string (e.g., "openrouter").
|
|
89
|
+
|
|
90
|
+
The cost field is provided by providers like OpenRouter and includes
|
|
91
|
+
accurate pricing for all token types (reasoning, cache, multimodal).
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
lm_instance: The DSPy LM instance that has the history.
|
|
95
|
+
expected_messages: The expected messages to match in the history entry.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
|
|
99
|
+
"""
|
|
100
|
+
empty_result = LMHistoryInfo(
|
|
101
|
+
usage=None,
|
|
102
|
+
cache_hit=None,
|
|
103
|
+
actual_provider=None,
|
|
104
|
+
total_cost=None,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if not hasattr(lm_instance, "history") or not lm_instance.history:
|
|
108
|
+
return empty_result
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
last_entry = lm_instance.history[-1]
|
|
112
|
+
|
|
113
|
+
# Verify we have the correct history entry by checking messages match
|
|
114
|
+
if last_entry.get("messages") != expected_messages:
|
|
115
|
+
LOGGER.debug(
|
|
116
|
+
"History entry messages don't match expected messages, "
|
|
117
|
+
"skipping usage extraction (possibly due to concurrent LM calls)"
|
|
118
|
+
)
|
|
119
|
+
return empty_result
|
|
120
|
+
|
|
121
|
+
response = last_entry.get("response")
|
|
122
|
+
usage_dict = last_entry.get("usage")
|
|
123
|
+
|
|
124
|
+
# Extract actual provider from response (useful for routers like OpenRouter)
|
|
125
|
+
# The response is a LiteLLM ModelResponse object with a 'provider' attribute
|
|
126
|
+
# when using routers like OpenRouter
|
|
127
|
+
actual_provider: Optional[str] = None
|
|
128
|
+
if response is not None and hasattr(response, "provider"):
|
|
129
|
+
actual_provider = response.provider
|
|
130
|
+
|
|
131
|
+
# Extract cost from history entry or usage dict
|
|
132
|
+
# OpenRouter and other providers return accurate cost including all token types
|
|
133
|
+
total_cost: Optional[float] = None
|
|
134
|
+
if last_entry.get("cost") is not None:
|
|
135
|
+
total_cost = last_entry.get("cost")
|
|
136
|
+
elif usage_dict and usage_dict.get("cost") is not None:
|
|
137
|
+
total_cost = usage_dict.get("cost")
|
|
138
|
+
|
|
139
|
+
# Get explicit cache_hit if set, otherwise infer from usage (empty = cached)
|
|
140
|
+
if response is None:
|
|
141
|
+
cache_hit = not usage_dict
|
|
142
|
+
elif hasattr(response, "cache_hit") and response.cache_hit is not None:
|
|
143
|
+
cache_hit = response.cache_hit
|
|
144
|
+
else:
|
|
145
|
+
# Fallback: infer from usage (empty = cached)
|
|
146
|
+
cache_hit = not usage_dict
|
|
147
|
+
|
|
148
|
+
if usage_dict:
|
|
149
|
+
usage = llm_usage.build_opik_usage_from_unknown_provider(usage_dict)
|
|
150
|
+
return LMHistoryInfo(
|
|
151
|
+
usage=usage,
|
|
152
|
+
cache_hit=cache_hit,
|
|
153
|
+
actual_provider=actual_provider,
|
|
154
|
+
total_cost=total_cost,
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
return LMHistoryInfo(
|
|
158
|
+
usage=None,
|
|
159
|
+
cache_hit=cache_hit,
|
|
160
|
+
actual_provider=actual_provider,
|
|
161
|
+
total_cost=total_cost,
|
|
162
|
+
)
|
|
163
|
+
except Exception:
|
|
164
|
+
LOGGER.debug(
|
|
165
|
+
"Failed to extract info from DSPy LM history",
|
|
166
|
+
exc_info=True,
|
|
167
|
+
)
|
|
168
|
+
return empty_result
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Opik integration for Harbor benchmark evaluation framework.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
>>> from opik.integrations.harbor import track_harbor
|
|
6
|
+
>>> job = Job(config)
|
|
7
|
+
>>> tracked_job = track_harbor(job)
|
|
8
|
+
>>> result = await tracked_job.run()
|
|
9
|
+
|
|
10
|
+
Or enable tracking globally (for CLI usage):
|
|
11
|
+
>>> from opik.integrations.harbor import track_harbor
|
|
12
|
+
>>> track_harbor()
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .opik_tracker import track_harbor, reset_harbor_tracking
|
|
16
|
+
|
|
17
|
+
__all__ = ["track_harbor", "reset_harbor_tracking"]
|