opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Shared usage conversion utilities for Bedrock models."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def anthropic_to_bedrock_usage(anthropic_usage: Dict[str, Any]) -> Dict[str, Any]:
|
|
7
|
+
"""
|
|
8
|
+
Convert Anthropic-style usage schema into Bedrock-style usage schema.
|
|
9
|
+
|
|
10
|
+
Anthropic usage keys (snake_case):
|
|
11
|
+
- input_tokens
|
|
12
|
+
- output_tokens
|
|
13
|
+
- cache_creation_input_tokens
|
|
14
|
+
- cache_read_input_tokens
|
|
15
|
+
|
|
16
|
+
Bedrock usage keys (camelCase):
|
|
17
|
+
- inputTokens
|
|
18
|
+
- outputTokens
|
|
19
|
+
- cacheWriteInputTokens
|
|
20
|
+
- cacheReadInputTokens
|
|
21
|
+
- totalTokens
|
|
22
|
+
"""
|
|
23
|
+
input_tokens = anthropic_usage.get("input_tokens", 0)
|
|
24
|
+
output_tokens = anthropic_usage.get("output_tokens", 0)
|
|
25
|
+
cache_write = anthropic_usage.get("cache_creation_input_tokens", 0)
|
|
26
|
+
cache_read = anthropic_usage.get("cache_read_input_tokens", 0)
|
|
27
|
+
|
|
28
|
+
return {
|
|
29
|
+
"inputTokens": input_tokens,
|
|
30
|
+
"outputTokens": output_tokens,
|
|
31
|
+
"cacheWriteInputTokens": cache_write,
|
|
32
|
+
"cacheReadInputTokens": cache_read,
|
|
33
|
+
"totalTokens": input_tokens + output_tokens,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def llama_to_bedrock_usage(llama_usage: Dict[str, Any]) -> Dict[str, Any]:
|
|
38
|
+
"""
|
|
39
|
+
Convert Llama-style usage schema into Bedrock-style usage schema.
|
|
40
|
+
|
|
41
|
+
Llama usage keys:
|
|
42
|
+
- prompt_token_count
|
|
43
|
+
- generation_token_count
|
|
44
|
+
"""
|
|
45
|
+
input_tokens = llama_usage.get("prompt_token_count", 0)
|
|
46
|
+
output_tokens = llama_usage.get("generation_token_count", 0)
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
"inputTokens": input_tokens,
|
|
50
|
+
"outputTokens": output_tokens,
|
|
51
|
+
"totalTokens": input_tokens + output_tokens,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def openai_to_bedrock_usage(openai_usage: Dict[str, Any]) -> Dict[str, Any]:
|
|
56
|
+
"""
|
|
57
|
+
Convert OpenAI-style usage schema into Bedrock-style usage schema.
|
|
58
|
+
Used by Mistral/Pixtral models.
|
|
59
|
+
|
|
60
|
+
OpenAI usage keys:
|
|
61
|
+
- prompt_tokens
|
|
62
|
+
- completion_tokens
|
|
63
|
+
- total_tokens
|
|
64
|
+
"""
|
|
65
|
+
input_tokens = openai_usage.get("prompt_tokens", 0)
|
|
66
|
+
output_tokens = openai_usage.get("completion_tokens", 0)
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
"inputTokens": input_tokens,
|
|
70
|
+
"outputTokens": output_tokens,
|
|
71
|
+
"totalTokens": input_tokens + output_tokens,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def nova_to_bedrock_usage(nova_usage: Dict[str, Any]) -> Dict[str, Any]:
|
|
76
|
+
"""
|
|
77
|
+
Convert Nova-style usage (already in Bedrock format) - pass through.
|
|
78
|
+
Nova already uses Bedrock format (inputTokens, outputTokens).
|
|
79
|
+
"""
|
|
80
|
+
input_tokens = nova_usage.get("inputTokens", 0)
|
|
81
|
+
output_tokens = nova_usage.get("outputTokens", 0)
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
"inputTokens": input_tokens,
|
|
85
|
+
"outputTokens": output_tokens,
|
|
86
|
+
"totalTokens": input_tokens + output_tokens,
|
|
87
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from opik import llm_usage
|
|
2
|
+
from typing import Dict, Any, Optional
|
|
3
|
+
import logging
|
|
4
|
+
import opik._logging
|
|
5
|
+
|
|
6
|
+
from . import usage_converters
|
|
7
|
+
|
|
8
|
+
LOGGER = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def extract_subprovider_from_model_id(model_id: str) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Extracts the provider name from a Bedrock modelId.
|
|
14
|
+
|
|
15
|
+
Examples:
|
|
16
|
+
ai21.j2-mid-v1 -> ai21
|
|
17
|
+
amazon.nova-lite-v1:0 -> amazon
|
|
18
|
+
anthropic.claude-v2:1 -> anthropic
|
|
19
|
+
us.meta.llama3-1-70b-instruct -> meta
|
|
20
|
+
"""
|
|
21
|
+
parts = model_id.split(".")
|
|
22
|
+
|
|
23
|
+
if parts[0] in {"us", "eu", "apac"}:
|
|
24
|
+
return parts[1]
|
|
25
|
+
|
|
26
|
+
return parts[0]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def try_extract_usage_from_bedrock_response(
|
|
30
|
+
subprovider: str, response: Dict[str, Any]
|
|
31
|
+
) -> Optional[llm_usage.OpikUsage]:
|
|
32
|
+
"""
|
|
33
|
+
Since Bedrock's invoke_model response format is not standardized, we need to try different ways to extract the usage.
|
|
34
|
+
|
|
35
|
+
This usage may also be not in Bedrock's format, but in the format of the original subprovider.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
LOGGER.debug("Extracting usage for subprovider: %s", subprovider)
|
|
39
|
+
|
|
40
|
+
if subprovider == "anthropic":
|
|
41
|
+
usage_dict = response["body"]["usage"]
|
|
42
|
+
bedrock_formatted_usage = usage_converters.anthropic_to_bedrock_usage(
|
|
43
|
+
usage_dict
|
|
44
|
+
)
|
|
45
|
+
opik_usage = llm_usage.OpikUsage.from_bedrock_dict(bedrock_formatted_usage)
|
|
46
|
+
LOGGER.debug("Anthropic usage extracted: %s", bedrock_formatted_usage)
|
|
47
|
+
return opik_usage
|
|
48
|
+
|
|
49
|
+
elif subprovider == "meta":
|
|
50
|
+
# Llama models have usage fields directly in body (not in body.usage)
|
|
51
|
+
body = response.get("body", {})
|
|
52
|
+
if "prompt_token_count" in body or "generation_token_count" in body:
|
|
53
|
+
bedrock_formatted_usage = usage_converters.llama_to_bedrock_usage(body)
|
|
54
|
+
opik_usage = llm_usage.OpikUsage.from_bedrock_dict(
|
|
55
|
+
bedrock_formatted_usage
|
|
56
|
+
)
|
|
57
|
+
LOGGER.debug("Llama usage extracted: %s", bedrock_formatted_usage)
|
|
58
|
+
return opik_usage
|
|
59
|
+
|
|
60
|
+
elif subprovider == "mistral":
|
|
61
|
+
# Mistral/Pixtral models use OpenAI-like usage format
|
|
62
|
+
usage_dict = response["body"].get("usage", {})
|
|
63
|
+
if usage_dict:
|
|
64
|
+
bedrock_formatted_usage = usage_converters.openai_to_bedrock_usage(
|
|
65
|
+
usage_dict
|
|
66
|
+
)
|
|
67
|
+
opik_usage = llm_usage.OpikUsage.from_bedrock_dict(
|
|
68
|
+
bedrock_formatted_usage
|
|
69
|
+
)
|
|
70
|
+
LOGGER.debug("Mistral usage extracted: %s", bedrock_formatted_usage)
|
|
71
|
+
return opik_usage
|
|
72
|
+
|
|
73
|
+
elif subprovider == "amazon":
|
|
74
|
+
# Nova models already use Bedrock format
|
|
75
|
+
usage_dict = response["body"].get("usage", {})
|
|
76
|
+
if usage_dict:
|
|
77
|
+
bedrock_formatted_usage = usage_converters.nova_to_bedrock_usage(
|
|
78
|
+
usage_dict
|
|
79
|
+
)
|
|
80
|
+
opik_usage = llm_usage.OpikUsage.from_bedrock_dict(
|
|
81
|
+
bedrock_formatted_usage
|
|
82
|
+
)
|
|
83
|
+
LOGGER.debug("Nova usage extracted: %s", bedrock_formatted_usage)
|
|
84
|
+
return opik_usage
|
|
85
|
+
|
|
86
|
+
# Fallback: This is the default case, but it's not guaranteed to find the usage here for all possible subproviders
|
|
87
|
+
presumably_usage_dict = response["body"].get("usage", {})
|
|
88
|
+
if presumably_usage_dict:
|
|
89
|
+
# If it's already in Bedrock's format, we are good (tested with amazon.nova-pro-v1:0, it has bedrock usage format)
|
|
90
|
+
# If it's not, but it's in some other format that Opik supports, we will at least extract
|
|
91
|
+
# completion and prompt tokens count so that backend could calculate cost based on them.
|
|
92
|
+
opik_usage = llm_usage.build_opik_usage_from_unknown_provider(
|
|
93
|
+
presumably_usage_dict
|
|
94
|
+
)
|
|
95
|
+
LOGGER.debug("Fallback usage extracted: %s", presumably_usage_dict)
|
|
96
|
+
return opik_usage
|
|
97
|
+
|
|
98
|
+
LOGGER.debug("No usage found in response body")
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
except Exception as e:
|
|
102
|
+
LOGGER.debug("Exception during usage extraction: %s", e)
|
|
103
|
+
opik._logging.log_once_at_level(
|
|
104
|
+
logging.WARNING,
|
|
105
|
+
f"Failed to extract usage from Bedrock's invoke_model response: {response}. It may be because this model response format is currently not supported: please create an issue at https://github.com/opik-ai/opik/issues and we will add support for it.",
|
|
106
|
+
LOGGER,
|
|
107
|
+
)
|
|
108
|
+
return None
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
from typing import Optional, TYPE_CHECKING
|
|
2
2
|
|
|
3
|
-
from . import
|
|
3
|
+
from . import invoke_agent_decorator
|
|
4
|
+
from .converse import chunks_aggregator as converse_chunks_aggregator
|
|
5
|
+
from .converse import converse_decorator
|
|
6
|
+
|
|
7
|
+
from .invoke_model import invoke_model_decorator
|
|
8
|
+
from .invoke_model import chunks_aggregator as invoke_model_chunks_aggregator
|
|
9
|
+
|
|
4
10
|
|
|
5
11
|
if TYPE_CHECKING:
|
|
6
12
|
from mypy_boto3_bedrock_runtime.client import BedrockRuntimeClient
|
|
@@ -12,9 +18,15 @@ def track_bedrock(
|
|
|
12
18
|
) -> "BedrockRuntimeClient":
|
|
13
19
|
"""Adds Opik tracking to an AWS Bedrock client.
|
|
14
20
|
|
|
15
|
-
Tracks calls to `converse()` and `
|
|
21
|
+
Tracks calls to `converse()`, `converse_stream()`, `invoke_model()`, and `invoke_model_with_response_stream()` methods.
|
|
16
22
|
Can be used within other Opik-tracked functions.
|
|
17
23
|
|
|
24
|
+
Supported Model subproviders for InvokeModel API (both streaming and non-streaming):
|
|
25
|
+
- **Anthropic** (Claude)
|
|
26
|
+
- **Amazon** (Nova)
|
|
27
|
+
- **Meta** (Llama)
|
|
28
|
+
- **Mistral** (Pixtral)
|
|
29
|
+
|
|
18
30
|
Args:
|
|
19
31
|
client: An instance of an AWS Bedrock client (botocore.client.BedrockRuntime or botocore.client.AgentsforBedrockRuntime).
|
|
20
32
|
project_name: The name of the project to log data.
|
|
@@ -25,6 +37,7 @@ def track_bedrock(
|
|
|
25
37
|
|
|
26
38
|
decorator_for_converse = converse_decorator.BedrockConverseDecorator()
|
|
27
39
|
decorator_for_invoke_agent = invoke_agent_decorator.BedrockInvokeAgentDecorator()
|
|
40
|
+
decorator_for_invoke_model = invoke_model_decorator.BedrockInvokeModelDecorator()
|
|
28
41
|
|
|
29
42
|
if hasattr(client, "invoke_agent") and not hasattr(
|
|
30
43
|
client.invoke_agent, "opik_tracked"
|
|
@@ -33,7 +46,7 @@ def track_bedrock(
|
|
|
33
46
|
type="llm",
|
|
34
47
|
name="bedrock_invoke_agent",
|
|
35
48
|
project_name=project_name,
|
|
36
|
-
generations_aggregator=
|
|
49
|
+
generations_aggregator=converse_chunks_aggregator.aggregate_invoke_agent_chunks,
|
|
37
50
|
)
|
|
38
51
|
tracked_invoke_agent = wrapper(client.invoke_agent)
|
|
39
52
|
client.invoke_agent = tracked_invoke_agent
|
|
@@ -54,9 +67,34 @@ def track_bedrock(
|
|
|
54
67
|
type="llm",
|
|
55
68
|
name="bedrock_converse_stream",
|
|
56
69
|
project_name=project_name,
|
|
57
|
-
generations_aggregator=
|
|
70
|
+
generations_aggregator=converse_chunks_aggregator.aggregate_converse_stream_chunks,
|
|
58
71
|
)
|
|
59
72
|
tracked_converse_stream = stream_wrapper(client.converse_stream)
|
|
60
73
|
client.converse_stream = tracked_converse_stream
|
|
61
74
|
|
|
75
|
+
if hasattr(client, "invoke_model") and not hasattr(
|
|
76
|
+
client.invoke_model, "opik_tracked"
|
|
77
|
+
):
|
|
78
|
+
wrapper = decorator_for_invoke_model.track(
|
|
79
|
+
type="llm",
|
|
80
|
+
name="bedrock_invoke_model",
|
|
81
|
+
project_name=project_name,
|
|
82
|
+
)
|
|
83
|
+
tracked_invoke_model = wrapper(client.invoke_model)
|
|
84
|
+
client.invoke_model = tracked_invoke_model
|
|
85
|
+
|
|
86
|
+
if hasattr(client, "invoke_model_with_response_stream") and not hasattr(
|
|
87
|
+
client.invoke_model_with_response_stream, "opik_tracked"
|
|
88
|
+
):
|
|
89
|
+
stream_wrapper = decorator_for_invoke_model.track(
|
|
90
|
+
type="llm",
|
|
91
|
+
name="bedrock_invoke_model_stream",
|
|
92
|
+
project_name=project_name,
|
|
93
|
+
generations_aggregator=invoke_model_chunks_aggregator.aggregate_chunks_to_dataclass,
|
|
94
|
+
)
|
|
95
|
+
tracked_invoke_model_stream = stream_wrapper(
|
|
96
|
+
client.invoke_model_with_response_stream
|
|
97
|
+
)
|
|
98
|
+
client.invoke_model_with_response_stream = tracked_invoke_model_stream
|
|
99
|
+
|
|
62
100
|
return client
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import Any, Dict, TypedDict
|
|
2
|
+
|
|
3
|
+
from botocore import eventstream
|
|
4
|
+
import botocore.response
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ConverseStreamOutput(TypedDict):
|
|
8
|
+
stream: eventstream.EventStream
|
|
9
|
+
ResponseMetadata: Dict[str, Any]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class InvokeModelOutput(TypedDict):
|
|
13
|
+
body: botocore.response.StreamingBody
|
|
14
|
+
ResponseMetadata: Dict[str, Any]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InvokeModelWithResponseStreamOutput(TypedDict):
|
|
18
|
+
body: eventstream.EventStream
|
|
19
|
+
ResponseMetadata: Dict[str, Any]
|
|
@@ -13,10 +13,9 @@ from typing import (
|
|
|
13
13
|
from typing_extensions import override
|
|
14
14
|
|
|
15
15
|
from opik.decorator import arguments_helpers, base_track_decorator
|
|
16
|
-
from opik.types import SpanType
|
|
16
|
+
from opik.types import SpanType
|
|
17
17
|
from opik.api_objects import span
|
|
18
18
|
import opik.jsonable_encoder as jsonable_encoder
|
|
19
|
-
import opik.llm_usage as llm_usage
|
|
20
19
|
import opik.dict_utils as dict_utils
|
|
21
20
|
|
|
22
21
|
LOGGER = logging.getLogger(__name__)
|
|
@@ -144,12 +143,6 @@ class CrewAITrackDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
144
143
|
input_dict["task"] = task_dict
|
|
145
144
|
name = f"Task: {task.name}"
|
|
146
145
|
|
|
147
|
-
elif name == "completion":
|
|
148
|
-
metadata["object_type"] = "completion"
|
|
149
|
-
input_dict = {"messages": kwargs.get("messages")}
|
|
150
|
-
span_type = "llm"
|
|
151
|
-
name = "llm call"
|
|
152
|
-
|
|
153
146
|
return input_dict, name, span_type
|
|
154
147
|
|
|
155
148
|
@override
|
|
@@ -161,38 +154,12 @@ class CrewAITrackDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
161
154
|
) -> arguments_helpers.EndSpanParameters:
|
|
162
155
|
object_type = None
|
|
163
156
|
metadata = {}
|
|
157
|
+
output_dict = {}
|
|
164
158
|
|
|
165
159
|
if current_span_data and current_span_data.metadata:
|
|
166
160
|
metadata = current_span_data.metadata
|
|
167
161
|
object_type = metadata.pop("object_type")
|
|
168
162
|
|
|
169
|
-
model, provider, output_dict, usage = self._parse_outputs(object_type, output)
|
|
170
|
-
|
|
171
|
-
result = arguments_helpers.EndSpanParameters(
|
|
172
|
-
output=output_dict,
|
|
173
|
-
usage=usage,
|
|
174
|
-
metadata=metadata,
|
|
175
|
-
model=model,
|
|
176
|
-
provider=provider,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
return result
|
|
180
|
-
|
|
181
|
-
def _parse_outputs(
|
|
182
|
-
self,
|
|
183
|
-
object_type: Optional[str],
|
|
184
|
-
output: Any,
|
|
185
|
-
) -> Tuple[
|
|
186
|
-
Optional[str],
|
|
187
|
-
Optional[str],
|
|
188
|
-
Dict[str, Any],
|
|
189
|
-
Optional[llm_usage.OpikUsage],
|
|
190
|
-
]:
|
|
191
|
-
model = None
|
|
192
|
-
provider = None
|
|
193
|
-
usage = None
|
|
194
|
-
output_dict = {}
|
|
195
|
-
|
|
196
163
|
if object_type == "crew":
|
|
197
164
|
output_dict = jsonable_encoder.encode(output)
|
|
198
165
|
output_dict.pop("token_usage", None)
|
|
@@ -202,23 +169,13 @@ class CrewAITrackDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
202
169
|
output_dict = _encode_dict_and_keep_keys(
|
|
203
170
|
output, TASK_KWARGS_KEYS_TO_LOG_AS_OUTPUT
|
|
204
171
|
)
|
|
205
|
-
elif object_type == "completion":
|
|
206
|
-
output_dict = jsonable_encoder.encode(output)
|
|
207
|
-
if output_dict.get("usage", None) is not None:
|
|
208
|
-
usage = llm_usage.try_build_opik_usage_or_log_error(
|
|
209
|
-
provider=LLMProvider.OPENAI, # even if it's not openai, we know the format is openai-like
|
|
210
|
-
usage=output_dict["usage"],
|
|
211
|
-
logger=LOGGER,
|
|
212
|
-
error_message="Failed to log token usage from CrewAI LLM call",
|
|
213
|
-
)
|
|
214
|
-
else:
|
|
215
|
-
usage = None
|
|
216
|
-
model = output_dict.pop("model", None)
|
|
217
|
-
provider = (
|
|
218
|
-
"openai" if output_dict.get("object") == "chat.completion" else None
|
|
219
|
-
)
|
|
220
172
|
|
|
221
|
-
|
|
173
|
+
result = arguments_helpers.EndSpanParameters(
|
|
174
|
+
output=output_dict,
|
|
175
|
+
metadata=metadata,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return result
|
|
222
179
|
|
|
223
180
|
@override
|
|
224
181
|
def _streams_handler(
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
+
import importlib.metadata
|
|
2
|
+
import logging
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import crewai
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
import opik.semantic_version
|
|
6
8
|
|
|
7
|
-
|
|
9
|
+
from . import crewai_decorator, patchers
|
|
10
|
+
|
|
11
|
+
LOGGER = logging.getLogger(__name__)
|
|
8
12
|
|
|
9
13
|
|
|
10
14
|
def track_crewai(
|
|
11
15
|
project_name: Optional[str] = None,
|
|
16
|
+
crew: Optional[crewai.Crew] = None,
|
|
12
17
|
) -> None:
|
|
13
18
|
"""
|
|
14
19
|
Tracks CrewAI activities by enabling tracking decorators for various critical methods.
|
|
@@ -21,11 +26,8 @@ def track_crewai(
|
|
|
21
26
|
|
|
22
27
|
Parameters:
|
|
23
28
|
project_name: The name of the project to associate with the tracking.
|
|
29
|
+
crew: The Crew instance to track. Required for CrewAI v1.0.0+ to properly track LLM calls.
|
|
24
30
|
"""
|
|
25
|
-
global __IS_TRACKING_ENABLED
|
|
26
|
-
if __IS_TRACKING_ENABLED:
|
|
27
|
-
return
|
|
28
|
-
__IS_TRACKING_ENABLED = True
|
|
29
31
|
|
|
30
32
|
decorator_factory = crewai_decorator.CrewAITrackDecorator()
|
|
31
33
|
|
|
@@ -33,12 +35,31 @@ def track_crewai(
|
|
|
33
35
|
project_name=project_name,
|
|
34
36
|
)
|
|
35
37
|
|
|
36
|
-
import litellm
|
|
37
|
-
|
|
38
38
|
crewai.Crew.kickoff = crewai_wrapper(crewai.Crew.kickoff)
|
|
39
39
|
crewai.Crew.kickoff_for_each = crewai_wrapper(crewai.Crew.kickoff_for_each)
|
|
40
40
|
crewai.Agent.execute_task = crewai_wrapper(crewai.Agent.execute_task)
|
|
41
41
|
crewai.Task.execute_sync = crewai_wrapper(crewai.Task.execute_sync)
|
|
42
|
-
litellm.completion = crewai_wrapper(litellm.completion)
|
|
43
42
|
|
|
44
|
-
|
|
43
|
+
# Patch LiteLLM functions used by CrewAI
|
|
44
|
+
patchers.patch_litellm_completion(project_name=project_name)
|
|
45
|
+
|
|
46
|
+
# Patch Flow class (v1.0.0+)
|
|
47
|
+
patchers.patch_flow(project_name=project_name)
|
|
48
|
+
|
|
49
|
+
# Patch LLM clients used by CrewAI agents (v1.0.0+)
|
|
50
|
+
if crew is not None and is_crewai_v1():
|
|
51
|
+
patchers.patch_llm_client(crew, project_name)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def is_crewai_v1() -> bool:
|
|
55
|
+
"""
|
|
56
|
+
Checks if CrewAI v1.0.0+ is installed.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
True if CrewAI v1.0.0+ is detected, False otherwise.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
version_str = importlib.metadata.version("crewai")
|
|
63
|
+
return opik.semantic_version.SemanticVersion.parse(version_str) >= "1.0.0" # type: ignore
|
|
64
|
+
except Exception:
|
|
65
|
+
return False
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patcher for CrewAI Flow class (v1.0.0+).
|
|
3
|
+
|
|
4
|
+
This module patches the Flow class to automatically track flow methods and execution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import functools
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import opik.decorator.tracker as opik_tracker
|
|
12
|
+
|
|
13
|
+
LOGGER = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def patch_flow(project_name: Optional[str] = None) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Patches CrewAI Flow class to track flow execution.
|
|
19
|
+
|
|
20
|
+
If Flow class is not available (CrewAI < v1.0.0), this function does nothing.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
project_name: The name of the project to associate with tracking.
|
|
24
|
+
"""
|
|
25
|
+
_patch_flow_init(project_name)
|
|
26
|
+
_patch_flow_kickoff_async(project_name)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _patch_flow_init(project_name: Optional[str] = None) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Patches CrewAI Flow.__init__ to automatically track flow methods.
|
|
32
|
+
|
|
33
|
+
If Flow class is not available (CrewAI < v1.0.0), this function does nothing.
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
import crewai
|
|
37
|
+
|
|
38
|
+
if not hasattr(crewai, "Flow"):
|
|
39
|
+
LOGGER.debug("CrewAI Flow class not available, skipping Flow patching")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
if hasattr(_patch_flow_init, "_patched"):
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
original_init = crewai.Flow.__init__
|
|
46
|
+
|
|
47
|
+
@functools.wraps(original_init)
|
|
48
|
+
def _init_wrapper(self, *args, **kwargs) -> None: # type: ignore
|
|
49
|
+
original_init(self, *args, **kwargs)
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
flow_registered_methods = getattr(self, "_methods", {})
|
|
53
|
+
for method_name, method in list(flow_registered_methods.items()):
|
|
54
|
+
if getattr(method, "opik_tracked", False):
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
decorated = opik_tracker.track(
|
|
58
|
+
project_name=project_name,
|
|
59
|
+
tags=["crewai"],
|
|
60
|
+
metadata={"created_from": "crewai"},
|
|
61
|
+
)(method)
|
|
62
|
+
|
|
63
|
+
flow_registered_methods[method_name] = decorated
|
|
64
|
+
except Exception:
|
|
65
|
+
LOGGER.error(
|
|
66
|
+
"An error occurred during Opik instrumentation of CrewAI Flow",
|
|
67
|
+
exc_info=True,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
crewai.Flow.__init__ = _init_wrapper # type: ignore[assignment]
|
|
71
|
+
|
|
72
|
+
setattr(_patch_flow_init, "_patched", True) # type: ignore[attr-defined]
|
|
73
|
+
except (ImportError, AttributeError):
|
|
74
|
+
LOGGER.debug(
|
|
75
|
+
"CrewAI Flow class not available, skipping Flow patching", exc_info=True
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _patch_flow_kickoff_async(project_name: Optional[str] = None) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Patches CrewAI Flow.kickoff_async to track flow execution.
|
|
82
|
+
|
|
83
|
+
If Flow class is not available (CrewAI < v1.0.0), this function does nothing.
|
|
84
|
+
"""
|
|
85
|
+
try:
|
|
86
|
+
import crewai
|
|
87
|
+
|
|
88
|
+
if not hasattr(crewai, "Flow"):
|
|
89
|
+
LOGGER.debug(
|
|
90
|
+
"CrewAI Flow class not available, skipping Flow.kickoff_async patching"
|
|
91
|
+
)
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
if hasattr(_patch_flow_kickoff_async, "_patched"):
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
# We only need to patch the async version of the kickoff method because
|
|
98
|
+
# the sync version calls it internally
|
|
99
|
+
original_kickoff_async = crewai.Flow.kickoff_async
|
|
100
|
+
|
|
101
|
+
@functools.wraps(original_kickoff_async)
|
|
102
|
+
async def _kickoff_async_wrapper(self, *args, **kwargs): # type: ignore
|
|
103
|
+
wrapped = opik_tracker.track(
|
|
104
|
+
project_name=project_name,
|
|
105
|
+
tags=["crewai"],
|
|
106
|
+
name="Flow.kickoff_async",
|
|
107
|
+
metadata={"created_from": "crewai"},
|
|
108
|
+
)(original_kickoff_async)
|
|
109
|
+
return await wrapped(self, *args, **kwargs)
|
|
110
|
+
|
|
111
|
+
crewai.Flow.kickoff_async = _kickoff_async_wrapper # type: ignore[assignment]
|
|
112
|
+
|
|
113
|
+
setattr(_patch_flow_kickoff_async, "_patched", True) # type: ignore[attr-defined]
|
|
114
|
+
except (ImportError, AttributeError):
|
|
115
|
+
LOGGER.debug(
|
|
116
|
+
"CrewAI Flow class not available, skipping Flow.kickoff_async patching",
|
|
117
|
+
exc_info=True,
|
|
118
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patcher for LiteLLM completion functions used by CrewAI.
|
|
3
|
+
|
|
4
|
+
This module patches litellm.completion and litellm.acompletion with Opik tracking.
|
|
5
|
+
CrewAI v0.x uses LiteLLM internally for LLM calls.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import litellm
|
|
12
|
+
|
|
13
|
+
import opik.integrations.litellm
|
|
14
|
+
|
|
15
|
+
LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def patch_litellm_completion(project_name: Optional[str] = None) -> None:
|
|
19
|
+
"""
|
|
20
|
+
Patches LiteLLM completion functions used by CrewAI.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
project_name: The name of the project to associate with tracking.
|
|
24
|
+
"""
|
|
25
|
+
litellm.completion = opik.integrations.litellm.track_completion(
|
|
26
|
+
project_name=project_name
|
|
27
|
+
)(litellm.completion)
|
|
28
|
+
litellm.acompletion = opik.integrations.litellm.track_completion(
|
|
29
|
+
project_name=project_name
|
|
30
|
+
)(litellm.acompletion)
|