opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/__init__.py +19 -3
- opik/anonymizer/__init__.py +5 -0
- opik/anonymizer/anonymizer.py +12 -0
- opik/anonymizer/factory.py +80 -0
- opik/anonymizer/recursive_anonymizer.py +64 -0
- opik/anonymizer/rules.py +56 -0
- opik/anonymizer/rules_anonymizer.py +35 -0
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/data_helpers.py +79 -0
- opik/api_objects/dataset/dataset.py +64 -4
- opik/api_objects/dataset/rest_operations.py +11 -2
- opik/api_objects/experiment/experiment.py +57 -57
- opik/api_objects/experiment/experiment_item.py +2 -1
- opik/api_objects/experiment/experiments_client.py +64 -0
- opik/api_objects/experiment/helpers.py +35 -11
- opik/api_objects/experiment/rest_operations.py +65 -5
- opik/api_objects/helpers.py +8 -5
- opik/api_objects/local_recording.py +81 -0
- opik/api_objects/opik_client.py +600 -108
- opik/api_objects/opik_query_language.py +39 -5
- opik/api_objects/prompt/__init__.py +12 -2
- opik/api_objects/prompt/base_prompt.py +69 -0
- opik/api_objects/prompt/base_prompt_template.py +29 -0
- opik/api_objects/prompt/chat/__init__.py +1 -0
- opik/api_objects/prompt/chat/chat_prompt.py +210 -0
- opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
- opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
- opik/api_objects/prompt/client.py +189 -47
- opik/api_objects/prompt/text/__init__.py +1 -0
- opik/api_objects/prompt/text/prompt.py +174 -0
- opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
- opik/api_objects/prompt/types.py +23 -0
- opik/api_objects/search_helpers.py +89 -0
- opik/api_objects/span/span_data.py +35 -25
- opik/api_objects/threads/threads_client.py +39 -5
- opik/api_objects/trace/trace_client.py +52 -2
- opik/api_objects/trace/trace_data.py +15 -24
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/__init__.py +5 -0
- opik/cli/__main__.py +6 -0
- opik/cli/configure.py +66 -0
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/healthcheck.py +21 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +49 -0
- opik/cli/proxy.py +93 -0
- opik/cli/usage_report/__init__.py +16 -0
- opik/cli/usage_report/charts.py +783 -0
- opik/cli/usage_report/cli.py +274 -0
- opik/cli/usage_report/constants.py +9 -0
- opik/cli/usage_report/extraction.py +749 -0
- opik/cli/usage_report/pdf.py +244 -0
- opik/cli/usage_report/statistics.py +78 -0
- opik/cli/usage_report/utils.py +235 -0
- opik/config.py +13 -7
- opik/configurator/configure.py +17 -0
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +9 -1
- opik/decorator/base_track_decorator.py +205 -133
- opik/decorator/context_manager/span_context_manager.py +123 -0
- opik/decorator/context_manager/trace_context_manager.py +84 -0
- opik/decorator/opik_args/__init__.py +13 -0
- opik/decorator/opik_args/api_classes.py +71 -0
- opik/decorator/opik_args/helpers.py +120 -0
- opik/decorator/span_creation_handler.py +25 -6
- opik/dict_utils.py +3 -3
- opik/evaluation/__init__.py +13 -2
- opik/evaluation/engine/engine.py +272 -75
- opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
- opik/evaluation/engine/helpers.py +31 -6
- opik/evaluation/engine/metrics_evaluator.py +237 -0
- opik/evaluation/evaluation_result.py +168 -2
- opik/evaluation/evaluator.py +533 -62
- opik/evaluation/metrics/__init__.py +103 -4
- opik/evaluation/metrics/aggregated_metric.py +35 -6
- opik/evaluation/metrics/base_metric.py +1 -1
- opik/evaluation/metrics/conversation/__init__.py +48 -0
- opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
- opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
- opik/evaluation/metrics/conversation/helpers.py +14 -15
- opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
- opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
- opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
- opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
- opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
- opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
- opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
- opik/evaluation/metrics/conversation/types.py +4 -5
- opik/evaluation/metrics/conversation_types.py +9 -0
- opik/evaluation/metrics/heuristics/bertscore.py +107 -0
- opik/evaluation/metrics/heuristics/bleu.py +35 -15
- opik/evaluation/metrics/heuristics/chrf.py +127 -0
- opik/evaluation/metrics/heuristics/contains.py +47 -11
- opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
- opik/evaluation/metrics/heuristics/gleu.py +113 -0
- opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
- opik/evaluation/metrics/heuristics/meteor.py +119 -0
- opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
- opik/evaluation/metrics/heuristics/readability.py +129 -0
- opik/evaluation/metrics/heuristics/rouge.py +26 -9
- opik/evaluation/metrics/heuristics/spearman.py +88 -0
- opik/evaluation/metrics/heuristics/tone.py +155 -0
- opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
- opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
- opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
- opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
- opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
- opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
- opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
- opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
- opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
- opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
- opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
- opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
- opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/__init__.py +8 -0
- opik/evaluation/models/base_model.py +107 -1
- opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
- opik/evaluation/models/langchain/message_converters.py +97 -15
- opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
- opik/evaluation/models/litellm/util.py +125 -0
- opik/evaluation/models/litellm/warning_filters.py +16 -4
- opik/evaluation/models/model_capabilities.py +187 -0
- opik/evaluation/models/models_factory.py +25 -3
- opik/evaluation/preprocessing.py +92 -0
- opik/evaluation/report.py +70 -12
- opik/evaluation/rest_operations.py +49 -45
- opik/evaluation/samplers/__init__.py +4 -0
- opik/evaluation/samplers/base_dataset_sampler.py +40 -0
- opik/evaluation/samplers/random_dataset_sampler.py +48 -0
- opik/evaluation/score_statistics.py +66 -0
- opik/evaluation/scorers/__init__.py +4 -0
- opik/evaluation/scorers/scorer_function.py +55 -0
- opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
- opik/evaluation/test_case.py +3 -2
- opik/evaluation/test_result.py +1 -0
- opik/evaluation/threads/evaluator.py +31 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/evaluation/types.py +9 -1
- opik/exceptions.py +33 -0
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/hooks/__init__.py +23 -0
- opik/hooks/anonymizer_hook.py +36 -0
- opik/hooks/httpx_client_hook.py +112 -0
- opik/httpx_client.py +12 -9
- opik/id_helpers.py +18 -0
- opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
- opik/integrations/adk/helpers.py +16 -7
- opik/integrations/adk/legacy_opik_tracer.py +7 -4
- opik/integrations/adk/opik_tracer.py +14 -1
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
- opik/integrations/adk/recursive_callback_injector.py +4 -7
- opik/integrations/bedrock/converse/__init__.py +0 -0
- opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
- opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
- opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
- opik/integrations/bedrock/invoke_model/__init__.py +0 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
- opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
- opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
- opik/integrations/bedrock/invoke_model/response_types.py +34 -0
- opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
- opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
- opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
- opik/integrations/bedrock/opik_tracker.py +42 -4
- opik/integrations/bedrock/types.py +19 -0
- opik/integrations/crewai/crewai_decorator.py +8 -51
- opik/integrations/crewai/opik_tracker.py +31 -10
- opik/integrations/crewai/patchers/__init__.py +5 -0
- opik/integrations/crewai/patchers/flow.py +118 -0
- opik/integrations/crewai/patchers/litellm_completion.py +30 -0
- opik/integrations/crewai/patchers/llm_client.py +207 -0
- opik/integrations/dspy/callback.py +80 -17
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_connector.py +2 -2
- opik/integrations/haystack/opik_tracer.py +3 -7
- opik/integrations/langchain/__init__.py +3 -1
- opik/integrations/langchain/helpers.py +96 -0
- opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_encoder_extension.py +1 -1
- opik/integrations/langchain/opik_tracer.py +474 -229
- opik/integrations/litellm/__init__.py +5 -0
- opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
- opik/integrations/litellm/litellm_completion_decorator.py +242 -0
- opik/integrations/litellm/opik_tracker.py +43 -0
- opik/integrations/litellm/stream_patchers.py +151 -0
- opik/integrations/llama_index/callback.py +146 -107
- opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
- opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
- opik/integrations/openai/opik_tracker.py +1 -1
- opik/integrations/sagemaker/auth.py +5 -1
- opik/llm_usage/google_usage.py +3 -1
- opik/llm_usage/opik_usage.py +7 -8
- opik/llm_usage/opik_usage_factory.py +4 -2
- opik/logging_messages.py +6 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batch_manager_constuctors.py +10 -0
- opik/message_processing/batching/batchers.py +59 -27
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/__init__.py +0 -0
- opik/message_processing/emulation/emulator_message_processor.py +578 -0
- opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
- opik/message_processing/emulation/models.py +162 -0
- opik/message_processing/encoder_helpers.py +79 -0
- opik/message_processing/messages.py +56 -1
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/processors/message_processors.py +92 -0
- opik/message_processing/processors/message_processors_chain.py +96 -0
- opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
- opik/message_processing/queue_consumer.py +9 -3
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +43 -10
- opik/opik_context.py +16 -4
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +346 -15
- opik/rest_api/alerts/__init__.py +7 -0
- opik/rest_api/alerts/client.py +667 -0
- opik/rest_api/alerts/raw_client.py +1015 -0
- opik/rest_api/alerts/types/__init__.py +7 -0
- opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
- opik/rest_api/annotation_queues/__init__.py +4 -0
- opik/rest_api/annotation_queues/client.py +668 -0
- opik/rest_api/annotation_queues/raw_client.py +1019 -0
- opik/rest_api/automation_rule_evaluators/client.py +34 -2
- opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
- opik/rest_api/client.py +15 -0
- opik/rest_api/dashboards/__init__.py +4 -0
- opik/rest_api/dashboards/client.py +462 -0
- opik/rest_api/dashboards/raw_client.py +648 -0
- opik/rest_api/datasets/client.py +1310 -44
- opik/rest_api/datasets/raw_client.py +2269 -358
- opik/rest_api/experiments/__init__.py +2 -2
- opik/rest_api/experiments/client.py +191 -5
- opik/rest_api/experiments/raw_client.py +301 -7
- opik/rest_api/experiments/types/__init__.py +4 -1
- opik/rest_api/experiments/types/experiment_update_status.py +5 -0
- opik/rest_api/experiments/types/experiment_update_type.py +5 -0
- opik/rest_api/experiments/types/experiment_write_status.py +5 -0
- opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
- opik/rest_api/llm_provider_key/client.py +20 -0
- opik/rest_api/llm_provider_key/raw_client.py +20 -0
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
- opik/rest_api/manual_evaluation/__init__.py +4 -0
- opik/rest_api/manual_evaluation/client.py +347 -0
- opik/rest_api/manual_evaluation/raw_client.py +543 -0
- opik/rest_api/optimizations/client.py +145 -9
- opik/rest_api/optimizations/raw_client.py +237 -13
- opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
- opik/rest_api/prompts/__init__.py +2 -2
- opik/rest_api/prompts/client.py +227 -6
- opik/rest_api/prompts/raw_client.py +331 -2
- opik/rest_api/prompts/types/__init__.py +3 -1
- opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
- opik/rest_api/spans/__init__.py +0 -2
- opik/rest_api/spans/client.py +238 -76
- opik/rest_api/spans/raw_client.py +307 -95
- opik/rest_api/spans/types/__init__.py +0 -2
- opik/rest_api/traces/client.py +572 -161
- opik/rest_api/traces/raw_client.py +736 -229
- opik/rest_api/types/__init__.py +352 -17
- opik/rest_api/types/aggregation_data.py +1 -0
- opik/rest_api/types/alert.py +33 -0
- opik/rest_api/types/alert_alert_type.py +5 -0
- opik/rest_api/types/alert_page_public.py +24 -0
- opik/rest_api/types/alert_public.py +33 -0
- opik/rest_api/types/alert_public_alert_type.py +5 -0
- opik/rest_api/types/alert_trigger.py +27 -0
- opik/rest_api/types/alert_trigger_config.py +28 -0
- opik/rest_api/types/alert_trigger_config_public.py +28 -0
- opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_type.py +10 -0
- opik/rest_api/types/alert_trigger_config_write.py +22 -0
- opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
- opik/rest_api/types/alert_trigger_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_public.py +27 -0
- opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
- opik/rest_api/types/alert_trigger_write.py +23 -0
- opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
- opik/rest_api/types/alert_write.py +28 -0
- opik/rest_api/types/alert_write_alert_type.py +5 -0
- opik/rest_api/types/annotation_queue.py +42 -0
- opik/rest_api/types/annotation_queue_batch.py +27 -0
- opik/rest_api/types/annotation_queue_item_ids.py +19 -0
- opik/rest_api/types/annotation_queue_page_public.py +28 -0
- opik/rest_api/types/annotation_queue_public.py +38 -0
- opik/rest_api/types/annotation_queue_public_scope.py +5 -0
- opik/rest_api/types/annotation_queue_reviewer.py +20 -0
- opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
- opik/rest_api/types/annotation_queue_scope.py +5 -0
- opik/rest_api/types/annotation_queue_write.py +31 -0
- opik/rest_api/types/annotation_queue_write_scope.py +5 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +62 -2
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
- opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
- opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
- opik/rest_api/types/boolean_feedback_definition.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
- opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
- opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
- opik/rest_api/types/boolean_feedback_detail.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
- opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
- opik/rest_api/types/dashboard_page_public.py +24 -0
- opik/rest_api/types/dashboard_public.py +30 -0
- opik/rest_api/types/dataset.py +4 -0
- opik/rest_api/types/dataset_expansion.py +42 -0
- opik/rest_api/types/dataset_expansion_response.py +39 -0
- opik/rest_api/types/dataset_item.py +2 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +2 -0
- opik/rest_api/types/dataset_item_filter.py +27 -0
- opik/rest_api/types/dataset_item_filter_operator.py +21 -0
- opik/rest_api/types/dataset_item_page_compare.py +5 -0
- opik/rest_api/types/dataset_item_page_public.py +5 -0
- opik/rest_api/types/dataset_item_public.py +2 -0
- opik/rest_api/types/dataset_item_update.py +39 -0
- opik/rest_api/types/dataset_item_write.py +1 -0
- opik/rest_api/types/dataset_public.py +4 -0
- opik/rest_api/types/dataset_public_status.py +5 -0
- opik/rest_api/types/dataset_status.py +5 -0
- opik/rest_api/types/dataset_version_diff.py +22 -0
- opik/rest_api/types/dataset_version_diff_stats.py +24 -0
- opik/rest_api/types/dataset_version_page_public.py +23 -0
- opik/rest_api/types/dataset_version_public.py +59 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +7 -2
- opik/rest_api/types/experiment_group_response.py +2 -0
- opik/rest_api/types/experiment_public.py +7 -2
- opik/rest_api/types/experiment_public_status.py +5 -0
- opik/rest_api/types/experiment_score.py +20 -0
- opik/rest_api/types/experiment_score_public.py +20 -0
- opik/rest_api/types/experiment_score_write.py +20 -0
- opik/rest_api/types/experiment_status.py +5 -0
- opik/rest_api/types/feedback.py +25 -1
- opik/rest_api/types/feedback_create.py +20 -1
- opik/rest_api/types/feedback_object_public.py +27 -1
- opik/rest_api/types/feedback_public.py +25 -1
- opik/rest_api/types/feedback_score_batch_item.py +2 -1
- opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
- opik/rest_api/types/feedback_score_public.py +4 -0
- opik/rest_api/types/feedback_update.py +20 -1
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/group_detail.py +19 -0
- opik/rest_api/types/group_details.py +20 -0
- opik/rest_api/types/guardrail.py +1 -0
- opik/rest_api/types/guardrail_write.py +1 -0
- opik/rest_api/types/ids_holder.py +19 -0
- opik/rest_api/types/image_url.py +20 -0
- opik/rest_api/types/image_url_public.py +20 -0
- opik/rest_api/types/image_url_write.py +20 -0
- opik/rest_api/types/llm_as_judge_message.py +5 -1
- opik/rest_api/types/llm_as_judge_message_content.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
- opik/rest_api/types/llm_as_judge_message_public.py +5 -1
- opik/rest_api/types/llm_as_judge_message_write.py +5 -1
- opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
- opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
- opik/rest_api/types/manual_evaluation_request.py +38 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
- opik/rest_api/types/manual_evaluation_response.py +27 -0
- opik/rest_api/types/optimization.py +4 -2
- opik/rest_api/types/optimization_public.py +4 -2
- opik/rest_api/types/optimization_public_status.py +3 -1
- opik/rest_api/types/optimization_status.py +3 -1
- opik/rest_api/types/optimization_studio_config.py +27 -0
- opik/rest_api/types/optimization_studio_config_public.py +27 -0
- opik/rest_api/types/optimization_studio_config_write.py +27 -0
- opik/rest_api/types/optimization_studio_log.py +22 -0
- opik/rest_api/types/optimization_write.py +4 -2
- opik/rest_api/types/optimization_write_status.py +3 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt.py +6 -0
- opik/rest_api/types/prompt_detail.py +6 -0
- opik/rest_api/types/prompt_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_public.py +6 -0
- opik/rest_api/types/prompt_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_template_structure.py +5 -0
- opik/rest_api/types/prompt_version.py +3 -0
- opik/rest_api/types/prompt_version_detail.py +3 -0
- opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_link.py +1 -0
- opik/rest_api/types/prompt_version_link_public.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +3 -0
- opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_template_structure.py +5 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +9 -0
- opik/rest_api/types/provider_api_key_provider.py +1 -1
- opik/rest_api/types/provider_api_key_public.py +9 -0
- opik/rest_api/types/provider_api_key_public_provider.py +1 -1
- opik/rest_api/types/score_name.py +1 -0
- opik/rest_api/types/service_toggles_config.py +18 -0
- opik/rest_api/types/span.py +1 -2
- opik/rest_api/types/span_enrichment_options.py +31 -0
- opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/span_filter.py +23 -0
- opik/rest_api/types/span_filter_operator.py +21 -0
- opik/rest_api/types/span_filter_write.py +23 -0
- opik/rest_api/types/span_filter_write_operator.py +21 -0
- opik/rest_api/types/span_llm_as_judge_code.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
- opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
- opik/rest_api/types/span_public.py +1 -2
- opik/rest_api/types/span_update.py +46 -0
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/rest_api/types/span_write.py +1 -2
- opik/rest_api/types/studio_evaluation.py +20 -0
- opik/rest_api/types/studio_evaluation_public.py +20 -0
- opik/rest_api/types/studio_evaluation_write.py +20 -0
- opik/rest_api/types/studio_llm_model.py +21 -0
- opik/rest_api/types/studio_llm_model_public.py +21 -0
- opik/rest_api/types/studio_llm_model_write.py +21 -0
- opik/rest_api/types/studio_message.py +20 -0
- opik/rest_api/types/studio_message_public.py +20 -0
- opik/rest_api/types/studio_message_write.py +20 -0
- opik/rest_api/types/studio_metric.py +21 -0
- opik/rest_api/types/studio_metric_public.py +21 -0
- opik/rest_api/types/studio_metric_write.py +21 -0
- opik/rest_api/types/studio_optimizer.py +21 -0
- opik/rest_api/types/studio_optimizer_public.py +21 -0
- opik/rest_api/types/studio_optimizer_write.py +21 -0
- opik/rest_api/types/studio_prompt.py +20 -0
- opik/rest_api/types/studio_prompt_public.py +20 -0
- opik/rest_api/types/studio_prompt_write.py +20 -0
- opik/rest_api/types/trace.py +11 -2
- opik/rest_api/types/trace_enrichment_options.py +32 -0
- opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
- opik/rest_api/types/trace_filter.py +23 -0
- opik/rest_api/types/trace_filter_operator.py +21 -0
- opik/rest_api/types/trace_filter_write.py +23 -0
- opik/rest_api/types/trace_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_public.py +11 -2
- opik/rest_api/types/trace_thread_filter_write.py +23 -0
- opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
- opik/rest_api/types/trace_thread_identifier.py +1 -0
- opik/rest_api/types/trace_update.py +39 -0
- opik/rest_api/types/trace_write.py +1 -2
- opik/rest_api/types/value_entry.py +2 -0
- opik/rest_api/types/value_entry_compare.py +2 -0
- opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
- opik/rest_api/types/value_entry_public.py +2 -0
- opik/rest_api/types/video_url.py +19 -0
- opik/rest_api/types/video_url_public.py +19 -0
- opik/rest_api/types/video_url_write.py +19 -0
- opik/rest_api/types/webhook.py +28 -0
- opik/rest_api/types/webhook_examples.py +19 -0
- opik/rest_api/types/webhook_public.py +28 -0
- opik/rest_api/types/webhook_test_result.py +23 -0
- opik/rest_api/types/webhook_test_result_status.py +5 -0
- opik/rest_api/types/webhook_write.py +23 -0
- opik/rest_api/types/welcome_wizard_tracking.py +22 -0
- opik/rest_api/types/workspace_configuration.py +5 -0
- opik/rest_api/welcome_wizard/__init__.py +4 -0
- opik/rest_api/welcome_wizard/client.py +195 -0
- opik/rest_api/welcome_wizard/raw_client.py +208 -0
- opik/rest_api/workspaces/client.py +14 -2
- opik/rest_api/workspaces/raw_client.py +10 -0
- opik/s3_httpx_client.py +14 -1
- opik/simulation/__init__.py +6 -0
- opik/simulation/simulated_user.py +99 -0
- opik/simulation/simulator.py +108 -0
- opik/synchronization.py +5 -6
- opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- opik-1.9.71.dist-info/METADATA +370 -0
- opik-1.9.71.dist-info/RECORD +1110 -0
- opik/api_objects/prompt/prompt.py +0 -112
- opik/cli.py +0 -193
- opik/hooks.py +0 -13
- opik/integrations/bedrock/chunks_aggregator.py +0 -55
- opik/integrations/bedrock/helpers.py +0 -8
- opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
- opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
- opik-1.8.39.dist-info/METADATA +0 -339
- opik-1.8.39.dist-info/RECORD +0 -790
- /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
- /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
- /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
- /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
- /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
- {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
|
|
4
|
+
LOGGER = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _handle_message_start(event: Dict[str, Any], result: Dict[str, Any]) -> None:
|
|
8
|
+
"""Extract role from messageStart event."""
|
|
9
|
+
message_start = event.get("messageStart")
|
|
10
|
+
if isinstance(message_start, dict):
|
|
11
|
+
role = message_start.get("role")
|
|
12
|
+
if role:
|
|
13
|
+
result["output"]["message"]["role"] = role
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _handle_content_block_delta(event: Dict[str, Any], result: Dict[str, Any]) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Extract content from contentBlockDelta event.
|
|
19
|
+
|
|
20
|
+
Handles multiple delta types:
|
|
21
|
+
- delta.text: Regular text streaming
|
|
22
|
+
- delta.toolUse: Structured output / tool calls (Issue #3829)
|
|
23
|
+
"""
|
|
24
|
+
content_block_delta = event.get("contentBlockDelta")
|
|
25
|
+
if not isinstance(content_block_delta, dict):
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
delta = content_block_delta.get("delta")
|
|
29
|
+
if not isinstance(delta, dict):
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
content = result["output"]["message"]["content"][0]
|
|
33
|
+
|
|
34
|
+
# Handle regular text streaming
|
|
35
|
+
if "text" in delta:
|
|
36
|
+
content["text"] += delta["text"]
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
# Handle structured output / tool use (Issue #3829)
|
|
40
|
+
# Ref: https://github.com/comet-ml/opik/issues/3829
|
|
41
|
+
if "toolUse" in delta:
|
|
42
|
+
if "toolUse" not in content:
|
|
43
|
+
content["toolUse"] = {}
|
|
44
|
+
content["toolUse"].update(delta["toolUse"])
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
# Log other delta types for future compatibility
|
|
48
|
+
LOGGER.debug("Unknown delta type in contentBlockDelta: %s", list(delta.keys()))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _handle_message_stop(event: Dict[str, Any], result: Dict[str, Any]) -> None:
|
|
52
|
+
"""Extract stopReason from messageStop event."""
|
|
53
|
+
message_stop = event.get("messageStop")
|
|
54
|
+
if isinstance(message_stop, dict):
|
|
55
|
+
stop_reason = message_stop.get("stopReason")
|
|
56
|
+
if stop_reason:
|
|
57
|
+
result["stopReason"] = stop_reason
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _handle_metadata(event: Dict[str, Any], result: Dict[str, Any]) -> None:
|
|
61
|
+
"""Extract usage and metrics from metadata event."""
|
|
62
|
+
metadata = event.get("metadata")
|
|
63
|
+
if not isinstance(metadata, dict):
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
# Extract usage information
|
|
67
|
+
if "usage" in metadata:
|
|
68
|
+
result["usage"] = metadata["usage"]
|
|
69
|
+
|
|
70
|
+
# Extract metrics information
|
|
71
|
+
if "metrics" in metadata:
|
|
72
|
+
metrics = metadata["metrics"]
|
|
73
|
+
if isinstance(metrics, dict) and "latencyMs" in metrics:
|
|
74
|
+
result["metrics"] = {"latencyMs": metrics["latencyMs"]}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def aggregate_converse_stream_chunks(items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
78
|
+
"""
|
|
79
|
+
Aggregate streaming chunks from AWS Bedrock converse_stream API into a single response.
|
|
80
|
+
|
|
81
|
+
This function handles various event structures from different Bedrock model providers:
|
|
82
|
+
- Anthropic (Claude): Standard messageStart, contentBlockDelta, messageStop events
|
|
83
|
+
- Amazon (Nova): Amazon's proprietary event format with potential variations
|
|
84
|
+
- Meta (Llama): Open-source model events with different tokenization patterns
|
|
85
|
+
- Mistral (Pixtral): Multimodal model events that may include additional content types
|
|
86
|
+
- DeepSeek (R1): Reasoning model events with extended thought processes (OPIK-2910 fix)
|
|
87
|
+
|
|
88
|
+
Event Structure Variations by Provider:
|
|
89
|
+
========================================
|
|
90
|
+
|
|
91
|
+
Standard Converse Stream Events (from AWS documentation):
|
|
92
|
+
- messageStart: Contains role and initial metadata
|
|
93
|
+
- contentBlockStart: Marks beginning of content block
|
|
94
|
+
- contentBlockDelta: Contains incremental text in delta.text
|
|
95
|
+
- contentBlockStop: Marks end of content block
|
|
96
|
+
- messageStop: Contains stopReason (e.g., "end_turn", "stop_sequence")
|
|
97
|
+
- metadata: Contains usage stats and performance metrics
|
|
98
|
+
|
|
99
|
+
Known Variations:
|
|
100
|
+
- DeepSeek R1: May have different delta structure or additional reasoning fields (OPIK-2910)
|
|
101
|
+
- Tool Use/Structured Output: delta.toolUse instead of delta.text (Issue #3829)
|
|
102
|
+
- Multimodal models: May include non-text content blocks (images, documents)
|
|
103
|
+
- Different models: Varying field names, nesting levels, or optional fields
|
|
104
|
+
|
|
105
|
+
References:
|
|
106
|
+
- AWS Bedrock Converse API: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html
|
|
107
|
+
- Streaming Events: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ConverseStreamOutput.html
|
|
108
|
+
- ContentBlockDelta: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ContentBlockDelta.html
|
|
109
|
+
- Tool Use Guide: https://docs.aws.amazon.com/bedrock/latest/userguide/tool-use.html
|
|
110
|
+
- DeepSeek R1 Issue: https://comet-ml.atlassian.net/browse/OPIK-2910
|
|
111
|
+
- Tool Use Issue: https://github.com/comet-ml/opik/issues/3829
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
items: List of streaming event dictionaries from Bedrock converse_stream
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Aggregated response dictionary with structure:
|
|
118
|
+
{
|
|
119
|
+
"output": {
|
|
120
|
+
"message": {
|
|
121
|
+
"role": "assistant",
|
|
122
|
+
"content": [{"text": "aggregated text"}]
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
"stopReason": "end_turn", # Optional
|
|
126
|
+
"usage": {...}, # Optional
|
|
127
|
+
"metrics": {"latencyMs": ...} # Optional
|
|
128
|
+
}
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
result: Dict[str, Any] = {
|
|
132
|
+
"output": {"message": {"role": "assistant", "content": [{"text": ""}]}}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
for event in items:
|
|
136
|
+
if not isinstance(event, dict):
|
|
137
|
+
LOGGER.debug("Skipping non-dictionary event: %s", type(event))
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
if "messageStart" in event:
|
|
142
|
+
_handle_message_start(event, result)
|
|
143
|
+
|
|
144
|
+
if "contentBlockDelta" in event:
|
|
145
|
+
_handle_content_block_delta(event, result)
|
|
146
|
+
|
|
147
|
+
if "messageStop" in event:
|
|
148
|
+
_handle_message_stop(event, result)
|
|
149
|
+
|
|
150
|
+
if "metadata" in event:
|
|
151
|
+
_handle_metadata(event, result)
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
LOGGER.warning(
|
|
155
|
+
"Unexpected error processing event: %s. Event: %s",
|
|
156
|
+
str(e),
|
|
157
|
+
event,
|
|
158
|
+
exc_info=True,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return result
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def aggregate_invoke_agent_chunks(items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
165
|
+
"""
|
|
166
|
+
Aggregate streaming chunks from AWS Bedrock invoke_agent API.
|
|
167
|
+
|
|
168
|
+
Note: The implementation uses a simplified approach as the completion payload
|
|
169
|
+
only contains chunks without additional metadata (as of implementation date).
|
|
170
|
+
|
|
171
|
+
Reference:
|
|
172
|
+
- https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/invoke_agent.html
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
items: List of chunk event dictionaries from invoke_agent stream
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Aggregated response dictionary with decoded text output
|
|
179
|
+
"""
|
|
180
|
+
merged_chunks = b""
|
|
181
|
+
|
|
182
|
+
for item in items:
|
|
183
|
+
if isinstance(item, dict) and "chunk" in item:
|
|
184
|
+
chunk = item["chunk"]
|
|
185
|
+
if isinstance(chunk, dict) and "bytes" in chunk:
|
|
186
|
+
merged_chunks += chunk["bytes"]
|
|
187
|
+
|
|
188
|
+
return {"output": merged_chunks.decode("utf-8")}
|
|
@@ -8,7 +8,8 @@ import opik.llm_usage as llm_usage
|
|
|
8
8
|
from opik.api_objects import span
|
|
9
9
|
from opik.decorator import arguments_helpers, base_track_decorator
|
|
10
10
|
|
|
11
|
-
from . import
|
|
11
|
+
from . import stream_wrappers
|
|
12
|
+
from .. import types
|
|
12
13
|
|
|
13
14
|
LOGGER = logging.getLogger(__name__)
|
|
14
15
|
|
|
@@ -91,7 +92,7 @@ class BedrockConverseDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
91
92
|
capture_output: bool,
|
|
92
93
|
generations_aggregator: Optional[Callable[[List[Any]], Any]],
|
|
93
94
|
) -> Union[
|
|
94
|
-
|
|
95
|
+
types.ConverseStreamOutput,
|
|
95
96
|
None,
|
|
96
97
|
]:
|
|
97
98
|
DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR = (
|
|
@@ -117,7 +118,7 @@ class BedrockConverseDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
117
118
|
)
|
|
118
119
|
|
|
119
120
|
output["stream"] = wrapped_stream
|
|
120
|
-
return cast(
|
|
121
|
+
return cast(types.ConverseStreamOutput, output)
|
|
121
122
|
|
|
122
123
|
STREAM_NOT_FOUND = None
|
|
123
124
|
|
|
@@ -6,7 +6,8 @@ import opik.dict_utils as dict_utils
|
|
|
6
6
|
from opik.api_objects import span
|
|
7
7
|
from opik.decorator import arguments_helpers, base_track_decorator
|
|
8
8
|
|
|
9
|
-
from . import
|
|
9
|
+
from . import types
|
|
10
|
+
from .converse import stream_wrappers as converse_stream_wrappers
|
|
10
11
|
|
|
11
12
|
LOGGER = logging.getLogger(__name__)
|
|
12
13
|
|
|
@@ -77,7 +78,7 @@ class BedrockInvokeAgentDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
77
78
|
capture_output: bool,
|
|
78
79
|
generations_aggregator: Optional[Callable[[List[Any]], Any]],
|
|
79
80
|
) -> Union[
|
|
80
|
-
|
|
81
|
+
types.ConverseStreamOutput,
|
|
81
82
|
None,
|
|
82
83
|
]:
|
|
83
84
|
DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR = (
|
|
@@ -92,7 +93,7 @@ class BedrockInvokeAgentDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
92
93
|
if isinstance(output, dict) and "completion" in output:
|
|
93
94
|
span_to_end, trace_to_end = base_track_decorator.pop_end_candidates()
|
|
94
95
|
|
|
95
|
-
wrapped_stream =
|
|
96
|
+
wrapped_stream = converse_stream_wrappers.wrap_stream(
|
|
96
97
|
stream=output["completion"],
|
|
97
98
|
capture_output=capture_output,
|
|
98
99
|
span_to_end=span_to_end,
|
|
@@ -103,7 +104,7 @@ class BedrockInvokeAgentDecorator(base_track_decorator.BaseTrackDecorator):
|
|
|
103
104
|
)
|
|
104
105
|
|
|
105
106
|
output["completion"] = wrapped_stream
|
|
106
|
-
return cast(
|
|
107
|
+
return cast(types.ConverseStreamOutput, output)
|
|
107
108
|
|
|
108
109
|
STREAM_NOT_FOUND = None
|
|
109
110
|
|
|
File without changes
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bedrock invoke_model_with_response_stream chunk aggregator.
|
|
3
|
+
|
|
4
|
+
DISCLAIMER: This package was generated with AI assistance.
|
|
5
|
+
|
|
6
|
+
This package provides a modular, extensible architecture for aggregating
|
|
7
|
+
streaming chunks from different Bedrock model formats (Claude, Nova, etc.).
|
|
8
|
+
|
|
9
|
+
## Public API
|
|
10
|
+
|
|
11
|
+
The main public function is `aggregate_chunks_to_dataclass()` which returns a
|
|
12
|
+
structured `BedrockAggregatedResponse` dataclass for type safety.
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from opik.integrations.bedrock.invoke_model.chunks_aggregator import aggregate_chunks_to_dataclass
|
|
16
|
+
|
|
17
|
+
# Create typed aggregated response
|
|
18
|
+
response = aggregate_chunks_to_dataclass(chunks)
|
|
19
|
+
|
|
20
|
+
# Access native format and usage
|
|
21
|
+
native_output = response.native_response # Provider-specific format
|
|
22
|
+
usage = response.usage # Bedrock format: {inputTokens, outputTokens, totalTokens}
|
|
23
|
+
|
|
24
|
+
# Use in span logging
|
|
25
|
+
output = response.to_output_format() # {"body": native_response}
|
|
26
|
+
metadata = response.to_metadata_format() # {"created_from": "bedrock", ...}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Each format aggregator returns the response in its native structure,
|
|
30
|
+
with usage standardized to Bedrock format (camelCase: inputTokens, outputTokens, totalTokens).
|
|
31
|
+
|
|
32
|
+
## Architecture
|
|
33
|
+
|
|
34
|
+
- `base`: Base types and protocols
|
|
35
|
+
- `claude`: Claude/Anthropic format aggregator
|
|
36
|
+
- `llama`: Meta Llama format aggregator
|
|
37
|
+
- `mistral`: Mistral/Pixtral format aggregator
|
|
38
|
+
- `nova`: Amazon Nova format aggregator
|
|
39
|
+
- `format_detector`: Format detection and aggregator registry
|
|
40
|
+
- `response_types`: Dataclass definitions for structured responses
|
|
41
|
+
|
|
42
|
+
## Adding New Formats
|
|
43
|
+
|
|
44
|
+
To add support for a new model format:
|
|
45
|
+
|
|
46
|
+
1. Create a new aggregator module (e.g., `titan.py`):
|
|
47
|
+
```python
|
|
48
|
+
class TitanAggregator:
|
|
49
|
+
def aggregate(self, items) -> Dict[str, Any]:
|
|
50
|
+
# Return Titan's native structure with Bedrock usage
|
|
51
|
+
return {
|
|
52
|
+
"outputText": "...",
|
|
53
|
+
"usage": {"inputTokens": 10, "outputTokens": 20, "totalTokens": 30}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
2. Add detection and registration in `format_detector.py`:
|
|
58
|
+
```python
|
|
59
|
+
from . import titan
|
|
60
|
+
|
|
61
|
+
def _is_titan_format(chunk_data):
|
|
62
|
+
return "titan_specific_field" in chunk_data
|
|
63
|
+
|
|
64
|
+
# Add to _DETECTORS registry
|
|
65
|
+
_DETECTORS["titan"] = _is_titan_format
|
|
66
|
+
|
|
67
|
+
# Add to _AGGREGATORS registry
|
|
68
|
+
_AGGREGATORS["titan"] = titan.TitanAggregator()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The new format will be automatically detected and used.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
from .api import aggregate_chunks_to_dataclass
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
"aggregate_chunks_to_dataclass",
|
|
78
|
+
]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API functions for Bedrock chunk aggregation.
|
|
3
|
+
|
|
4
|
+
This module contains the main public functions for aggregating streaming chunks
|
|
5
|
+
from different Bedrock model formats into structured responses.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List
|
|
9
|
+
|
|
10
|
+
from . import format_detector
|
|
11
|
+
from .. import response_types
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def aggregate_chunks_to_dataclass(
|
|
15
|
+
items: List[Dict[str, Any]],
|
|
16
|
+
) -> response_types.BedrockAggregatedResponse:
|
|
17
|
+
"""
|
|
18
|
+
Aggregate chunks directly to structured dataclass (used by stream wrapper).
|
|
19
|
+
|
|
20
|
+
This function is called by the stream wrapper and returns a structured
|
|
21
|
+
dataclass that the decorator can handle directly.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
items: List of chunk items from the event stream
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
BedrockAggregatedResponse with structured data
|
|
28
|
+
"""
|
|
29
|
+
# Detect format and get appropriate aggregator
|
|
30
|
+
format_name = format_detector.detect_format(items)
|
|
31
|
+
aggregator = format_detector.get_aggregator(format_name)
|
|
32
|
+
|
|
33
|
+
# Get aggregated data
|
|
34
|
+
aggregated_data = aggregator.aggregate(items)
|
|
35
|
+
|
|
36
|
+
# Extract components directly
|
|
37
|
+
usage = aggregated_data.get("usage", {})
|
|
38
|
+
native_response = {
|
|
39
|
+
key: value for key, value in aggregated_data.items() if key != "usage"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Create dataclass directly (response_metadata will be set by stream wrapper)
|
|
43
|
+
return response_types.BedrockAggregatedResponse(
|
|
44
|
+
native_response=native_response, usage=usage, response_metadata={}
|
|
45
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Base types and protocols for chunk aggregators."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ChunkAggregator(Protocol):
|
|
7
|
+
"""Protocol for chunk aggregators."""
|
|
8
|
+
|
|
9
|
+
def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
10
|
+
"""
|
|
11
|
+
Aggregate streaming chunks in the format native to the provider.
|
|
12
|
+
|
|
13
|
+
The returned dictionary contains:
|
|
14
|
+
- Provider-specific response structure (e.g., Claude's content blocks, Nova's output)
|
|
15
|
+
- "usage" field with Bedrock format: {inputTokens, outputTokens, totalTokens}
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
items: List of chunk items from the event stream
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Dict with provider-native structure and Bedrock-standardized usage
|
|
22
|
+
"""
|
|
23
|
+
...
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Claude/Anthropic chunk aggregator."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Dict, List
|
|
6
|
+
|
|
7
|
+
from .. import usage_converters
|
|
8
|
+
from .base import ChunkAggregator
|
|
9
|
+
|
|
10
|
+
LOGGER = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ClaudeAggregator(ChunkAggregator):
|
|
14
|
+
"""
|
|
15
|
+
Aggregator for Claude/Anthropic streaming format.
|
|
16
|
+
|
|
17
|
+
Returns Claude's native message format with content blocks,
|
|
18
|
+
with usage standardized to Bedrock format (camelCase).
|
|
19
|
+
|
|
20
|
+
Claude chunk structure uses snake_case:
|
|
21
|
+
- message_start: Contains role and initial usage
|
|
22
|
+
- content_block_start: Content block metadata
|
|
23
|
+
- content_block_delta: Text chunks in delta.text
|
|
24
|
+
- content_block_stop: End of content block
|
|
25
|
+
- message_delta: Stop reason and updated usage
|
|
26
|
+
- message_stop: amazon-bedrock-invocationMetrics
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
30
|
+
"""Aggregate Claude chunks into native Claude message structure with Bedrock usage."""
|
|
31
|
+
LOGGER.debug("Claude aggregator processing %d items", len(items))
|
|
32
|
+
|
|
33
|
+
content_blocks = []
|
|
34
|
+
current_text = ""
|
|
35
|
+
role = "assistant"
|
|
36
|
+
stop_reason = None
|
|
37
|
+
input_tokens = 0
|
|
38
|
+
output_tokens = 0
|
|
39
|
+
|
|
40
|
+
for item in items:
|
|
41
|
+
if "chunk" not in item:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
chunk_data = json.loads(item["chunk"]["bytes"])
|
|
46
|
+
chunk_type = chunk_data.get("type", "")
|
|
47
|
+
|
|
48
|
+
if chunk_type == "message_start":
|
|
49
|
+
message = chunk_data.get("message", {})
|
|
50
|
+
role = message.get("role", "assistant")
|
|
51
|
+
usage = message.get("usage", {})
|
|
52
|
+
input_tokens = usage.get("input_tokens", 0)
|
|
53
|
+
output_tokens = usage.get("output_tokens", 0)
|
|
54
|
+
LOGGER.debug(
|
|
55
|
+
"Claude message_start: input_tokens=%d, output_tokens=%d",
|
|
56
|
+
input_tokens,
|
|
57
|
+
output_tokens,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
elif chunk_type == "content_block_delta":
|
|
61
|
+
delta = chunk_data.get("delta", {})
|
|
62
|
+
if "text" in delta:
|
|
63
|
+
current_text += delta["text"]
|
|
64
|
+
|
|
65
|
+
elif chunk_type == "content_block_stop":
|
|
66
|
+
if current_text:
|
|
67
|
+
content_blocks.append({"type": "text", "text": current_text})
|
|
68
|
+
current_text = ""
|
|
69
|
+
|
|
70
|
+
elif chunk_type == "message_delta":
|
|
71
|
+
delta = chunk_data.get("delta", {})
|
|
72
|
+
if "stop_reason" in delta:
|
|
73
|
+
stop_reason = delta["stop_reason"]
|
|
74
|
+
usage = chunk_data.get("usage", {})
|
|
75
|
+
if "output_tokens" in usage:
|
|
76
|
+
output_tokens = usage["output_tokens"]
|
|
77
|
+
LOGGER.debug(
|
|
78
|
+
"Claude message_delta: output_tokens=%d", output_tokens
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
elif chunk_type == "message_stop":
|
|
82
|
+
metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
|
|
83
|
+
if metrics:
|
|
84
|
+
input_tokens = metrics.get("inputTokenCount", input_tokens)
|
|
85
|
+
output_tokens = metrics.get("outputTokenCount", output_tokens)
|
|
86
|
+
LOGGER.debug(
|
|
87
|
+
"Claude bedrock metrics: input=%d, output=%d",
|
|
88
|
+
input_tokens,
|
|
89
|
+
output_tokens,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
93
|
+
LOGGER.debug("Claude aggregator error processing chunk: %s", e)
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Add any remaining text
|
|
97
|
+
if current_text:
|
|
98
|
+
content_blocks.append({"type": "text", "text": current_text})
|
|
99
|
+
|
|
100
|
+
total_text = "".join(
|
|
101
|
+
block["text"] for block in content_blocks if block["type"] == "text"
|
|
102
|
+
)
|
|
103
|
+
LOGGER.debug(
|
|
104
|
+
"Claude aggregated: %d chars, input_tokens=%d, output_tokens=%d",
|
|
105
|
+
len(total_text),
|
|
106
|
+
input_tokens,
|
|
107
|
+
output_tokens,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Convert to Bedrock usage format using shared converter
|
|
111
|
+
bedrock_usage = usage_converters.anthropic_to_bedrock_usage(
|
|
112
|
+
{"input_tokens": input_tokens, "output_tokens": output_tokens}
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Return Claude's native format with Bedrock usage
|
|
116
|
+
return {
|
|
117
|
+
"role": role,
|
|
118
|
+
"content": content_blocks,
|
|
119
|
+
"stop_reason": stop_reason,
|
|
120
|
+
"usage": bedrock_usage,
|
|
121
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Format detection and aggregator registry."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Callable, Dict, List
|
|
5
|
+
|
|
6
|
+
from .base import ChunkAggregator
|
|
7
|
+
from . import claude
|
|
8
|
+
from . import llama
|
|
9
|
+
from . import mistral
|
|
10
|
+
from . import nova
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Format detection functions
|
|
14
|
+
FormatDetector = Callable[[Dict[str, Any]], bool]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _is_nova_format(chunk_data: Dict[str, Any]) -> bool:
|
|
18
|
+
"""Check if chunk is Nova format (camelCase fields)."""
|
|
19
|
+
return "contentBlockDelta" in chunk_data or "messageStart" in chunk_data
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _is_claude_format(chunk_data: Dict[str, Any]) -> bool:
|
|
23
|
+
"""Check if chunk is Claude format (snake_case fields with type)."""
|
|
24
|
+
return "type" in chunk_data
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _is_llama_format(chunk_data: Dict[str, Any]) -> bool:
|
|
28
|
+
"""Check if chunk is Llama format (generation field)."""
|
|
29
|
+
return "generation" in chunk_data
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _is_mistral_format(chunk_data: Dict[str, Any]) -> bool:
|
|
33
|
+
"""Check if chunk is Mistral/Pixtral format (OpenAI-like with choices and object)."""
|
|
34
|
+
return (
|
|
35
|
+
"object" in chunk_data
|
|
36
|
+
and chunk_data["object"] == "chat.completion.chunk"
|
|
37
|
+
and "choices" in chunk_data
|
|
38
|
+
and chunk_data["choices"]
|
|
39
|
+
and "message" in chunk_data["choices"][0]
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Format detectors registry (ordered by specificity - most specific first)
|
|
44
|
+
_DETECTORS: Dict[str, FormatDetector] = {
|
|
45
|
+
"mistral": _is_mistral_format, # Specific (has object field)
|
|
46
|
+
"llama": _is_llama_format, # Specific (has generation field)
|
|
47
|
+
"nova": _is_nova_format, # Specific (has contentBlockDelta)
|
|
48
|
+
"claude": _is_claude_format, # Generic (has type field)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Aggregators registry
|
|
52
|
+
_AGGREGATORS: Dict[str, ChunkAggregator] = {
|
|
53
|
+
"claude": claude.ClaudeAggregator(),
|
|
54
|
+
"llama": llama.LlamaAggregator(),
|
|
55
|
+
"mistral": mistral.MistralAggregator(),
|
|
56
|
+
"nova": nova.NovaAggregator(),
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def detect_format(items: List[Dict[str, Any]]) -> str:
|
|
61
|
+
"""
|
|
62
|
+
Detect streaming format from the first chunk.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
items: List of chunk items from the event stream
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Format name (e.g., "claude", "nova") or "claude" as default
|
|
69
|
+
"""
|
|
70
|
+
for item in items:
|
|
71
|
+
if "chunk" not in item:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
chunk_data = json.loads(item["chunk"]["bytes"])
|
|
76
|
+
|
|
77
|
+
# Try each registered detector
|
|
78
|
+
for format_name, detector in _DETECTORS.items():
|
|
79
|
+
if detector(chunk_data):
|
|
80
|
+
return format_name
|
|
81
|
+
|
|
82
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Default to Claude format
|
|
86
|
+
return "claude"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_aggregator(format_name: str) -> ChunkAggregator:
|
|
90
|
+
"""
|
|
91
|
+
Get aggregator for the specified format.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
format_name: Name of the format
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
ChunkAggregator instance
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ValueError: If format is not registered
|
|
101
|
+
"""
|
|
102
|
+
if format_name not in _AGGREGATORS:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
f"Unknown format: {format_name}. Registered formats: {list(_AGGREGATORS.keys())}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return _AGGREGATORS[format_name]
|