PyPI - opik - Versions diffs - 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl - Mend

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (592) hide show

opik/__init__.py +19 -3
opik/anonymizer/__init__.py +5 -0
opik/anonymizer/anonymizer.py +12 -0
opik/anonymizer/factory.py +80 -0
opik/anonymizer/recursive_anonymizer.py +64 -0
opik/anonymizer/rules.py +56 -0
opik/anonymizer/rules_anonymizer.py +35 -0
opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/data_helpers.py +79 -0
opik/api_objects/dataset/dataset.py +64 -4
opik/api_objects/dataset/rest_operations.py +11 -2
opik/api_objects/experiment/experiment.py +57 -57
opik/api_objects/experiment/experiment_item.py +2 -1
opik/api_objects/experiment/experiments_client.py +64 -0
opik/api_objects/experiment/helpers.py +35 -11
opik/api_objects/experiment/rest_operations.py +65 -5
opik/api_objects/helpers.py +8 -5
opik/api_objects/local_recording.py +81 -0
opik/api_objects/opik_client.py +600 -108
opik/api_objects/opik_query_language.py +39 -5
opik/api_objects/prompt/__init__.py +12 -2
opik/api_objects/prompt/base_prompt.py +69 -0
opik/api_objects/prompt/base_prompt_template.py +29 -0
opik/api_objects/prompt/chat/__init__.py +1 -0
opik/api_objects/prompt/chat/chat_prompt.py +210 -0
opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
opik/api_objects/prompt/client.py +189 -47
opik/api_objects/prompt/text/__init__.py +1 -0
opik/api_objects/prompt/text/prompt.py +174 -0
opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
opik/api_objects/prompt/types.py +23 -0
opik/api_objects/search_helpers.py +89 -0
opik/api_objects/span/span_data.py +35 -25
opik/api_objects/threads/threads_client.py +39 -5
opik/api_objects/trace/trace_client.py +52 -2
opik/api_objects/trace/trace_data.py +15 -24
opik/api_objects/validation_helpers.py +3 -3
opik/cli/__init__.py +5 -0
opik/cli/__main__.py +6 -0
opik/cli/configure.py +66 -0
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/healthcheck.py +21 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +49 -0
opik/cli/proxy.py +93 -0
opik/cli/usage_report/__init__.py +16 -0
opik/cli/usage_report/charts.py +783 -0
opik/cli/usage_report/cli.py +274 -0
opik/cli/usage_report/constants.py +9 -0
opik/cli/usage_report/extraction.py +749 -0
opik/cli/usage_report/pdf.py +244 -0
opik/cli/usage_report/statistics.py +78 -0
opik/cli/usage_report/utils.py +235 -0
opik/config.py +13 -7
opik/configurator/configure.py +17 -0
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +9 -1
opik/decorator/base_track_decorator.py +205 -133
opik/decorator/context_manager/span_context_manager.py +123 -0
opik/decorator/context_manager/trace_context_manager.py +84 -0
opik/decorator/opik_args/__init__.py +13 -0
opik/decorator/opik_args/api_classes.py +71 -0
opik/decorator/opik_args/helpers.py +120 -0
opik/decorator/span_creation_handler.py +25 -6
opik/dict_utils.py +3 -3
opik/evaluation/__init__.py +13 -2
opik/evaluation/engine/engine.py +272 -75
opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
opik/evaluation/engine/helpers.py +31 -6
opik/evaluation/engine/metrics_evaluator.py +237 -0
opik/evaluation/evaluation_result.py +168 -2
opik/evaluation/evaluator.py +533 -62
opik/evaluation/metrics/__init__.py +103 -4
opik/evaluation/metrics/aggregated_metric.py +35 -6
opik/evaluation/metrics/base_metric.py +1 -1
opik/evaluation/metrics/conversation/__init__.py +48 -0
opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
opik/evaluation/metrics/conversation/helpers.py +14 -15
opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
opik/evaluation/metrics/conversation/types.py +4 -5
opik/evaluation/metrics/conversation_types.py +9 -0
opik/evaluation/metrics/heuristics/bertscore.py +107 -0
opik/evaluation/metrics/heuristics/bleu.py +35 -15
opik/evaluation/metrics/heuristics/chrf.py +127 -0
opik/evaluation/metrics/heuristics/contains.py +47 -11
opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
opik/evaluation/metrics/heuristics/gleu.py +113 -0
opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
opik/evaluation/metrics/heuristics/meteor.py +119 -0
opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
opik/evaluation/metrics/heuristics/readability.py +129 -0
opik/evaluation/metrics/heuristics/rouge.py +26 -9
opik/evaluation/metrics/heuristics/spearman.py +88 -0
opik/evaluation/metrics/heuristics/tone.py +155 -0
opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/__init__.py +8 -0
opik/evaluation/models/base_model.py +107 -1
opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
opik/evaluation/models/langchain/message_converters.py +97 -15
opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
opik/evaluation/models/litellm/util.py +125 -0
opik/evaluation/models/litellm/warning_filters.py +16 -4
opik/evaluation/models/model_capabilities.py +187 -0
opik/evaluation/models/models_factory.py +25 -3
opik/evaluation/preprocessing.py +92 -0
opik/evaluation/report.py +70 -12
opik/evaluation/rest_operations.py +49 -45
opik/evaluation/samplers/__init__.py +4 -0
opik/evaluation/samplers/base_dataset_sampler.py +40 -0
opik/evaluation/samplers/random_dataset_sampler.py +48 -0
opik/evaluation/score_statistics.py +66 -0
opik/evaluation/scorers/__init__.py +4 -0
opik/evaluation/scorers/scorer_function.py +55 -0
opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
opik/evaluation/test_case.py +3 -2
opik/evaluation/test_result.py +1 -0
opik/evaluation/threads/evaluator.py +31 -3
opik/evaluation/threads/helpers.py +3 -2
opik/evaluation/types.py +9 -1
opik/exceptions.py +33 -0
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/hooks/__init__.py +23 -0
opik/hooks/anonymizer_hook.py +36 -0
opik/hooks/httpx_client_hook.py +112 -0
opik/httpx_client.py +12 -9
opik/id_helpers.py +18 -0
opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
opik/integrations/adk/helpers.py +16 -7
opik/integrations/adk/legacy_opik_tracer.py +7 -4
opik/integrations/adk/opik_tracer.py +14 -1
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
opik/integrations/adk/recursive_callback_injector.py +4 -7
opik/integrations/bedrock/converse/__init__.py +0 -0
opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
opik/integrations/bedrock/invoke_model/__init__.py +0 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
opik/integrations/bedrock/invoke_model/response_types.py +34 -0
opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
opik/integrations/bedrock/opik_tracker.py +42 -4
opik/integrations/bedrock/types.py +19 -0
opik/integrations/crewai/crewai_decorator.py +8 -51
opik/integrations/crewai/opik_tracker.py +31 -10
opik/integrations/crewai/patchers/__init__.py +5 -0
opik/integrations/crewai/patchers/flow.py +118 -0
opik/integrations/crewai/patchers/litellm_completion.py +30 -0
opik/integrations/crewai/patchers/llm_client.py +207 -0
opik/integrations/dspy/callback.py +80 -17
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_connector.py +2 -2
opik/integrations/haystack/opik_tracer.py +3 -7
opik/integrations/langchain/__init__.py +3 -1
opik/integrations/langchain/helpers.py +96 -0
opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_encoder_extension.py +1 -1
opik/integrations/langchain/opik_tracer.py +474 -229
opik/integrations/litellm/__init__.py +5 -0
opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
opik/integrations/litellm/litellm_completion_decorator.py +242 -0
opik/integrations/litellm/opik_tracker.py +43 -0
opik/integrations/litellm/stream_patchers.py +151 -0
opik/integrations/llama_index/callback.py +146 -107
opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
opik/integrations/openai/opik_tracker.py +1 -1
opik/integrations/sagemaker/auth.py +5 -1
opik/llm_usage/google_usage.py +3 -1
opik/llm_usage/opik_usage.py +7 -8
opik/llm_usage/opik_usage_factory.py +4 -2
opik/logging_messages.py +6 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batch_manager_constuctors.py +10 -0
opik/message_processing/batching/batchers.py +59 -27
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/__init__.py +0 -0
opik/message_processing/emulation/emulator_message_processor.py +578 -0
opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
opik/message_processing/emulation/models.py +162 -0
opik/message_processing/encoder_helpers.py +79 -0
opik/message_processing/messages.py +56 -1
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/processors/message_processors.py +92 -0
opik/message_processing/processors/message_processors_chain.py +96 -0
opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
opik/message_processing/queue_consumer.py +9 -3
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +43 -10
opik/opik_context.py +16 -4
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +346 -15
opik/rest_api/alerts/__init__.py +7 -0
opik/rest_api/alerts/client.py +667 -0
opik/rest_api/alerts/raw_client.py +1015 -0
opik/rest_api/alerts/types/__init__.py +7 -0
opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
opik/rest_api/annotation_queues/__init__.py +4 -0
opik/rest_api/annotation_queues/client.py +668 -0
opik/rest_api/annotation_queues/raw_client.py +1019 -0
opik/rest_api/automation_rule_evaluators/client.py +34 -2
opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
opik/rest_api/client.py +15 -0
opik/rest_api/dashboards/__init__.py +4 -0
opik/rest_api/dashboards/client.py +462 -0
opik/rest_api/dashboards/raw_client.py +648 -0
opik/rest_api/datasets/client.py +1310 -44
opik/rest_api/datasets/raw_client.py +2269 -358
opik/rest_api/experiments/__init__.py +2 -2
opik/rest_api/experiments/client.py +191 -5
opik/rest_api/experiments/raw_client.py +301 -7
opik/rest_api/experiments/types/__init__.py +4 -1
opik/rest_api/experiments/types/experiment_update_status.py +5 -0
opik/rest_api/experiments/types/experiment_update_type.py +5 -0
opik/rest_api/experiments/types/experiment_write_status.py +5 -0
opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
opik/rest_api/llm_provider_key/client.py +20 -0
opik/rest_api/llm_provider_key/raw_client.py +20 -0
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
opik/rest_api/manual_evaluation/__init__.py +4 -0
opik/rest_api/manual_evaluation/client.py +347 -0
opik/rest_api/manual_evaluation/raw_client.py +543 -0
opik/rest_api/optimizations/client.py +145 -9
opik/rest_api/optimizations/raw_client.py +237 -13
opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
opik/rest_api/prompts/__init__.py +2 -2
opik/rest_api/prompts/client.py +227 -6
opik/rest_api/prompts/raw_client.py +331 -2
opik/rest_api/prompts/types/__init__.py +3 -1
opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
opik/rest_api/spans/__init__.py +0 -2
opik/rest_api/spans/client.py +238 -76
opik/rest_api/spans/raw_client.py +307 -95
opik/rest_api/spans/types/__init__.py +0 -2
opik/rest_api/traces/client.py +572 -161
opik/rest_api/traces/raw_client.py +736 -229
opik/rest_api/types/__init__.py +352 -17
opik/rest_api/types/aggregation_data.py +1 -0
opik/rest_api/types/alert.py +33 -0
opik/rest_api/types/alert_alert_type.py +5 -0
opik/rest_api/types/alert_page_public.py +24 -0
opik/rest_api/types/alert_public.py +33 -0
opik/rest_api/types/alert_public_alert_type.py +5 -0
opik/rest_api/types/alert_trigger.py +27 -0
opik/rest_api/types/alert_trigger_config.py +28 -0
opik/rest_api/types/alert_trigger_config_public.py +28 -0
opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
opik/rest_api/types/alert_trigger_config_type.py +10 -0
opik/rest_api/types/alert_trigger_config_write.py +22 -0
opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
opik/rest_api/types/alert_trigger_event_type.py +19 -0
opik/rest_api/types/alert_trigger_public.py +27 -0
opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
opik/rest_api/types/alert_trigger_write.py +23 -0
opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
opik/rest_api/types/alert_write.py +28 -0
opik/rest_api/types/alert_write_alert_type.py +5 -0
opik/rest_api/types/annotation_queue.py +42 -0
opik/rest_api/types/annotation_queue_batch.py +27 -0
opik/rest_api/types/annotation_queue_item_ids.py +19 -0
opik/rest_api/types/annotation_queue_page_public.py +28 -0
opik/rest_api/types/annotation_queue_public.py +38 -0
opik/rest_api/types/annotation_queue_public_scope.py +5 -0
opik/rest_api/types/annotation_queue_reviewer.py +20 -0
opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
opik/rest_api/types/annotation_queue_scope.py +5 -0
opik/rest_api/types/annotation_queue_write.py +31 -0
opik/rest_api/types/annotation_queue_write_scope.py +5 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +62 -2
opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
opik/rest_api/types/boolean_feedback_definition.py +25 -0
opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
opik/rest_api/types/boolean_feedback_detail.py +29 -0
opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
opik/rest_api/types/dashboard_page_public.py +24 -0
opik/rest_api/types/dashboard_public.py +30 -0
opik/rest_api/types/dataset.py +4 -0
opik/rest_api/types/dataset_expansion.py +42 -0
opik/rest_api/types/dataset_expansion_response.py +39 -0
opik/rest_api/types/dataset_item.py +2 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +2 -0
opik/rest_api/types/dataset_item_filter.py +27 -0
opik/rest_api/types/dataset_item_filter_operator.py +21 -0
opik/rest_api/types/dataset_item_page_compare.py +5 -0
opik/rest_api/types/dataset_item_page_public.py +5 -0
opik/rest_api/types/dataset_item_public.py +2 -0
opik/rest_api/types/dataset_item_update.py +39 -0
opik/rest_api/types/dataset_item_write.py +1 -0
opik/rest_api/types/dataset_public.py +4 -0
opik/rest_api/types/dataset_public_status.py +5 -0
opik/rest_api/types/dataset_status.py +5 -0
opik/rest_api/types/dataset_version_diff.py +22 -0
opik/rest_api/types/dataset_version_diff_stats.py +24 -0
opik/rest_api/types/dataset_version_page_public.py +23 -0
opik/rest_api/types/dataset_version_public.py +59 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/experiment.py +7 -2
opik/rest_api/types/experiment_group_response.py +2 -0
opik/rest_api/types/experiment_public.py +7 -2
opik/rest_api/types/experiment_public_status.py +5 -0
opik/rest_api/types/experiment_score.py +20 -0
opik/rest_api/types/experiment_score_public.py +20 -0
opik/rest_api/types/experiment_score_write.py +20 -0
opik/rest_api/types/experiment_status.py +5 -0
opik/rest_api/types/feedback.py +25 -1
opik/rest_api/types/feedback_create.py +20 -1
opik/rest_api/types/feedback_object_public.py +27 -1
opik/rest_api/types/feedback_public.py +25 -1
opik/rest_api/types/feedback_score_batch_item.py +2 -1
opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
opik/rest_api/types/feedback_score_public.py +4 -0
opik/rest_api/types/feedback_update.py +20 -1
opik/rest_api/types/group_content_with_aggregations.py +1 -0
opik/rest_api/types/group_detail.py +19 -0
opik/rest_api/types/group_details.py +20 -0
opik/rest_api/types/guardrail.py +1 -0
opik/rest_api/types/guardrail_write.py +1 -0
opik/rest_api/types/ids_holder.py +19 -0
opik/rest_api/types/image_url.py +20 -0
opik/rest_api/types/image_url_public.py +20 -0
opik/rest_api/types/image_url_write.py +20 -0
opik/rest_api/types/llm_as_judge_message.py +5 -1
opik/rest_api/types/llm_as_judge_message_content.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
opik/rest_api/types/llm_as_judge_message_public.py +5 -1
opik/rest_api/types/llm_as_judge_message_write.py +5 -1
opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
opik/rest_api/types/manual_evaluation_request.py +38 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
opik/rest_api/types/manual_evaluation_response.py +27 -0
opik/rest_api/types/optimization.py +4 -2
opik/rest_api/types/optimization_public.py +4 -2
opik/rest_api/types/optimization_public_status.py +3 -1
opik/rest_api/types/optimization_status.py +3 -1
opik/rest_api/types/optimization_studio_config.py +27 -0
opik/rest_api/types/optimization_studio_config_public.py +27 -0
opik/rest_api/types/optimization_studio_config_write.py +27 -0
opik/rest_api/types/optimization_studio_log.py +22 -0
opik/rest_api/types/optimization_write.py +4 -2
opik/rest_api/types/optimization_write_status.py +3 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt.py +6 -0
opik/rest_api/types/prompt_detail.py +6 -0
opik/rest_api/types/prompt_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_public.py +6 -0
opik/rest_api/types/prompt_public_template_structure.py +5 -0
opik/rest_api/types/prompt_template_structure.py +5 -0
opik/rest_api/types/prompt_version.py +3 -0
opik/rest_api/types/prompt_version_detail.py +3 -0
opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_version_link.py +1 -0
opik/rest_api/types/prompt_version_link_public.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +3 -0
opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
opik/rest_api/types/prompt_version_template_structure.py +5 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +9 -0
opik/rest_api/types/provider_api_key_provider.py +1 -1
opik/rest_api/types/provider_api_key_public.py +9 -0
opik/rest_api/types/provider_api_key_public_provider.py +1 -1
opik/rest_api/types/score_name.py +1 -0
opik/rest_api/types/service_toggles_config.py +18 -0
opik/rest_api/types/span.py +1 -2
opik/rest_api/types/span_enrichment_options.py +31 -0
opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/span_filter.py +23 -0
opik/rest_api/types/span_filter_operator.py +21 -0
opik/rest_api/types/span_filter_write.py +23 -0
opik/rest_api/types/span_filter_write_operator.py +21 -0
opik/rest_api/types/span_llm_as_judge_code.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
opik/rest_api/types/span_public.py +1 -2
opik/rest_api/types/span_update.py +46 -0
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/rest_api/types/span_write.py +1 -2
opik/rest_api/types/studio_evaluation.py +20 -0
opik/rest_api/types/studio_evaluation_public.py +20 -0
opik/rest_api/types/studio_evaluation_write.py +20 -0
opik/rest_api/types/studio_llm_model.py +21 -0
opik/rest_api/types/studio_llm_model_public.py +21 -0
opik/rest_api/types/studio_llm_model_write.py +21 -0
opik/rest_api/types/studio_message.py +20 -0
opik/rest_api/types/studio_message_public.py +20 -0
opik/rest_api/types/studio_message_write.py +20 -0
opik/rest_api/types/studio_metric.py +21 -0
opik/rest_api/types/studio_metric_public.py +21 -0
opik/rest_api/types/studio_metric_write.py +21 -0
opik/rest_api/types/studio_optimizer.py +21 -0
opik/rest_api/types/studio_optimizer_public.py +21 -0
opik/rest_api/types/studio_optimizer_write.py +21 -0
opik/rest_api/types/studio_prompt.py +20 -0
opik/rest_api/types/studio_prompt_public.py +20 -0
opik/rest_api/types/studio_prompt_write.py +20 -0
opik/rest_api/types/trace.py +11 -2
opik/rest_api/types/trace_enrichment_options.py +32 -0
opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
opik/rest_api/types/trace_filter.py +23 -0
opik/rest_api/types/trace_filter_operator.py +21 -0
opik/rest_api/types/trace_filter_write.py +23 -0
opik/rest_api/types/trace_filter_write_operator.py +21 -0
opik/rest_api/types/trace_public.py +11 -2
opik/rest_api/types/trace_thread_filter_write.py +23 -0
opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
opik/rest_api/types/trace_thread_identifier.py +1 -0
opik/rest_api/types/trace_update.py +39 -0
opik/rest_api/types/trace_write.py +1 -2
opik/rest_api/types/value_entry.py +2 -0
opik/rest_api/types/value_entry_compare.py +2 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
opik/rest_api/types/value_entry_public.py +2 -0
opik/rest_api/types/video_url.py +19 -0
opik/rest_api/types/video_url_public.py +19 -0
opik/rest_api/types/video_url_write.py +19 -0
opik/rest_api/types/webhook.py +28 -0
opik/rest_api/types/webhook_examples.py +19 -0
opik/rest_api/types/webhook_public.py +28 -0
opik/rest_api/types/webhook_test_result.py +23 -0
opik/rest_api/types/webhook_test_result_status.py +5 -0
opik/rest_api/types/webhook_write.py +23 -0
opik/rest_api/types/welcome_wizard_tracking.py +22 -0
opik/rest_api/types/workspace_configuration.py +5 -0
opik/rest_api/welcome_wizard/__init__.py +4 -0
opik/rest_api/welcome_wizard/client.py +195 -0
opik/rest_api/welcome_wizard/raw_client.py +208 -0
opik/rest_api/workspaces/client.py +14 -2
opik/rest_api/workspaces/raw_client.py +10 -0
opik/s3_httpx_client.py +14 -1
opik/simulation/__init__.py +6 -0
opik/simulation/simulated_user.py +99 -0
opik/simulation/simulator.py +108 -0
opik/synchronization.py +5 -6
opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
opik-1.9.71.dist-info/METADATA +370 -0
opik-1.9.71.dist-info/RECORD +1110 -0
opik/api_objects/prompt/prompt.py +0 -112
opik/cli.py +0 -193
opik/hooks.py +0 -13
opik/integrations/bedrock/chunks_aggregator.py +0 -55
opik/integrations/bedrock/helpers.py +0 -8
opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
opik-1.8.39.dist-info/METADATA +0 -339
opik-1.8.39.dist-info/RECORD +0 -790
/opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
/opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
/opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
/opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
/opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
{opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0

opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Meta Llama chunk aggregator."""
+import json
+import logging
+from typing import Any, Dict, List
+from .. import usage_converters
+from .base import ChunkAggregator
+LOGGER = logging.getLogger(__name__)
+class LlamaAggregator(ChunkAggregator):
+    """
+    Aggregator for Meta Llama streaming format.
+    Returns Llama's native output format with Bedrock usage.
+    Llama chunk structure:
+    - generation: Contains generated text
+    - prompt_token_count: Input tokens (only in first chunk)
+    - generation_token_count: Cumulative output tokens
+    - stop_reason: Stop reason in final chunk
+    - amazon-bedrock-invocationMetrics: Final metrics
+    """
+    def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Aggregate Llama chunks into native Llama output with Bedrock usage."""
+        LOGGER.debug("Llama aggregator processing %d items", len(items))
+        generation_text = ""
+        stop_reason = None
+        prompt_token_count = 0
+        generation_token_count = 0
+        for item in items:
+            if "chunk" not in item:
+                continue
+            try:
+                chunk_data = json.loads(item["chunk"]["bytes"])
+                # Extract generated text
+                if "generation" in chunk_data and chunk_data["generation"]:
+                    generation_text += chunk_data["generation"]
+                # Extract prompt token count from first chunk
+                if (
+                    "prompt_token_count" in chunk_data
+                    and chunk_data["prompt_token_count"]
+                ):
+                    prompt_token_count = chunk_data["prompt_token_count"]
+                    LOGGER.debug("Llama prompt_token_count: %d", prompt_token_count)
+                # Extract generation token count (cumulative)
+                if (
+                    "generation_token_count" in chunk_data
+                    and chunk_data["generation_token_count"]
+                ):
+                    generation_token_count = chunk_data["generation_token_count"]
+                # Extract stop reason
+                if "stop_reason" in chunk_data and chunk_data["stop_reason"]:
+                    stop_reason = chunk_data["stop_reason"]
+                    LOGGER.debug("Llama stop_reason: %s", stop_reason)
+                # Use bedrock metrics as authoritative source
+                metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
+                if metrics:
+                    prompt_token_count = metrics.get(
+                        "inputTokenCount", prompt_token_count
+                    )
+                    generation_token_count = metrics.get(
+                        "outputTokenCount", generation_token_count
+                    )
+                    LOGGER.debug(
+                        "Llama bedrock metrics: input=%d, output=%d",
+                        prompt_token_count,
+                        generation_token_count,
+                    )
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                LOGGER.debug("Llama aggregator error processing chunk: %s", e)
+                continue
+        LOGGER.debug(
+            "Llama aggregated: %d chars, prompt_tokens=%d, generation_tokens=%d",
+            len(generation_text),
+            prompt_token_count,
+            generation_token_count,
+        )
+        # Convert to Bedrock usage format using shared converter
+        bedrock_usage = usage_converters.llama_to_bedrock_usage(
+            {
+                "prompt_token_count": prompt_token_count,
+                "generation_token_count": generation_token_count,
+            }
+        )
+        # Return Llama's native output format with Bedrock usage
+        return {
+            "generation": generation_text,
+            "prompt_token_count": prompt_token_count,
+            "generation_token_count": generation_token_count,
+            "stop_reason": stop_reason,
+            "usage": bedrock_usage,
+        }

opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""Mistral (Pixtral) chunk aggregator."""
+import json
+import logging
+from typing import Any, Dict, List
+from .. import usage_converters
+from .base import ChunkAggregator
+LOGGER = logging.getLogger(__name__)
+class MistralAggregator(ChunkAggregator):
+    """
+    Aggregator for Mistral (Pixtral) streaming format.
+    Returns Mistral's native OpenAI-like output format with Bedrock usage.
+    Mistral chunk structure (OpenAI-compatible):
+    - choices[0].message.content: Generated text
+    - choices[0].stop_reason: Stop reason
+    - usage: Token usage in last chunk (prompt_tokens, completion_tokens, total_tokens)
+    - amazon-bedrock-invocationMetrics: Bedrock metrics
+    """
+    def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Aggregate Mistral chunks into native Mistral output with Bedrock usage."""
+        LOGGER.debug("Mistral aggregator processing %d items", len(items))
+        content = ""
+        stop_reason = None
+        input_tokens = 0
+        output_tokens = 0
+        model_id = None
+        completion_id = None
+        for item in items:
+            if "chunk" not in item:
+                continue
+            try:
+                chunk_data = json.loads(item["chunk"]["bytes"])
+                # Extract model ID and completion ID from first chunk
+                if model_id is None and "model" in chunk_data:
+                    model_id = chunk_data["model"]
+                if completion_id is None and "id" in chunk_data:
+                    completion_id = chunk_data["id"]
+                # Extract content from choices
+                if "choices" in chunk_data and chunk_data["choices"]:
+                    choice = chunk_data["choices"][0]
+                    # Extract message content
+                    if "message" in choice and choice["message"]:
+                        message_content = choice["message"].get("content")
+                        if message_content:
+                            content += message_content
+                    # Extract stop reason
+                    if "stop_reason" in choice and choice["stop_reason"]:
+                        stop_reason = choice["stop_reason"]
+                        LOGGER.debug("Mistral stop_reason: %s", stop_reason)
+                # Extract usage from last chunk
+                if "usage" in chunk_data and chunk_data["usage"]:
+                    usage = chunk_data["usage"]
+                    if "prompt_tokens" in usage:
+                        input_tokens = usage["prompt_tokens"]
+                    if "completion_tokens" in usage:
+                        output_tokens = usage["completion_tokens"]
+                    LOGGER.debug(
+                        "Mistral usage: prompt=%d, completion=%d",
+                        input_tokens,
+                        output_tokens,
+                    )
+                # Use bedrock metrics as authoritative source
+                metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
+                if metrics:
+                    input_tokens = metrics.get("inputTokenCount", input_tokens)
+                    output_tokens = metrics.get("outputTokenCount", output_tokens)
+                    LOGGER.debug(
+                        "Mistral bedrock metrics: input=%d, output=%d",
+                        input_tokens,
+                        output_tokens,
+                    )
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                LOGGER.debug("Mistral aggregator error processing chunk: %s", e)
+                continue
+        LOGGER.debug(
+            "Mistral aggregated: %d chars, input_tokens=%d, output_tokens=%d",
+            len(content),
+            input_tokens,
+            output_tokens,
+        )
+        # Convert to Bedrock usage format using shared converter
+        bedrock_usage = usage_converters.openai_to_bedrock_usage(
+            {"prompt_tokens": input_tokens, "completion_tokens": output_tokens}
+        )
+        # Return Mistral's native OpenAI-like format with Bedrock usage only
+        return {
+            "id": completion_id,
+            "object": "chat.completion",
+            "model": model_id,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": content},
+                    "stop_reason": stop_reason,
+                }
+            ],
+            "usage": bedrock_usage,
+        }

opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""Amazon Nova chunk aggregator."""
+import json
+import logging
+from typing import Any, Dict, List
+from .. import usage_converters
+from .base import ChunkAggregator
+LOGGER = logging.getLogger(__name__)
+class NovaAggregator(ChunkAggregator):
+    """
+    Aggregator for Amazon Nova streaming format.
+    Returns Nova's native output format with Bedrock usage.
+    Nova chunk structure uses camelCase:
+    - messageStart: Contains role information
+    - contentBlockDelta: Contains text in delta.text
+    - contentBlockStop: End of content block
+    - messageStop: Contains stopReason
+    - metadata: Contains usage information with inputTokens/outputTokens
+    """
+    def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Aggregate Nova chunks into native Nova output structure with Bedrock usage."""
+        LOGGER.debug("Nova aggregator processing %d items", len(items))
+        output_text = ""
+        stop_reason = None
+        input_tokens = 0
+        output_tokens = 0
+        for item in items:
+            if "chunk" not in item:
+                continue
+            try:
+                chunk_data = json.loads(item["chunk"]["bytes"])
+                if "contentBlockDelta" in chunk_data:
+                    delta = chunk_data["contentBlockDelta"].get("delta", {})
+                    if "text" in delta:
+                        output_text += delta["text"]
+                elif "messageStop" in chunk_data:
+                    stop_data = chunk_data["messageStop"]
+                    if "stopReason" in stop_data:
+                        stop_reason = stop_data["stopReason"]
+                        LOGGER.debug("Nova stop_reason: %s", stop_reason)
+                elif "metadata" in chunk_data:
+                    if "usage" in chunk_data["metadata"]:
+                        metadata_usage = chunk_data["metadata"]["usage"]
+                        input_tokens = metadata_usage.get("inputTokens", 0)
+                        output_tokens = metadata_usage.get("outputTokens", 0)
+                        LOGGER.debug(
+                            "Nova metadata usage: input=%d, output=%d",
+                            input_tokens,
+                            output_tokens,
+                        )
+                    # Use bedrock invocation metrics as authoritative source
+                    metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
+                    if metrics:
+                        input_tokens = metrics.get("inputTokenCount", input_tokens)
+                        output_tokens = metrics.get("outputTokenCount", output_tokens)
+                        LOGGER.debug(
+                            "Nova bedrock metrics: input=%d, output=%d",
+                            input_tokens,
+                            output_tokens,
+                        )
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                LOGGER.debug("Nova aggregator error processing chunk: %s", e)
+                continue
+        LOGGER.debug(
+            "Nova aggregated: %d chars, input_tokens=%d, output_tokens=%d",
+            len(output_text),
+            input_tokens,
+            output_tokens,
+        )
+        # Convert to Bedrock usage format using shared converter
+        bedrock_usage = usage_converters.nova_to_bedrock_usage(
+            {"inputTokens": input_tokens, "outputTokens": output_tokens}
+        )
+        # Return Nova's native output format with Bedrock usage
+        return {
+            "output": {
+                "message": {"role": "assistant", "content": [{"text": output_text}]}
+            },
+            "stopReason": stop_reason,
+            "usage": bedrock_usage,
+        }

opik/integrations/bedrock/invoke_model/invoke_model_decorator.py ADDED Viewed

@@ -0,0 +1,178 @@
+import json
+import logging
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
+from typing_extensions import override
+import opik
+import opik.dict_utils as dict_utils
+from opik import llm_usage
+from opik.api_objects import span
+from opik.decorator import arguments_helpers, base_track_decorator
+from .. import types
+from . import stream_wrappers, usage_extraction, response_types
+import botocore.response
+import botocore.eventstream
+LOGGER = logging.getLogger(__name__)
+# Keys to extract from kwargs for input logging
+KWARGS_KEYS_TO_LOG_AS_INPUTS = ["body", "modelId"]
+# Keys to extract from response for output logging
+RESPONSE_KEYS_TO_LOG_AS_OUTPUTS = ["body"]
+class BedrockInvokeModelDecorator(base_track_decorator.BaseTrackDecorator):
+    """
+    An implementation of BaseTrackDecorator designed specifically for tracking
+    calls of AWS bedrock client `invoke_model` and `invoke_model_with_response_stream` functions.
+    Besides special processing for input arguments and response content, it
+    overrides _streams_handler() method to work correctly with bedrock's streams
+    """
+    @override
+    def _start_span_inputs_preprocessor(
+        self,
+        func: Callable,
+        track_options: arguments_helpers.TrackOptions,
+        args: Tuple,
+        kwargs: Dict[str, Any],
+    ) -> arguments_helpers.StartSpanParameters:
+        assert (
+            kwargs is not None
+        ), "Expected kwargs to be not None in BedrockRuntime.Client.invoke_model(**kwargs)"
+        name = track_options.name if track_options.name is not None else func.__name__
+        body_dict = json.loads(kwargs.get("body", "{}"))
+        kwargs_copy = kwargs.copy()
+        kwargs_copy["body"] = body_dict
+        input_data, metadata = dict_utils.split_dict_by_keys(
+            kwargs_copy, KWARGS_KEYS_TO_LOG_AS_INPUTS
+        )
+        metadata["created_from"] = "bedrock"
+        tags = ["bedrock", "invoke_model"]
+        result = arguments_helpers.StartSpanParameters(
+            name=name,
+            input=input_data,
+            type=track_options.type,
+            tags=tags,
+            metadata=metadata,
+            project_name=track_options.project_name,
+            model=kwargs.get("modelId", None),
+            provider=opik.LLMProvider.BEDROCK,
+        )
+        return result
+    @override
+    def _end_span_inputs_preprocessor(
+        self,
+        output: Any,
+        capture_output: bool,
+        current_span_data: span.SpanData,
+    ) -> arguments_helpers.EndSpanParameters:
+        # Check if this is a structured aggregated response dataclass
+        if isinstance(output, response_types.BedrockAggregatedResponse):
+            # This is a structured aggregated streaming response
+            opik_usage = llm_usage.build_opik_usage(
+                provider=opik.LLMProvider.BEDROCK, usage=output.usage
+            )
+            result = arguments_helpers.EndSpanParameters(
+                output=output.to_output_format(),  # Native format in body
+                provider=opik.LLMProvider.BEDROCK,
+                usage=opik_usage,
+                metadata=output.to_metadata_format(),
+            )
+        else:
+            # Regular non-streaming response (dict)
+            output, metadata = dict_utils.split_dict_by_keys(
+                output, RESPONSE_KEYS_TO_LOG_AS_OUTPUTS
+            )
+            subprovider = usage_extraction.extract_subprovider_from_model_id(
+                cast(str, current_span_data.model)
+            )
+            opik_usage = usage_extraction.try_extract_usage_from_bedrock_response(  # type: ignore
+                subprovider, output
+            )
+            result = arguments_helpers.EndSpanParameters(
+                output=output,
+                provider=opik.LLMProvider.BEDROCK,
+                usage=opik_usage,
+                metadata=metadata,
+            )
+        return result
+    @override
+    def _streams_handler(  # type: ignore
+        self,
+        output: Any,
+        capture_output: bool,
+        generations_aggregator: Optional[Callable[[List[Any]], Any]],
+    ) -> Union[
+        types.InvokeModelOutput,
+        None,
+    ]:
+        # Despite the name, StreamingBody is not a stream in traditional LLM provider sense (response chunks).
+        # It's an interface to a stream of bytes representing the response body.
+        streaming_body_detected = (
+            isinstance(output, dict)
+            and "body" in output
+            and isinstance(output["body"], botocore.response.StreamingBody)
+        )
+        if streaming_body_detected:
+            span_to_end, trace_to_end = base_track_decorator.pop_end_candidates()
+            return stream_wrappers.wrap_invoke_model_response(
+                output=output,
+                span_to_end=span_to_end,
+                trace_to_end=trace_to_end,
+                finally_callback=self._after_call,
+            )
+        DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR = (
+            generations_aggregator is None
+        )
+        if DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR:
+            return None
+        generations_aggregator = cast(
+            Callable[[List[Any]], Any], generations_aggregator
+        )
+        event_streaming_body_detected = (
+            isinstance(output, dict)
+            and "body" in output
+            and isinstance(output["body"], botocore.eventstream.EventStream)
+        )
+        if event_streaming_body_detected:
+            span_to_end, trace_to_end = base_track_decorator.pop_end_candidates()
+            wrapped_stream = (
+                stream_wrappers.wrap_invoke_model_with_response_stream_response(
+                    stream=output["body"],
+                    capture_output=capture_output,
+                    span_to_end=span_to_end,
+                    trace_to_end=trace_to_end,
+                    generations_aggregator=generations_aggregator,
+                    response_metadata=output["ResponseMetadata"],
+                    finally_callback=self._after_call,
+                )
+            )
+            output["body"] = wrapped_stream
+            return cast(types.InvokeModelWithResponseStreamOutput, output)
+        STREAM_NOT_FOUND = None
+        return STREAM_NOT_FOUND

opik/integrations/bedrock/invoke_model/response_types.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Response types for Bedrock invoke_model operations."""
+from dataclasses import dataclass
+from typing import Any, Dict
+@dataclass
+class BedrockAggregatedResponse:
+    """
+    Response from invoke_model_with_response_stream after chunk aggregation.
+    Contains the aggregated response in the provider's native format
+    with standardized Bedrock usage format.
+    """
+    # Provider-native response structure
+    native_response: Dict[str, Any]
+    # Standardized Bedrock usage format
+    usage: Dict[str, Any]  # {inputTokens, outputTokens, totalTokens}
+    # Response metadata from Bedrock
+    response_metadata: Dict[str, Any]
+    def to_output_format(self) -> Dict[str, Any]:
+        """Convert to output format for span logging."""
+        return {"body": self.native_response}
+    def to_metadata_format(self) -> Dict[str, Any]:
+        """Convert to metadata format for span logging."""
+        return {
+            "created_from": "bedrock",
+            **self.response_metadata,
+        }

opik/integrations/bedrock/invoke_model/stream_wrappers.py ADDED Viewed

@@ -0,0 +1,122 @@
+import logging
+import json
+from typing import Optional, Callable, List, Any, Dict, Generator
+import botocore.response
+import functools
+import opik.api_objects.span as span
+import opik.api_objects.trace as trace
+from opik.types import ErrorInfoDict
+from opik.decorator import generator_wrappers, error_info_collector
+from .. import types
+import botocore.eventstream
+LOGGER = logging.getLogger(__name__)
+__original_streaming_body_read = botocore.response.StreamingBody.read
+def wrap_invoke_model_response(
+    output: types.InvokeModelOutput,
+    span_to_end: span.SpanData,
+    trace_to_end: Optional[trace.TraceData],
+    finally_callback: generator_wrappers.FinishGeneratorCallback,
+) -> types.InvokeModelOutput:
+    response_metadata = output["ResponseMetadata"]
+    streaming_body = output["body"]
+    @functools.wraps(__original_streaming_body_read)
+    def wrapped_read(self: botocore.response.StreamingBody, *args, **kwargs):  # type: ignore
+        error_info: Optional[ErrorInfoDict] = None
+        result = None
+        try:
+            result = __original_streaming_body_read(self, *args, **kwargs)
+            return result
+        except Exception as exception:
+            LOGGER.debug(
+                "Exception raised from botocore.response.StreamingBody: %s",
+                str(exception),
+                exc_info=True,
+            )
+            error_info = error_info_collector.collect(exception)
+            raise exception
+        finally:
+            if not hasattr(self, "opik_tracked_instance"):
+                return None
+            delattr(self, "opik_tracked_instance")
+            if error_info is None and result is not None:
+                try:
+                    parsed_body = json.loads(result)
+                    output = {
+                        "body": parsed_body,
+                        "ResponseMetadata": response_metadata,
+                    }
+                    LOGGER.debug(
+                        "Successfully parsed response body with keys: %s",
+                        list(parsed_body.keys()),
+                    )
+                except (json.JSONDecodeError, TypeError) as e:
+                    LOGGER.debug("Failed to parse response body as JSON: %s", e)
+                    output = {"body": {}, "ResponseMetadata": response_metadata}
+            else:
+                LOGGER.debug("Error occurred or result is None, using empty body")
+                output = {"body": {}, "ResponseMetadata": response_metadata}
+            finally_callback(
+                output=output,
+                error_info=error_info,
+                generators_span_to_end=span_to_end,
+                generators_trace_to_end=trace_to_end,
+                capture_output=True,
+            )
+    botocore.response.StreamingBody.read = wrapped_read
+    streaming_body.opik_tracked_instance = True
+    return output
+def wrap_invoke_model_with_response_stream_response(
+    stream: botocore.eventstream.EventStream,
+    capture_output: bool,
+    span_to_end: span.SpanData,
+    trace_to_end: Optional[trace.TraceData],
+    generations_aggregator: Callable[[List[Any]], Any],
+    response_metadata: Dict[str, Any],
+    finally_callback: generator_wrappers.FinishGeneratorCallback,
+) -> Generator[Any, None, None]:
+    items: List[Dict[str, Any]] = []
+    error_info: Optional[ErrorInfoDict] = None
+    try:
+        for item in stream:
+            items.append(item)
+            yield item
+    except Exception as exception:
+        LOGGER.debug(
+            "Exception raised from botocore.eventstream.EventStream: %s",
+            str(exception),
+            exc_info=True,
+        )
+        error_info = error_info_collector.collect(exception)
+        raise exception
+    finally:
+        if error_info is None:
+            output = generations_aggregator(items)
+            output.response_metadata = response_metadata
+        else:
+            output = None
+        finally_callback(
+            output=output,
+            error_info=error_info,
+            generators_span_to_end=span_to_end,
+            generators_trace_to_end=trace_to_end,
+            capture_output=capture_output,
+        )

opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

opik 1.8.39py3-none-any.whl → 1.9.71py3-none-any.whl