opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,188 @@
1
+ import logging
2
+ from typing import Any, Dict, List
3
+
4
+ LOGGER = logging.getLogger(__name__)
5
+
6
+
7
+ def _handle_message_start(event: Dict[str, Any], result: Dict[str, Any]) -> None:
8
+ """Extract role from messageStart event."""
9
+ message_start = event.get("messageStart")
10
+ if isinstance(message_start, dict):
11
+ role = message_start.get("role")
12
+ if role:
13
+ result["output"]["message"]["role"] = role
14
+
15
+
16
+ def _handle_content_block_delta(event: Dict[str, Any], result: Dict[str, Any]) -> None:
17
+ """
18
+ Extract content from contentBlockDelta event.
19
+
20
+ Handles multiple delta types:
21
+ - delta.text: Regular text streaming
22
+ - delta.toolUse: Structured output / tool calls (Issue #3829)
23
+ """
24
+ content_block_delta = event.get("contentBlockDelta")
25
+ if not isinstance(content_block_delta, dict):
26
+ return
27
+
28
+ delta = content_block_delta.get("delta")
29
+ if not isinstance(delta, dict):
30
+ return
31
+
32
+ content = result["output"]["message"]["content"][0]
33
+
34
+ # Handle regular text streaming
35
+ if "text" in delta:
36
+ content["text"] += delta["text"]
37
+ return
38
+
39
+ # Handle structured output / tool use (Issue #3829)
40
+ # Ref: https://github.com/comet-ml/opik/issues/3829
41
+ if "toolUse" in delta:
42
+ if "toolUse" not in content:
43
+ content["toolUse"] = {}
44
+ content["toolUse"].update(delta["toolUse"])
45
+ return
46
+
47
+ # Log other delta types for future compatibility
48
+ LOGGER.debug("Unknown delta type in contentBlockDelta: %s", list(delta.keys()))
49
+
50
+
51
+ def _handle_message_stop(event: Dict[str, Any], result: Dict[str, Any]) -> None:
52
+ """Extract stopReason from messageStop event."""
53
+ message_stop = event.get("messageStop")
54
+ if isinstance(message_stop, dict):
55
+ stop_reason = message_stop.get("stopReason")
56
+ if stop_reason:
57
+ result["stopReason"] = stop_reason
58
+
59
+
60
+ def _handle_metadata(event: Dict[str, Any], result: Dict[str, Any]) -> None:
61
+ """Extract usage and metrics from metadata event."""
62
+ metadata = event.get("metadata")
63
+ if not isinstance(metadata, dict):
64
+ return
65
+
66
+ # Extract usage information
67
+ if "usage" in metadata:
68
+ result["usage"] = metadata["usage"]
69
+
70
+ # Extract metrics information
71
+ if "metrics" in metadata:
72
+ metrics = metadata["metrics"]
73
+ if isinstance(metrics, dict) and "latencyMs" in metrics:
74
+ result["metrics"] = {"latencyMs": metrics["latencyMs"]}
75
+
76
+
77
+ def aggregate_converse_stream_chunks(items: List[Dict[str, Any]]) -> Dict[str, Any]:
78
+ """
79
+ Aggregate streaming chunks from AWS Bedrock converse_stream API into a single response.
80
+
81
+ This function handles various event structures from different Bedrock model providers:
82
+ - Anthropic (Claude): Standard messageStart, contentBlockDelta, messageStop events
83
+ - Amazon (Nova): Amazon's proprietary event format with potential variations
84
+ - Meta (Llama): Open-source model events with different tokenization patterns
85
+ - Mistral (Pixtral): Multimodal model events that may include additional content types
86
+ - DeepSeek (R1): Reasoning model events with extended thought processes (OPIK-2910 fix)
87
+
88
+ Event Structure Variations by Provider:
89
+ ========================================
90
+
91
+ Standard Converse Stream Events (from AWS documentation):
92
+ - messageStart: Contains role and initial metadata
93
+ - contentBlockStart: Marks beginning of content block
94
+ - contentBlockDelta: Contains incremental text in delta.text
95
+ - contentBlockStop: Marks end of content block
96
+ - messageStop: Contains stopReason (e.g., "end_turn", "stop_sequence")
97
+ - metadata: Contains usage stats and performance metrics
98
+
99
+ Known Variations:
100
+ - DeepSeek R1: May have different delta structure or additional reasoning fields (OPIK-2910)
101
+ - Tool Use/Structured Output: delta.toolUse instead of delta.text (Issue #3829)
102
+ - Multimodal models: May include non-text content blocks (images, documents)
103
+ - Different models: Varying field names, nesting levels, or optional fields
104
+
105
+ References:
106
+ - AWS Bedrock Converse API: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html
107
+ - Streaming Events: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ConverseStreamOutput.html
108
+ - ContentBlockDelta: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ContentBlockDelta.html
109
+ - Tool Use Guide: https://docs.aws.amazon.com/bedrock/latest/userguide/tool-use.html
110
+ - DeepSeek R1 Issue: https://comet-ml.atlassian.net/browse/OPIK-2910
111
+ - Tool Use Issue: https://github.com/comet-ml/opik/issues/3829
112
+
113
+ Args:
114
+ items: List of streaming event dictionaries from Bedrock converse_stream
115
+
116
+ Returns:
117
+ Aggregated response dictionary with structure:
118
+ {
119
+ "output": {
120
+ "message": {
121
+ "role": "assistant",
122
+ "content": [{"text": "aggregated text"}]
123
+ }
124
+ },
125
+ "stopReason": "end_turn", # Optional
126
+ "usage": {...}, # Optional
127
+ "metrics": {"latencyMs": ...} # Optional
128
+ }
129
+ """
130
+
131
+ result: Dict[str, Any] = {
132
+ "output": {"message": {"role": "assistant", "content": [{"text": ""}]}}
133
+ }
134
+
135
+ for event in items:
136
+ if not isinstance(event, dict):
137
+ LOGGER.debug("Skipping non-dictionary event: %s", type(event))
138
+ continue
139
+
140
+ try:
141
+ if "messageStart" in event:
142
+ _handle_message_start(event, result)
143
+
144
+ if "contentBlockDelta" in event:
145
+ _handle_content_block_delta(event, result)
146
+
147
+ if "messageStop" in event:
148
+ _handle_message_stop(event, result)
149
+
150
+ if "metadata" in event:
151
+ _handle_metadata(event, result)
152
+
153
+ except Exception as e:
154
+ LOGGER.warning(
155
+ "Unexpected error processing event: %s. Event: %s",
156
+ str(e),
157
+ event,
158
+ exc_info=True,
159
+ )
160
+
161
+ return result
162
+
163
+
164
+ def aggregate_invoke_agent_chunks(items: List[Dict[str, Any]]) -> Dict[str, Any]:
165
+ """
166
+ Aggregate streaming chunks from AWS Bedrock invoke_agent API.
167
+
168
+ Note: The implementation uses a simplified approach as the completion payload
169
+ only contains chunks without additional metadata (as of implementation date).
170
+
171
+ Reference:
172
+ - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/invoke_agent.html
173
+
174
+ Args:
175
+ items: List of chunk event dictionaries from invoke_agent stream
176
+
177
+ Returns:
178
+ Aggregated response dictionary with decoded text output
179
+ """
180
+ merged_chunks = b""
181
+
182
+ for item in items:
183
+ if isinstance(item, dict) and "chunk" in item:
184
+ chunk = item["chunk"]
185
+ if isinstance(chunk, dict) and "bytes" in chunk:
186
+ merged_chunks += chunk["bytes"]
187
+
188
+ return {"output": merged_chunks.decode("utf-8")}
@@ -8,7 +8,8 @@ import opik.llm_usage as llm_usage
8
8
  from opik.api_objects import span
9
9
  from opik.decorator import arguments_helpers, base_track_decorator
10
10
 
11
- from . import helpers, stream_wrappers
11
+ from . import stream_wrappers
12
+ from .. import types
12
13
 
13
14
  LOGGER = logging.getLogger(__name__)
14
15
 
@@ -91,7 +92,7 @@ class BedrockConverseDecorator(base_track_decorator.BaseTrackDecorator):
91
92
  capture_output: bool,
92
93
  generations_aggregator: Optional[Callable[[List[Any]], Any]],
93
94
  ) -> Union[
94
- helpers.ConverseStreamOutput,
95
+ types.ConverseStreamOutput,
95
96
  None,
96
97
  ]:
97
98
  DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR = (
@@ -117,7 +118,7 @@ class BedrockConverseDecorator(base_track_decorator.BaseTrackDecorator):
117
118
  )
118
119
 
119
120
  output["stream"] = wrapped_stream
120
- return cast(helpers.ConverseStreamOutput, output)
121
+ return cast(types.ConverseStreamOutput, output)
121
122
 
122
123
  STREAM_NOT_FOUND = None
123
124
 
@@ -6,7 +6,8 @@ import opik.dict_utils as dict_utils
6
6
  from opik.api_objects import span
7
7
  from opik.decorator import arguments_helpers, base_track_decorator
8
8
 
9
- from . import helpers, stream_wrappers
9
+ from . import types
10
+ from .converse import stream_wrappers as converse_stream_wrappers
10
11
 
11
12
  LOGGER = logging.getLogger(__name__)
12
13
 
@@ -77,7 +78,7 @@ class BedrockInvokeAgentDecorator(base_track_decorator.BaseTrackDecorator):
77
78
  capture_output: bool,
78
79
  generations_aggregator: Optional[Callable[[List[Any]], Any]],
79
80
  ) -> Union[
80
- helpers.ConverseStreamOutput,
81
+ types.ConverseStreamOutput,
81
82
  None,
82
83
  ]:
83
84
  DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR = (
@@ -92,7 +93,7 @@ class BedrockInvokeAgentDecorator(base_track_decorator.BaseTrackDecorator):
92
93
  if isinstance(output, dict) and "completion" in output:
93
94
  span_to_end, trace_to_end = base_track_decorator.pop_end_candidates()
94
95
 
95
- wrapped_stream = stream_wrappers.wrap_stream(
96
+ wrapped_stream = converse_stream_wrappers.wrap_stream(
96
97
  stream=output["completion"],
97
98
  capture_output=capture_output,
98
99
  span_to_end=span_to_end,
@@ -103,7 +104,7 @@ class BedrockInvokeAgentDecorator(base_track_decorator.BaseTrackDecorator):
103
104
  )
104
105
 
105
106
  output["completion"] = wrapped_stream
106
- return cast(helpers.ConverseStreamOutput, output)
107
+ return cast(types.ConverseStreamOutput, output)
107
108
 
108
109
  STREAM_NOT_FOUND = None
109
110
 
File without changes
@@ -0,0 +1,78 @@
1
+ """
2
+ Bedrock invoke_model_with_response_stream chunk aggregator.
3
+
4
+ DISCLAIMER: This package was generated with AI assistance.
5
+
6
+ This package provides a modular, extensible architecture for aggregating
7
+ streaming chunks from different Bedrock model formats (Claude, Nova, etc.).
8
+
9
+ ## Public API
10
+
11
+ The main public function is `aggregate_chunks_to_dataclass()` which returns a
12
+ structured `BedrockAggregatedResponse` dataclass for type safety.
13
+
14
+ ```python
15
+ from opik.integrations.bedrock.invoke_model.chunks_aggregator import aggregate_chunks_to_dataclass
16
+
17
+ # Create typed aggregated response
18
+ response = aggregate_chunks_to_dataclass(chunks)
19
+
20
+ # Access native format and usage
21
+ native_output = response.native_response # Provider-specific format
22
+ usage = response.usage # Bedrock format: {inputTokens, outputTokens, totalTokens}
23
+
24
+ # Use in span logging
25
+ output = response.to_output_format() # {"body": native_response}
26
+ metadata = response.to_metadata_format() # {"created_from": "bedrock", ...}
27
+ ```
28
+
29
+ Each format aggregator returns the response in its native structure,
30
+ with usage standardized to Bedrock format (camelCase: inputTokens, outputTokens, totalTokens).
31
+
32
+ ## Architecture
33
+
34
+ - `base`: Base types and protocols
35
+ - `claude`: Claude/Anthropic format aggregator
36
+ - `llama`: Meta Llama format aggregator
37
+ - `mistral`: Mistral/Pixtral format aggregator
38
+ - `nova`: Amazon Nova format aggregator
39
+ - `format_detector`: Format detection and aggregator registry
40
+ - `response_types`: Dataclass definitions for structured responses
41
+
42
+ ## Adding New Formats
43
+
44
+ To add support for a new model format:
45
+
46
+ 1. Create a new aggregator module (e.g., `titan.py`):
47
+ ```python
48
+ class TitanAggregator:
49
+ def aggregate(self, items) -> Dict[str, Any]:
50
+ # Return Titan's native structure with Bedrock usage
51
+ return {
52
+ "outputText": "...",
53
+ "usage": {"inputTokens": 10, "outputTokens": 20, "totalTokens": 30}
54
+ }
55
+ ```
56
+
57
+ 2. Add detection and registration in `format_detector.py`:
58
+ ```python
59
+ from . import titan
60
+
61
+ def _is_titan_format(chunk_data):
62
+ return "titan_specific_field" in chunk_data
63
+
64
+ # Add to _DETECTORS registry
65
+ _DETECTORS["titan"] = _is_titan_format
66
+
67
+ # Add to _AGGREGATORS registry
68
+ _AGGREGATORS["titan"] = titan.TitanAggregator()
69
+ ```
70
+
71
+ The new format will be automatically detected and used.
72
+ """
73
+
74
+ from .api import aggregate_chunks_to_dataclass
75
+
76
+ __all__ = [
77
+ "aggregate_chunks_to_dataclass",
78
+ ]
@@ -0,0 +1,45 @@
1
+ """
2
+ API functions for Bedrock chunk aggregation.
3
+
4
+ This module contains the main public functions for aggregating streaming chunks
5
+ from different Bedrock model formats into structured responses.
6
+ """
7
+
8
+ from typing import Any, Dict, List
9
+
10
+ from . import format_detector
11
+ from .. import response_types
12
+
13
+
14
+ def aggregate_chunks_to_dataclass(
15
+ items: List[Dict[str, Any]],
16
+ ) -> response_types.BedrockAggregatedResponse:
17
+ """
18
+ Aggregate chunks directly to structured dataclass (used by stream wrapper).
19
+
20
+ This function is called by the stream wrapper and returns a structured
21
+ dataclass that the decorator can handle directly.
22
+
23
+ Args:
24
+ items: List of chunk items from the event stream
25
+
26
+ Returns:
27
+ BedrockAggregatedResponse with structured data
28
+ """
29
+ # Detect format and get appropriate aggregator
30
+ format_name = format_detector.detect_format(items)
31
+ aggregator = format_detector.get_aggregator(format_name)
32
+
33
+ # Get aggregated data
34
+ aggregated_data = aggregator.aggregate(items)
35
+
36
+ # Extract components directly
37
+ usage = aggregated_data.get("usage", {})
38
+ native_response = {
39
+ key: value for key, value in aggregated_data.items() if key != "usage"
40
+ }
41
+
42
+ # Create dataclass directly (response_metadata will be set by stream wrapper)
43
+ return response_types.BedrockAggregatedResponse(
44
+ native_response=native_response, usage=usage, response_metadata={}
45
+ )
@@ -0,0 +1,23 @@
1
+ """Base types and protocols for chunk aggregators."""
2
+
3
+ from typing import Any, Dict, List, Protocol
4
+
5
+
6
+ class ChunkAggregator(Protocol):
7
+ """Protocol for chunk aggregators."""
8
+
9
+ def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
10
+ """
11
+ Aggregate streaming chunks in the format native to the provider.
12
+
13
+ The returned dictionary contains:
14
+ - Provider-specific response structure (e.g., Claude's content blocks, Nova's output)
15
+ - "usage" field with Bedrock format: {inputTokens, outputTokens, totalTokens}
16
+
17
+ Args:
18
+ items: List of chunk items from the event stream
19
+
20
+ Returns:
21
+ Dict with provider-native structure and Bedrock-standardized usage
22
+ """
23
+ ...
@@ -0,0 +1,121 @@
1
+ """Claude/Anthropic chunk aggregator."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict, List
6
+
7
+ from .. import usage_converters
8
+ from .base import ChunkAggregator
9
+
10
+ LOGGER = logging.getLogger(__name__)
11
+
12
+
13
+ class ClaudeAggregator(ChunkAggregator):
14
+ """
15
+ Aggregator for Claude/Anthropic streaming format.
16
+
17
+ Returns Claude's native message format with content blocks,
18
+ with usage standardized to Bedrock format (camelCase).
19
+
20
+ Claude chunk structure uses snake_case:
21
+ - message_start: Contains role and initial usage
22
+ - content_block_start: Content block metadata
23
+ - content_block_delta: Text chunks in delta.text
24
+ - content_block_stop: End of content block
25
+ - message_delta: Stop reason and updated usage
26
+ - message_stop: amazon-bedrock-invocationMetrics
27
+ """
28
+
29
+ def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
30
+ """Aggregate Claude chunks into native Claude message structure with Bedrock usage."""
31
+ LOGGER.debug("Claude aggregator processing %d items", len(items))
32
+
33
+ content_blocks = []
34
+ current_text = ""
35
+ role = "assistant"
36
+ stop_reason = None
37
+ input_tokens = 0
38
+ output_tokens = 0
39
+
40
+ for item in items:
41
+ if "chunk" not in item:
42
+ continue
43
+
44
+ try:
45
+ chunk_data = json.loads(item["chunk"]["bytes"])
46
+ chunk_type = chunk_data.get("type", "")
47
+
48
+ if chunk_type == "message_start":
49
+ message = chunk_data.get("message", {})
50
+ role = message.get("role", "assistant")
51
+ usage = message.get("usage", {})
52
+ input_tokens = usage.get("input_tokens", 0)
53
+ output_tokens = usage.get("output_tokens", 0)
54
+ LOGGER.debug(
55
+ "Claude message_start: input_tokens=%d, output_tokens=%d",
56
+ input_tokens,
57
+ output_tokens,
58
+ )
59
+
60
+ elif chunk_type == "content_block_delta":
61
+ delta = chunk_data.get("delta", {})
62
+ if "text" in delta:
63
+ current_text += delta["text"]
64
+
65
+ elif chunk_type == "content_block_stop":
66
+ if current_text:
67
+ content_blocks.append({"type": "text", "text": current_text})
68
+ current_text = ""
69
+
70
+ elif chunk_type == "message_delta":
71
+ delta = chunk_data.get("delta", {})
72
+ if "stop_reason" in delta:
73
+ stop_reason = delta["stop_reason"]
74
+ usage = chunk_data.get("usage", {})
75
+ if "output_tokens" in usage:
76
+ output_tokens = usage["output_tokens"]
77
+ LOGGER.debug(
78
+ "Claude message_delta: output_tokens=%d", output_tokens
79
+ )
80
+
81
+ elif chunk_type == "message_stop":
82
+ metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
83
+ if metrics:
84
+ input_tokens = metrics.get("inputTokenCount", input_tokens)
85
+ output_tokens = metrics.get("outputTokenCount", output_tokens)
86
+ LOGGER.debug(
87
+ "Claude bedrock metrics: input=%d, output=%d",
88
+ input_tokens,
89
+ output_tokens,
90
+ )
91
+
92
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
93
+ LOGGER.debug("Claude aggregator error processing chunk: %s", e)
94
+ continue
95
+
96
+ # Add any remaining text
97
+ if current_text:
98
+ content_blocks.append({"type": "text", "text": current_text})
99
+
100
+ total_text = "".join(
101
+ block["text"] for block in content_blocks if block["type"] == "text"
102
+ )
103
+ LOGGER.debug(
104
+ "Claude aggregated: %d chars, input_tokens=%d, output_tokens=%d",
105
+ len(total_text),
106
+ input_tokens,
107
+ output_tokens,
108
+ )
109
+
110
+ # Convert to Bedrock usage format using shared converter
111
+ bedrock_usage = usage_converters.anthropic_to_bedrock_usage(
112
+ {"input_tokens": input_tokens, "output_tokens": output_tokens}
113
+ )
114
+
115
+ # Return Claude's native format with Bedrock usage
116
+ return {
117
+ "role": role,
118
+ "content": content_blocks,
119
+ "stop_reason": stop_reason,
120
+ "usage": bedrock_usage,
121
+ }
@@ -0,0 +1,107 @@
1
+ """Format detection and aggregator registry."""
2
+
3
+ import json
4
+ from typing import Any, Callable, Dict, List
5
+
6
+ from .base import ChunkAggregator
7
+ from . import claude
8
+ from . import llama
9
+ from . import mistral
10
+ from . import nova
11
+
12
+
13
+ # Format detection functions
14
+ FormatDetector = Callable[[Dict[str, Any]], bool]
15
+
16
+
17
+ def _is_nova_format(chunk_data: Dict[str, Any]) -> bool:
18
+ """Check if chunk is Nova format (camelCase fields)."""
19
+ return "contentBlockDelta" in chunk_data or "messageStart" in chunk_data
20
+
21
+
22
+ def _is_claude_format(chunk_data: Dict[str, Any]) -> bool:
23
+ """Check if chunk is Claude format (snake_case fields with type)."""
24
+ return "type" in chunk_data
25
+
26
+
27
+ def _is_llama_format(chunk_data: Dict[str, Any]) -> bool:
28
+ """Check if chunk is Llama format (generation field)."""
29
+ return "generation" in chunk_data
30
+
31
+
32
+ def _is_mistral_format(chunk_data: Dict[str, Any]) -> bool:
33
+ """Check if chunk is Mistral/Pixtral format (OpenAI-like with choices and object)."""
34
+ return (
35
+ "object" in chunk_data
36
+ and chunk_data["object"] == "chat.completion.chunk"
37
+ and "choices" in chunk_data
38
+ and chunk_data["choices"]
39
+ and "message" in chunk_data["choices"][0]
40
+ )
41
+
42
+
43
+ # Format detectors registry (ordered by specificity - most specific first)
44
+ _DETECTORS: Dict[str, FormatDetector] = {
45
+ "mistral": _is_mistral_format, # Specific (has object field)
46
+ "llama": _is_llama_format, # Specific (has generation field)
47
+ "nova": _is_nova_format, # Specific (has contentBlockDelta)
48
+ "claude": _is_claude_format, # Generic (has type field)
49
+ }
50
+
51
+ # Aggregators registry
52
+ _AGGREGATORS: Dict[str, ChunkAggregator] = {
53
+ "claude": claude.ClaudeAggregator(),
54
+ "llama": llama.LlamaAggregator(),
55
+ "mistral": mistral.MistralAggregator(),
56
+ "nova": nova.NovaAggregator(),
57
+ }
58
+
59
+
60
+ def detect_format(items: List[Dict[str, Any]]) -> str:
61
+ """
62
+ Detect streaming format from the first chunk.
63
+
64
+ Args:
65
+ items: List of chunk items from the event stream
66
+
67
+ Returns:
68
+ Format name (e.g., "claude", "nova") or "claude" as default
69
+ """
70
+ for item in items:
71
+ if "chunk" not in item:
72
+ continue
73
+
74
+ try:
75
+ chunk_data = json.loads(item["chunk"]["bytes"])
76
+
77
+ # Try each registered detector
78
+ for format_name, detector in _DETECTORS.items():
79
+ if detector(chunk_data):
80
+ return format_name
81
+
82
+ except (json.JSONDecodeError, KeyError, TypeError):
83
+ continue
84
+
85
+ # Default to Claude format
86
+ return "claude"
87
+
88
+
89
+ def get_aggregator(format_name: str) -> ChunkAggregator:
90
+ """
91
+ Get aggregator for the specified format.
92
+
93
+ Args:
94
+ format_name: Name of the format
95
+
96
+ Returns:
97
+ ChunkAggregator instance
98
+
99
+ Raises:
100
+ ValueError: If format is not registered
101
+ """
102
+ if format_name not in _AGGREGATORS:
103
+ raise ValueError(
104
+ f"Unknown format: {format_name}. Registered formats: {list(_AGGREGATORS.keys())}"
105
+ )
106
+
107
+ return _AGGREGATORS[format_name]