opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
1
+ """Meta Llama chunk aggregator."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict, List
6
+
7
+ from .. import usage_converters
8
+ from .base import ChunkAggregator
9
+
10
+ LOGGER = logging.getLogger(__name__)
11
+
12
+
13
+ class LlamaAggregator(ChunkAggregator):
14
+ """
15
+ Aggregator for Meta Llama streaming format.
16
+
17
+ Returns Llama's native output format with Bedrock usage.
18
+
19
+ Llama chunk structure:
20
+ - generation: Contains generated text
21
+ - prompt_token_count: Input tokens (only in first chunk)
22
+ - generation_token_count: Cumulative output tokens
23
+ - stop_reason: Stop reason in final chunk
24
+ - amazon-bedrock-invocationMetrics: Final metrics
25
+ """
26
+
27
+ def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
28
+ """Aggregate Llama chunks into native Llama output with Bedrock usage."""
29
+ LOGGER.debug("Llama aggregator processing %d items", len(items))
30
+
31
+ generation_text = ""
32
+ stop_reason = None
33
+ prompt_token_count = 0
34
+ generation_token_count = 0
35
+
36
+ for item in items:
37
+ if "chunk" not in item:
38
+ continue
39
+
40
+ try:
41
+ chunk_data = json.loads(item["chunk"]["bytes"])
42
+
43
+ # Extract generated text
44
+ if "generation" in chunk_data and chunk_data["generation"]:
45
+ generation_text += chunk_data["generation"]
46
+
47
+ # Extract prompt token count from first chunk
48
+ if (
49
+ "prompt_token_count" in chunk_data
50
+ and chunk_data["prompt_token_count"]
51
+ ):
52
+ prompt_token_count = chunk_data["prompt_token_count"]
53
+ LOGGER.debug("Llama prompt_token_count: %d", prompt_token_count)
54
+
55
+ # Extract generation token count (cumulative)
56
+ if (
57
+ "generation_token_count" in chunk_data
58
+ and chunk_data["generation_token_count"]
59
+ ):
60
+ generation_token_count = chunk_data["generation_token_count"]
61
+
62
+ # Extract stop reason
63
+ if "stop_reason" in chunk_data and chunk_data["stop_reason"]:
64
+ stop_reason = chunk_data["stop_reason"]
65
+ LOGGER.debug("Llama stop_reason: %s", stop_reason)
66
+
67
+ # Use bedrock metrics as authoritative source
68
+ metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
69
+ if metrics:
70
+ prompt_token_count = metrics.get(
71
+ "inputTokenCount", prompt_token_count
72
+ )
73
+ generation_token_count = metrics.get(
74
+ "outputTokenCount", generation_token_count
75
+ )
76
+ LOGGER.debug(
77
+ "Llama bedrock metrics: input=%d, output=%d",
78
+ prompt_token_count,
79
+ generation_token_count,
80
+ )
81
+
82
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
83
+ LOGGER.debug("Llama aggregator error processing chunk: %s", e)
84
+ continue
85
+
86
+ LOGGER.debug(
87
+ "Llama aggregated: %d chars, prompt_tokens=%d, generation_tokens=%d",
88
+ len(generation_text),
89
+ prompt_token_count,
90
+ generation_token_count,
91
+ )
92
+
93
+ # Convert to Bedrock usage format using shared converter
94
+ bedrock_usage = usage_converters.llama_to_bedrock_usage(
95
+ {
96
+ "prompt_token_count": prompt_token_count,
97
+ "generation_token_count": generation_token_count,
98
+ }
99
+ )
100
+
101
+ # Return Llama's native output format with Bedrock usage
102
+ return {
103
+ "generation": generation_text,
104
+ "prompt_token_count": prompt_token_count,
105
+ "generation_token_count": generation_token_count,
106
+ "stop_reason": stop_reason,
107
+ "usage": bedrock_usage,
108
+ }
@@ -0,0 +1,118 @@
1
+ """Mistral (Pixtral) chunk aggregator."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict, List
6
+
7
+ from .. import usage_converters
8
+ from .base import ChunkAggregator
9
+
10
+ LOGGER = logging.getLogger(__name__)
11
+
12
+
13
+ class MistralAggregator(ChunkAggregator):
14
+ """
15
+ Aggregator for Mistral (Pixtral) streaming format.
16
+
17
+ Returns Mistral's native OpenAI-like output format with Bedrock usage.
18
+
19
+ Mistral chunk structure (OpenAI-compatible):
20
+ - choices[0].message.content: Generated text
21
+ - choices[0].stop_reason: Stop reason
22
+ - usage: Token usage in last chunk (prompt_tokens, completion_tokens, total_tokens)
23
+ - amazon-bedrock-invocationMetrics: Bedrock metrics
24
+ """
25
+
26
+ def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
27
+ """Aggregate Mistral chunks into native Mistral output with Bedrock usage."""
28
+ LOGGER.debug("Mistral aggregator processing %d items", len(items))
29
+
30
+ content = ""
31
+ stop_reason = None
32
+ input_tokens = 0
33
+ output_tokens = 0
34
+ model_id = None
35
+ completion_id = None
36
+
37
+ for item in items:
38
+ if "chunk" not in item:
39
+ continue
40
+
41
+ try:
42
+ chunk_data = json.loads(item["chunk"]["bytes"])
43
+
44
+ # Extract model ID and completion ID from first chunk
45
+ if model_id is None and "model" in chunk_data:
46
+ model_id = chunk_data["model"]
47
+ if completion_id is None and "id" in chunk_data:
48
+ completion_id = chunk_data["id"]
49
+
50
+ # Extract content from choices
51
+ if "choices" in chunk_data and chunk_data["choices"]:
52
+ choice = chunk_data["choices"][0]
53
+
54
+ # Extract message content
55
+ if "message" in choice and choice["message"]:
56
+ message_content = choice["message"].get("content")
57
+ if message_content:
58
+ content += message_content
59
+
60
+ # Extract stop reason
61
+ if "stop_reason" in choice and choice["stop_reason"]:
62
+ stop_reason = choice["stop_reason"]
63
+ LOGGER.debug("Mistral stop_reason: %s", stop_reason)
64
+
65
+ # Extract usage from last chunk
66
+ if "usage" in chunk_data and chunk_data["usage"]:
67
+ usage = chunk_data["usage"]
68
+ if "prompt_tokens" in usage:
69
+ input_tokens = usage["prompt_tokens"]
70
+ if "completion_tokens" in usage:
71
+ output_tokens = usage["completion_tokens"]
72
+ LOGGER.debug(
73
+ "Mistral usage: prompt=%d, completion=%d",
74
+ input_tokens,
75
+ output_tokens,
76
+ )
77
+
78
+ # Use bedrock metrics as authoritative source
79
+ metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
80
+ if metrics:
81
+ input_tokens = metrics.get("inputTokenCount", input_tokens)
82
+ output_tokens = metrics.get("outputTokenCount", output_tokens)
83
+ LOGGER.debug(
84
+ "Mistral bedrock metrics: input=%d, output=%d",
85
+ input_tokens,
86
+ output_tokens,
87
+ )
88
+
89
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
90
+ LOGGER.debug("Mistral aggregator error processing chunk: %s", e)
91
+ continue
92
+
93
+ LOGGER.debug(
94
+ "Mistral aggregated: %d chars, input_tokens=%d, output_tokens=%d",
95
+ len(content),
96
+ input_tokens,
97
+ output_tokens,
98
+ )
99
+
100
+ # Convert to Bedrock usage format using shared converter
101
+ bedrock_usage = usage_converters.openai_to_bedrock_usage(
102
+ {"prompt_tokens": input_tokens, "completion_tokens": output_tokens}
103
+ )
104
+
105
+ # Return Mistral's native OpenAI-like format with Bedrock usage only
106
+ return {
107
+ "id": completion_id,
108
+ "object": "chat.completion",
109
+ "model": model_id,
110
+ "choices": [
111
+ {
112
+ "index": 0,
113
+ "message": {"role": "assistant", "content": content},
114
+ "stop_reason": stop_reason,
115
+ }
116
+ ],
117
+ "usage": bedrock_usage,
118
+ }
@@ -0,0 +1,99 @@
1
+ """Amazon Nova chunk aggregator."""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict, List
6
+
7
+ from .. import usage_converters
8
+ from .base import ChunkAggregator
9
+
10
+ LOGGER = logging.getLogger(__name__)
11
+
12
+
13
+ class NovaAggregator(ChunkAggregator):
14
+ """
15
+ Aggregator for Amazon Nova streaming format.
16
+
17
+ Returns Nova's native output format with Bedrock usage.
18
+
19
+ Nova chunk structure uses camelCase:
20
+ - messageStart: Contains role information
21
+ - contentBlockDelta: Contains text in delta.text
22
+ - contentBlockStop: End of content block
23
+ - messageStop: Contains stopReason
24
+ - metadata: Contains usage information with inputTokens/outputTokens
25
+ """
26
+
27
+ def aggregate(self, items: List[Dict[str, Any]]) -> Dict[str, Any]:
28
+ """Aggregate Nova chunks into native Nova output structure with Bedrock usage."""
29
+ LOGGER.debug("Nova aggregator processing %d items", len(items))
30
+
31
+ output_text = ""
32
+ stop_reason = None
33
+ input_tokens = 0
34
+ output_tokens = 0
35
+
36
+ for item in items:
37
+ if "chunk" not in item:
38
+ continue
39
+
40
+ try:
41
+ chunk_data = json.loads(item["chunk"]["bytes"])
42
+
43
+ if "contentBlockDelta" in chunk_data:
44
+ delta = chunk_data["contentBlockDelta"].get("delta", {})
45
+ if "text" in delta:
46
+ output_text += delta["text"]
47
+
48
+ elif "messageStop" in chunk_data:
49
+ stop_data = chunk_data["messageStop"]
50
+ if "stopReason" in stop_data:
51
+ stop_reason = stop_data["stopReason"]
52
+ LOGGER.debug("Nova stop_reason: %s", stop_reason)
53
+
54
+ elif "metadata" in chunk_data:
55
+ if "usage" in chunk_data["metadata"]:
56
+ metadata_usage = chunk_data["metadata"]["usage"]
57
+ input_tokens = metadata_usage.get("inputTokens", 0)
58
+ output_tokens = metadata_usage.get("outputTokens", 0)
59
+ LOGGER.debug(
60
+ "Nova metadata usage: input=%d, output=%d",
61
+ input_tokens,
62
+ output_tokens,
63
+ )
64
+
65
+ # Use bedrock invocation metrics as authoritative source
66
+ metrics = chunk_data.get("amazon-bedrock-invocationMetrics", {})
67
+ if metrics:
68
+ input_tokens = metrics.get("inputTokenCount", input_tokens)
69
+ output_tokens = metrics.get("outputTokenCount", output_tokens)
70
+ LOGGER.debug(
71
+ "Nova bedrock metrics: input=%d, output=%d",
72
+ input_tokens,
73
+ output_tokens,
74
+ )
75
+
76
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
77
+ LOGGER.debug("Nova aggregator error processing chunk: %s", e)
78
+ continue
79
+
80
+ LOGGER.debug(
81
+ "Nova aggregated: %d chars, input_tokens=%d, output_tokens=%d",
82
+ len(output_text),
83
+ input_tokens,
84
+ output_tokens,
85
+ )
86
+
87
+ # Convert to Bedrock usage format using shared converter
88
+ bedrock_usage = usage_converters.nova_to_bedrock_usage(
89
+ {"inputTokens": input_tokens, "outputTokens": output_tokens}
90
+ )
91
+
92
+ # Return Nova's native output format with Bedrock usage
93
+ return {
94
+ "output": {
95
+ "message": {"role": "assistant", "content": [{"text": output_text}]}
96
+ },
97
+ "stopReason": stop_reason,
98
+ "usage": bedrock_usage,
99
+ }
@@ -0,0 +1,178 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
4
+ from typing_extensions import override
5
+
6
+ import opik
7
+ import opik.dict_utils as dict_utils
8
+ from opik import llm_usage
9
+ from opik.api_objects import span
10
+ from opik.decorator import arguments_helpers, base_track_decorator
11
+
12
+ from .. import types
13
+ from . import stream_wrappers, usage_extraction, response_types
14
+
15
+ import botocore.response
16
+ import botocore.eventstream
17
+
18
+ LOGGER = logging.getLogger(__name__)
19
+
20
+ # Keys to extract from kwargs for input logging
21
+ KWARGS_KEYS_TO_LOG_AS_INPUTS = ["body", "modelId"]
22
+ # Keys to extract from response for output logging
23
+ RESPONSE_KEYS_TO_LOG_AS_OUTPUTS = ["body"]
24
+
25
+
26
+ class BedrockInvokeModelDecorator(base_track_decorator.BaseTrackDecorator):
27
+ """
28
+ An implementation of BaseTrackDecorator designed specifically for tracking
29
+ calls of AWS bedrock client `invoke_model` and `invoke_model_with_response_stream` functions.
30
+
31
+ Besides special processing for input arguments and response content, it
32
+ overrides _streams_handler() method to work correctly with bedrock's streams
33
+ """
34
+
35
+ @override
36
+ def _start_span_inputs_preprocessor(
37
+ self,
38
+ func: Callable,
39
+ track_options: arguments_helpers.TrackOptions,
40
+ args: Tuple,
41
+ kwargs: Dict[str, Any],
42
+ ) -> arguments_helpers.StartSpanParameters:
43
+ assert (
44
+ kwargs is not None
45
+ ), "Expected kwargs to be not None in BedrockRuntime.Client.invoke_model(**kwargs)"
46
+
47
+ name = track_options.name if track_options.name is not None else func.__name__
48
+ body_dict = json.loads(kwargs.get("body", "{}"))
49
+
50
+ kwargs_copy = kwargs.copy()
51
+ kwargs_copy["body"] = body_dict
52
+
53
+ input_data, metadata = dict_utils.split_dict_by_keys(
54
+ kwargs_copy, KWARGS_KEYS_TO_LOG_AS_INPUTS
55
+ )
56
+
57
+ metadata["created_from"] = "bedrock"
58
+ tags = ["bedrock", "invoke_model"]
59
+
60
+ result = arguments_helpers.StartSpanParameters(
61
+ name=name,
62
+ input=input_data,
63
+ type=track_options.type,
64
+ tags=tags,
65
+ metadata=metadata,
66
+ project_name=track_options.project_name,
67
+ model=kwargs.get("modelId", None),
68
+ provider=opik.LLMProvider.BEDROCK,
69
+ )
70
+
71
+ return result
72
+
73
+ @override
74
+ def _end_span_inputs_preprocessor(
75
+ self,
76
+ output: Any,
77
+ capture_output: bool,
78
+ current_span_data: span.SpanData,
79
+ ) -> arguments_helpers.EndSpanParameters:
80
+ # Check if this is a structured aggregated response dataclass
81
+ if isinstance(output, response_types.BedrockAggregatedResponse):
82
+ # This is a structured aggregated streaming response
83
+ opik_usage = llm_usage.build_opik_usage(
84
+ provider=opik.LLMProvider.BEDROCK, usage=output.usage
85
+ )
86
+
87
+ result = arguments_helpers.EndSpanParameters(
88
+ output=output.to_output_format(), # Native format in body
89
+ provider=opik.LLMProvider.BEDROCK,
90
+ usage=opik_usage,
91
+ metadata=output.to_metadata_format(),
92
+ )
93
+ else:
94
+ # Regular non-streaming response (dict)
95
+ output, metadata = dict_utils.split_dict_by_keys(
96
+ output, RESPONSE_KEYS_TO_LOG_AS_OUTPUTS
97
+ )
98
+ subprovider = usage_extraction.extract_subprovider_from_model_id(
99
+ cast(str, current_span_data.model)
100
+ )
101
+ opik_usage = usage_extraction.try_extract_usage_from_bedrock_response( # type: ignore
102
+ subprovider, output
103
+ )
104
+
105
+ result = arguments_helpers.EndSpanParameters(
106
+ output=output,
107
+ provider=opik.LLMProvider.BEDROCK,
108
+ usage=opik_usage,
109
+ metadata=metadata,
110
+ )
111
+
112
+ return result
113
+
114
+ @override
115
+ def _streams_handler( # type: ignore
116
+ self,
117
+ output: Any,
118
+ capture_output: bool,
119
+ generations_aggregator: Optional[Callable[[List[Any]], Any]],
120
+ ) -> Union[
121
+ types.InvokeModelOutput,
122
+ None,
123
+ ]:
124
+ # Despite the name, StreamingBody is not a stream in traditional LLM provider sense (response chunks).
125
+ # It's an interface to a stream of bytes representing the response body.
126
+ streaming_body_detected = (
127
+ isinstance(output, dict)
128
+ and "body" in output
129
+ and isinstance(output["body"], botocore.response.StreamingBody)
130
+ )
131
+
132
+ if streaming_body_detected:
133
+ span_to_end, trace_to_end = base_track_decorator.pop_end_candidates()
134
+ return stream_wrappers.wrap_invoke_model_response(
135
+ output=output,
136
+ span_to_end=span_to_end,
137
+ trace_to_end=trace_to_end,
138
+ finally_callback=self._after_call,
139
+ )
140
+
141
+ DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR = (
142
+ generations_aggregator is None
143
+ )
144
+
145
+ if DECORATED_FUNCTION_IS_NOT_EXPECTED_TO_RETURN_GENERATOR:
146
+ return None
147
+
148
+ generations_aggregator = cast(
149
+ Callable[[List[Any]], Any], generations_aggregator
150
+ )
151
+
152
+ event_streaming_body_detected = (
153
+ isinstance(output, dict)
154
+ and "body" in output
155
+ and isinstance(output["body"], botocore.eventstream.EventStream)
156
+ )
157
+
158
+ if event_streaming_body_detected:
159
+ span_to_end, trace_to_end = base_track_decorator.pop_end_candidates()
160
+
161
+ wrapped_stream = (
162
+ stream_wrappers.wrap_invoke_model_with_response_stream_response(
163
+ stream=output["body"],
164
+ capture_output=capture_output,
165
+ span_to_end=span_to_end,
166
+ trace_to_end=trace_to_end,
167
+ generations_aggregator=generations_aggregator,
168
+ response_metadata=output["ResponseMetadata"],
169
+ finally_callback=self._after_call,
170
+ )
171
+ )
172
+
173
+ output["body"] = wrapped_stream
174
+ return cast(types.InvokeModelWithResponseStreamOutput, output)
175
+
176
+ STREAM_NOT_FOUND = None
177
+
178
+ return STREAM_NOT_FOUND
@@ -0,0 +1,34 @@
1
+ """Response types for Bedrock invoke_model operations."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict
5
+
6
+
7
+ @dataclass
8
+ class BedrockAggregatedResponse:
9
+ """
10
+ Response from invoke_model_with_response_stream after chunk aggregation.
11
+
12
+ Contains the aggregated response in the provider's native format
13
+ with standardized Bedrock usage format.
14
+ """
15
+
16
+ # Provider-native response structure
17
+ native_response: Dict[str, Any]
18
+
19
+ # Standardized Bedrock usage format
20
+ usage: Dict[str, Any] # {inputTokens, outputTokens, totalTokens}
21
+
22
+ # Response metadata from Bedrock
23
+ response_metadata: Dict[str, Any]
24
+
25
+ def to_output_format(self) -> Dict[str, Any]:
26
+ """Convert to output format for span logging."""
27
+ return {"body": self.native_response}
28
+
29
+ def to_metadata_format(self) -> Dict[str, Any]:
30
+ """Convert to metadata format for span logging."""
31
+ return {
32
+ "created_from": "bedrock",
33
+ **self.response_metadata,
34
+ }
@@ -0,0 +1,122 @@
1
+ import logging
2
+ import json
3
+ from typing import Optional, Callable, List, Any, Dict, Generator
4
+
5
+ import botocore.response
6
+ import functools
7
+
8
+ import opik.api_objects.span as span
9
+ import opik.api_objects.trace as trace
10
+ from opik.types import ErrorInfoDict
11
+ from opik.decorator import generator_wrappers, error_info_collector
12
+ from .. import types
13
+
14
+ import botocore.eventstream
15
+
16
+ LOGGER = logging.getLogger(__name__)
17
+
18
+
19
+ __original_streaming_body_read = botocore.response.StreamingBody.read
20
+
21
+
22
+ def wrap_invoke_model_response(
23
+ output: types.InvokeModelOutput,
24
+ span_to_end: span.SpanData,
25
+ trace_to_end: Optional[trace.TraceData],
26
+ finally_callback: generator_wrappers.FinishGeneratorCallback,
27
+ ) -> types.InvokeModelOutput:
28
+ response_metadata = output["ResponseMetadata"]
29
+ streaming_body = output["body"]
30
+
31
+ @functools.wraps(__original_streaming_body_read)
32
+ def wrapped_read(self: botocore.response.StreamingBody, *args, **kwargs): # type: ignore
33
+ error_info: Optional[ErrorInfoDict] = None
34
+ result = None
35
+ try:
36
+ result = __original_streaming_body_read(self, *args, **kwargs)
37
+ return result
38
+ except Exception as exception:
39
+ LOGGER.debug(
40
+ "Exception raised from botocore.response.StreamingBody: %s",
41
+ str(exception),
42
+ exc_info=True,
43
+ )
44
+ error_info = error_info_collector.collect(exception)
45
+ raise exception
46
+ finally:
47
+ if not hasattr(self, "opik_tracked_instance"):
48
+ return None
49
+
50
+ delattr(self, "opik_tracked_instance")
51
+
52
+ if error_info is None and result is not None:
53
+ try:
54
+ parsed_body = json.loads(result)
55
+ output = {
56
+ "body": parsed_body,
57
+ "ResponseMetadata": response_metadata,
58
+ }
59
+ LOGGER.debug(
60
+ "Successfully parsed response body with keys: %s",
61
+ list(parsed_body.keys()),
62
+ )
63
+ except (json.JSONDecodeError, TypeError) as e:
64
+ LOGGER.debug("Failed to parse response body as JSON: %s", e)
65
+ output = {"body": {}, "ResponseMetadata": response_metadata}
66
+ else:
67
+ LOGGER.debug("Error occurred or result is None, using empty body")
68
+ output = {"body": {}, "ResponseMetadata": response_metadata}
69
+
70
+ finally_callback(
71
+ output=output,
72
+ error_info=error_info,
73
+ generators_span_to_end=span_to_end,
74
+ generators_trace_to_end=trace_to_end,
75
+ capture_output=True,
76
+ )
77
+
78
+ botocore.response.StreamingBody.read = wrapped_read
79
+ streaming_body.opik_tracked_instance = True
80
+
81
+ return output
82
+
83
+
84
+ def wrap_invoke_model_with_response_stream_response(
85
+ stream: botocore.eventstream.EventStream,
86
+ capture_output: bool,
87
+ span_to_end: span.SpanData,
88
+ trace_to_end: Optional[trace.TraceData],
89
+ generations_aggregator: Callable[[List[Any]], Any],
90
+ response_metadata: Dict[str, Any],
91
+ finally_callback: generator_wrappers.FinishGeneratorCallback,
92
+ ) -> Generator[Any, None, None]:
93
+ items: List[Dict[str, Any]] = []
94
+ error_info: Optional[ErrorInfoDict] = None
95
+
96
+ try:
97
+ for item in stream:
98
+ items.append(item)
99
+
100
+ yield item
101
+ except Exception as exception:
102
+ LOGGER.debug(
103
+ "Exception raised from botocore.eventstream.EventStream: %s",
104
+ str(exception),
105
+ exc_info=True,
106
+ )
107
+ error_info = error_info_collector.collect(exception)
108
+ raise exception
109
+ finally:
110
+ if error_info is None:
111
+ output = generations_aggregator(items)
112
+ output.response_metadata = response_metadata
113
+ else:
114
+ output = None
115
+
116
+ finally_callback(
117
+ output=output,
118
+ error_info=error_info,
119
+ generators_span_to_end=span_to_end,
120
+ generators_trace_to_end=trace_to_end,
121
+ capture_output=capture_output,
122
+ )