opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,87 @@
1
+ """Shared usage conversion utilities for Bedrock models."""
2
+
3
+ from typing import Any, Dict
4
+
5
+
6
+ def anthropic_to_bedrock_usage(anthropic_usage: Dict[str, Any]) -> Dict[str, Any]:
7
+ """
8
+ Convert Anthropic-style usage schema into Bedrock-style usage schema.
9
+
10
+ Anthropic usage keys (snake_case):
11
+ - input_tokens
12
+ - output_tokens
13
+ - cache_creation_input_tokens
14
+ - cache_read_input_tokens
15
+
16
+ Bedrock usage keys (camelCase):
17
+ - inputTokens
18
+ - outputTokens
19
+ - cacheWriteInputTokens
20
+ - cacheReadInputTokens
21
+ - totalTokens
22
+ """
23
+ input_tokens = anthropic_usage.get("input_tokens", 0)
24
+ output_tokens = anthropic_usage.get("output_tokens", 0)
25
+ cache_write = anthropic_usage.get("cache_creation_input_tokens", 0)
26
+ cache_read = anthropic_usage.get("cache_read_input_tokens", 0)
27
+
28
+ return {
29
+ "inputTokens": input_tokens,
30
+ "outputTokens": output_tokens,
31
+ "cacheWriteInputTokens": cache_write,
32
+ "cacheReadInputTokens": cache_read,
33
+ "totalTokens": input_tokens + output_tokens,
34
+ }
35
+
36
+
37
+ def llama_to_bedrock_usage(llama_usage: Dict[str, Any]) -> Dict[str, Any]:
38
+ """
39
+ Convert Llama-style usage schema into Bedrock-style usage schema.
40
+
41
+ Llama usage keys:
42
+ - prompt_token_count
43
+ - generation_token_count
44
+ """
45
+ input_tokens = llama_usage.get("prompt_token_count", 0)
46
+ output_tokens = llama_usage.get("generation_token_count", 0)
47
+
48
+ return {
49
+ "inputTokens": input_tokens,
50
+ "outputTokens": output_tokens,
51
+ "totalTokens": input_tokens + output_tokens,
52
+ }
53
+
54
+
55
+ def openai_to_bedrock_usage(openai_usage: Dict[str, Any]) -> Dict[str, Any]:
56
+ """
57
+ Convert OpenAI-style usage schema into Bedrock-style usage schema.
58
+ Used by Mistral/Pixtral models.
59
+
60
+ OpenAI usage keys:
61
+ - prompt_tokens
62
+ - completion_tokens
63
+ - total_tokens
64
+ """
65
+ input_tokens = openai_usage.get("prompt_tokens", 0)
66
+ output_tokens = openai_usage.get("completion_tokens", 0)
67
+
68
+ return {
69
+ "inputTokens": input_tokens,
70
+ "outputTokens": output_tokens,
71
+ "totalTokens": input_tokens + output_tokens,
72
+ }
73
+
74
+
75
+ def nova_to_bedrock_usage(nova_usage: Dict[str, Any]) -> Dict[str, Any]:
76
+ """
77
+ Convert Nova-style usage (already in Bedrock format) - pass through.
78
+ Nova already uses Bedrock format (inputTokens, outputTokens).
79
+ """
80
+ input_tokens = nova_usage.get("inputTokens", 0)
81
+ output_tokens = nova_usage.get("outputTokens", 0)
82
+
83
+ return {
84
+ "inputTokens": input_tokens,
85
+ "outputTokens": output_tokens,
86
+ "totalTokens": input_tokens + output_tokens,
87
+ }
@@ -0,0 +1,108 @@
1
+ from opik import llm_usage
2
+ from typing import Dict, Any, Optional
3
+ import logging
4
+ import opik._logging
5
+
6
+ from . import usage_converters
7
+
8
+ LOGGER = logging.getLogger(__name__)
9
+
10
+
11
+ def extract_subprovider_from_model_id(model_id: str) -> str:
12
+ """
13
+ Extracts the provider name from a Bedrock modelId.
14
+
15
+ Examples:
16
+ ai21.j2-mid-v1 -> ai21
17
+ amazon.nova-lite-v1:0 -> amazon
18
+ anthropic.claude-v2:1 -> anthropic
19
+ us.meta.llama3-1-70b-instruct -> meta
20
+ """
21
+ parts = model_id.split(".")
22
+
23
+ if parts[0] in {"us", "eu", "apac"}:
24
+ return parts[1]
25
+
26
+ return parts[0]
27
+
28
+
29
+ def try_extract_usage_from_bedrock_response(
30
+ subprovider: str, response: Dict[str, Any]
31
+ ) -> Optional[llm_usage.OpikUsage]:
32
+ """
33
+ Since Bedrock's invoke_model response format is not standardized, we need to try different ways to extract the usage.
34
+
35
+ This usage may also be not in Bedrock's format, but in the format of the original subprovider.
36
+ """
37
+ try:
38
+ LOGGER.debug("Extracting usage for subprovider: %s", subprovider)
39
+
40
+ if subprovider == "anthropic":
41
+ usage_dict = response["body"]["usage"]
42
+ bedrock_formatted_usage = usage_converters.anthropic_to_bedrock_usage(
43
+ usage_dict
44
+ )
45
+ opik_usage = llm_usage.OpikUsage.from_bedrock_dict(bedrock_formatted_usage)
46
+ LOGGER.debug("Anthropic usage extracted: %s", bedrock_formatted_usage)
47
+ return opik_usage
48
+
49
+ elif subprovider == "meta":
50
+ # Llama models have usage fields directly in body (not in body.usage)
51
+ body = response.get("body", {})
52
+ if "prompt_token_count" in body or "generation_token_count" in body:
53
+ bedrock_formatted_usage = usage_converters.llama_to_bedrock_usage(body)
54
+ opik_usage = llm_usage.OpikUsage.from_bedrock_dict(
55
+ bedrock_formatted_usage
56
+ )
57
+ LOGGER.debug("Llama usage extracted: %s", bedrock_formatted_usage)
58
+ return opik_usage
59
+
60
+ elif subprovider == "mistral":
61
+ # Mistral/Pixtral models use OpenAI-like usage format
62
+ usage_dict = response["body"].get("usage", {})
63
+ if usage_dict:
64
+ bedrock_formatted_usage = usage_converters.openai_to_bedrock_usage(
65
+ usage_dict
66
+ )
67
+ opik_usage = llm_usage.OpikUsage.from_bedrock_dict(
68
+ bedrock_formatted_usage
69
+ )
70
+ LOGGER.debug("Mistral usage extracted: %s", bedrock_formatted_usage)
71
+ return opik_usage
72
+
73
+ elif subprovider == "amazon":
74
+ # Nova models already use Bedrock format
75
+ usage_dict = response["body"].get("usage", {})
76
+ if usage_dict:
77
+ bedrock_formatted_usage = usage_converters.nova_to_bedrock_usage(
78
+ usage_dict
79
+ )
80
+ opik_usage = llm_usage.OpikUsage.from_bedrock_dict(
81
+ bedrock_formatted_usage
82
+ )
83
+ LOGGER.debug("Nova usage extracted: %s", bedrock_formatted_usage)
84
+ return opik_usage
85
+
86
+ # Fallback: This is the default case, but it's not guaranteed to find the usage here for all possible subproviders
87
+ presumably_usage_dict = response["body"].get("usage", {})
88
+ if presumably_usage_dict:
89
+ # If it's already in Bedrock's format, we are good (tested with amazon.nova-pro-v1:0, it has bedrock usage format)
90
+ # If it's not, but it's in some other format that Opik supports, we will at least extract
91
+ # completion and prompt tokens count so that backend could calculate cost based on them.
92
+ opik_usage = llm_usage.build_opik_usage_from_unknown_provider(
93
+ presumably_usage_dict
94
+ )
95
+ LOGGER.debug("Fallback usage extracted: %s", presumably_usage_dict)
96
+ return opik_usage
97
+
98
+ LOGGER.debug("No usage found in response body")
99
+ return None
100
+
101
+ except Exception as e:
102
+ LOGGER.debug("Exception during usage extraction: %s", e)
103
+ opik._logging.log_once_at_level(
104
+ logging.WARNING,
105
+ f"Failed to extract usage from Bedrock's invoke_model response: {response}. It may be because this model response format is currently not supported: please create an issue at https://github.com/opik-ai/opik/issues and we will add support for it.",
106
+ LOGGER,
107
+ )
108
+ return None
@@ -1,6 +1,12 @@
1
1
  from typing import Optional, TYPE_CHECKING
2
2
 
3
- from . import chunks_aggregator, converse_decorator, invoke_agent_decorator
3
+ from . import invoke_agent_decorator
4
+ from .converse import chunks_aggregator as converse_chunks_aggregator
5
+ from .converse import converse_decorator
6
+
7
+ from .invoke_model import invoke_model_decorator
8
+ from .invoke_model import chunks_aggregator as invoke_model_chunks_aggregator
9
+
4
10
 
5
11
  if TYPE_CHECKING:
6
12
  from mypy_boto3_bedrock_runtime.client import BedrockRuntimeClient
@@ -12,9 +18,15 @@ def track_bedrock(
12
18
  ) -> "BedrockRuntimeClient":
13
19
  """Adds Opik tracking to an AWS Bedrock client.
14
20
 
15
- Tracks calls to `converse()` and `converse_stream()` methods
21
+ Tracks calls to `converse()`, `converse_stream()`, `invoke_model()`, and `invoke_model_with_response_stream()` methods.
16
22
  Can be used within other Opik-tracked functions.
17
23
 
24
+ Supported Model subproviders for InvokeModel API (both streaming and non-streaming):
25
+ - **Anthropic** (Claude)
26
+ - **Amazon** (Nova)
27
+ - **Meta** (Llama)
28
+ - **Mistral** (Pixtral)
29
+
18
30
  Args:
19
31
  client: An instance of an AWS Bedrock client (botocore.client.BedrockRuntime or botocore.client.AgentsforBedrockRuntime).
20
32
  project_name: The name of the project to log data.
@@ -25,6 +37,7 @@ def track_bedrock(
25
37
 
26
38
  decorator_for_converse = converse_decorator.BedrockConverseDecorator()
27
39
  decorator_for_invoke_agent = invoke_agent_decorator.BedrockInvokeAgentDecorator()
40
+ decorator_for_invoke_model = invoke_model_decorator.BedrockInvokeModelDecorator()
28
41
 
29
42
  if hasattr(client, "invoke_agent") and not hasattr(
30
43
  client.invoke_agent, "opik_tracked"
@@ -33,7 +46,7 @@ def track_bedrock(
33
46
  type="llm",
34
47
  name="bedrock_invoke_agent",
35
48
  project_name=project_name,
36
- generations_aggregator=chunks_aggregator.aggregate_invoke_agent_chunks,
49
+ generations_aggregator=converse_chunks_aggregator.aggregate_invoke_agent_chunks,
37
50
  )
38
51
  tracked_invoke_agent = wrapper(client.invoke_agent)
39
52
  client.invoke_agent = tracked_invoke_agent
@@ -54,9 +67,34 @@ def track_bedrock(
54
67
  type="llm",
55
68
  name="bedrock_converse_stream",
56
69
  project_name=project_name,
57
- generations_aggregator=chunks_aggregator.aggregate_converse_stream_chunks,
70
+ generations_aggregator=converse_chunks_aggregator.aggregate_converse_stream_chunks,
58
71
  )
59
72
  tracked_converse_stream = stream_wrapper(client.converse_stream)
60
73
  client.converse_stream = tracked_converse_stream
61
74
 
75
+ if hasattr(client, "invoke_model") and not hasattr(
76
+ client.invoke_model, "opik_tracked"
77
+ ):
78
+ wrapper = decorator_for_invoke_model.track(
79
+ type="llm",
80
+ name="bedrock_invoke_model",
81
+ project_name=project_name,
82
+ )
83
+ tracked_invoke_model = wrapper(client.invoke_model)
84
+ client.invoke_model = tracked_invoke_model
85
+
86
+ if hasattr(client, "invoke_model_with_response_stream") and not hasattr(
87
+ client.invoke_model_with_response_stream, "opik_tracked"
88
+ ):
89
+ stream_wrapper = decorator_for_invoke_model.track(
90
+ type="llm",
91
+ name="bedrock_invoke_model_stream",
92
+ project_name=project_name,
93
+ generations_aggregator=invoke_model_chunks_aggregator.aggregate_chunks_to_dataclass,
94
+ )
95
+ tracked_invoke_model_stream = stream_wrapper(
96
+ client.invoke_model_with_response_stream
97
+ )
98
+ client.invoke_model_with_response_stream = tracked_invoke_model_stream
99
+
62
100
  return client
@@ -0,0 +1,19 @@
1
+ from typing import Any, Dict, TypedDict
2
+
3
+ from botocore import eventstream
4
+ import botocore.response
5
+
6
+
7
+ class ConverseStreamOutput(TypedDict):
8
+ stream: eventstream.EventStream
9
+ ResponseMetadata: Dict[str, Any]
10
+
11
+
12
+ class InvokeModelOutput(TypedDict):
13
+ body: botocore.response.StreamingBody
14
+ ResponseMetadata: Dict[str, Any]
15
+
16
+
17
+ class InvokeModelWithResponseStreamOutput(TypedDict):
18
+ body: eventstream.EventStream
19
+ ResponseMetadata: Dict[str, Any]
@@ -13,10 +13,9 @@ from typing import (
13
13
  from typing_extensions import override
14
14
 
15
15
  from opik.decorator import arguments_helpers, base_track_decorator
16
- from opik.types import SpanType, LLMProvider
16
+ from opik.types import SpanType
17
17
  from opik.api_objects import span
18
18
  import opik.jsonable_encoder as jsonable_encoder
19
- import opik.llm_usage as llm_usage
20
19
  import opik.dict_utils as dict_utils
21
20
 
22
21
  LOGGER = logging.getLogger(__name__)
@@ -144,12 +143,6 @@ class CrewAITrackDecorator(base_track_decorator.BaseTrackDecorator):
144
143
  input_dict["task"] = task_dict
145
144
  name = f"Task: {task.name}"
146
145
 
147
- elif name == "completion":
148
- metadata["object_type"] = "completion"
149
- input_dict = {"messages": kwargs.get("messages")}
150
- span_type = "llm"
151
- name = "llm call"
152
-
153
146
  return input_dict, name, span_type
154
147
 
155
148
  @override
@@ -161,38 +154,12 @@ class CrewAITrackDecorator(base_track_decorator.BaseTrackDecorator):
161
154
  ) -> arguments_helpers.EndSpanParameters:
162
155
  object_type = None
163
156
  metadata = {}
157
+ output_dict = {}
164
158
 
165
159
  if current_span_data and current_span_data.metadata:
166
160
  metadata = current_span_data.metadata
167
161
  object_type = metadata.pop("object_type")
168
162
 
169
- model, provider, output_dict, usage = self._parse_outputs(object_type, output)
170
-
171
- result = arguments_helpers.EndSpanParameters(
172
- output=output_dict,
173
- usage=usage,
174
- metadata=metadata,
175
- model=model,
176
- provider=provider,
177
- )
178
-
179
- return result
180
-
181
- def _parse_outputs(
182
- self,
183
- object_type: Optional[str],
184
- output: Any,
185
- ) -> Tuple[
186
- Optional[str],
187
- Optional[str],
188
- Dict[str, Any],
189
- Optional[llm_usage.OpikUsage],
190
- ]:
191
- model = None
192
- provider = None
193
- usage = None
194
- output_dict = {}
195
-
196
163
  if object_type == "crew":
197
164
  output_dict = jsonable_encoder.encode(output)
198
165
  output_dict.pop("token_usage", None)
@@ -202,23 +169,13 @@ class CrewAITrackDecorator(base_track_decorator.BaseTrackDecorator):
202
169
  output_dict = _encode_dict_and_keep_keys(
203
170
  output, TASK_KWARGS_KEYS_TO_LOG_AS_OUTPUT
204
171
  )
205
- elif object_type == "completion":
206
- output_dict = jsonable_encoder.encode(output)
207
- if output_dict.get("usage", None) is not None:
208
- usage = llm_usage.try_build_opik_usage_or_log_error(
209
- provider=LLMProvider.OPENAI, # even if it's not openai, we know the format is openai-like
210
- usage=output_dict["usage"],
211
- logger=LOGGER,
212
- error_message="Failed to log token usage from CrewAI LLM call",
213
- )
214
- else:
215
- usage = None
216
- model = output_dict.pop("model", None)
217
- provider = (
218
- "openai" if output_dict.get("object") == "chat.completion" else None
219
- )
220
172
 
221
- return model, provider, output_dict, usage
173
+ result = arguments_helpers.EndSpanParameters(
174
+ output=output_dict,
175
+ metadata=metadata,
176
+ )
177
+
178
+ return result
222
179
 
223
180
  @override
224
181
  def _streams_handler(
@@ -1,14 +1,19 @@
1
+ import importlib.metadata
2
+ import logging
1
3
  from typing import Optional
2
4
 
3
5
  import crewai
4
6
 
5
- from . import crewai_decorator
7
+ import opik.semantic_version
6
8
 
7
- __IS_TRACKING_ENABLED = False
9
+ from . import crewai_decorator, patchers
10
+
11
+ LOGGER = logging.getLogger(__name__)
8
12
 
9
13
 
10
14
  def track_crewai(
11
15
  project_name: Optional[str] = None,
16
+ crew: Optional[crewai.Crew] = None,
12
17
  ) -> None:
13
18
  """
14
19
  Tracks CrewAI activities by enabling tracking decorators for various critical methods.
@@ -21,11 +26,8 @@ def track_crewai(
21
26
 
22
27
  Parameters:
23
28
  project_name: The name of the project to associate with the tracking.
29
+ crew: The Crew instance to track. Required for CrewAI v1.0.0+ to properly track LLM calls.
24
30
  """
25
- global __IS_TRACKING_ENABLED
26
- if __IS_TRACKING_ENABLED:
27
- return
28
- __IS_TRACKING_ENABLED = True
29
31
 
30
32
  decorator_factory = crewai_decorator.CrewAITrackDecorator()
31
33
 
@@ -33,12 +35,31 @@ def track_crewai(
33
35
  project_name=project_name,
34
36
  )
35
37
 
36
- import litellm
37
-
38
38
  crewai.Crew.kickoff = crewai_wrapper(crewai.Crew.kickoff)
39
39
  crewai.Crew.kickoff_for_each = crewai_wrapper(crewai.Crew.kickoff_for_each)
40
40
  crewai.Agent.execute_task = crewai_wrapper(crewai.Agent.execute_task)
41
41
  crewai.Task.execute_sync = crewai_wrapper(crewai.Task.execute_sync)
42
- litellm.completion = crewai_wrapper(litellm.completion)
43
42
 
44
- return None
43
+ # Patch LiteLLM functions used by CrewAI
44
+ patchers.patch_litellm_completion(project_name=project_name)
45
+
46
+ # Patch Flow class (v1.0.0+)
47
+ patchers.patch_flow(project_name=project_name)
48
+
49
+ # Patch LLM clients used by CrewAI agents (v1.0.0+)
50
+ if crew is not None and is_crewai_v1():
51
+ patchers.patch_llm_client(crew, project_name)
52
+
53
+
54
+ def is_crewai_v1() -> bool:
55
+ """
56
+ Checks if CrewAI v1.0.0+ is installed.
57
+
58
+ Returns:
59
+ True if CrewAI v1.0.0+ is detected, False otherwise.
60
+ """
61
+ try:
62
+ version_str = importlib.metadata.version("crewai")
63
+ return opik.semantic_version.SemanticVersion.parse(version_str) >= "1.0.0" # type: ignore
64
+ except Exception:
65
+ return False
@@ -0,0 +1,5 @@
1
+ from .flow import patch_flow
2
+ from .llm_client import patch_llm_client
3
+ from .litellm_completion import patch_litellm_completion
4
+
5
+ __all__ = ["patch_flow", "patch_llm_client", "patch_litellm_completion"]
@@ -0,0 +1,118 @@
1
+ """
2
+ Patcher for CrewAI Flow class (v1.0.0+).
3
+
4
+ This module patches the Flow class to automatically track flow methods and execution.
5
+ """
6
+
7
+ import functools
8
+ import logging
9
+ from typing import Optional
10
+
11
+ import opik.decorator.tracker as opik_tracker
12
+
13
+ LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ def patch_flow(project_name: Optional[str] = None) -> None:
17
+ """
18
+ Patches CrewAI Flow class to track flow execution.
19
+
20
+ If Flow class is not available (CrewAI < v1.0.0), this function does nothing.
21
+
22
+ Args:
23
+ project_name: The name of the project to associate with tracking.
24
+ """
25
+ _patch_flow_init(project_name)
26
+ _patch_flow_kickoff_async(project_name)
27
+
28
+
29
+ def _patch_flow_init(project_name: Optional[str] = None) -> None:
30
+ """
31
+ Patches CrewAI Flow.__init__ to automatically track flow methods.
32
+
33
+ If Flow class is not available (CrewAI < v1.0.0), this function does nothing.
34
+ """
35
+ try:
36
+ import crewai
37
+
38
+ if not hasattr(crewai, "Flow"):
39
+ LOGGER.debug("CrewAI Flow class not available, skipping Flow patching")
40
+ return
41
+
42
+ if hasattr(_patch_flow_init, "_patched"):
43
+ return
44
+
45
+ original_init = crewai.Flow.__init__
46
+
47
+ @functools.wraps(original_init)
48
+ def _init_wrapper(self, *args, **kwargs) -> None: # type: ignore
49
+ original_init(self, *args, **kwargs)
50
+
51
+ try:
52
+ flow_registered_methods = getattr(self, "_methods", {})
53
+ for method_name, method in list(flow_registered_methods.items()):
54
+ if getattr(method, "opik_tracked", False):
55
+ continue
56
+
57
+ decorated = opik_tracker.track(
58
+ project_name=project_name,
59
+ tags=["crewai"],
60
+ metadata={"created_from": "crewai"},
61
+ )(method)
62
+
63
+ flow_registered_methods[method_name] = decorated
64
+ except Exception:
65
+ LOGGER.error(
66
+ "An error occurred during Opik instrumentation of CrewAI Flow",
67
+ exc_info=True,
68
+ )
69
+
70
+ crewai.Flow.__init__ = _init_wrapper # type: ignore[assignment]
71
+
72
+ setattr(_patch_flow_init, "_patched", True) # type: ignore[attr-defined]
73
+ except (ImportError, AttributeError):
74
+ LOGGER.debug(
75
+ "CrewAI Flow class not available, skipping Flow patching", exc_info=True
76
+ )
77
+
78
+
79
+ def _patch_flow_kickoff_async(project_name: Optional[str] = None) -> None:
80
+ """
81
+ Patches CrewAI Flow.kickoff_async to track flow execution.
82
+
83
+ If Flow class is not available (CrewAI < v1.0.0), this function does nothing.
84
+ """
85
+ try:
86
+ import crewai
87
+
88
+ if not hasattr(crewai, "Flow"):
89
+ LOGGER.debug(
90
+ "CrewAI Flow class not available, skipping Flow.kickoff_async patching"
91
+ )
92
+ return
93
+
94
+ if hasattr(_patch_flow_kickoff_async, "_patched"):
95
+ return
96
+
97
+ # We only need to patch the async version of the kickoff method because
98
+ # the sync version calls it internally
99
+ original_kickoff_async = crewai.Flow.kickoff_async
100
+
101
+ @functools.wraps(original_kickoff_async)
102
+ async def _kickoff_async_wrapper(self, *args, **kwargs): # type: ignore
103
+ wrapped = opik_tracker.track(
104
+ project_name=project_name,
105
+ tags=["crewai"],
106
+ name="Flow.kickoff_async",
107
+ metadata={"created_from": "crewai"},
108
+ )(original_kickoff_async)
109
+ return await wrapped(self, *args, **kwargs)
110
+
111
+ crewai.Flow.kickoff_async = _kickoff_async_wrapper # type: ignore[assignment]
112
+
113
+ setattr(_patch_flow_kickoff_async, "_patched", True) # type: ignore[attr-defined]
114
+ except (ImportError, AttributeError):
115
+ LOGGER.debug(
116
+ "CrewAI Flow class not available, skipping Flow.kickoff_async patching",
117
+ exc_info=True,
118
+ )
@@ -0,0 +1,30 @@
1
+ """
2
+ Patcher for LiteLLM completion functions used by CrewAI.
3
+
4
+ This module patches litellm.completion and litellm.acompletion with Opik tracking.
5
+ CrewAI v0.x uses LiteLLM internally for LLM calls.
6
+ """
7
+
8
+ import logging
9
+ from typing import Optional
10
+
11
+ import litellm
12
+
13
+ import opik.integrations.litellm
14
+
15
+ LOGGER = logging.getLogger(__name__)
16
+
17
+
18
+ def patch_litellm_completion(project_name: Optional[str] = None) -> None:
19
+ """
20
+ Patches LiteLLM completion functions used by CrewAI.
21
+
22
+ Args:
23
+ project_name: The name of the project to associate with tracking.
24
+ """
25
+ litellm.completion = opik.integrations.litellm.track_completion(
26
+ project_name=project_name
27
+ )(litellm.completion)
28
+ litellm.acompletion = opik.integrations.litellm.track_completion(
29
+ project_name=project_name
30
+ )(litellm.acompletion)