opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,207 @@
1
+ """
2
+ Client patchers for CrewAI LLM providers (v1.0.0+).
3
+
4
+ This module handles patching of LLM clients used by CrewAI agents with Opik tracking.
5
+ Each provider has its own patching function that handles missing dependencies gracefully.
6
+ """
7
+
8
+ import logging
9
+ from typing import Any, Optional, TYPE_CHECKING
10
+
11
+ import crewai
12
+
13
+ if TYPE_CHECKING:
14
+ import crewai.llms.providers.openai.completion as openai_completion
15
+ import crewai.llms.providers.anthropic.completion as anthropic_completion
16
+ import crewai.llms.providers.gemini.completion as gemini_completion
17
+ import crewai.llms.providers.bedrock.completion as bedrock_completion
18
+
19
+ LOGGER = logging.getLogger(__name__)
20
+
21
+
22
+ def patch_llm_client(crew: crewai.Crew, project_name: Optional[str]) -> None:
23
+ """
24
+ Patches LLM clients used by CrewAI agents with Opik tracking.
25
+
26
+ Handles missing provider libraries gracefully by logging warnings instead of failing.
27
+
28
+ Args:
29
+ crew: The Crew instance containing agents to patch.
30
+ project_name: The name of the project to associate with tracking.
31
+ """
32
+ for agent in crew.agents:
33
+ _patch_single_llm_client(agent.llm, project_name)
34
+
35
+
36
+ def _patch_single_llm_client(llm: Any, project_name: Optional[str]) -> None:
37
+ """
38
+ Patches an LLM client based on its provider type.
39
+
40
+ Args:
41
+ llm: The CrewAI LLM instance to patch.
42
+ project_name: The name of the project to associate with tracking.
43
+ """
44
+ if _is_openai_llm(llm):
45
+ _patch_openai_client(llm, project_name)
46
+ elif _is_anthropic_llm(llm):
47
+ _patch_anthropic_client(llm, project_name)
48
+ elif _is_gemini_llm(llm):
49
+ _patch_gemini_client(llm, project_name)
50
+ elif _is_bedrock_llm(llm):
51
+ _patch_bedrock_client(llm, project_name)
52
+
53
+
54
+ def _is_openai_llm(llm: Any) -> bool:
55
+ """
56
+ Checks if LLM is an OpenAI provider.
57
+
58
+ Args:
59
+ llm: The CrewAI LLM to check.
60
+
61
+ Returns:
62
+ True if LLM is OpenAI provider, False otherwise.
63
+ """
64
+ try:
65
+ import crewai.llms.providers.openai.completion
66
+
67
+ return isinstance(llm, crewai.llms.providers.openai.completion.OpenAICompletion)
68
+ except ImportError:
69
+ return False
70
+
71
+
72
+ def _is_anthropic_llm(llm: Any) -> bool:
73
+ """
74
+ Checks if LLM is an Anthropic provider.
75
+
76
+ Args:
77
+ llm: The CrewAI LLM to check.
78
+
79
+ Returns:
80
+ True if LLM is Anthropic provider, False otherwise.
81
+ """
82
+ try:
83
+ import crewai.llms.providers.anthropic.completion
84
+
85
+ return isinstance(
86
+ llm, crewai.llms.providers.anthropic.completion.AnthropicCompletion
87
+ )
88
+ except ImportError:
89
+ return False
90
+
91
+
92
+ def _is_gemini_llm(llm: Any) -> bool:
93
+ """
94
+ Checks if LLM is a Gemini provider.
95
+
96
+ Args:
97
+ llm: The CrewAI LLM to check.
98
+
99
+ Returns:
100
+ True if LLM is Gemini provider, False otherwise.
101
+ """
102
+ try:
103
+ import crewai.llms.providers.gemini.completion
104
+
105
+ return isinstance(llm, crewai.llms.providers.gemini.completion.GeminiCompletion)
106
+ except ImportError:
107
+ return False
108
+
109
+
110
+ def _is_bedrock_llm(llm: Any) -> bool:
111
+ """
112
+ Checks if LLM is a Bedrock provider.
113
+
114
+ Args:
115
+ llm: The CrewAI LLM to check.
116
+
117
+ Returns:
118
+ True if LLM is Bedrock provider, False otherwise.
119
+ """
120
+ try:
121
+ import crewai.llms.providers.bedrock.completion
122
+
123
+ return isinstance(
124
+ llm, crewai.llms.providers.bedrock.completion.BedrockCompletion
125
+ )
126
+ except ImportError:
127
+ return False
128
+
129
+
130
+ def _patch_openai_client(
131
+ llm: "openai_completion.OpenAICompletion", project_name: Optional[str]
132
+ ) -> None:
133
+ """
134
+ Patches OpenAI client for the given LLM.
135
+
136
+ Args:
137
+ llm: The CrewAI LLM instance with OpenAI client to patch.
138
+ project_name: The name of the project to associate with tracking.
139
+ """
140
+ try:
141
+ import opik.integrations.openai
142
+
143
+ llm.client = opik.integrations.openai.track_openai(
144
+ llm.client, project_name=project_name
145
+ )
146
+ except Exception:
147
+ LOGGER.warning("Failed to track OpenAI client for LLM", exc_info=True)
148
+
149
+
150
+ def _patch_anthropic_client(
151
+ llm: "anthropic_completion.AnthropicCompletion", project_name: Optional[str]
152
+ ) -> None:
153
+ """
154
+ Patches Anthropic client for the given LLM.
155
+
156
+ Args:
157
+ llm: The CrewAI LLM instance with Anthropic client to patch.
158
+ project_name: The name of the project to associate with tracking.
159
+ """
160
+ try:
161
+ import opik.integrations.anthropic
162
+
163
+ llm.client = opik.integrations.anthropic.track_anthropic(
164
+ llm.client, project_name=project_name
165
+ )
166
+ except Exception:
167
+ LOGGER.warning("Failed to track Anthropic client for LLM", exc_info=True)
168
+
169
+
170
+ def _patch_gemini_client(
171
+ llm: "gemini_completion.GeminiCompletion", project_name: Optional[str]
172
+ ) -> None:
173
+ """
174
+ Patches Gemini client for the given LLM.
175
+
176
+ Args:
177
+ llm: The CrewAI LLM instance with Gemini client to patch.
178
+ project_name: The name of the project to associate with tracking.
179
+ """
180
+ try:
181
+ import opik.integrations.genai
182
+
183
+ llm.client = opik.integrations.genai.track_genai(
184
+ llm.client, project_name=project_name
185
+ )
186
+ except Exception:
187
+ LOGGER.warning("Failed to track Gemini client for LLM", exc_info=True)
188
+
189
+
190
+ def _patch_bedrock_client(
191
+ llm: "bedrock_completion.BedrockCompletion", project_name: Optional[str]
192
+ ) -> None:
193
+ """
194
+ Patches Bedrock client for the given LLM.
195
+
196
+ Args:
197
+ llm: The CrewAI LLM instance with Bedrock client to patch.
198
+ project_name: The name of the project to associate with tracking.
199
+ """
200
+ try:
201
+ import opik.integrations.bedrock
202
+
203
+ llm.client = opik.integrations.bedrock.track_bedrock(
204
+ llm.client, project_name=project_name
205
+ )
206
+ except Exception:
207
+ LOGGER.warning("Failed to track Bedrock client for LLM", exc_info=True)
@@ -1,17 +1,16 @@
1
- from typing import Any, Dict, Optional, Union
1
+ from typing import Any, Dict, Optional, Tuple, Union
2
2
  import logging
3
3
 
4
4
  import dspy
5
5
  from dspy.utils import callback as dspy_callback
6
6
 
7
- import opik.types as types
8
- import opik.opik_context as opik_context
9
- import opik.context_storage as context_storage
7
+ from opik import context_storage, opik_context, tracing_runtime_config
8
+ from opik import llm_usage
10
9
  from opik.api_objects import helpers, span, trace, opik_client
11
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
12
10
  from opik.decorator import error_info_collector
13
11
 
14
12
  from .graph import build_mermaid_graph_from_module
13
+ from .parsers import LMHistoryInfo, extract_lm_info_from_history, get_span_type
15
14
 
16
15
  LOGGER = logging.getLogger(__name__)
17
16
 
@@ -35,6 +34,8 @@ class OpikCallback(dspy_callback.BaseCallback):
35
34
  ):
36
35
  self._map_call_id_to_span_data: Dict[str, span.SpanData] = {}
37
36
  self._map_call_id_to_trace_data: Dict[str, trace.TraceData] = {}
37
+ # Store (lm_instance, expected_messages) for extracting usage and verifying correct history entry
38
+ self._map_call_id_to_lm_info: Dict[str, Tuple[Any, Optional[Any]]] = {}
38
39
 
39
40
  self._origins_metadata: Dict[str, Any] = {"created_from": "dspy"}
40
41
 
@@ -106,7 +107,7 @@ class OpikCallback(dspy_callback.BaseCallback):
106
107
  parent_project_name=current_span_data.project_name,
107
108
  child_project_name=self._project_name,
108
109
  )
109
- span_type = self._get_span_type(instance)
110
+ span_type = get_span_type(instance)
110
111
 
111
112
  span_data = span.SpanData(
112
113
  trace_id=current_span_data.trace_id,
@@ -130,7 +131,7 @@ class OpikCallback(dspy_callback.BaseCallback):
130
131
  current_trace_data.project_name,
131
132
  self._project_name,
132
133
  )
133
- span_type = self._get_span_type(instance)
134
+ span_type = get_span_type(instance)
134
135
 
135
136
  span_data = span.SpanData(
136
137
  trace_id=current_trace_data.id,
@@ -201,13 +202,39 @@ class OpikCallback(dspy_callback.BaseCallback):
201
202
  call_id: str,
202
203
  outputs: Optional[Any],
203
204
  exception: Optional[Exception] = None,
205
+ usage: Optional[llm_usage.OpikUsage] = None,
206
+ extra_metadata: Optional[Dict[str, Any]] = None,
207
+ actual_provider: Optional[str] = None,
208
+ total_cost: Optional[float] = None,
204
209
  ) -> None:
205
210
  if span_data := self._map_call_id_to_span_data.pop(call_id, None):
206
211
  if exception:
207
212
  error_info = error_info_collector.collect(exception)
208
213
  span_data.update(error_info=error_info)
209
214
 
210
- span_data.update(output={"output": outputs}).init_end_time()
215
+ # Prepare the update dict
216
+ update_kwargs: Dict[str, Any] = {
217
+ "output": {"output": outputs},
218
+ "usage": usage,
219
+ "total_cost": total_cost,
220
+ }
221
+
222
+ # Handle LLM routers like OpenRouter that return the actual serving provider
223
+ if (
224
+ actual_provider is not None
225
+ and span_data.provider is not None
226
+ and span_data.provider.lower() != actual_provider.lower()
227
+ ):
228
+ # Store the original provider (e.g., "openrouter") in metadata
229
+ if extra_metadata is None:
230
+ extra_metadata = {}
231
+ extra_metadata["llm_router"] = span_data.provider
232
+ # Update to the actual provider for accurate cost tracking
233
+ update_kwargs["provider"] = actual_provider
234
+
235
+ update_kwargs["metadata"] = extra_metadata
236
+
237
+ span_data.update(**update_kwargs).init_end_time()
211
238
  if tracing_runtime_config.is_tracing_active():
212
239
  self._opik_client.span(**span_data.as_parameters)
213
240
 
@@ -234,7 +261,7 @@ class OpikCallback(dspy_callback.BaseCallback):
234
261
  trace_id = current_callback_context_data.id
235
262
  parent_span_id = None
236
263
 
237
- span_type = self._get_span_type(instance)
264
+ span_type = get_span_type(instance)
238
265
 
239
266
  return span.SpanData(
240
267
  trace_id=trace_id,
@@ -266,6 +293,13 @@ class OpikCallback(dspy_callback.BaseCallback):
266
293
  name=f"{span_data.name}: {provider} - {model}",
267
294
  )
268
295
  self._map_call_id_to_span_data[call_id] = span_data
296
+
297
+ # Store LM instance and expected messages for extracting usage
298
+ self._map_call_id_to_lm_info[call_id] = (
299
+ instance,
300
+ inputs.get("messages"),
301
+ )
302
+
269
303
  self._set_current_context_data(span_data)
270
304
 
271
305
  def on_lm_end(
@@ -274,10 +308,21 @@ class OpikCallback(dspy_callback.BaseCallback):
274
308
  outputs: Optional[Dict[str, Any]],
275
309
  exception: Optional[Exception] = None,
276
310
  ) -> None:
311
+ lm_info = self._extract_lm_info_from_history(call_id)
312
+
313
+ # Add cache_hit to span metadata only when we have a definitive value
314
+ extra_metadata = (
315
+ {"cache_hit": lm_info.cache_hit} if lm_info.cache_hit is not None else None
316
+ )
317
+
277
318
  self._end_span(
278
319
  call_id=call_id,
279
320
  exception=exception,
280
321
  outputs=outputs,
322
+ usage=lm_info.usage,
323
+ extra_metadata=extra_metadata,
324
+ actual_provider=lm_info.actual_provider,
325
+ total_cost=lm_info.total_cost,
281
326
  )
282
327
 
283
328
  def on_tool_start(
@@ -319,14 +364,32 @@ class OpikCallback(dspy_callback.BaseCallback):
319
364
  return span_data
320
365
  return self._context_storage.get_trace_data()
321
366
 
322
- def _get_span_type(self, instance: Any) -> types.SpanType:
323
- if isinstance(instance, dspy.Predict):
324
- return "llm"
325
- elif isinstance(instance, dspy.LM):
326
- return "llm"
327
- elif isinstance(instance, dspy.Tool):
328
- return "tool"
329
- return "general"
367
+ def _extract_lm_info_from_history(self, call_id: str) -> LMHistoryInfo:
368
+ """
369
+ Extract token usage, cache status, actual provider, and cost from the LM's history.
370
+
371
+ DSPy stores usage information in the LM's history after each call.
372
+ We verify the history entry matches our expected messages to handle
373
+ potential race conditions with concurrent LM calls.
374
+
375
+ For routers like OpenRouter, the response contains the actual provider
376
+ that served the request (e.g., "Novita", "Together"), which differs from
377
+ the router name used in the model string (e.g., "openrouter").
378
+
379
+ The cost field is provided by providers like OpenRouter and includes
380
+ accurate pricing for all token types (reasoning, cache, multimodal).
381
+
382
+ Returns:
383
+ LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
384
+ """
385
+ lm_info = self._map_call_id_to_lm_info.pop(call_id, None)
386
+ if lm_info is None:
387
+ return LMHistoryInfo(
388
+ usage=None, cache_hit=None, actual_provider=None, total_cost=None
389
+ )
390
+
391
+ lm_instance, expected_messages = lm_info
392
+ return extract_lm_info_from_history(lm_instance, expected_messages)
330
393
 
331
394
  def _get_opik_metadata(self, instance: Any) -> Dict[str, Any]:
332
395
  graph = None
@@ -0,0 +1,168 @@
1
+ """
2
+ Parsers and data structures for extracting information from DSPy LM responses.
3
+
4
+ This module contains utilities for parsing DSPy LM history entries and
5
+ extracting relevant information like usage, provider, and cost data.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Any, Optional, Tuple
10
+ import logging
11
+
12
+ import dspy
13
+
14
+ from opik import llm_usage, types
15
+
16
+ LOGGER = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class LMHistoryInfo:
21
+ """
22
+ Information extracted from a DSPy LM history entry.
23
+
24
+ This dataclass holds the parsed information from an LM call's history,
25
+ including usage statistics, cache status, provider information, and cost.
26
+
27
+ Attributes:
28
+ usage: Token usage information (prompt, completion, total tokens)
29
+ cache_hit: Whether the response was served from cache.
30
+ True if cached, False if not, None if unknown.
31
+ actual_provider: The actual provider that served the request.
32
+ This is useful for LLM routers like OpenRouter that may route
33
+ to different underlying providers (e.g., "Novita", "Together").
34
+ total_cost: The total cost of the request from the provider.
35
+ This includes accurate pricing for all token types.
36
+ """
37
+
38
+ usage: Optional[llm_usage.OpikUsage]
39
+ cache_hit: Optional[bool]
40
+ actual_provider: Optional[str]
41
+ total_cost: Optional[float]
42
+
43
+ def as_tuple(
44
+ self,
45
+ ) -> Tuple[
46
+ Optional[llm_usage.OpikUsage],
47
+ Optional[bool],
48
+ Optional[str],
49
+ Optional[float],
50
+ ]:
51
+ """Return the info as a tuple for backwards compatibility."""
52
+ return (self.usage, self.cache_hit, self.actual_provider, self.total_cost)
53
+
54
+
55
+ def get_span_type(instance: Any) -> types.SpanType:
56
+ """
57
+ Determine the span type based on the DSPy instance type.
58
+
59
+ Args:
60
+ instance: A DSPy module, LM, or tool instance.
61
+
62
+ Returns:
63
+ The appropriate span type: "llm" for Predict/LM, "tool" for Tool,
64
+ or "general" for other types.
65
+ """
66
+ if isinstance(instance, dspy.Predict):
67
+ return "llm"
68
+ elif isinstance(instance, dspy.LM):
69
+ return "llm"
70
+ elif isinstance(instance, dspy.Tool):
71
+ return "tool"
72
+ return "general"
73
+
74
+
75
+ def extract_lm_info_from_history(
76
+ lm_instance: Any,
77
+ expected_messages: Optional[Any],
78
+ ) -> LMHistoryInfo:
79
+ """
80
+ Extract token usage, cache status, actual provider, and cost from the LM's history.
81
+
82
+ DSPy stores usage information in the LM's history after each call.
83
+ We verify the history entry matches our expected messages to handle
84
+ potential race conditions with concurrent LM calls.
85
+
86
+ For routers like OpenRouter, the response contains the actual provider
87
+ that served the request (e.g., "Novita", "Together"), which differs from
88
+ the router name used in the model string (e.g., "openrouter").
89
+
90
+ The cost field is provided by providers like OpenRouter and includes
91
+ accurate pricing for all token types (reasoning, cache, multimodal).
92
+
93
+ Args:
94
+ lm_instance: The DSPy LM instance that has the history.
95
+ expected_messages: The expected messages to match in the history entry.
96
+
97
+ Returns:
98
+ LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
99
+ """
100
+ empty_result = LMHistoryInfo(
101
+ usage=None,
102
+ cache_hit=None,
103
+ actual_provider=None,
104
+ total_cost=None,
105
+ )
106
+
107
+ if not hasattr(lm_instance, "history") or not lm_instance.history:
108
+ return empty_result
109
+
110
+ try:
111
+ last_entry = lm_instance.history[-1]
112
+
113
+ # Verify we have the correct history entry by checking messages match
114
+ if last_entry.get("messages") != expected_messages:
115
+ LOGGER.debug(
116
+ "History entry messages don't match expected messages, "
117
+ "skipping usage extraction (possibly due to concurrent LM calls)"
118
+ )
119
+ return empty_result
120
+
121
+ response = last_entry.get("response")
122
+ usage_dict = last_entry.get("usage")
123
+
124
+ # Extract actual provider from response (useful for routers like OpenRouter)
125
+ # The response is a LiteLLM ModelResponse object with a 'provider' attribute
126
+ # when using routers like OpenRouter
127
+ actual_provider: Optional[str] = None
128
+ if response is not None and hasattr(response, "provider"):
129
+ actual_provider = response.provider
130
+
131
+ # Extract cost from history entry or usage dict
132
+ # OpenRouter and other providers return accurate cost including all token types
133
+ total_cost: Optional[float] = None
134
+ if last_entry.get("cost") is not None:
135
+ total_cost = last_entry.get("cost")
136
+ elif usage_dict and usage_dict.get("cost") is not None:
137
+ total_cost = usage_dict.get("cost")
138
+
139
+ # Get explicit cache_hit if set, otherwise infer from usage (empty = cached)
140
+ if response is None:
141
+ cache_hit = not usage_dict
142
+ elif hasattr(response, "cache_hit") and response.cache_hit is not None:
143
+ cache_hit = response.cache_hit
144
+ else:
145
+ # Fallback: infer from usage (empty = cached)
146
+ cache_hit = not usage_dict
147
+
148
+ if usage_dict:
149
+ usage = llm_usage.build_opik_usage_from_unknown_provider(usage_dict)
150
+ return LMHistoryInfo(
151
+ usage=usage,
152
+ cache_hit=cache_hit,
153
+ actual_provider=actual_provider,
154
+ total_cost=total_cost,
155
+ )
156
+ else:
157
+ return LMHistoryInfo(
158
+ usage=None,
159
+ cache_hit=cache_hit,
160
+ actual_provider=actual_provider,
161
+ total_cost=total_cost,
162
+ )
163
+ except Exception:
164
+ LOGGER.debug(
165
+ "Failed to extract info from DSPy LM history",
166
+ exc_info=True,
167
+ )
168
+ return empty_result
@@ -0,0 +1,17 @@
1
+ """
2
+ Opik integration for Harbor benchmark evaluation framework.
3
+
4
+ Example:
5
+ >>> from opik.integrations.harbor import track_harbor
6
+ >>> job = Job(config)
7
+ >>> tracked_job = track_harbor(job)
8
+ >>> result = await tracked_job.run()
9
+
10
+ Or enable tracking globally (for CLI usage):
11
+ >>> from opik.integrations.harbor import track_harbor
12
+ >>> track_harbor()
13
+ """
14
+
15
+ from .opik_tracker import track_harbor, reset_harbor_tracking
16
+
17
+ __all__ = ["track_harbor", "reset_harbor_tracking"]