opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,26 @@
1
+ import contextvars
1
2
  import logging
2
- from typing import Optional, Dict, List, Any
3
+ from typing import Optional, Dict, Any, List, Union
3
4
  import uuid
4
5
 
5
6
  from llama_index.core.callbacks import schema as llama_index_schema
6
7
  from llama_index.core.callbacks import base_handler
7
8
 
8
- import opik.opik_context as opik_context
9
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
9
+ from opik import context_storage, tracing_runtime_config
10
+ from opik.decorator import arguments_helpers, span_creation_handler
10
11
  from opik.api_objects import opik_client, span, trace
11
12
 
12
13
  from . import event_parsing_utils
13
- from ...api_objects import helpers
14
14
 
15
15
  LOGGER = logging.getLogger(__name__)
16
16
 
17
-
18
17
  INDEX_CONSTRUCTION_TRACE_NAME = "index_construction"
18
+ LLAMA_INDEX_METADATA = {"created_from": "llama_index"}
19
+
20
+ # Context variable for root trace/span created by LlamaIndex
21
+ _llama_root: contextvars.ContextVar[Optional[Union[span.SpanData, trace.TraceData]]] = (
22
+ contextvars.ContextVar("_llama_root", default=None)
23
+ )
19
24
 
20
25
 
21
26
  def _get_last_event(trace_map: Dict[str, List[str]]) -> str:
@@ -36,20 +41,13 @@ class LlamaIndexCallbackHandler(base_handler.BaseCallbackHandler):
36
41
  project_name: Optional[str] = None,
37
42
  skip_index_construction_trace: bool = False,
38
43
  ):
39
- """
40
- Initialize the instance with optional customization to define event filters and project-
41
- specific data handling. The constructor sets up the necessary client and data mappings
42
- for operational processing.
43
-
44
- Parameters:
45
- event_starts_to_ignore: Optional list of event start types to be ignored during
46
- processing.
47
- event_ends_to_ignore: Optional list of event end types to be ignored during
48
- processing.
49
- project_name: Optional string representing the project name to establish context in
50
- client operations.
51
- skip_index_construction_trace: A boolean value determining whether to skip creation of trace/spans of index
52
- construction.
44
+ """Initialize LlamaIndex callback handler for Opik tracing.
45
+
46
+ Args:
47
+ event_starts_to_ignore: Event start types to be ignored during processing.
48
+ event_ends_to_ignore: Event end types to be ignored during processing.
49
+ project_name: Project name for trace/span context.
50
+ skip_index_construction_trace: Whether to skip index construction traces.
53
51
  """
54
52
  event_starts_to_ignore = (
55
53
  event_starts_to_ignore if event_starts_to_ignore else []
@@ -62,30 +60,27 @@ class LlamaIndexCallbackHandler(base_handler.BaseCallbackHandler):
62
60
 
63
61
  self._skip_index_construction_trace = skip_index_construction_trace
64
62
  self._project_name = project_name
65
- self._opik_client = opik_client.Opik(
66
- _use_batching=True,
67
- project_name=project_name,
68
- )
69
-
70
- self._opik_trace_data: Optional[trace.TraceData] = None
63
+ self._opik_client = opik_client.get_client_cached()
64
+ self._opik_context_storage = context_storage.get_current_context_instance()
71
65
 
66
+ # Event tracking - shared across contexts, but events have unique IDs
72
67
  self._map_event_id_to_span_data: Dict[str, span.SpanData] = {}
73
68
  self._map_event_id_to_output: Dict[str, Any] = {}
74
69
 
75
- def _create_trace_data(self, trace_name: Optional[str]) -> trace.TraceData:
76
- trace_data = trace.TraceData(
77
- name=trace_name,
78
- metadata={"created_from": "llama_index"},
79
- project_name=self._project_name,
80
- )
81
-
82
- if (
83
- self._opik_client.config.log_start_trace_span
84
- and tracing_runtime_config.is_tracing_active()
85
- ):
86
- self._opik_client.trace(**trace_data.as_start_parameters)
70
+ # For streaming: end_trace may be called before event_end, so we need to
71
+ # defer the trace output update until the event output is available
72
+ self._pending_root_output_updates: Dict[
73
+ str, Union[span.SpanData, trace.TraceData]
74
+ ] = {}
87
75
 
88
- return trace_data
76
+ def _send_root_to_backend(
77
+ self, root: Union[span.SpanData, trace.TraceData]
78
+ ) -> None:
79
+ """Send root trace or span data to the backend."""
80
+ if isinstance(root, span.SpanData):
81
+ self._opik_client.span(**root.as_parameters)
82
+ elif isinstance(root, trace.TraceData):
83
+ self._opik_client.trace(**root.as_parameters)
89
84
 
90
85
  def start_trace(self, trace_id: Optional[str] = None) -> None:
91
86
  if (
@@ -94,18 +89,29 @@ class LlamaIndexCallbackHandler(base_handler.BaseCallbackHandler):
94
89
  ):
95
90
  return
96
91
 
97
- # When a new LLama Index trace is started, create a new trace in Opik
98
- existing_trace_data = opik_context.get_current_trace_data()
99
- if existing_trace_data is not None:
100
- self._opik_trace_data = existing_trace_data
101
- else:
102
- self._opik_trace_data = self._create_trace_data(trace_name=trace_id)
92
+ trace_name = trace_id if trace_id else "llama_index_operation"
103
93
 
104
- if (
105
- self._opik_client.config.log_start_trace_span
106
- and tracing_runtime_config.is_tracing_active()
107
- ):
108
- self._opik_client.trace(**self._opik_trace_data.as_start_parameters)
94
+ span_creation_result = span_creation_handler.create_span_respecting_context(
95
+ start_span_arguments=arguments_helpers.StartSpanParameters(
96
+ name=trace_name,
97
+ type="general",
98
+ project_name=self._project_name,
99
+ metadata=LLAMA_INDEX_METADATA,
100
+ ),
101
+ distributed_trace_headers=None,
102
+ opik_context_storage=self._opik_context_storage,
103
+ )
104
+
105
+ if span_creation_result.trace_data is not None:
106
+ self._opik_context_storage.set_trace_data(span_creation_result.trace_data)
107
+ self._opik_client.trace(
108
+ **span_creation_result.trace_data.as_start_parameters
109
+ )
110
+ _llama_root.set(span_creation_result.trace_data)
111
+ else:
112
+ self._opik_context_storage.add_span_data(span_creation_result.span_data)
113
+ self._opik_client.span(**span_creation_result.span_data.as_start_parameters)
114
+ _llama_root.set(span_creation_result.span_data)
109
115
 
110
116
  def end_trace(
111
117
  self,
@@ -115,21 +121,38 @@ class LlamaIndexCallbackHandler(base_handler.BaseCallbackHandler):
115
121
  if not trace_map:
116
122
  return
117
123
 
118
- # When a trace finishes, we first get the last event output
124
+ root = _llama_root.get()
125
+ if root is None:
126
+ return
127
+
119
128
  last_event = _get_last_event(trace_map)
120
- last_event_output = self._map_event_id_to_output.get(last_event, None)
121
129
 
122
- # And then end the trace with the optional output
123
- if self._opik_trace_data is not None:
124
- self._opik_trace_data.init_end_time().update(output=last_event_output)
125
- if tracing_runtime_config.is_tracing_active():
126
- self._opik_client.trace(**self._opik_trace_data.as_parameters)
127
- self._opik_trace_data = None
130
+ # Check if the output for the last event is already available.
131
+ # For streaming calls, LlamaIndex calls end_trace() BEFORE event_end(),
132
+ # so the output won't be stored yet.
133
+ if last_event in self._map_event_id_to_output:
134
+ last_event_output = self._map_event_id_to_output.get(last_event)
135
+ root.init_end_time().update(output=last_event_output)
128
136
 
129
- # Do not clean _map_event_id_to_span_data as streaming LLM events can
130
- # end after this method is called. _map_event_id_to_span_data is
131
- # individually cleaned after each event is ended
132
- self._map_event_id_to_output.clear()
137
+ # Send the trace/span with output
138
+ self._send_root_to_backend(root)
139
+ else:
140
+ # Output not available yet (streaming scenario).
141
+ # Store the root so we can update it when event_end is called.
142
+ # Don't send the trace/span yet - it will be sent in on_event_end
143
+ # with the output and correct end_time to avoid race conditions.
144
+ # Note: We don't set end_time here because the actual end is when
145
+ # the last event ends, not when LlamaIndex calls end_trace().
146
+ self._pending_root_output_updates[last_event] = root
147
+
148
+ # Clean up context storage
149
+ if isinstance(root, span.SpanData):
150
+ self._opik_context_storage.pop_span_data(ensure_id=root.id)
151
+ elif isinstance(root, trace.TraceData):
152
+ self._opik_context_storage.pop_trace_data(ensure_id=root.id)
153
+
154
+ # Clean up
155
+ _llama_root.set(None)
133
156
 
134
157
  def on_event_start(
135
158
  self,
@@ -142,53 +165,60 @@ class LlamaIndexCallbackHandler(base_handler.BaseCallbackHandler):
142
165
  if not event_id:
143
166
  event_id = str(uuid.uuid4())
144
167
 
145
- # the event is not part of a trace probably because we are skipping the index construction trace
146
- if self._opik_trace_data is None:
168
+ root_span_or_trace = _llama_root.get()
169
+
170
+ if root_span_or_trace is None:
147
171
  if not self._skip_index_construction_trace:
148
172
  LOGGER.warning(
149
- "No trace data found in context for event start. "
150
- "This is likely due to the fact that the trace is not started properly. "
151
- f"The parent_id: {parent_id}, event_type: {event_type}, event_id: {event_id}."
173
+ "No active LlamaIndex trace/span found in context. "
174
+ "parent_id=%s, event_type=%s, event_id=%s",
175
+ parent_id,
176
+ event_type,
177
+ event_id,
152
178
  )
153
-
154
179
  return event_id
155
180
 
156
- # Get parent span Id if it exists
157
- if parent_id and parent_id in self._map_event_id_to_span_data:
158
- opik_parent_id = self._map_event_id_to_span_data[parent_id].id
159
- else:
160
- opik_parent_id = None
161
-
162
- # Compute the span input based on the event payload
163
181
  span_input = event_parsing_utils.get_span_input_from_events(event_type, payload)
164
182
 
165
- project_name = helpers.resolve_child_span_project_name(
166
- parent_project_name=self._opik_trace_data.project_name,
167
- child_project_name=self._project_name,
168
- show_warning=self._opik_trace_data.created_by != "evaluation",
183
+ # Skip creating span if event duplicates root operation name
184
+ root_name = root_span_or_trace.name if root_span_or_trace else None
185
+ event_duplicates_root = (
186
+ parent_id == llama_index_schema.BASE_TRACE_EVENT
187
+ and event_type.value == root_name
169
188
  )
189
+ if event_duplicates_root:
190
+ if span_input:
191
+ root_span_or_trace.update(input=span_input)
192
+ return event_id
170
193
 
171
- # Create a new span for this event
172
- span_data = span.SpanData(
173
- trace_id=self._opik_trace_data.id,
174
- name=event_type.value,
175
- parent_span_id=opik_parent_id,
176
- type=(
177
- "llm" if event_type == llama_index_schema.CBEventType.LLM else "general"
194
+ span_creation_result = span_creation_handler.create_span_respecting_context(
195
+ start_span_arguments=arguments_helpers.StartSpanParameters(
196
+ name=event_type.value,
197
+ input=span_input,
198
+ type=(
199
+ "llm"
200
+ if event_type == llama_index_schema.CBEventType.LLM
201
+ else "general"
202
+ ),
203
+ project_name=self._project_name,
204
+ metadata=LLAMA_INDEX_METADATA,
178
205
  ),
179
- input=span_input,
180
- project_name=project_name,
206
+ distributed_trace_headers=None,
207
+ opik_context_storage=self._opik_context_storage,
181
208
  )
209
+ span_data = span_creation_result.span_data
182
210
  self._map_event_id_to_span_data[event_id] = span_data
211
+ self._opik_context_storage.add_span_data(span_data)
212
+
183
213
  if (
184
214
  self._opik_client.config.log_start_trace_span
185
215
  and tracing_runtime_config.is_tracing_active()
186
216
  ):
187
217
  self._opik_client.span(**span_data.as_start_parameters)
188
218
 
189
- # If the parent_id is a BASE_TRACE_EVENT, update the trace with the span input
190
- if parent_id == llama_index_schema.BASE_TRACE_EVENT and span_input:
191
- self._opik_trace_data.update(input=span_input)
219
+ # Update root input from first child event
220
+ if parent_id == llama_index_schema.BASE_TRACE_EVENT and span_input is not None:
221
+ root_span_or_trace.update(input=span_input)
192
222
 
193
223
  return event_id
194
224
 
@@ -199,32 +229,41 @@ class LlamaIndexCallbackHandler(base_handler.BaseCallbackHandler):
199
229
  event_id: Optional[str] = None,
200
230
  **kwargs: Any,
201
231
  ) -> None:
202
- # Get the span output from the event and store it so we can use it if needed
203
- # when finishing the trace
204
232
  span_output = event_parsing_utils.get_span_output_from_event(
205
233
  event_type, payload
206
234
  )
207
-
208
235
  error_info = event_parsing_utils.get_span_error_info(payload)
209
236
 
210
- if event_id:
211
- self._map_event_id_to_output[event_id] = span_output
237
+ if not event_id:
238
+ return
212
239
 
213
- # Log the output to the span with the matching id
214
- if event_id in self._map_event_id_to_span_data:
215
- span_data = self._map_event_id_to_span_data[event_id]
240
+ # Store output for end_trace
241
+ self._map_event_id_to_output[event_id] = span_output
216
242
 
217
- llm_usage_info = event_parsing_utils.get_usage_data(payload)
218
- span_data.update(**llm_usage_info.__dict__)
243
+ # Check if there's a pending root trace/span output update for this event.
244
+ # This happens when end_trace() was called before event_end() (streaming scenario).
245
+ if event_id in self._pending_root_output_updates:
246
+ root = self._pending_root_output_updates.pop(event_id)
247
+ # Set end_time now (the actual end) and update with output
248
+ root.init_end_time().update(output=span_output)
219
249
 
220
- span_data.update(
221
- output=span_output, error_info=error_info
222
- ).init_end_time()
223
- if tracing_runtime_config.is_tracing_active():
224
- self._opik_client.span(**span_data.as_parameters)
250
+ # Send the trace/span to the backend with correct end_time and output
251
+ self._send_root_to_backend(root)
252
+
253
+ # Finalize span if it exists
254
+ if event_id in self._map_event_id_to_span_data:
255
+ span_data = self._map_event_id_to_span_data[event_id]
256
+
257
+ llm_usage_info = event_parsing_utils.get_usage_data(payload)
258
+ span_data.update(**llm_usage_info.__dict__)
259
+ span_data.update(output=span_output, error_info=error_info).init_end_time()
260
+
261
+ if tracing_runtime_config.is_tracing_active():
262
+ self._opik_client.span(**span_data.as_parameters)
225
263
 
226
- del self._map_event_id_to_span_data[event_id]
264
+ self._opik_context_storage.pop_span_data(ensure_id=span_data.id)
265
+ del self._map_event_id_to_span_data[event_id]
227
266
 
228
267
  def flush(self) -> None:
229
- """Sends pending Opik data to the backend"""
268
+ """Flush pending Opik data to backend."""
230
269
  self._opik_client.flush()
@@ -3,12 +3,11 @@ from agents import tracing
3
3
 
4
4
  import logging
5
5
 
6
+ from opik import context_storage, tracing_runtime_config
6
7
  from opik.api_objects.span import span_data
7
8
  from opik.api_objects.trace import trace_data
8
9
  from opik.api_objects import opik_client
9
10
  from opik.decorator import span_creation_handler, arguments_helpers
10
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
11
- import opik.context_storage as context_storage
12
11
 
13
12
  from . import span_data_parsers
14
13
 
@@ -56,9 +56,7 @@ class OpenaiChatCompletionsTrackDecorator(base_track_decorator.BaseTrackDecorato
56
56
  ), "Expected kwargs to be not None in chat.completion.create(**kwargs), chat.completion.parse(**kwargs) or chat.completion.stream(**kwargs)"
57
57
 
58
58
  name = track_options.name if track_options.name is not None else func.__name__
59
- if _is_completions_stream_call(
60
- name_passed_to_track_decorator=name, kwargs=kwargs
61
- ):
59
+ if kwargs.get("stream") is True:
62
60
  kwargs = _remove_not_given_sentinel_values(kwargs)
63
61
  name = "chat_completion_stream"
64
62
 
@@ -196,17 +194,5 @@ def _remove_not_given_sentinel_values(dict_: Dict[str, Any]) -> Dict[str, Any]:
196
194
  key: value
197
195
  for key, value in dict_.items()
198
196
  if value is not _openai_types.NOT_GIVEN
197
+ and not isinstance(value, _openai_types.Omit)
199
198
  }
200
-
201
-
202
- def _is_completions_stream_call(
203
- name_passed_to_track_decorator: str, kwargs: Dict[str, Any]
204
- ) -> bool:
205
- if not name_passed_to_track_decorator == "chat_completion_create":
206
- return False
207
-
208
- for _, value in kwargs.items():
209
- if value is _openai_types.NOT_GIVEN:
210
- return True
211
-
212
- return False
@@ -18,7 +18,7 @@ def track_openai(
18
18
  """Adds Opik tracking wrappers to an OpenAI client.
19
19
 
20
20
  The client is always patched; however every wrapped call checks
21
- `opik.decorator.tracing_runtime_config.is_tracing_active()` before emitting
21
+ `opik.is_tracing_active()` before emitting
22
22
  any telemetry. If tracing is disabled at call time, the wrapped function
23
23
  executes normally but no span/trace is sent.
24
24
 
@@ -43,4 +43,8 @@ def setup_aws_sagemaker_session_hook() -> None:
43
43
 
44
44
  client.auth = sagemaker_auth
45
45
 
46
- opik.hooks.register_httpx_client_hook(sagemaker_auth_client_hook)
46
+ opik.hooks.add_httpx_client_hook(
47
+ opik.hooks.HttpxClientHook(
48
+ client_modifier=sagemaker_auth_client_hook, client_init_arguments=None
49
+ )
50
+ )
@@ -1,11 +1,13 @@
1
1
  from typing import Optional, Dict, Any
2
+
3
+
2
4
  from . import base_original_provider_usage
3
5
 
4
6
 
5
7
  class GoogleGeminiUsage(base_original_provider_usage.BaseOriginalProviderUsage):
6
8
  """Google AI / VertexAI calls token usage data. Updated 11.03.2025"""
7
9
 
8
- candidates_token_count: int
10
+ candidates_token_count: Optional[int]
9
11
  """Number of tokens in the response(s)."""
10
12
 
11
13
  prompt_token_count: int
@@ -93,22 +93,21 @@ class OpikUsage(pydantic.BaseModel):
93
93
  # The completions token should include both the candidates token_count and the thought tokens. Usage differs depending on the models and between VertexAI and Google AI
94
94
  # See https://github.com/BerriAI/litellm/pull/10141#discussion_r2052272035
95
95
  # Do something similar as: https://github.com/BerriAI/litellm/blob/4854482af4a2a56060bbfeb4345bce4f1bb7ec41/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py#L980-L995
96
+ candidates_token_count = provider_usage.candidates_token_count or 0
96
97
 
97
- if (
98
- provider_usage.total_token_count
99
- == provider_usage.prompt_token_count + provider_usage.candidates_token_count
100
- ):
101
- completion_tokens = provider_usage.candidates_token_count
98
+ total_token_count = provider_usage.prompt_token_count + candidates_token_count
99
+
100
+ if provider_usage.total_token_count == total_token_count:
101
+ completion_tokens = candidates_token_count
102
102
  elif provider_usage.thoughts_token_count is not None:
103
103
  completion_tokens = (
104
- provider_usage.candidates_token_count
105
- + provider_usage.thoughts_token_count
104
+ candidates_token_count + provider_usage.thoughts_token_count
106
105
  )
107
106
  else:
108
107
  LOGGER.debug(
109
108
  "Something is wrong in Google usage, completion_tokens might be invalid"
110
109
  )
111
- completion_tokens = provider_usage.candidates_token_count
110
+ completion_tokens = candidates_token_count
112
111
 
113
112
  return cls(
114
113
  completion_tokens=completion_tokens,
@@ -27,15 +27,17 @@ def build_opik_usage(
27
27
  ) -> opik_usage.OpikUsage:
28
28
  build_functions = _PROVIDER_TO_OPIK_USAGE_BUILDERS[provider]
29
29
 
30
+ exc = None
30
31
  for build_function in build_functions:
31
32
  try:
32
33
  result = build_function(usage)
33
34
  return result
34
- except Exception:
35
+ except Exception as exc_info:
36
+ exc = exc_info
35
37
  pass
36
38
 
37
39
  raise ValueError(
38
- f"Failed to build OpikUsage for provider {provider} and usage {usage}"
40
+ f"Failed to build OpikUsage for provider {provider} and usage {usage}, reason: {exc}"
39
41
  )
40
42
 
41
43
 
opik/logging_messages.py CHANGED
@@ -29,6 +29,8 @@ FAILED_TO_PARSE_OPENAI_STREAM_CONTENT = "Failed to parse openai Stream content.
29
29
 
30
30
  FAILED_TO_PROCESS_MESSAGE_IN_BACKGROUND_STREAMER = "Failed to process %s. Error: %s"
31
31
 
32
+ MAKE_SURE_OPIK_IS_CONFIGURED_CORRECTLY = "This error may be due to incorrect configuration. For configuration help, see: https://www.comet.com/docs/opik/tracing/sdk_configuration"
33
+
32
34
  HALLUCINATION_DETECTION_FAILED = "Failed hallucination detection"
33
35
 
34
36
  FACTUALITY_SCORE_CALC_FAILED = "Failed to calculate factuality score"
@@ -37,6 +39,10 @@ ANSWER_RELEVANCE_SCORE_CALC_FAILED = "Failed to calculate answer relevance score
37
39
 
38
40
  MODERATION_SCORE_CALC_FAILED = "Failed to calculate moderation score"
39
41
 
42
+ STRUCTURED_OUTPUT_COMPLIANCE_FAILED = (
43
+ "Failed to parse structured output compliance metric output."
44
+ )
45
+
40
46
  CONTEXT_RECALL_SCORE_CALC_FAILED = "Failed to calculate context recall score"
41
47
 
42
48
  GEVAL_SCORE_CALC_FAILED = "Failed to calculate g-eval score"
@@ -1,4 +1,3 @@
1
- import threading
2
1
  import time
3
2
  import abc
4
3
 
@@ -24,25 +23,22 @@ class BaseBatcher(abc.ABC):
24
23
  self._batch_memory_limit_mb: int = batch_memory_limit_mb
25
24
 
26
25
  self._last_time_flush_callback_called: float = time.time()
27
- self._lock = threading.RLock()
28
26
 
29
27
  def flush(self) -> None:
30
- with self._lock:
31
- if len(self._accumulated_messages) > 0:
32
- batch_messages = self._create_batches_from_accumulated_messages()
33
- self._accumulated_messages = []
28
+ if len(self._accumulated_messages) > 0:
29
+ batch_messages = self._create_batches_from_accumulated_messages()
30
+ self._accumulated_messages = []
34
31
 
35
- for batch_message in batch_messages:
36
- self._flush_callback(batch_message)
37
- self._last_time_flush_callback_called = time.time()
32
+ for batch_message in batch_messages:
33
+ self._flush_callback(batch_message)
34
+ self._last_time_flush_callback_called = time.time()
38
35
 
39
36
  def is_ready_to_flush(self) -> bool:
40
37
  elapsed = time.time() - self._last_time_flush_callback_called
41
38
  return elapsed >= self._flush_interval_seconds
42
39
 
43
40
  def is_empty(self) -> bool:
44
- with self._lock:
45
- return len(self._accumulated_messages) == 0
41
+ return len(self._accumulated_messages) == 0
46
42
 
47
43
  @abc.abstractmethod
48
44
  def _create_batches_from_accumulated_messages(
@@ -51,10 +47,9 @@ class BaseBatcher(abc.ABC):
51
47
 
52
48
  @abc.abstractmethod
53
49
  def add(self, message: messages.BaseMessage) -> None:
54
- with self._lock:
55
- self._accumulated_messages.append(message)
56
- if len(self._accumulated_messages) >= self._max_batch_size:
57
- self.flush()
50
+ self._accumulated_messages.append(message)
51
+ if len(self._accumulated_messages) >= self._max_batch_size:
52
+ self.flush()
58
53
 
59
54
  def _remove_matching_messages(
60
55
  self, filter_func: Callable[[messages.BaseMessage], bool]
@@ -65,10 +60,9 @@ class BaseBatcher(abc.ABC):
65
60
  Args:
66
61
  filter_func: A function that takes a BaseMessage and returns True if the message should be removed
67
62
  """
68
- with self._lock:
69
- self._accumulated_messages = list(
70
- filter(lambda x: not filter_func(x), self._accumulated_messages)
71
- )
63
+ self._accumulated_messages = list(
64
+ filter(lambda x: not filter_func(x), self._accumulated_messages)
65
+ )
72
66
 
73
67
  def size(self) -> int:
74
68
  """
@@ -81,5 +75,4 @@ class BaseBatcher(abc.ABC):
81
75
  Returns:
82
76
  int: The total number of accumulated messages.
83
77
  """
84
- with self._lock:
85
- return len(self._accumulated_messages)
78
+ return len(self._accumulated_messages)
@@ -1,3 +1,4 @@
1
+ import threading
1
2
  from typing import Type, Dict
2
3
  from .. import messages
3
4
  from . import base_batcher
@@ -15,30 +16,41 @@ class BatchManager:
15
16
  self._flushing_thread = flushing_thread.FlushingThread(
16
17
  batchers=list(self._message_to_batcher_mapping.values())
17
18
  )
19
+ self._lock = threading.RLock()
18
20
 
19
21
  def start(self) -> None:
20
22
  self._flushing_thread.start()
21
23
 
22
24
  def stop(self) -> None:
23
- self._flushing_thread.close()
25
+ with self._lock:
26
+ # stop the flushing thread
27
+ self._flushing_thread.close()
28
+ # force flush all pending messages
29
+ self.flush()
24
30
 
25
31
  def message_supports_batching(self, message: messages.BaseMessage) -> bool:
32
+ if message is None:
33
+ return False
34
+
26
35
  if hasattr(message, "supports_batching"):
27
36
  return message.supports_batching
28
37
 
29
38
  return message.__class__ in self._message_to_batcher_mapping
30
39
 
31
40
  def process_message(self, message: messages.BaseMessage) -> None:
32
- self._message_to_batcher_mapping[type(message)].add(message)
41
+ with self._lock:
42
+ self._message_to_batcher_mapping[type(message)].add(message)
33
43
 
34
44
  def is_empty(self) -> bool:
35
- return all(
36
- [
37
- batcher.is_empty()
38
- for batcher in self._message_to_batcher_mapping.values()
39
- ]
40
- )
45
+ with self._lock:
46
+ return all(
47
+ [
48
+ batcher.is_empty()
49
+ for batcher in self._message_to_batcher_mapping.values()
50
+ ]
51
+ )
41
52
 
42
53
  def flush(self) -> None:
43
- for batcher in self._message_to_batcher_mapping.values():
44
- batcher.flush()
54
+ with self._lock:
55
+ for batcher in self._message_to_batcher_mapping.values():
56
+ batcher.flush()
@@ -15,6 +15,9 @@ FEEDBACK_SCORES_BATCH_MESSAGE_BATCHER_MAX_BATCH_SIZE = 1000
15
15
  GUARDRAIL_BATCH_MESSAGE_BATCHER_FLUSH_INTERVAL_SECONDS = 1.0
16
16
  GUARDRAIL_BATCH_MESSAGE_BATCHER_MAX_BATCH_SIZE = 1000
17
17
 
18
+ EXPERIMENT_ITEMS_BATCH_MESSAGE_BATCHER_FLUSH_INTERVAL_SECONDS = 3.0
19
+ EXPERIMENT_ITEMS_BATCH_MESSAGE_BATCHER_MAX_BATCH_SIZE = 1000
20
+
18
21
 
19
22
  def create_batch_manager(
20
23
  queue: message_queue.MessageQueue[messages.BaseMessage],
@@ -55,6 +58,12 @@ def create_batch_manager(
55
58
  flush_callback=queue.put,
56
59
  )
57
60
 
61
+ experiment_items_batch_message_batcher = batchers.CreateExperimentItemsBatchMessageBatcher(
62
+ flush_interval_seconds=EXPERIMENT_ITEMS_BATCH_MESSAGE_BATCHER_FLUSH_INTERVAL_SECONDS,
63
+ max_batch_size=EXPERIMENT_ITEMS_BATCH_MESSAGE_BATCHER_MAX_BATCH_SIZE,
64
+ flush_callback=queue.put,
65
+ )
66
+
58
67
  message_to_batcher_mapping: Dict[
59
68
  Type[messages.BaseMessage], base_batcher.BaseBatcher
60
69
  ] = {
@@ -64,6 +73,7 @@ def create_batch_manager(
64
73
  messages.AddTraceFeedbackScoresBatchMessage: add_trace_feedback_scores_batch_message_batcher,
65
74
  messages.AddThreadsFeedbackScoresBatchMessage: add_threads_feedback_scores_batch_message_batcher,
66
75
  messages.GuardrailBatchMessage: guardrail_batch_message_batcher,
76
+ messages.CreateExperimentItemsBatchMessage: experiment_items_batch_message_batcher,
67
77
  }
68
78
 
69
79
  batch_manager_ = batch_manager.BatchManager(