opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Dict, Any
1
+ from typing import Dict, Any, Optional
2
2
  import dataclasses
3
3
 
4
4
 
@@ -6,5 +6,6 @@ import dataclasses
6
6
  class TestCase:
7
7
  trace_id: str
8
8
  dataset_item_id: str
9
- scoring_inputs: Dict[str, Any]
10
9
  task_output: Dict[str, Any]
10
+ dataset_item_content: Dict[str, Any] = dataclasses.field(default_factory=dict)
11
+ mapped_scoring_inputs: Optional[Dict[str, Any]] = None
@@ -10,3 +10,4 @@ from .metrics import score_result
10
10
  class TestResult:
11
11
  test_case: test_case.TestCase
12
12
  score_results: List[score_result.ScoreResult]
13
+ trial_id: int
@@ -27,16 +27,44 @@ def evaluate_threads(
27
27
 
28
28
  Args:
29
29
  project_name: The name of the project containing the threads to evaluate.
30
- filter_string: Optional filter string to select specific threads for evaluation.
30
+ filter_string: Optional filter string to select specific threads for evaluation using Opik Query Language (OQL).
31
+ The format is: "<COLUMN> <OPERATOR> <VALUE> [AND <COLUMN> <OPERATOR> <VALUE>]*"
32
+
33
+ Supported columns include:
34
+ - `id`, `name`, `created_by`, `thread_id`, `type`, `model`, `provider`: String fields with full operator support
35
+ - `status`: String field (=, contains, not_contains only)
36
+ - `start_time`, `end_time`: DateTime fields (use ISO 8601 format, e.g., "2024-01-01T00:00:00Z")
37
+ - `input`, `output`: String fields for content (=, contains, not_contains only)
38
+ - `metadata`: Dictionary field (use dot notation, e.g., "metadata.model")
39
+ - `feedback_scores`: Numeric field (use dot notation, e.g., "feedback_scores.accuracy")
40
+ - `tags`: List field (use "contains" operator only)
41
+ - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`: Numeric usage fields
42
+ - `duration`, `number_of_messages`, `total_estimated_cost`: Numeric fields
43
+
44
+ Examples: 'status = "inactive"', 'id = "thread_123"', 'duration > 300'
31
45
  If None, all threads in the project will be evaluated.
32
46
  eval_project_name: Optional name for the evaluation project where evaluation traces will be stored.
33
47
  If None, the same project_name will be used.
34
48
  metrics: List of ConversationThreadMetric instances to apply to each thread.
35
49
  Must contain at least one metric.
36
50
  trace_input_transform: Function to transform trace input JSON to string representation.
37
- This is used when converting traces to conversation threads.
51
+ This function extracts the relevant user message from your trace's input structure.
52
+ The function receives the raw trace input as a dictionary and should return a string.
53
+
54
+ Example: If your trace input is {"content": {"user_question": "Hello"}},
55
+ use: lambda x: x["content"]["user_question"]
56
+
57
+ This transformation is essential because trace inputs vary by framework, but metrics
58
+ expect a standardized string format representing the user's message.
38
59
  trace_output_transform: Function to transform trace output JSON to string representation.
39
- This is used when converting traces to conversation threads.
60
+ This function extracts the relevant agent response from your trace's output structure.
61
+ The function receives the raw trace output as a dictionary and should return a string.
62
+
63
+ Example: If your trace output is {"response": {"text": "Hi there"}},
64
+ use: lambda x: x["response"]["text"]
65
+
66
+ This transformation is essential because trace outputs vary by framework, but metrics
67
+ expect a standardized string format representing the agent's response.
40
68
  verbose: Verbosity level for progress reporting (0=silent, 1=progress).
41
69
  Default is 1.
42
70
  num_workers: Number of concurrent workers for thread evaluation.
@@ -4,7 +4,7 @@ from . import evaluation_result
4
4
  from ...api_objects import opik_client
5
5
  from ...api_objects.conversation import conversation_thread, conversation_factory
6
6
  from ...rest_api import TraceThread, JsonListStringPublic
7
- from ...types import FeedbackScoreDict
7
+ from ...types import BatchFeedbackScoreDict
8
8
  from ...api_objects.threads import threads_client
9
9
 
10
10
 
@@ -15,7 +15,7 @@ def log_feedback_scores(
15
15
  ) -> None:
16
16
  for result in results:
17
17
  feedback_scores = [
18
- FeedbackScoreDict(
18
+ BatchFeedbackScoreDict(
19
19
  id=result.thread_id,
20
20
  name=score.name,
21
21
  value=score.value,
@@ -42,6 +42,7 @@ def load_conversation_thread(
42
42
  project_name=project_name,
43
43
  filter_string=f'thread_id = "{thread.id}"',
44
44
  max_results=max_results,
45
+ truncate=False,
45
46
  )
46
47
  return conversation_factory.create_conversation_from_traces(
47
48
  traces=traces,
opik/evaluation/types.py CHANGED
@@ -1,5 +1,13 @@
1
- from typing import Any, Callable, Dict, Union
1
+ from typing import Any, Callable, Dict, List, Union
2
+
3
+ from . import test_result
4
+ from .metrics import score_result
2
5
 
3
6
  LLMTask = Callable[[Dict[str, Any]], Dict[str, Any]]
4
7
 
5
8
  ScoringKeyMappingType = Dict[str, Union[str, Callable[[Dict[str, Any]], Any]]]
9
+
10
+ ExperimentScoreFunction = Callable[
11
+ [List[test_result.TestResult]],
12
+ Union[score_result.ScoreResult, List[score_result.ScoreResult]],
13
+ ]
opik/exceptions.py CHANGED
@@ -81,6 +81,23 @@ class PromptPlaceholdersDontMatchFormatArguments(OpikException):
81
81
  )
82
82
 
83
83
 
84
+ class PromptTemplateStructureMismatch(OpikException):
85
+ """Exception raised when attempting to create a prompt version with a different template structure than the existing prompt."""
86
+
87
+ def __init__(
88
+ self, prompt_name: str, existing_structure: str, attempted_structure: str
89
+ ):
90
+ self.prompt_name = prompt_name
91
+ self.existing_structure = existing_structure
92
+ self.attempted_structure = attempted_structure
93
+
94
+ def __str__(self) -> str:
95
+ return (
96
+ f"Prompt with name '{self.prompt_name}' already exists and has immutable "
97
+ f"'{self.existing_structure}' template structure, not '{self.attempted_structure}'. "
98
+ )
99
+
100
+
84
101
  class ExperimentNotFound(OpikException):
85
102
  pass
86
103
 
@@ -130,3 +147,19 @@ class ValidationError(OpikException):
130
147
 
131
148
  def __repr__(self) -> str:
132
149
  return f"ValidationError(prefix={self._prefix}, failure_reasons={self._failure_reasons})"
150
+
151
+
152
+ class BaseLLMError(OpikException):
153
+ """Base class for all LLM errors during evaluation."""
154
+
155
+ def __init__(self, message: str) -> None:
156
+ self.message = message
157
+
158
+ def __str__(self) -> str:
159
+ return f"LLM infrastructure error: {self.message}"
160
+
161
+
162
+ class SearchTimeoutError(OpikException):
163
+ """Exception raised when a search times out."""
164
+
165
+ pass
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  from typing import Optional
3
4
 
4
5
  import httpx
@@ -28,6 +29,10 @@ def upload_attachment(
28
29
  httpx_client=upload_httpx_client,
29
30
  monitor=monitor,
30
31
  )
32
+
33
+ # delete the file after upload if requested
34
+ if upload_options.delete_after_upload:
35
+ _delete_attachment_file(upload_options.file_path)
31
36
  except Exception as e:
32
37
  LOGGER.error(
33
38
  "Failed to upload attachment: '%s' from file: [%s] with size: [%s]. Error: %s",
@@ -40,6 +45,14 @@ def upload_attachment(
40
45
  raise
41
46
 
42
47
 
48
+ def _delete_attachment_file(file_path: str) -> None:
49
+ try:
50
+ os.unlink(file_path)
51
+ except OSError as e:
52
+ LOGGER.info(f"Failed to delete attachment file: '{file_path}'. Reason: {e}.")
53
+ pass
54
+
55
+
43
56
  def _do_upload_attachment(
44
57
  upload_options: file_upload_options.FileUploadOptions,
45
58
  rest_client: rest_api_client.OpikApi,
@@ -16,6 +16,7 @@ class FileUploadOptions:
16
16
  entity_id: str
17
17
  project_name: str
18
18
  encoded_url_override: str
19
+ delete_after_upload: bool
19
20
 
20
21
 
21
22
  def file_upload_options_from_attachment(
@@ -32,4 +33,5 @@ def file_upload_options_from_attachment(
32
33
  entity_id=attachment.entity_id,
33
34
  project_name=attachment.project_name,
34
35
  encoded_url_override=attachment.encoded_url_override,
36
+ delete_after_upload=attachment.delete_after_upload,
35
37
  )
opik/hooks/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ from .httpx_client_hook import (
2
+ HttpxClientHook,
3
+ add_httpx_client_hook,
4
+ register_httpx_client_hook,
5
+ )
6
+ from .anonymizer_hook import (
7
+ has_anonymizers,
8
+ add_anonymizer,
9
+ apply_anonymizers,
10
+ get_anonymizers,
11
+ clear_anonymizers,
12
+ )
13
+
14
+ __all__ = (
15
+ "HttpxClientHook",
16
+ "add_httpx_client_hook",
17
+ "register_httpx_client_hook",
18
+ "add_anonymizer",
19
+ "apply_anonymizers",
20
+ "clear_anonymizers",
21
+ "get_anonymizers",
22
+ "has_anonymizers",
23
+ )
@@ -0,0 +1,36 @@
1
+ from typing import List
2
+
3
+ from opik.anonymizer import anonymizer
4
+
5
+
6
+ # holder for a global list of anonymizers
7
+ _anonymizers: List[anonymizer.Anonymizer] = []
8
+
9
+
10
+ def add_anonymizer(anonymizer_hook: anonymizer.Anonymizer) -> None:
11
+ """Register a new anonymizer to be applied to all sensitive data logged by Opik."""
12
+ _anonymizers.append(anonymizer_hook)
13
+
14
+
15
+ def clear_anonymizers() -> None:
16
+ """Clear all registered anonymizers."""
17
+ _anonymizers.clear()
18
+
19
+
20
+ def has_anonymizers() -> bool:
21
+ """Check if any anonymizers have been registered."""
22
+ return len(_anonymizers) > 0
23
+
24
+
25
+ def get_anonymizers() -> List[anonymizer.Anonymizer]:
26
+ """Get a list of all registered anonymizers."""
27
+ return _anonymizers
28
+
29
+
30
+ def apply_anonymizers(
31
+ data: anonymizer.AnonymizerDataType,
32
+ ) -> anonymizer.AnonymizerDataType:
33
+ """Apply all registered anonymizers to the given data."""
34
+ for anonymizer_ in _anonymizers:
35
+ data = anonymizer_.anonymize(data)
36
+ return data
@@ -0,0 +1,112 @@
1
+ import logging
2
+
3
+ import httpx
4
+ from typing import Any, Callable, List, Optional, Dict
5
+
6
+
7
+ _deprecated_httpx_client_hooks: List[Callable[[httpx.Client], httpx.Client]] = []
8
+
9
+ # holder for the global httpx client hook
10
+ _httpx_client_hooks: List["HttpxClientHook"] = []
11
+
12
+
13
+ LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ class HttpxClientHook:
17
+ def __init__(
18
+ self,
19
+ client_modifier: Optional[Callable[[httpx.Client], None]],
20
+ client_init_arguments: Optional[Dict[str, Any]],
21
+ ) -> None:
22
+ """Provides a means to customize an `httpx.Client` instance used by Opik.
23
+
24
+ This class allows users to attach a callable hook to modify or interact
25
+ with an `httpx.Client` instance and pass initialization arguments to
26
+ create customized client configurations. The primary usage is to allow
27
+ pre-processing or setup of HTTP clients used in a broader application.
28
+
29
+ Args:
30
+ client_modifier: Optional callable that accepts an `httpx.Client` instance and
31
+ returns a modified httpx.Client instance.
32
+ client_init_arguments: Dictionary containing additional `httpx.Client`
33
+ initialization arguments to be passed to the default `httpx.Client`.
34
+ """
35
+ self._hook = client_modifier
36
+ self._httpx_client_arguments = client_init_arguments
37
+
38
+ def update_init_arguments(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
39
+ if self._httpx_client_arguments is not None:
40
+ kwargs.update(self._httpx_client_arguments)
41
+
42
+ return kwargs
43
+
44
+ def __call__(self, client: httpx.Client) -> None:
45
+ if self._hook is not None:
46
+ self._hook(client)
47
+
48
+
49
+ def add_httpx_client_hook(hook: HttpxClientHook) -> None:
50
+ """
51
+ Adds an HttpxClientHook to the list of hooks to be used with HTTPX clients.
52
+
53
+ Injects a new hook into the global list of HTTPX client hooks, allowing for
54
+ custom behavior or additional functionality when making requests with
55
+ an HTTPX client.
56
+
57
+ Args:
58
+ hook (HttpxClientHook): A callable to be added as an HTTPX client hook.
59
+
60
+ """
61
+ global _httpx_client_hooks
62
+ _httpx_client_hooks.append(hook)
63
+
64
+
65
+ def build_init_arguments(default_kwargs: Dict[str, Any]) -> Dict[str, Any]:
66
+ """
67
+ Modifies and returns initialization arguments by applying pre-defined hooks.
68
+
69
+ This function iterates through a collection of hooks and applies their logic to
70
+ update the initialization arguments provided.
71
+
72
+ Args:
73
+ default_kwargs: A dictionary containing default initialization
74
+ arguments.
75
+
76
+ Returns:
77
+ Dict[str, Any]: The modified dictionary of initialization arguments.
78
+ """
79
+ for hook in _httpx_client_hooks:
80
+ default_kwargs = hook.update_init_arguments(default_kwargs)
81
+
82
+ return default_kwargs
83
+
84
+
85
+ def apply_httpx_client_hooks(client: httpx.Client) -> None:
86
+ """Applies registered httpx client hooks."""
87
+ for hook in _httpx_client_hooks:
88
+ hook(client)
89
+
90
+ # apply deprecated hooks if any
91
+ for deprecated_hook in _deprecated_httpx_client_hooks:
92
+ deprecated_hook(client)
93
+
94
+
95
+ def register_httpx_client_hook(hook: Callable[[httpx.Client], httpx.Client]) -> None:
96
+ """
97
+ Deprecated: This method is deprecated and will be removed in a future release. Please use `add_httpx_client_hook` instead.
98
+
99
+ Registers a hook for the customization of `httpx.Client` instances. The provided
100
+ hook function will be invoked with an `httpx.Client` instance and is expected
101
+ to return a customized `httpx.Client`.
102
+
103
+
104
+ Args:
105
+ hook: A callable that takes an `httpx.Client` instance and returns a
106
+ customized `httpx.Client`.
107
+ """
108
+ _deprecated_httpx_client_hooks.append(hook)
109
+
110
+ LOGGER.warning(
111
+ "register_httpx_client_hook is deprecated and will be removed in a future release. Please use add_httpx_client_hook instead."
112
+ )
opik/httpx_client.py CHANGED
@@ -40,19 +40,22 @@ def get(
40
40
  pool=POOL_TIMEOUT_SECONDS,
41
41
  )
42
42
 
43
- client = OpikHttpxClient(
44
- compress_json_requests=compress_json_requests,
45
- limits=limits,
46
- verify=verify,
47
- timeout=timeout,
48
- follow_redirects=True,
49
- proxy=proxy,
50
- )
43
+ # build HTTPX client arguments
44
+ kwargs = {
45
+ "limits": limits,
46
+ "verify": verify,
47
+ "timeout": timeout,
48
+ "follow_redirects": True,
49
+ "proxy": proxy,
50
+ }
51
+ kwargs = hooks.httpx_client_hook.build_init_arguments(kwargs)
52
+
53
+ client = OpikHttpxClient(compress_json_requests=compress_json_requests, **kwargs)
51
54
 
52
55
  headers = _prepare_headers(workspace=workspace, api_key=api_key)
53
56
  client.headers.update(headers)
54
57
 
55
- hooks.run_httpx_client_hooks(client)
58
+ hooks.httpx_client_hook.apply_httpx_client_hooks(client)
56
59
 
57
60
  return client
58
61
 
opik/id_helpers.py CHANGED
@@ -1,5 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import Optional
3
+ import random
4
+ import string
3
5
  import uuid
4
6
  import uuid6
5
7
 
@@ -12,6 +14,22 @@ def generate_id(timestamp: Optional[datetime] = None) -> str:
12
14
  return str(uuid6.uuid7())
13
15
 
14
16
 
17
+ def generate_random_alphanumeric_string(length: int) -> str:
18
+ """Generate a random alphanumeric string of the specified length.
19
+
20
+ Args:
21
+ length: The length of the string to generate.
22
+
23
+ Returns:
24
+ A random string containing only alphanumeric characters (a-z, A-Z, 0-9).
25
+ """
26
+ if length < 0:
27
+ raise ValueError("Length must be non-negative")
28
+
29
+ characters = string.ascii_letters + string.digits
30
+ return "".join(random.choice(characters) for _ in range(length))
31
+
32
+
15
33
  def uuid4_to_uuid7(user_datetime: datetime, user_uuid: str) -> uuid.UUID:
16
34
  """Convert a UUID v4 into a UUID v7 following RFC draft specification."""
17
35
  # Get Unix timestamp in milliseconds
@@ -1,6 +1,5 @@
1
1
  from typing import List, Optional
2
2
  from . import nodes
3
- import itertools
4
3
 
5
4
 
6
5
  def build_edge_definitions_for_parallel_subagents(
@@ -16,7 +15,7 @@ def build_edge_definitions_for_sequential_subagents(
16
15
  return [f"{children[0].name}"]
17
16
 
18
17
  result: List[str] = []
19
- for current, next in itertools.pairwise(children):
18
+ for current, next in zip(children, children[1:]):
20
19
  edge_definition = f"{current.name} ==> {next.name}"
21
20
  result.append(edge_definition)
22
21
 
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  from typing import (
3
4
  Any,
@@ -8,6 +9,8 @@ from google.adk.models import LlmResponse
8
9
  import opik.types as opik_types
9
10
  import pydantic
10
11
 
12
+ LOGGER = logging.getLogger(__name__)
13
+
11
14
 
12
15
  def convert_adk_base_model_to_dict(value: pydantic.BaseModel) -> Dict[str, Any]:
13
16
  """Most ADK objects are Pydantic Base Models"""
@@ -27,13 +30,19 @@ def get_adk_provider() -> opik_types.LLMProvider:
27
30
 
28
31
 
29
32
  def has_empty_text_part_content(llm_response: LlmResponse) -> bool:
30
- if llm_response.content is None or len(llm_response.content.parts) == 0:
31
- return True
33
+ try:
34
+ if llm_response.content is None:
35
+ return True
32
36
 
33
- # to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
34
- if len(llm_response.content.parts) == 1:
35
- part = llm_response.content.parts[0]
36
- if part.text is not None and len(part.text) == 0:
37
+ if not llm_response.content.parts:
37
38
  return True
38
39
 
39
- return False
40
+ # to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
41
+ if len(llm_response.content.parts) == 1:
42
+ part = llm_response.content.parts[0]
43
+ if part.text is not None and len(part.text) == 0:
44
+ return True
45
+ return False
46
+ except Exception as e:
47
+ LOGGER.warning(f"Exception in has_empty_text_part_content {e}", exc_info=True)
48
+ return True
@@ -8,6 +8,7 @@ from google.adk import models
8
8
  from google.adk.tools import base_tool
9
9
  from google.adk.tools import tool_context
10
10
 
11
+ import opik
11
12
  from opik import context_storage
12
13
  from opik.decorator import arguments_helpers, span_creation_handler
13
14
  from opik.api_objects import opik_client, span, trace
@@ -77,7 +78,8 @@ class LegacyOpikTracer:
77
78
  trace_data = self._context_storage.pop_trace_data()
78
79
  assert trace_data is not None
79
80
  trace_data.init_end_time()
80
- self._opik_client.trace(**trace_data.as_parameters)
81
+ if opik.is_tracing_active():
82
+ self._opik_client.trace(**trace_data.as_parameters)
81
83
 
82
84
  def _end_current_span(
83
85
  self,
@@ -85,20 +87,21 @@ class LegacyOpikTracer:
85
87
  span_data = self._context_storage.pop_span_data()
86
88
  assert span_data is not None
87
89
  span_data.init_end_time()
88
- self._opik_client.span(**span_data.as_parameters)
90
+ if opik.is_tracing_active():
91
+ self._opik_client.span(**span_data.as_parameters)
89
92
 
90
93
  def _start_span(self, span_data: span.SpanData) -> None:
91
94
  self._context_storage.add_span_data(span_data)
92
95
  self._opik_created_spans.add(span_data.id)
93
96
 
94
- if self._opik_client.config.log_start_trace_span:
97
+ if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
95
98
  self._opik_client.span(**span_data.as_start_parameters)
96
99
 
97
100
  def _start_trace(self, trace_data: trace.TraceData) -> None:
98
101
  self._context_storage.set_trace_data(trace_data)
99
102
  self._current_trace_created_by_opik_tracer.set(trace_data.id)
100
103
 
101
- if self._opik_client.config.log_start_trace_span:
104
+ if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
102
105
  self._opik_client.trace(**trace_data.as_start_parameters)
103
106
 
104
107
  def _set_current_context_data(self, value: SpanOrTraceData) -> None:
@@ -7,6 +7,7 @@ from google.adk import models
7
7
  from google.adk.tools import base_tool
8
8
  from google.adk.tools import tool_context
9
9
 
10
+ import opik
10
11
  from opik import context_storage
11
12
  from opik.api_objects import opik_client, span, trace
12
13
  from opik.types import DistributedTraceHeadersDict
@@ -253,7 +254,8 @@ class OpikTracer:
253
254
  current_span.init_end_time()
254
255
  # We close this span manually because otherwise ADK will close it too late,
255
256
  # and it will also add tool spans inside of it, which we want to avoid.
256
- self._opik_client.span(**current_span.as_parameters)
257
+ if opik.is_tracing_active():
258
+ self._opik_client.span(**current_span.as_parameters)
257
259
  self._last_model_output = output
258
260
 
259
261
  except Exception as e:
@@ -288,6 +290,7 @@ class OpikTracer:
288
290
  LOGGER.warning(
289
291
  f"No current span found in context for tool: {tool.name}"
290
292
  )
293
+ _log_tool_context_warning(context=tool_context)
291
294
 
292
295
  except Exception as e:
293
296
  LOGGER.error(f"Failed during before_tool_callback(): {e}", exc_info=True)
@@ -321,6 +324,7 @@ class OpikTracer:
321
324
  LOGGER.warning(
322
325
  f"No current span found in context for tool output update: {tool.name}"
323
326
  )
327
+ _log_tool_context_warning(context=tool_context)
324
328
  except Exception as e:
325
329
  LOGGER.error(f"Failed during after_tool_callback(): {e}", exc_info=True)
326
330
 
@@ -355,3 +359,12 @@ def _try_add_agent_graph_to_metadata(
355
359
  }
356
360
  except Exception:
357
361
  LOGGER.error("Failed to build mermaid graph for agent.", exc_info=True)
362
+
363
+
364
+ def _log_tool_context_warning(context: tool_context.ToolContext) -> None:
365
+ if context is not None:
366
+ warning = f"Function call id: {context.function_call_id}, agent name: {context.agent_name}"
367
+ if context.actions is not None:
368
+ warning += f", is escalate: {context.actions.escalate}, transfer to: {context.actions.transfer_to_agent}"
369
+
370
+ LOGGER.warning(warning)
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Iterator, Optional, Tuple
3
3
 
4
4
  import opentelemetry.trace
5
+ import opik
5
6
  import opik.context_storage
6
7
  from opik.api_objects import trace, span
7
8
  from opik.decorator import (
@@ -100,7 +101,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
100
101
  # so we manually finalize it here to avoid incorrect span nesting.
101
102
  opik.context_storage.pop_span_data(ensure_id=current_span_data.id)
102
103
  current_span_data.init_end_time()
103
- self.opik_client.span(**current_span_data.as_parameters)
104
+ if opik.is_tracing_active():
105
+ self.opik_client.span(**current_span_data.as_parameters)
104
106
  current_span_data = opik.context_storage.top_span_data()
105
107
 
106
108
  try:
@@ -145,7 +147,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
145
147
  trace_data = opik.context_storage.pop_trace_data(ensure_id=trace_id)
146
148
  if trace_data is not None:
147
149
  trace_data.init_end_time()
148
- self.opik_client.trace(**trace_data.as_parameters)
150
+ if opik.is_tracing_active():
151
+ self.opik_client.trace(**trace_data.as_parameters)
149
152
 
150
153
  def _ensure_span_is_finalized(self, span_id: str) -> None:
151
154
  opik.context_storage.trim_span_data_stack_to_certain_span(span_id)
@@ -153,7 +156,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
153
156
  span_data = opik.context_storage.pop_span_data(ensure_id=span_id)
154
157
  if span_data is not None:
155
158
  span_data.init_end_time()
156
- self.opik_client.span(**span_data.as_parameters)
159
+ if opik.is_tracing_active():
160
+ self.opik_client.span(**span_data.as_parameters)
157
161
 
158
162
 
159
163
  def _prepare_trace_and_span_to_be_finalized(
@@ -2,7 +2,6 @@ import types
2
2
  from typing import TypeVar, List, Any, Set
3
3
  from . import opik_tracer
4
4
  import logging
5
- from opik import _logging
6
5
 
7
6
  from google.adk.tools import agent_tool
8
7
  from google.adk import agents
@@ -46,7 +45,9 @@ class RecursiveCallbackInjector:
46
45
  callbacks=current_callback_value
47
46
  ):
48
47
  current_callback_value.append(callback_func)
49
- elif not self._is_opik_callback_function(current_callback_value):
48
+ elif not self._is_opik_callback_function(
49
+ current_callback_value
50
+ ) and callable(current_callback_value):
50
51
  setattr(
51
52
  agent, callback_field_name, [current_callback_value, callback_func]
52
53
  )
@@ -118,11 +119,7 @@ def track_adk_agent_recursive(
118
119
  Returns:
119
120
  The modified root agent with tracking enabled
120
121
  """
121
- _logging.log_once_at_level(
122
- logging.INFO,
123
- "`track_adk_agent_recursive` is experimental feature. Please let us know if something is not working as expected: https://github.com/comet-ml/opik/issues",
124
- logger=LOGGER,
125
- )
122
+
126
123
  recursive_callback_injector = RecursiveCallbackInjector(tracer)
127
124
  recursive_callback_injector.inject(root_agent)
128
125
 
File without changes