opik 1.6.4__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1021) hide show
  1. opik/__init__.py +33 -2
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/__init__.py +5 -0
  9. opik/api_objects/attachment/attachment.py +20 -0
  10. opik/api_objects/attachment/attachment_context.py +36 -0
  11. opik/api_objects/attachment/attachments_extractor.py +153 -0
  12. opik/api_objects/attachment/client.py +220 -0
  13. opik/api_objects/attachment/converters.py +51 -0
  14. opik/api_objects/attachment/decoder.py +18 -0
  15. opik/api_objects/attachment/decoder_base64.py +83 -0
  16. opik/api_objects/attachment/decoder_helpers.py +137 -0
  17. opik/api_objects/conversation/__init__.py +0 -0
  18. opik/api_objects/conversation/conversation_factory.py +43 -0
  19. opik/api_objects/conversation/conversation_thread.py +49 -0
  20. opik/api_objects/data_helpers.py +79 -0
  21. opik/api_objects/dataset/dataset.py +107 -45
  22. opik/api_objects/dataset/rest_operations.py +12 -3
  23. opik/api_objects/experiment/experiment.py +81 -45
  24. opik/api_objects/experiment/experiment_item.py +2 -1
  25. opik/api_objects/experiment/experiments_client.py +64 -0
  26. opik/api_objects/experiment/helpers.py +35 -11
  27. opik/api_objects/experiment/rest_operations.py +88 -19
  28. opik/api_objects/helpers.py +104 -7
  29. opik/api_objects/local_recording.py +81 -0
  30. opik/api_objects/opik_client.py +872 -174
  31. opik/api_objects/opik_query_language.py +136 -18
  32. opik/api_objects/optimization/__init__.py +3 -0
  33. opik/api_objects/optimization/optimization.py +39 -0
  34. opik/api_objects/prompt/__init__.py +13 -1
  35. opik/api_objects/prompt/base_prompt.py +69 -0
  36. opik/api_objects/prompt/base_prompt_template.py +29 -0
  37. opik/api_objects/prompt/chat/__init__.py +1 -0
  38. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  39. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  40. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  41. opik/api_objects/prompt/client.py +193 -41
  42. opik/api_objects/prompt/text/__init__.py +1 -0
  43. opik/api_objects/prompt/text/prompt.py +174 -0
  44. opik/api_objects/prompt/text/prompt_template.py +55 -0
  45. opik/api_objects/prompt/types.py +29 -0
  46. opik/api_objects/rest_stream_parser.py +98 -0
  47. opik/api_objects/search_helpers.py +89 -0
  48. opik/api_objects/span/span_client.py +165 -45
  49. opik/api_objects/span/span_data.py +136 -25
  50. opik/api_objects/threads/__init__.py +0 -0
  51. opik/api_objects/threads/threads_client.py +185 -0
  52. opik/api_objects/trace/trace_client.py +72 -36
  53. opik/api_objects/trace/trace_data.py +112 -26
  54. opik/api_objects/validation_helpers.py +3 -3
  55. opik/cli/__init__.py +5 -0
  56. opik/cli/__main__.py +6 -0
  57. opik/cli/configure.py +66 -0
  58. opik/cli/exports/__init__.py +131 -0
  59. opik/cli/exports/dataset.py +278 -0
  60. opik/cli/exports/experiment.py +784 -0
  61. opik/cli/exports/project.py +685 -0
  62. opik/cli/exports/prompt.py +578 -0
  63. opik/cli/exports/utils.py +406 -0
  64. opik/cli/harbor.py +39 -0
  65. opik/cli/healthcheck.py +21 -0
  66. opik/cli/imports/__init__.py +439 -0
  67. opik/cli/imports/dataset.py +143 -0
  68. opik/cli/imports/experiment.py +1192 -0
  69. opik/cli/imports/project.py +262 -0
  70. opik/cli/imports/prompt.py +177 -0
  71. opik/cli/imports/utils.py +280 -0
  72. opik/cli/main.py +49 -0
  73. opik/cli/proxy.py +93 -0
  74. opik/cli/usage_report/__init__.py +16 -0
  75. opik/cli/usage_report/charts.py +783 -0
  76. opik/cli/usage_report/cli.py +274 -0
  77. opik/cli/usage_report/constants.py +9 -0
  78. opik/cli/usage_report/extraction.py +749 -0
  79. opik/cli/usage_report/pdf.py +244 -0
  80. opik/cli/usage_report/statistics.py +78 -0
  81. opik/cli/usage_report/utils.py +235 -0
  82. opik/config.py +62 -4
  83. opik/configurator/configure.py +45 -6
  84. opik/configurator/opik_rest_helpers.py +4 -1
  85. opik/context_storage.py +164 -65
  86. opik/datetime_helpers.py +12 -0
  87. opik/decorator/arguments_helpers.py +9 -1
  88. opik/decorator/base_track_decorator.py +298 -146
  89. opik/decorator/context_manager/__init__.py +0 -0
  90. opik/decorator/context_manager/span_context_manager.py +123 -0
  91. opik/decorator/context_manager/trace_context_manager.py +84 -0
  92. opik/decorator/generator_wrappers.py +3 -2
  93. opik/decorator/inspect_helpers.py +11 -0
  94. opik/decorator/opik_args/__init__.py +13 -0
  95. opik/decorator/opik_args/api_classes.py +71 -0
  96. opik/decorator/opik_args/helpers.py +120 -0
  97. opik/decorator/span_creation_handler.py +49 -21
  98. opik/decorator/tracker.py +9 -1
  99. opik/dict_utils.py +3 -3
  100. opik/environment.py +13 -1
  101. opik/error_tracking/api.py +1 -1
  102. opik/error_tracking/before_send.py +6 -5
  103. opik/error_tracking/environment_details.py +29 -7
  104. opik/error_tracking/error_filtering/filter_by_response_status_code.py +42 -0
  105. opik/error_tracking/error_filtering/filter_chain_builder.py +14 -3
  106. opik/evaluation/__init__.py +14 -2
  107. opik/evaluation/engine/engine.py +280 -82
  108. opik/evaluation/engine/evaluation_tasks_executor.py +15 -10
  109. opik/evaluation/engine/helpers.py +34 -9
  110. opik/evaluation/engine/metrics_evaluator.py +237 -0
  111. opik/evaluation/engine/types.py +5 -4
  112. opik/evaluation/evaluation_result.py +169 -2
  113. opik/evaluation/evaluator.py +659 -58
  114. opik/evaluation/metrics/__init__.py +121 -6
  115. opik/evaluation/metrics/aggregated_metric.py +92 -0
  116. opik/evaluation/metrics/arguments_helpers.py +15 -21
  117. opik/evaluation/metrics/arguments_validator.py +38 -0
  118. opik/evaluation/metrics/base_metric.py +20 -10
  119. opik/evaluation/metrics/conversation/__init__.py +48 -0
  120. opik/evaluation/metrics/conversation/conversation_thread_metric.py +79 -0
  121. opik/evaluation/metrics/conversation/conversation_turns_factory.py +39 -0
  122. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  123. opik/evaluation/metrics/conversation/helpers.py +84 -0
  124. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  125. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  126. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  127. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  128. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  129. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  130. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  131. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/__init__.py +0 -0
  132. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +274 -0
  133. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/schema.py +16 -0
  134. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/templates.py +95 -0
  135. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  136. opik/evaluation/metrics/conversation/llm_judges/session_completeness/__init__.py +0 -0
  137. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +295 -0
  138. opik/evaluation/metrics/conversation/llm_judges/session_completeness/schema.py +22 -0
  139. opik/evaluation/metrics/conversation/llm_judges/session_completeness/templates.py +139 -0
  140. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  141. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +277 -0
  142. opik/evaluation/metrics/conversation/llm_judges/user_frustration/schema.py +16 -0
  143. opik/evaluation/metrics/conversation/llm_judges/user_frustration/templates.py +135 -0
  144. opik/evaluation/metrics/conversation/types.py +34 -0
  145. opik/evaluation/metrics/conversation_types.py +9 -0
  146. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  147. opik/evaluation/metrics/heuristics/bleu.py +43 -16
  148. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  149. opik/evaluation/metrics/heuristics/contains.py +50 -11
  150. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  151. opik/evaluation/metrics/heuristics/equals.py +4 -1
  152. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  153. opik/evaluation/metrics/heuristics/is_json.py +9 -3
  154. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  155. opik/evaluation/metrics/heuristics/levenshtein_ratio.py +6 -5
  156. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  157. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  158. opik/evaluation/metrics/heuristics/readability.py +129 -0
  159. opik/evaluation/metrics/heuristics/regex_match.py +4 -1
  160. opik/evaluation/metrics/heuristics/rouge.py +148 -0
  161. opik/evaluation/metrics/heuristics/sentiment.py +98 -0
  162. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  163. opik/evaluation/metrics/heuristics/tone.py +155 -0
  164. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  165. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +27 -30
  166. opik/evaluation/metrics/llm_judges/answer_relevance/parser.py +27 -0
  167. opik/evaluation/metrics/llm_judges/answer_relevance/templates.py +10 -10
  168. opik/evaluation/metrics/llm_judges/context_precision/metric.py +28 -31
  169. opik/evaluation/metrics/llm_judges/context_precision/parser.py +27 -0
  170. opik/evaluation/metrics/llm_judges/context_precision/template.py +7 -7
  171. opik/evaluation/metrics/llm_judges/context_recall/metric.py +27 -31
  172. opik/evaluation/metrics/llm_judges/context_recall/parser.py +27 -0
  173. opik/evaluation/metrics/llm_judges/context_recall/template.py +7 -7
  174. opik/evaluation/metrics/llm_judges/factuality/metric.py +7 -26
  175. opik/evaluation/metrics/llm_judges/factuality/parser.py +35 -0
  176. opik/evaluation/metrics/llm_judges/factuality/template.py +1 -1
  177. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  178. opik/evaluation/metrics/llm_judges/g_eval/metric.py +244 -113
  179. opik/evaluation/metrics/llm_judges/g_eval/parser.py +161 -0
  180. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  181. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  182. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  183. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  184. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  185. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  186. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  187. opik/evaluation/metrics/llm_judges/hallucination/metric.py +23 -27
  188. opik/evaluation/metrics/llm_judges/hallucination/parser.py +29 -0
  189. opik/evaluation/metrics/llm_judges/hallucination/template.py +2 -4
  190. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  191. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  192. opik/evaluation/metrics/llm_judges/moderation/metric.py +23 -28
  193. opik/evaluation/metrics/llm_judges/moderation/parser.py +27 -0
  194. opik/evaluation/metrics/llm_judges/moderation/template.py +2 -2
  195. opik/evaluation/metrics/llm_judges/parsing_helpers.py +26 -0
  196. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  197. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  198. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  199. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  200. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  201. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  202. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  203. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  204. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  205. opik/evaluation/metrics/llm_judges/trajectory_accuracy/__init__.py +3 -0
  206. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +171 -0
  207. opik/evaluation/metrics/llm_judges/trajectory_accuracy/parser.py +38 -0
  208. opik/evaluation/metrics/llm_judges/trajectory_accuracy/templates.py +65 -0
  209. opik/evaluation/metrics/llm_judges/usefulness/metric.py +23 -32
  210. opik/evaluation/metrics/llm_judges/usefulness/parser.py +28 -0
  211. opik/evaluation/metrics/ragas_metric.py +112 -0
  212. opik/evaluation/models/__init__.py +10 -0
  213. opik/evaluation/models/base_model.py +140 -18
  214. opik/evaluation/models/langchain/__init__.py +3 -0
  215. opik/evaluation/models/langchain/langchain_chat_model.py +166 -0
  216. opik/evaluation/models/langchain/message_converters.py +106 -0
  217. opik/evaluation/models/langchain/opik_monitoring.py +23 -0
  218. opik/evaluation/models/litellm/litellm_chat_model.py +186 -40
  219. opik/evaluation/models/litellm/opik_monitor.py +24 -21
  220. opik/evaluation/models/litellm/util.py +125 -0
  221. opik/evaluation/models/litellm/warning_filters.py +16 -4
  222. opik/evaluation/models/model_capabilities.py +187 -0
  223. opik/evaluation/models/models_factory.py +25 -3
  224. opik/evaluation/preprocessing.py +92 -0
  225. opik/evaluation/report.py +70 -12
  226. opik/evaluation/rest_operations.py +49 -45
  227. opik/evaluation/samplers/__init__.py +4 -0
  228. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  229. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  230. opik/evaluation/score_statistics.py +66 -0
  231. opik/evaluation/scorers/__init__.py +4 -0
  232. opik/evaluation/scorers/scorer_function.py +55 -0
  233. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  234. opik/evaluation/test_case.py +3 -2
  235. opik/evaluation/test_result.py +1 -0
  236. opik/evaluation/threads/__init__.py +0 -0
  237. opik/evaluation/threads/context_helper.py +32 -0
  238. opik/evaluation/threads/evaluation_engine.py +181 -0
  239. opik/evaluation/threads/evaluation_result.py +18 -0
  240. opik/evaluation/threads/evaluator.py +120 -0
  241. opik/evaluation/threads/helpers.py +51 -0
  242. opik/evaluation/types.py +9 -1
  243. opik/exceptions.py +116 -3
  244. opik/file_upload/__init__.py +0 -0
  245. opik/file_upload/base_upload_manager.py +39 -0
  246. opik/file_upload/file_upload_monitor.py +14 -0
  247. opik/file_upload/file_uploader.py +141 -0
  248. opik/file_upload/mime_type.py +9 -0
  249. opik/file_upload/s3_multipart_upload/__init__.py +0 -0
  250. opik/file_upload/s3_multipart_upload/file_parts_strategy.py +89 -0
  251. opik/file_upload/s3_multipart_upload/s3_file_uploader.py +86 -0
  252. opik/file_upload/s3_multipart_upload/s3_upload_error.py +29 -0
  253. opik/file_upload/thread_pool.py +17 -0
  254. opik/file_upload/upload_client.py +114 -0
  255. opik/file_upload/upload_manager.py +255 -0
  256. opik/file_upload/upload_options.py +37 -0
  257. opik/format_helpers.py +17 -0
  258. opik/guardrails/__init__.py +4 -0
  259. opik/guardrails/guardrail.py +157 -0
  260. opik/guardrails/guards/__init__.py +5 -0
  261. opik/guardrails/guards/guard.py +17 -0
  262. opik/guardrails/guards/pii.py +47 -0
  263. opik/guardrails/guards/topic.py +76 -0
  264. opik/guardrails/rest_api_client.py +34 -0
  265. opik/guardrails/schemas.py +24 -0
  266. opik/guardrails/tracing.py +61 -0
  267. opik/healthcheck/__init__.py +2 -1
  268. opik/healthcheck/checks.py +2 -2
  269. opik/healthcheck/rich_representation.py +1 -1
  270. opik/hooks/__init__.py +23 -0
  271. opik/hooks/anonymizer_hook.py +36 -0
  272. opik/hooks/httpx_client_hook.py +112 -0
  273. opik/httpx_client.py +75 -4
  274. opik/id_helpers.py +18 -0
  275. opik/integrations/adk/__init__.py +14 -0
  276. opik/integrations/adk/callback_context_info_extractors.py +32 -0
  277. opik/integrations/adk/graph/__init__.py +0 -0
  278. opik/integrations/adk/graph/mermaid_graph_builder.py +128 -0
  279. opik/integrations/adk/graph/nodes.py +101 -0
  280. opik/integrations/adk/graph/subgraph_edges_builders.py +41 -0
  281. opik/integrations/adk/helpers.py +48 -0
  282. opik/integrations/adk/legacy_opik_tracer.py +381 -0
  283. opik/integrations/adk/opik_tracer.py +370 -0
  284. opik/integrations/adk/patchers/__init__.py +4 -0
  285. opik/integrations/adk/patchers/adk_otel_tracer/__init__.py +0 -0
  286. opik/integrations/adk/patchers/adk_otel_tracer/llm_span_helpers.py +30 -0
  287. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +201 -0
  288. opik/integrations/adk/patchers/litellm_wrappers.py +91 -0
  289. opik/integrations/adk/patchers/llm_response_wrapper.py +105 -0
  290. opik/integrations/adk/patchers/patchers.py +64 -0
  291. opik/integrations/adk/recursive_callback_injector.py +126 -0
  292. opik/integrations/aisuite/aisuite_decorator.py +8 -3
  293. opik/integrations/aisuite/opik_tracker.py +1 -0
  294. opik/integrations/anthropic/messages_create_decorator.py +8 -3
  295. opik/integrations/anthropic/opik_tracker.py +0 -1
  296. opik/integrations/bedrock/converse/__init__.py +0 -0
  297. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  298. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +18 -8
  299. opik/integrations/bedrock/invoke_agent_decorator.py +12 -7
  300. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  301. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  302. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  303. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  304. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  305. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  306. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  307. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  308. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  309. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  310. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  311. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  312. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  313. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  314. opik/integrations/bedrock/opik_tracker.py +43 -4
  315. opik/integrations/bedrock/types.py +19 -0
  316. opik/integrations/crewai/crewai_decorator.py +34 -56
  317. opik/integrations/crewai/opik_tracker.py +31 -10
  318. opik/integrations/crewai/patchers/__init__.py +5 -0
  319. opik/integrations/crewai/patchers/flow.py +118 -0
  320. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  321. opik/integrations/crewai/patchers/llm_client.py +207 -0
  322. opik/integrations/dspy/callback.py +246 -84
  323. opik/integrations/dspy/graph.py +88 -0
  324. opik/integrations/dspy/parsers.py +168 -0
  325. opik/integrations/genai/encoder_extension.py +2 -6
  326. opik/integrations/genai/generate_content_decorator.py +20 -13
  327. opik/integrations/guardrails/guardrails_decorator.py +4 -0
  328. opik/integrations/harbor/__init__.py +17 -0
  329. opik/integrations/harbor/experiment_service.py +269 -0
  330. opik/integrations/harbor/opik_tracker.py +528 -0
  331. opik/integrations/haystack/constants.py +35 -0
  332. opik/integrations/haystack/converters.py +1 -2
  333. opik/integrations/haystack/opik_connector.py +28 -6
  334. opik/integrations/haystack/opik_span_bridge.py +284 -0
  335. opik/integrations/haystack/opik_tracer.py +124 -222
  336. opik/integrations/langchain/__init__.py +3 -1
  337. opik/integrations/langchain/helpers.py +96 -0
  338. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  339. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  340. opik/integrations/langchain/opik_encoder_extension.py +2 -2
  341. opik/integrations/langchain/opik_tracer.py +641 -206
  342. opik/integrations/langchain/provider_usage_extractors/__init__.py +5 -0
  343. opik/integrations/langchain/provider_usage_extractors/anthropic_usage_extractor.py +101 -0
  344. opik/integrations/langchain/provider_usage_extractors/anthropic_vertexai_usage_extractor.py +67 -0
  345. opik/integrations/langchain/provider_usage_extractors/bedrock_usage_extractor.py +94 -0
  346. opik/integrations/langchain/provider_usage_extractors/google_generative_ai_usage_extractor.py +109 -0
  347. opik/integrations/langchain/provider_usage_extractors/groq_usage_extractor.py +92 -0
  348. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/__init__.py +15 -0
  349. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +134 -0
  350. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/langchain_usage.py +163 -0
  351. opik/integrations/langchain/provider_usage_extractors/openai_usage_extractor.py +124 -0
  352. opik/integrations/langchain/provider_usage_extractors/provider_usage_extractor_protocol.py +29 -0
  353. opik/integrations/langchain/provider_usage_extractors/usage_extractor.py +48 -0
  354. opik/integrations/langchain/provider_usage_extractors/vertexai_usage_extractor.py +109 -0
  355. opik/integrations/litellm/__init__.py +5 -0
  356. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  357. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  358. opik/integrations/litellm/opik_tracker.py +43 -0
  359. opik/integrations/litellm/stream_patchers.py +151 -0
  360. opik/integrations/llama_index/callback.py +179 -78
  361. opik/integrations/llama_index/event_parsing_utils.py +29 -9
  362. opik/integrations/openai/agents/opik_tracing_processor.py +204 -32
  363. opik/integrations/openai/agents/span_data_parsers.py +15 -6
  364. opik/integrations/openai/chat_completion_chunks_aggregator.py +1 -1
  365. opik/integrations/openai/{openai_decorator.py → openai_chat_completions_decorator.py} +45 -35
  366. opik/integrations/openai/openai_responses_decorator.py +158 -0
  367. opik/integrations/openai/opik_tracker.py +94 -13
  368. opik/integrations/openai/response_events_aggregator.py +36 -0
  369. opik/integrations/openai/stream_patchers.py +125 -15
  370. opik/integrations/sagemaker/auth.py +5 -1
  371. opik/jsonable_encoder.py +29 -1
  372. opik/llm_usage/base_original_provider_usage.py +15 -8
  373. opik/llm_usage/bedrock_usage.py +8 -2
  374. opik/llm_usage/google_usage.py +6 -1
  375. opik/llm_usage/llm_usage_info.py +6 -0
  376. opik/llm_usage/{openai_usage.py → openai_chat_completions_usage.py} +2 -12
  377. opik/llm_usage/{openai_agent_usage.py → openai_responses_usage.py} +7 -15
  378. opik/llm_usage/opik_usage.py +36 -10
  379. opik/llm_usage/opik_usage_factory.py +35 -19
  380. opik/logging_messages.py +19 -7
  381. opik/message_processing/arguments_utils.py +22 -0
  382. opik/message_processing/batching/base_batcher.py +45 -17
  383. opik/message_processing/batching/batch_manager.py +22 -10
  384. opik/message_processing/batching/batch_manager_constuctors.py +36 -11
  385. opik/message_processing/batching/batchers.py +167 -44
  386. opik/message_processing/batching/flushing_thread.py +0 -3
  387. opik/message_processing/batching/sequence_splitter.py +50 -5
  388. opik/message_processing/emulation/__init__.py +0 -0
  389. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  390. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  391. opik/message_processing/emulation/models.py +162 -0
  392. opik/message_processing/encoder_helpers.py +79 -0
  393. opik/message_processing/message_queue.py +79 -0
  394. opik/message_processing/messages.py +154 -12
  395. opik/message_processing/preprocessing/__init__.py +0 -0
  396. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  397. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  398. opik/message_processing/preprocessing/constants.py +1 -0
  399. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  400. opik/message_processing/preprocessing/preprocessor.py +36 -0
  401. opik/message_processing/processors/__init__.py +0 -0
  402. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  403. opik/message_processing/processors/message_processors.py +92 -0
  404. opik/message_processing/processors/message_processors_chain.py +96 -0
  405. opik/message_processing/processors/online_message_processor.py +324 -0
  406. opik/message_processing/queue_consumer.py +61 -13
  407. opik/message_processing/streamer.py +102 -31
  408. opik/message_processing/streamer_constructors.py +67 -12
  409. opik/opik_context.py +103 -11
  410. opik/plugins/pytest/decorator.py +2 -2
  411. opik/plugins/pytest/experiment_runner.py +3 -2
  412. opik/plugins/pytest/hooks.py +6 -4
  413. opik/rate_limit/__init__.py +0 -0
  414. opik/rate_limit/rate_limit.py +25 -0
  415. opik/rest_api/__init__.py +643 -11
  416. opik/rest_api/alerts/__init__.py +7 -0
  417. opik/rest_api/alerts/client.py +667 -0
  418. opik/rest_api/alerts/raw_client.py +1015 -0
  419. opik/rest_api/alerts/types/__init__.py +7 -0
  420. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  421. opik/rest_api/annotation_queues/__init__.py +4 -0
  422. opik/rest_api/annotation_queues/client.py +668 -0
  423. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  424. opik/rest_api/attachments/__init__.py +17 -0
  425. opik/rest_api/attachments/client.py +752 -0
  426. opik/rest_api/attachments/raw_client.py +1125 -0
  427. opik/rest_api/attachments/types/__init__.py +15 -0
  428. opik/rest_api/attachments/types/attachment_list_request_entity_type.py +5 -0
  429. opik/rest_api/attachments/types/download_attachment_request_entity_type.py +5 -0
  430. opik/rest_api/attachments/types/start_multipart_upload_request_entity_type.py +5 -0
  431. opik/rest_api/attachments/types/upload_attachment_request_entity_type.py +5 -0
  432. opik/rest_api/automation_rule_evaluators/__init__.py +2 -0
  433. opik/rest_api/automation_rule_evaluators/client.py +182 -1162
  434. opik/rest_api/automation_rule_evaluators/raw_client.py +598 -0
  435. opik/rest_api/chat_completions/__init__.py +2 -0
  436. opik/rest_api/chat_completions/client.py +115 -149
  437. opik/rest_api/chat_completions/raw_client.py +339 -0
  438. opik/rest_api/check/__init__.py +2 -0
  439. opik/rest_api/check/client.py +88 -106
  440. opik/rest_api/check/raw_client.py +258 -0
  441. opik/rest_api/client.py +112 -212
  442. opik/rest_api/core/__init__.py +5 -0
  443. opik/rest_api/core/api_error.py +12 -6
  444. opik/rest_api/core/client_wrapper.py +4 -14
  445. opik/rest_api/core/datetime_utils.py +1 -3
  446. opik/rest_api/core/file.py +2 -5
  447. opik/rest_api/core/http_client.py +42 -120
  448. opik/rest_api/core/http_response.py +55 -0
  449. opik/rest_api/core/jsonable_encoder.py +1 -4
  450. opik/rest_api/core/pydantic_utilities.py +79 -147
  451. opik/rest_api/core/query_encoder.py +1 -3
  452. opik/rest_api/core/serialization.py +10 -10
  453. opik/rest_api/dashboards/__init__.py +4 -0
  454. opik/rest_api/dashboards/client.py +462 -0
  455. opik/rest_api/dashboards/raw_client.py +648 -0
  456. opik/rest_api/datasets/__init__.py +5 -0
  457. opik/rest_api/datasets/client.py +1638 -1091
  458. opik/rest_api/datasets/raw_client.py +3389 -0
  459. opik/rest_api/datasets/types/__init__.py +8 -0
  460. opik/rest_api/datasets/types/dataset_update_visibility.py +5 -0
  461. opik/rest_api/datasets/types/dataset_write_visibility.py +5 -0
  462. opik/rest_api/errors/__init__.py +2 -0
  463. opik/rest_api/errors/bad_request_error.py +4 -3
  464. opik/rest_api/errors/conflict_error.py +4 -3
  465. opik/rest_api/errors/forbidden_error.py +4 -2
  466. opik/rest_api/errors/not_found_error.py +4 -3
  467. opik/rest_api/errors/not_implemented_error.py +4 -3
  468. opik/rest_api/errors/unauthorized_error.py +4 -3
  469. opik/rest_api/errors/unprocessable_entity_error.py +4 -3
  470. opik/rest_api/experiments/__init__.py +5 -0
  471. opik/rest_api/experiments/client.py +676 -752
  472. opik/rest_api/experiments/raw_client.py +1872 -0
  473. opik/rest_api/experiments/types/__init__.py +10 -0
  474. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  475. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  476. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  477. opik/rest_api/experiments/types/experiment_write_type.py +5 -0
  478. opik/rest_api/feedback_definitions/__init__.py +2 -0
  479. opik/rest_api/feedback_definitions/client.py +96 -370
  480. opik/rest_api/feedback_definitions/raw_client.py +541 -0
  481. opik/rest_api/feedback_definitions/types/__init__.py +2 -0
  482. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -3
  483. opik/rest_api/guardrails/__init__.py +4 -0
  484. opik/rest_api/guardrails/client.py +104 -0
  485. opik/rest_api/guardrails/raw_client.py +102 -0
  486. opik/rest_api/llm_provider_key/__init__.py +2 -0
  487. opik/rest_api/llm_provider_key/client.py +166 -440
  488. opik/rest_api/llm_provider_key/raw_client.py +643 -0
  489. opik/rest_api/llm_provider_key/types/__init__.py +2 -0
  490. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  491. opik/rest_api/manual_evaluation/__init__.py +4 -0
  492. opik/rest_api/manual_evaluation/client.py +347 -0
  493. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  494. opik/rest_api/open_telemetry_ingestion/__init__.py +2 -0
  495. opik/rest_api/open_telemetry_ingestion/client.py +38 -63
  496. opik/rest_api/open_telemetry_ingestion/raw_client.py +88 -0
  497. opik/rest_api/optimizations/__init__.py +7 -0
  498. opik/rest_api/optimizations/client.py +704 -0
  499. opik/rest_api/optimizations/raw_client.py +920 -0
  500. opik/rest_api/optimizations/types/__init__.py +7 -0
  501. opik/rest_api/optimizations/types/optimization_update_status.py +7 -0
  502. opik/rest_api/projects/__init__.py +10 -1
  503. opik/rest_api/projects/client.py +180 -855
  504. opik/rest_api/projects/raw_client.py +1216 -0
  505. opik/rest_api/projects/types/__init__.py +11 -4
  506. opik/rest_api/projects/types/project_metric_request_public_interval.py +1 -3
  507. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +11 -1
  508. opik/rest_api/projects/types/project_update_visibility.py +5 -0
  509. opik/rest_api/projects/types/project_write_visibility.py +5 -0
  510. opik/rest_api/prompts/__init__.py +4 -2
  511. opik/rest_api/prompts/client.py +381 -970
  512. opik/rest_api/prompts/raw_client.py +1634 -0
  513. opik/rest_api/prompts/types/__init__.py +5 -1
  514. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  515. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  516. opik/rest_api/raw_client.py +156 -0
  517. opik/rest_api/redirect/__init__.py +4 -0
  518. opik/rest_api/redirect/client.py +375 -0
  519. opik/rest_api/redirect/raw_client.py +566 -0
  520. opik/rest_api/service_toggles/__init__.py +4 -0
  521. opik/rest_api/service_toggles/client.py +91 -0
  522. opik/rest_api/service_toggles/raw_client.py +93 -0
  523. opik/rest_api/spans/__init__.py +2 -0
  524. opik/rest_api/spans/client.py +659 -1354
  525. opik/rest_api/spans/raw_client.py +2383 -0
  526. opik/rest_api/spans/types/__init__.py +2 -0
  527. opik/rest_api/spans/types/find_feedback_score_names_1_request_type.py +1 -3
  528. opik/rest_api/spans/types/get_span_stats_request_type.py +1 -3
  529. opik/rest_api/spans/types/get_spans_by_project_request_type.py +1 -3
  530. opik/rest_api/spans/types/span_search_stream_request_public_type.py +1 -3
  531. opik/rest_api/system_usage/__init__.py +2 -0
  532. opik/rest_api/system_usage/client.py +157 -216
  533. opik/rest_api/system_usage/raw_client.py +455 -0
  534. opik/rest_api/traces/__init__.py +2 -0
  535. opik/rest_api/traces/client.py +2102 -1625
  536. opik/rest_api/traces/raw_client.py +4144 -0
  537. opik/rest_api/types/__init__.py +629 -24
  538. opik/rest_api/types/aggregation_data.py +27 -0
  539. opik/rest_api/types/alert.py +33 -0
  540. opik/rest_api/types/alert_alert_type.py +5 -0
  541. opik/rest_api/types/alert_page_public.py +24 -0
  542. opik/rest_api/types/alert_public.py +33 -0
  543. opik/rest_api/types/alert_public_alert_type.py +5 -0
  544. opik/rest_api/types/alert_trigger.py +27 -0
  545. opik/rest_api/types/alert_trigger_config.py +28 -0
  546. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  547. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  548. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  549. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  550. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  551. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  552. opik/rest_api/types/alert_trigger_public.py +27 -0
  553. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  554. opik/rest_api/types/alert_trigger_write.py +23 -0
  555. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  556. opik/rest_api/types/alert_write.py +28 -0
  557. opik/rest_api/types/alert_write_alert_type.py +5 -0
  558. opik/rest_api/types/annotation_queue.py +42 -0
  559. opik/rest_api/types/annotation_queue_batch.py +27 -0
  560. opik/rest_api/types/{json_schema_element.py → annotation_queue_item_ids.py} +5 -7
  561. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  562. opik/rest_api/types/annotation_queue_public.py +38 -0
  563. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  564. opik/rest_api/types/{workspace_metadata.py → annotation_queue_reviewer.py} +6 -7
  565. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  566. opik/rest_api/types/annotation_queue_scope.py +5 -0
  567. opik/rest_api/types/annotation_queue_write.py +31 -0
  568. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  569. opik/rest_api/types/assistant_message.py +7 -8
  570. opik/rest_api/types/assistant_message_role.py +1 -3
  571. opik/rest_api/types/attachment.py +22 -0
  572. opik/rest_api/types/attachment_page.py +28 -0
  573. opik/rest_api/types/audio_url.py +19 -0
  574. opik/rest_api/types/audio_url_public.py +19 -0
  575. opik/rest_api/types/audio_url_write.py +19 -0
  576. opik/rest_api/types/automation_rule_evaluator.py +160 -0
  577. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +6 -6
  578. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +6 -6
  579. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +6 -6
  580. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  581. opik/rest_api/types/automation_rule_evaluator_page_public.py +6 -6
  582. opik/rest_api/types/automation_rule_evaluator_public.py +155 -0
  583. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  584. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  585. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  586. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  587. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  588. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  589. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +22 -0
  590. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +22 -0
  591. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +22 -0
  592. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +22 -0
  593. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +22 -0
  594. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +22 -0
  595. opik/rest_api/types/automation_rule_evaluator_update.py +143 -0
  596. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +6 -6
  597. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  598. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  599. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +22 -0
  600. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +22 -0
  601. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +6 -6
  602. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +6 -6
  603. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +6 -6
  604. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +6 -6
  605. opik/rest_api/types/automation_rule_evaluator_write.py +143 -0
  606. opik/rest_api/types/avg_value_stat_public.py +3 -5
  607. opik/rest_api/types/batch_delete.py +3 -5
  608. opik/rest_api/types/batch_delete_by_project.py +20 -0
  609. opik/rest_api/types/bi_information.py +3 -5
  610. opik/rest_api/types/bi_information_response.py +4 -6
  611. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  612. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  613. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  614. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  615. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  616. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  617. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  618. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  619. opik/rest_api/types/categorical_feedback_definition.py +5 -7
  620. opik/rest_api/types/categorical_feedback_definition_create.py +4 -6
  621. opik/rest_api/types/categorical_feedback_definition_public.py +5 -7
  622. opik/rest_api/types/categorical_feedback_definition_update.py +4 -6
  623. opik/rest_api/types/categorical_feedback_detail.py +3 -5
  624. opik/rest_api/types/categorical_feedback_detail_create.py +3 -5
  625. opik/rest_api/types/categorical_feedback_detail_public.py +3 -5
  626. opik/rest_api/types/categorical_feedback_detail_update.py +3 -5
  627. opik/rest_api/types/chat_completion_choice.py +4 -6
  628. opik/rest_api/types/chat_completion_response.py +5 -6
  629. opik/rest_api/types/check.py +22 -0
  630. opik/rest_api/types/{json_node_compare.py → check_name.py} +1 -1
  631. opik/rest_api/types/check_public.py +22 -0
  632. opik/rest_api/types/check_public_name.py +5 -0
  633. opik/rest_api/types/check_public_result.py +5 -0
  634. opik/rest_api/types/check_result.py +5 -0
  635. opik/rest_api/types/chunked_output_json_node.py +4 -6
  636. opik/rest_api/types/chunked_output_json_node_public.py +4 -6
  637. opik/rest_api/types/chunked_output_json_node_public_type.py +6 -10
  638. opik/rest_api/types/chunked_output_json_node_type.py +6 -10
  639. opik/rest_api/types/column.py +8 -10
  640. opik/rest_api/types/column_compare.py +8 -10
  641. opik/rest_api/types/column_public.py +8 -10
  642. opik/rest_api/types/column_types_item.py +1 -3
  643. opik/rest_api/types/comment.py +4 -6
  644. opik/rest_api/types/comment_compare.py +4 -6
  645. opik/rest_api/types/comment_public.py +4 -6
  646. opik/rest_api/types/complete_multipart_upload_request.py +33 -0
  647. opik/rest_api/types/complete_multipart_upload_request_entity_type.py +5 -0
  648. opik/rest_api/types/completion_tokens_details.py +3 -5
  649. opik/rest_api/types/count_value_stat_public.py +3 -5
  650. opik/rest_api/types/dashboard_page_public.py +24 -0
  651. opik/rest_api/types/dashboard_public.py +30 -0
  652. opik/rest_api/types/data_point_double.py +21 -0
  653. opik/rest_api/types/data_point_number_public.py +3 -5
  654. opik/rest_api/types/dataset.py +14 -6
  655. opik/rest_api/types/dataset_expansion.py +42 -0
  656. opik/rest_api/types/dataset_expansion_response.py +39 -0
  657. opik/rest_api/types/dataset_item.py +9 -8
  658. opik/rest_api/types/dataset_item_batch.py +3 -5
  659. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  660. opik/rest_api/types/dataset_item_compare.py +9 -8
  661. opik/rest_api/types/dataset_item_compare_source.py +1 -3
  662. opik/rest_api/types/dataset_item_filter.py +27 -0
  663. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  664. opik/rest_api/types/dataset_item_page_compare.py +10 -7
  665. opik/rest_api/types/dataset_item_page_public.py +10 -7
  666. opik/rest_api/types/dataset_item_public.py +9 -8
  667. opik/rest_api/types/dataset_item_public_source.py +1 -3
  668. opik/rest_api/types/dataset_item_source.py +1 -3
  669. opik/rest_api/types/dataset_item_update.py +39 -0
  670. opik/rest_api/types/dataset_item_write.py +5 -6
  671. opik/rest_api/types/dataset_item_write_source.py +1 -3
  672. opik/rest_api/types/dataset_page_public.py +9 -6
  673. opik/rest_api/types/dataset_public.py +14 -6
  674. opik/rest_api/types/dataset_public_status.py +5 -0
  675. opik/rest_api/types/dataset_public_visibility.py +5 -0
  676. opik/rest_api/types/dataset_status.py +5 -0
  677. opik/rest_api/types/dataset_version_diff.py +22 -0
  678. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  679. opik/rest_api/types/dataset_version_page_public.py +23 -0
  680. opik/rest_api/types/dataset_version_public.py +59 -0
  681. opik/rest_api/types/dataset_version_summary.py +46 -0
  682. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  683. opik/rest_api/types/dataset_visibility.py +5 -0
  684. opik/rest_api/types/delete_attachments_request.py +23 -0
  685. opik/rest_api/types/delete_attachments_request_entity_type.py +5 -0
  686. opik/rest_api/types/delete_feedback_score.py +4 -5
  687. opik/rest_api/types/delete_ids_holder.py +19 -0
  688. opik/rest_api/types/delta.py +7 -9
  689. opik/rest_api/types/error_count_with_deviation.py +21 -0
  690. opik/rest_api/types/error_count_with_deviation_detailed.py +21 -0
  691. opik/rest_api/types/error_info.py +3 -5
  692. opik/rest_api/types/error_info_experiment_item_bulk_write_view.py +21 -0
  693. opik/rest_api/types/error_info_public.py +3 -5
  694. opik/rest_api/types/error_info_write.py +3 -5
  695. opik/rest_api/types/error_message.py +3 -5
  696. opik/rest_api/types/error_message_detail.py +3 -5
  697. opik/rest_api/types/error_message_detailed.py +3 -5
  698. opik/rest_api/types/error_message_public.py +3 -5
  699. opik/rest_api/types/experiment.py +21 -10
  700. opik/rest_api/types/experiment_group_aggregations_response.py +20 -0
  701. opik/rest_api/types/experiment_group_response.py +22 -0
  702. opik/rest_api/types/experiment_item.py +14 -11
  703. opik/rest_api/types/experiment_item_bulk_record.py +27 -0
  704. opik/rest_api/types/experiment_item_bulk_record_experiment_item_bulk_write_view.py +27 -0
  705. opik/rest_api/types/experiment_item_bulk_upload.py +27 -0
  706. opik/rest_api/types/experiment_item_compare.py +14 -11
  707. opik/rest_api/types/experiment_item_compare_trace_visibility_mode.py +5 -0
  708. opik/rest_api/types/experiment_item_public.py +6 -6
  709. opik/rest_api/types/experiment_item_public_trace_visibility_mode.py +5 -0
  710. opik/rest_api/types/experiment_item_trace_visibility_mode.py +5 -0
  711. opik/rest_api/types/experiment_page_public.py +9 -6
  712. opik/rest_api/types/experiment_public.py +21 -10
  713. opik/rest_api/types/experiment_public_status.py +5 -0
  714. opik/rest_api/types/experiment_public_type.py +5 -0
  715. opik/rest_api/types/experiment_score.py +20 -0
  716. opik/rest_api/types/experiment_score_public.py +20 -0
  717. opik/rest_api/types/experiment_score_write.py +20 -0
  718. opik/rest_api/types/experiment_status.py +5 -0
  719. opik/rest_api/types/experiment_type.py +5 -0
  720. opik/rest_api/types/export_trace_service_request.py +5 -0
  721. opik/rest_api/types/feedback.py +40 -27
  722. opik/rest_api/types/feedback_create.py +27 -13
  723. opik/rest_api/types/feedback_definition_page_public.py +4 -6
  724. opik/rest_api/types/feedback_object_public.py +40 -27
  725. opik/rest_api/types/feedback_public.py +40 -27
  726. opik/rest_api/types/feedback_score.py +7 -7
  727. opik/rest_api/types/feedback_score_average.py +3 -5
  728. opik/rest_api/types/feedback_score_average_detailed.py +3 -5
  729. opik/rest_api/types/feedback_score_average_public.py +3 -5
  730. opik/rest_api/types/feedback_score_batch.py +4 -6
  731. opik/rest_api/types/feedback_score_batch_item.py +6 -6
  732. opik/rest_api/types/feedback_score_batch_item_source.py +1 -3
  733. opik/rest_api/types/feedback_score_batch_item_thread.py +32 -0
  734. opik/rest_api/types/feedback_score_batch_item_thread_source.py +5 -0
  735. opik/rest_api/types/feedback_score_compare.py +7 -7
  736. opik/rest_api/types/feedback_score_compare_source.py +1 -3
  737. opik/rest_api/types/feedback_score_experiment_item_bulk_write_view.py +31 -0
  738. opik/rest_api/types/feedback_score_experiment_item_bulk_write_view_source.py +5 -0
  739. opik/rest_api/types/feedback_score_names.py +4 -6
  740. opik/rest_api/types/feedback_score_public.py +11 -7
  741. opik/rest_api/types/feedback_score_public_source.py +1 -3
  742. opik/rest_api/types/feedback_score_source.py +1 -3
  743. opik/rest_api/types/feedback_update.py +27 -13
  744. opik/rest_api/types/function.py +4 -7
  745. opik/rest_api/types/function_call.py +3 -5
  746. opik/rest_api/types/group_content.py +19 -0
  747. opik/rest_api/types/group_content_with_aggregations.py +21 -0
  748. opik/rest_api/types/group_detail.py +19 -0
  749. opik/rest_api/types/group_details.py +20 -0
  750. opik/rest_api/types/guardrail.py +34 -0
  751. opik/rest_api/types/guardrail_batch.py +20 -0
  752. opik/rest_api/types/guardrail_name.py +5 -0
  753. opik/rest_api/types/guardrail_result.py +5 -0
  754. opik/rest_api/types/guardrail_write.py +33 -0
  755. opik/rest_api/types/guardrail_write_name.py +5 -0
  756. opik/rest_api/types/guardrail_write_result.py +5 -0
  757. opik/rest_api/types/guardrails_validation.py +21 -0
  758. opik/rest_api/types/guardrails_validation_public.py +21 -0
  759. opik/rest_api/types/ids_holder.py +19 -0
  760. opik/rest_api/types/image_url.py +20 -0
  761. opik/rest_api/types/image_url_public.py +20 -0
  762. opik/rest_api/types/image_url_write.py +20 -0
  763. opik/rest_api/types/json_list_string.py +7 -0
  764. opik/rest_api/types/json_list_string_compare.py +7 -0
  765. opik/rest_api/types/json_list_string_experiment_item_bulk_write_view.py +7 -0
  766. opik/rest_api/types/json_list_string_public.py +7 -0
  767. opik/rest_api/types/json_list_string_write.py +7 -0
  768. opik/rest_api/types/json_schema.py +5 -8
  769. opik/rest_api/types/llm_as_judge_code.py +8 -12
  770. opik/rest_api/types/llm_as_judge_code_public.py +8 -12
  771. opik/rest_api/types/llm_as_judge_code_write.py +8 -12
  772. opik/rest_api/types/llm_as_judge_message.py +9 -7
  773. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  774. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  775. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  776. opik/rest_api/types/llm_as_judge_message_public.py +9 -7
  777. opik/rest_api/types/llm_as_judge_message_public_role.py +1 -1
  778. opik/rest_api/types/llm_as_judge_message_role.py +1 -1
  779. opik/rest_api/types/llm_as_judge_message_write.py +9 -7
  780. opik/rest_api/types/llm_as_judge_message_write_role.py +1 -1
  781. opik/rest_api/types/llm_as_judge_model_parameters.py +6 -5
  782. opik/rest_api/types/llm_as_judge_model_parameters_public.py +6 -5
  783. opik/rest_api/types/llm_as_judge_model_parameters_write.py +6 -5
  784. opik/rest_api/types/llm_as_judge_output_schema.py +4 -6
  785. opik/rest_api/types/llm_as_judge_output_schema_public.py +4 -6
  786. opik/rest_api/types/llm_as_judge_output_schema_public_type.py +1 -3
  787. opik/rest_api/types/llm_as_judge_output_schema_type.py +1 -3
  788. opik/rest_api/types/llm_as_judge_output_schema_write.py +4 -6
  789. opik/rest_api/types/llm_as_judge_output_schema_write_type.py +1 -3
  790. opik/rest_api/types/log_item.py +5 -7
  791. opik/rest_api/types/log_item_level.py +1 -3
  792. opik/rest_api/types/log_page.py +4 -6
  793. opik/rest_api/types/manual_evaluation_request.py +38 -0
  794. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  795. opik/rest_api/types/manual_evaluation_response.py +27 -0
  796. opik/rest_api/types/multipart_upload_part.py +20 -0
  797. opik/rest_api/types/numerical_feedback_definition.py +5 -7
  798. opik/rest_api/types/numerical_feedback_definition_create.py +4 -6
  799. opik/rest_api/types/numerical_feedback_definition_public.py +5 -7
  800. opik/rest_api/types/numerical_feedback_definition_update.py +4 -6
  801. opik/rest_api/types/numerical_feedback_detail.py +3 -5
  802. opik/rest_api/types/numerical_feedback_detail_create.py +3 -5
  803. opik/rest_api/types/numerical_feedback_detail_public.py +3 -5
  804. opik/rest_api/types/numerical_feedback_detail_update.py +3 -5
  805. opik/rest_api/types/optimization.py +37 -0
  806. opik/rest_api/types/optimization_page_public.py +28 -0
  807. opik/rest_api/types/optimization_public.py +37 -0
  808. opik/rest_api/types/optimization_public_status.py +7 -0
  809. opik/rest_api/types/optimization_status.py +7 -0
  810. opik/rest_api/types/optimization_studio_config.py +27 -0
  811. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  812. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  813. opik/rest_api/types/optimization_studio_log.py +22 -0
  814. opik/rest_api/types/optimization_write.py +30 -0
  815. opik/rest_api/types/optimization_write_status.py +7 -0
  816. opik/rest_api/types/page_columns.py +4 -6
  817. opik/rest_api/types/percentage_value_stat_public.py +4 -6
  818. opik/rest_api/types/percentage_values.py +8 -16
  819. opik/rest_api/types/percentage_values_detailed.py +8 -16
  820. opik/rest_api/types/percentage_values_public.py +8 -16
  821. opik/rest_api/types/project.py +12 -7
  822. opik/rest_api/types/project_detailed.py +12 -7
  823. opik/rest_api/types/project_detailed_visibility.py +5 -0
  824. opik/rest_api/types/project_metric_response_public.py +5 -9
  825. opik/rest_api/types/project_metric_response_public_interval.py +1 -3
  826. opik/rest_api/types/project_metric_response_public_metric_type.py +11 -1
  827. opik/rest_api/types/project_page_public.py +8 -10
  828. opik/rest_api/types/project_public.py +6 -6
  829. opik/rest_api/types/project_public_visibility.py +5 -0
  830. opik/rest_api/types/project_reference.py +31 -0
  831. opik/rest_api/types/project_reference_public.py +31 -0
  832. opik/rest_api/types/project_stat_item_object_public.py +8 -17
  833. opik/rest_api/types/project_stats_public.py +4 -6
  834. opik/rest_api/types/project_stats_summary.py +4 -6
  835. opik/rest_api/types/project_stats_summary_item.py +9 -6
  836. opik/rest_api/types/project_visibility.py +5 -0
  837. opik/rest_api/types/prompt.py +12 -7
  838. opik/rest_api/types/prompt_detail.py +12 -7
  839. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  840. opik/rest_api/types/prompt_page_public.py +9 -6
  841. opik/rest_api/types/prompt_public.py +11 -6
  842. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  843. opik/rest_api/types/prompt_template_structure.py +5 -0
  844. opik/rest_api/types/prompt_tokens_details.py +19 -0
  845. opik/rest_api/types/prompt_version.py +7 -6
  846. opik/rest_api/types/prompt_version_detail.py +7 -6
  847. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  848. opik/rest_api/types/prompt_version_link.py +4 -5
  849. opik/rest_api/types/prompt_version_link_public.py +4 -5
  850. opik/rest_api/types/prompt_version_link_write.py +3 -5
  851. opik/rest_api/types/prompt_version_page_public.py +9 -6
  852. opik/rest_api/types/prompt_version_public.py +7 -6
  853. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  854. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  855. opik/rest_api/types/prompt_version_update.py +33 -0
  856. opik/rest_api/types/provider_api_key.py +18 -8
  857. opik/rest_api/types/provider_api_key_page_public.py +27 -0
  858. opik/rest_api/types/provider_api_key_provider.py +1 -1
  859. opik/rest_api/types/provider_api_key_public.py +18 -8
  860. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  861. opik/rest_api/types/response_format.py +5 -7
  862. opik/rest_api/types/response_format_type.py +1 -3
  863. opik/rest_api/types/result.py +21 -0
  864. opik/rest_api/types/results_number_public.py +4 -6
  865. opik/rest_api/types/score_name.py +4 -5
  866. opik/rest_api/types/service_toggles_config.py +44 -0
  867. opik/rest_api/types/span.py +13 -15
  868. opik/rest_api/types/span_batch.py +4 -6
  869. opik/rest_api/types/span_enrichment_options.py +31 -0
  870. opik/rest_api/types/span_experiment_item_bulk_write_view.py +39 -0
  871. opik/rest_api/types/span_experiment_item_bulk_write_view_type.py +5 -0
  872. opik/rest_api/types/span_filter.py +23 -0
  873. opik/rest_api/types/span_filter_operator.py +21 -0
  874. opik/rest_api/types/span_filter_public.py +4 -6
  875. opik/rest_api/types/span_filter_public_operator.py +2 -0
  876. opik/rest_api/types/span_filter_write.py +23 -0
  877. opik/rest_api/types/span_filter_write_operator.py +21 -0
  878. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  879. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  880. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  881. opik/rest_api/types/span_page_public.py +9 -6
  882. opik/rest_api/types/span_public.py +19 -16
  883. opik/rest_api/types/span_public_type.py +1 -1
  884. opik/rest_api/types/span_type.py +1 -1
  885. opik/rest_api/types/span_update.py +46 -0
  886. opik/rest_api/types/span_update_type.py +5 -0
  887. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  888. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  889. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  890. opik/rest_api/types/span_write.py +13 -14
  891. opik/rest_api/types/span_write_type.py +1 -1
  892. opik/rest_api/types/spans_count_response.py +20 -0
  893. opik/rest_api/types/start_multipart_upload_response.py +20 -0
  894. opik/rest_api/types/stream_options.py +3 -5
  895. opik/rest_api/types/studio_evaluation.py +20 -0
  896. opik/rest_api/types/studio_evaluation_public.py +20 -0
  897. opik/rest_api/types/studio_evaluation_write.py +20 -0
  898. opik/rest_api/types/studio_llm_model.py +21 -0
  899. opik/rest_api/types/studio_llm_model_public.py +21 -0
  900. opik/rest_api/types/studio_llm_model_write.py +21 -0
  901. opik/rest_api/types/studio_message.py +20 -0
  902. opik/rest_api/types/studio_message_public.py +20 -0
  903. opik/rest_api/types/studio_message_write.py +20 -0
  904. opik/rest_api/types/studio_metric.py +21 -0
  905. opik/rest_api/types/studio_metric_public.py +21 -0
  906. opik/rest_api/types/studio_metric_write.py +21 -0
  907. opik/rest_api/types/studio_optimizer.py +21 -0
  908. opik/rest_api/types/studio_optimizer_public.py +21 -0
  909. opik/rest_api/types/studio_optimizer_write.py +21 -0
  910. opik/rest_api/types/studio_prompt.py +20 -0
  911. opik/rest_api/types/studio_prompt_public.py +20 -0
  912. opik/rest_api/types/studio_prompt_write.py +20 -0
  913. opik/rest_api/types/tool.py +4 -6
  914. opik/rest_api/types/tool_call.py +4 -6
  915. opik/rest_api/types/trace.py +26 -12
  916. opik/rest_api/types/trace_batch.py +4 -6
  917. opik/rest_api/types/trace_count_response.py +4 -6
  918. opik/rest_api/types/trace_enrichment_options.py +32 -0
  919. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +41 -0
  920. opik/rest_api/types/trace_filter.py +23 -0
  921. opik/rest_api/types/trace_filter_operator.py +21 -0
  922. opik/rest_api/types/trace_filter_public.py +23 -0
  923. opik/rest_api/types/trace_filter_public_operator.py +21 -0
  924. opik/rest_api/types/trace_filter_write.py +23 -0
  925. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  926. opik/rest_api/types/trace_page_public.py +8 -10
  927. opik/rest_api/types/trace_public.py +27 -13
  928. opik/rest_api/types/trace_public_visibility_mode.py +5 -0
  929. opik/rest_api/types/trace_thread.py +18 -9
  930. opik/rest_api/types/trace_thread_filter.py +23 -0
  931. opik/rest_api/types/trace_thread_filter_operator.py +21 -0
  932. opik/rest_api/types/trace_thread_filter_public.py +23 -0
  933. opik/rest_api/types/trace_thread_filter_public_operator.py +21 -0
  934. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  935. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  936. opik/rest_api/types/trace_thread_identifier.py +22 -0
  937. opik/rest_api/types/trace_thread_llm_as_judge_code.py +26 -0
  938. opik/rest_api/types/trace_thread_llm_as_judge_code_public.py +26 -0
  939. opik/rest_api/types/trace_thread_llm_as_judge_code_write.py +26 -0
  940. opik/rest_api/types/trace_thread_page.py +9 -6
  941. opik/rest_api/types/trace_thread_status.py +5 -0
  942. opik/rest_api/types/trace_thread_update.py +19 -0
  943. opik/rest_api/types/trace_thread_user_defined_metric_python_code.py +19 -0
  944. opik/rest_api/types/trace_thread_user_defined_metric_python_code_public.py +19 -0
  945. opik/rest_api/types/trace_thread_user_defined_metric_python_code_write.py +19 -0
  946. opik/rest_api/types/trace_update.py +39 -0
  947. opik/rest_api/types/trace_visibility_mode.py +5 -0
  948. opik/rest_api/types/trace_write.py +10 -11
  949. opik/rest_api/types/usage.py +6 -6
  950. opik/rest_api/types/user_defined_metric_python_code.py +3 -5
  951. opik/rest_api/types/user_defined_metric_python_code_public.py +3 -5
  952. opik/rest_api/types/user_defined_metric_python_code_write.py +3 -5
  953. opik/rest_api/types/value_entry.py +27 -0
  954. opik/rest_api/types/value_entry_compare.py +27 -0
  955. opik/rest_api/types/value_entry_compare_source.py +5 -0
  956. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +27 -0
  957. opik/rest_api/types/value_entry_experiment_item_bulk_write_view_source.py +5 -0
  958. opik/rest_api/types/value_entry_public.py +27 -0
  959. opik/rest_api/types/value_entry_public_source.py +5 -0
  960. opik/rest_api/types/value_entry_source.py +5 -0
  961. opik/rest_api/types/video_url.py +19 -0
  962. opik/rest_api/types/video_url_public.py +19 -0
  963. opik/rest_api/types/video_url_write.py +19 -0
  964. opik/rest_api/types/webhook.py +28 -0
  965. opik/rest_api/types/webhook_examples.py +19 -0
  966. opik/rest_api/types/webhook_public.py +28 -0
  967. opik/rest_api/types/webhook_test_result.py +23 -0
  968. opik/rest_api/types/webhook_test_result_status.py +5 -0
  969. opik/rest_api/types/webhook_write.py +23 -0
  970. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  971. opik/rest_api/types/workspace_configuration.py +27 -0
  972. opik/rest_api/types/workspace_metric_request.py +24 -0
  973. opik/rest_api/types/workspace_metric_response.py +20 -0
  974. opik/rest_api/types/workspace_metrics_summary_request.py +23 -0
  975. opik/rest_api/types/workspace_metrics_summary_response.py +20 -0
  976. opik/rest_api/types/workspace_name_holder.py +19 -0
  977. opik/rest_api/types/workspace_spans_count.py +20 -0
  978. opik/rest_api/types/workspace_trace_count.py +3 -5
  979. opik/rest_api/welcome_wizard/__init__.py +4 -0
  980. opik/rest_api/welcome_wizard/client.py +195 -0
  981. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  982. opik/rest_api/workspaces/__init__.py +2 -0
  983. opik/rest_api/workspaces/client.py +550 -77
  984. opik/rest_api/workspaces/raw_client.py +923 -0
  985. opik/rest_client_configurator/api.py +1 -0
  986. opik/rest_client_configurator/retry_decorator.py +1 -0
  987. opik/s3_httpx_client.py +67 -0
  988. opik/simulation/__init__.py +6 -0
  989. opik/simulation/simulated_user.py +99 -0
  990. opik/simulation/simulator.py +108 -0
  991. opik/synchronization.py +11 -24
  992. opik/tracing_runtime_config.py +48 -0
  993. opik/types.py +48 -2
  994. opik/url_helpers.py +13 -3
  995. opik/validation/chat_prompt_messages.py +241 -0
  996. opik/validation/feedback_score.py +4 -5
  997. opik/validation/parameter.py +122 -0
  998. opik/validation/parameters_validator.py +175 -0
  999. opik/validation/validator.py +30 -2
  1000. opik/validation/validator_helpers.py +147 -0
  1001. opik-1.9.71.dist-info/METADATA +370 -0
  1002. opik-1.9.71.dist-info/RECORD +1110 -0
  1003. {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/WHEEL +1 -1
  1004. opik-1.9.71.dist-info/licenses/LICENSE +203 -0
  1005. opik/api_objects/prompt/prompt.py +0 -107
  1006. opik/api_objects/prompt/prompt_template.py +0 -35
  1007. opik/cli.py +0 -193
  1008. opik/evaluation/metrics/models.py +0 -8
  1009. opik/hooks.py +0 -13
  1010. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  1011. opik/integrations/bedrock/helpers.py +0 -8
  1012. opik/integrations/langchain/google_run_helpers.py +0 -75
  1013. opik/integrations/langchain/openai_run_helpers.py +0 -122
  1014. opik/message_processing/message_processors.py +0 -203
  1015. opik/rest_api/types/delta_role.py +0 -7
  1016. opik/rest_api/types/json_object_schema.py +0 -34
  1017. opik-1.6.4.dist-info/METADATA +0 -270
  1018. opik-1.6.4.dist-info/RECORD +0 -507
  1019. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  1020. {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  1021. {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,84 @@
1
+ from typing import Any, Generator, List
2
+
3
+ from . import types
4
+ from .conversation_turns_factory import build_conversation_turns
5
+
6
+
7
+ def get_turns_in_sliding_window(
8
+ turns: List[types.ConversationTurn], window_size: int
9
+ ) -> Generator[List[types.ConversationTurn], Any, None]:
10
+ """
11
+ Generates windows of conversation turns of a fixed size from a list of turns.
12
+ This function creates a sliding window over the list of conversation turns.
13
+ Each window includes the current turn and up to `window_size - 1` previous
14
+ conversation turns. If there are fewer turns available than the `window_size`,
15
+ the window will consist of all available turns up to the current turn.
16
+ Args:
17
+ turns: List of conversation turn objects representing the interactions
18
+ in a conversation.
19
+ window_size: Integer specifying the maximum number of turns to include
20
+ in each window.
21
+ Yields:
22
+ A generator that produces lists of conversation turns, where each list
23
+ represents a sliding window of turns.
24
+ """
25
+
26
+ for i in range(len(turns)):
27
+ yield turns[max(0, i - window_size + 1) : i + 1]
28
+
29
+
30
+ def merge_turns(turns: List[types.ConversationTurn]) -> types.Conversation:
31
+ """
32
+ Merges a list of conversation turns into a single conversation.
33
+ This function takes a list of conversation turns and combines them
34
+ into a single conversation by extending the output list with the data
35
+ from each turn.
36
+ Args:
37
+ turns: A list of conversation turn objects to be combined.
38
+ Returns:
39
+ types.Conversation: A combined conversation object containing all
40
+ the turns from the input list.
41
+ """
42
+
43
+ output: types.Conversation = []
44
+ for turn in turns:
45
+ output.extend(turn.as_list())
46
+ return output
47
+
48
+
49
+ def extract_turns_windows_from_conversation(
50
+ conversation: types.Conversation, window_size: int
51
+ ) -> List[types.Conversation]:
52
+ """
53
+ Extracts a list of conversation windows based on turns using a sliding window
54
+ approach. This function divides a conversation into consecutive overlapping
55
+ windows, where each window contains a specified number of turns.
56
+ Args:
57
+ conversation: The input conversation from which turns will be processed.
58
+ window_size: The number of turns to include in each sliding window.
59
+ Returns:
60
+ A list of conversations, each representing a window of turns specified
61
+ by the given window size.
62
+ Raises:
63
+ ValueError: If the conversation is empty or if it has no turns.
64
+ """
65
+
66
+ if len(conversation) == 0:
67
+ raise ValueError("Conversation is empty")
68
+
69
+ turns = build_conversation_turns(conversation=conversation)
70
+ if len(turns) == 0:
71
+ raise ValueError("Conversation has no turns")
72
+
73
+ turns_windows: List[types.Conversation] = [
74
+ merge_turns(turns_window)
75
+ for turns_window in get_turns_in_sliding_window(turns, window_size)
76
+ ]
77
+ return turns_windows
78
+
79
+
80
+ __all__ = [
81
+ "get_turns_in_sliding_window",
82
+ "merge_turns",
83
+ "extract_turns_windows_from_conversation",
84
+ ]
@@ -0,0 +1,14 @@
1
+ """Heuristic conversation-level metrics.
2
+
3
+ Exposes the reusable conversation-level heuristics under the public namespace
4
+ ``opik.evaluation.metrics.conversation.heuristics.*`` so documentation and downstream
5
+ code can import them directly.
6
+ """
7
+
8
+ from .degeneration.metric import ConversationDegenerationMetric
9
+ from .knowledge_retention.metric import KnowledgeRetentionMetric
10
+
11
+ __all__ = [
12
+ "ConversationDegenerationMetric",
13
+ "KnowledgeRetentionMetric",
14
+ ]
@@ -0,0 +1,3 @@
1
+ from .metric import ConversationDegenerationMetric
2
+
3
+ __all__ = ["ConversationDegenerationMetric"]
@@ -0,0 +1,189 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import re
5
+ from collections import Counter
6
+ from typing import Dict, List, Optional
7
+
8
+ from opik.evaluation.metrics.conversation import types as conversation_types
9
+ from opik.evaluation.metrics.conversation.conversation_thread_metric import (
10
+ ConversationThreadMetric,
11
+ )
12
+ from opik.evaluation.metrics.score_result import ScoreResult
13
+ from opik.exceptions import MetricComputationError
14
+ from .phrases import DEFAULT_FALLBACK_PHRASES
15
+
16
+
17
+ def _tokenize(text: str) -> List[str]:
18
+ return re.findall(r"\b\w+\b", text.lower())
19
+
20
+
21
+ def _ngram_counts(tokens: List[str], n: int) -> Counter:
22
+ if len(tokens) < n:
23
+ return Counter()
24
+ return Counter(tuple(tokens[i : i + n]) for i in range(len(tokens) - n + 1))
25
+
26
+
27
+ class ConversationDegenerationMetric(ConversationThreadMetric):
28
+ """
29
+ Score how strongly an assistant conversation shows degeneration or repetition.
30
+
31
+ The metric inspects each assistant turn, measuring repeated n-grams, overlap with
32
+ the previous reply, low lexical diversity, and presence of known fallback
33
+ phrases (for example, "as an AI language model..."). Each turn receives a
34
+ degeneration score between `0.0` and `1.0`; the overall metric reports the peak
35
+ risk observed so you can quickly flag sections where the assistant got stuck or
36
+ stopped being helpful. Detailed per-turn diagnostics are returned in the
37
+ ``ScoreResult.metadata`` payload.
38
+
39
+ Args:
40
+ name: Display name for the metric result. Defaults to
41
+ ``"conversation_degeneration_metric"``.
42
+ track: Whether the metric should automatically track to an Opik project.
43
+ Defaults to ``True``.
44
+ project_name: Optional project to store tracked results in. Defaults to
45
+ ``None`` (inherit global setting).
46
+ ngram_size: Size of the n-grams used to detect repetition within a single
47
+ response. Must be at least ``2``. Defaults to ``3``.
48
+ fallback_phrases: Custom list of phrases that should be treated as
49
+ degeneration signatures. If ``None``, a sensible default list is used.
50
+
51
+ Example:
52
+ >>> from opik.evaluation.metrics import ConversationDegenerationMetric
53
+ >>> conversation = [
54
+ ... {"role": "user", "content": "Can you draft a short bio for Ada?"},
55
+ ... {"role": "assistant", "content": "Sure, here is a short bio for Ada."},
56
+ ... {"role": "user", "content": "Could you add more detail?"},
57
+ ... {"role": "assistant", "content": "Sure, here is a short bio for Ada."},
58
+ ... ]
59
+ >>> metric = ConversationDegenerationMetric(ngram_size=3)
60
+ >>> result = metric.score(conversation)
61
+ >>> float(result.value) # doctest: +SKIP
62
+ 0.75
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ name: str = "conversation_degeneration_metric",
68
+ track: bool = True,
69
+ project_name: Optional[str] = None,
70
+ ngram_size: int = 3,
71
+ fallback_phrases: Optional[List[str]] = None,
72
+ ) -> None:
73
+ super().__init__(name=name, track=track, project_name=project_name)
74
+ if ngram_size < 2:
75
+ raise MetricComputationError("ngram_size must be >= 2")
76
+ self._ngram_size = ngram_size
77
+ phrases = (
78
+ fallback_phrases
79
+ if fallback_phrases is not None
80
+ else DEFAULT_FALLBACK_PHRASES
81
+ )
82
+ self._fallback_phrases = [phrase.lower() for phrase in phrases]
83
+
84
+ def score(
85
+ self,
86
+ conversation: conversation_types.Conversation,
87
+ **ignored_kwargs: object,
88
+ ) -> ScoreResult:
89
+ assistant_turns = [
90
+ turn["content"]
91
+ for turn in conversation
92
+ if turn.get("role") == "assistant" and turn.get("content")
93
+ ]
94
+ if not assistant_turns:
95
+ raise MetricComputationError("Conversation contains no assistant messages")
96
+
97
+ per_turn_metadata: List[Dict[str, float]] = []
98
+ degeneracy_scores: List[float] = []
99
+
100
+ prev_tokens: Optional[List[str]] = None
101
+ for content in assistant_turns:
102
+ tokens = _tokenize(content)
103
+ if not tokens:
104
+ continue
105
+
106
+ entropy_norm = self._token_entropy(tokens)
107
+ repetition_ratio = self._repetition_ratio(tokens)
108
+ prev_overlap = self._overlap_with_previous(tokens, prev_tokens)
109
+ fallback_score = 1.0 if self._contains_fallback_phrase(content) else 0.0
110
+
111
+ normalized_entropy = 1.0 - entropy_norm
112
+ # Combine all four risk factors with equal weight; this keeps the
113
+ # heuristic interpretable and matches the legacy scoring behaviour.
114
+ deg_score = min(
115
+ 1.0,
116
+ (repetition_ratio + prev_overlap + fallback_score + normalized_entropy)
117
+ / 4.0,
118
+ )
119
+
120
+ per_turn_metadata.append(
121
+ {
122
+ "repetition_ratio": repetition_ratio,
123
+ "overlap_previous": prev_overlap,
124
+ "fallback_hit": fallback_score,
125
+ "normalized_entropy": normalized_entropy,
126
+ "degeneration_score": deg_score,
127
+ }
128
+ )
129
+ degeneracy_scores.append(deg_score)
130
+ prev_tokens = tokens
131
+
132
+ if not degeneracy_scores:
133
+ raise MetricComputationError(
134
+ "Assistant messages were empty after tokenization"
135
+ )
136
+
137
+ average_score = sum(degeneracy_scores) / len(degeneracy_scores)
138
+ peak_score = max(degeneracy_scores)
139
+
140
+ return ScoreResult(
141
+ value=peak_score,
142
+ name=self.name,
143
+ reason=(
144
+ f"Peak degeneration risk ({len(degeneracy_scores)} turns):"
145
+ f" {peak_score:.3f}"
146
+ ),
147
+ metadata={
148
+ "per_turn": per_turn_metadata,
149
+ "average_score": average_score,
150
+ "peak_score": peak_score,
151
+ },
152
+ )
153
+
154
+ def _token_entropy(self, tokens: List[str]) -> float:
155
+ counts = Counter(tokens)
156
+ total = float(len(tokens))
157
+ entropy = 0.0
158
+ for count in counts.values():
159
+ prob = count / total
160
+ entropy -= prob * math.log(prob, 2)
161
+ max_entropy = math.log(len(counts), 2) if counts else 1.0
162
+ if max_entropy == 0:
163
+ return 0.0
164
+ return min(1.0, entropy / max_entropy)
165
+
166
+ def _repetition_ratio(self, tokens: List[str]) -> float:
167
+ ngram_counts = _ngram_counts(tokens, self._ngram_size)
168
+ total = sum(ngram_counts.values())
169
+ if total == 0:
170
+ return 0.0
171
+ repeated = sum(count for count in ngram_counts.values() if count > 1)
172
+ return repeated / total
173
+
174
+ def _overlap_with_previous(
175
+ self, tokens: List[str], prev_tokens: Optional[List[str]]
176
+ ) -> float:
177
+ if not prev_tokens:
178
+ return 0.0
179
+ current_set = set(tokens)
180
+ prev_set = set(prev_tokens)
181
+ if not current_set or not prev_set:
182
+ return 0.0
183
+ intersection = len(current_set & prev_set)
184
+ union = len(current_set | prev_set)
185
+ return intersection / union
186
+
187
+ def _contains_fallback_phrase(self, content: str) -> bool:
188
+ lowered = content.lower()
189
+ return any(phrase in lowered for phrase in self._fallback_phrases)
@@ -0,0 +1,12 @@
1
+ """Phrase lists used by the conversation degeneration metric."""
2
+
3
+ DEFAULT_FALLBACK_PHRASES = [
4
+ "i'm sorry",
5
+ "as an ai language model",
6
+ "i cannot",
7
+ "i'm unable",
8
+ "please provide",
9
+ "i don't have access",
10
+ "i don't understand",
11
+ "could you please clarify",
12
+ ]
@@ -0,0 +1,3 @@
1
+ from .metric import KnowledgeRetentionMetric
2
+
3
+ __all__ = ["KnowledgeRetentionMetric"]
@@ -0,0 +1,172 @@
1
+ """Heuristic metric for knowledge retention across conversation turns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ from typing import Callable, List, Optional, Sequence, Set
7
+
8
+ from opik.evaluation.metrics.conversation import types as conversation_types
9
+ from opik.evaluation.metrics.conversation.conversation_thread_metric import (
10
+ ConversationThreadMetric,
11
+ )
12
+ from opik.evaluation.metrics.score_result import ScoreResult
13
+ from opik.evaluation.preprocessing import normalize_text
14
+
15
+ _MIN_TOKEN_LENGTH = 4
16
+ _STOPWORDS = {
17
+ "the",
18
+ "this",
19
+ "that",
20
+ "with",
21
+ "have",
22
+ "from",
23
+ "about",
24
+ "there",
25
+ "which",
26
+ "would",
27
+ "could",
28
+ "should",
29
+ "name",
30
+ "number",
31
+ }
32
+
33
+ _REQUEST_KEYWORDS = {
34
+ "need",
35
+ "please",
36
+ "help",
37
+ "can",
38
+ "could",
39
+ "would",
40
+ "should",
41
+ "shall",
42
+ "may",
43
+ "might",
44
+ "want",
45
+ "tell",
46
+ "explain",
47
+ "show",
48
+ "give",
49
+ "provide",
50
+ "let",
51
+ "allow",
52
+ "request",
53
+ }
54
+
55
+
56
+ class KnowledgeRetentionMetric(ConversationThreadMetric):
57
+ """
58
+ Measure how many user-provided facts resurface in the closing assistant reply.
59
+
60
+ The metric extracts salient, non-stopword terms from earlier user turns,
61
+ excluding explicit requests (for example, questions or pleas for help). When the
62
+ final assistant response omits those key terms, the score drops, signalling the
63
+ assistant may have forgotten essential context. Scores range from `0.0`
64
+ (nothing retained) to `1.0` (all extracted facts referenced).
65
+
66
+ Args:
67
+ name: Display name for the metric result. Defaults to
68
+ ``"knowledge_retention_metric"``.
69
+ track: Whether the metric should automatically track results. Defaults to
70
+ ``True``.
71
+ project_name: Optional Opik project name for tracking. Defaults to ``None``.
72
+ turns_to_consider: How many of the earliest user turns should be mined for
73
+ facts. Defaults to ``5``.
74
+
75
+ Example:
76
+ >>> from opik.evaluation.metrics import KnowledgeRetentionMetric
77
+ >>> conversation = [
78
+ ... {"role": "user", "content": "My new router is a Netgear Nighthawk."},
79
+ ... {"role": "assistant", "content": "Great choice!"},
80
+ ... {"role": "user", "content": "Please remind me about the Netgear router setup."},
81
+ ... {
82
+ ... "role": "assistant",
83
+ ... "content": "Be sure to update the Netgear Nighthawk firmware first.",
84
+ ... },
85
+ ... ]
86
+ >>> metric = KnowledgeRetentionMetric(turns_to_consider=3)
87
+ >>> result = metric.score(conversation)
88
+ >>> float(result.value) # doctest: +SKIP
89
+ 1.0
90
+ """
91
+
92
+ def __init__(
93
+ self,
94
+ name: str = "knowledge_retention_metric",
95
+ track: bool = True,
96
+ project_name: Optional[str] = None,
97
+ turns_to_consider: int = 5,
98
+ ) -> None:
99
+ super().__init__(name=name, track=track, project_name=project_name)
100
+ self._turns_to_consider = turns_to_consider
101
+ self._normalizer: Callable[[str], str] = functools.partial(
102
+ normalize_text,
103
+ keep_emoji=False,
104
+ remove_punctuation=True,
105
+ )
106
+
107
+ def score(
108
+ self,
109
+ conversation: conversation_types.Conversation,
110
+ **ignored_kwargs: object,
111
+ ) -> ScoreResult:
112
+ user_turns: List[str] = []
113
+ assistant_turns: List[str] = []
114
+ for message in conversation:
115
+ content = message.get("content")
116
+ role = message.get("role")
117
+ if not content or not role:
118
+ continue
119
+ if role == "user":
120
+ if self._is_user_request(content):
121
+ continue
122
+ user_turns.append(content)
123
+ elif role == "assistant":
124
+ assistant_turns.append(content)
125
+
126
+ if not assistant_turns:
127
+ return ScoreResult(
128
+ value=0.0, name=self.name, reason="No assistant turns", metadata={}
129
+ )
130
+
131
+ reference_facts = self._extract_terms(user_turns[: self._turns_to_consider])
132
+ if not reference_facts:
133
+ return ScoreResult(
134
+ value=1.0, name=self.name, reason="No facts to retain", metadata={}
135
+ )
136
+
137
+ final_response = assistant_turns[-1]
138
+ final_terms = self._extract_terms([final_response])
139
+
140
+ retained = reference_facts & final_terms
141
+ score = len(retained) / len(reference_facts)
142
+
143
+ metadata = {
144
+ "reference_terms": sorted(reference_facts),
145
+ "retained_terms": sorted(retained),
146
+ }
147
+
148
+ return ScoreResult(
149
+ value=score,
150
+ name=self.name,
151
+ reason=f"Retained {len(retained)} of {len(reference_facts)} reference terms",
152
+ metadata=metadata,
153
+ )
154
+
155
+ def _extract_terms(self, texts: Sequence[str]) -> Set[str]:
156
+ tokens: Set[str] = set()
157
+ for text in texts:
158
+ normalized = self._normalizer(text)
159
+ for token in normalized.split():
160
+ token = token.strip()
161
+ if len(token) < _MIN_TOKEN_LENGTH or token in _STOPWORDS:
162
+ continue
163
+ tokens.add(token)
164
+ return tokens
165
+
166
+ def _is_user_request(self, text: str) -> bool:
167
+ if "?" in text:
168
+ return True
169
+
170
+ normalized = self._normalizer(text)
171
+ tokens = set(normalized.split())
172
+ return any(token in _REQUEST_KEYWORDS for token in tokens)
@@ -0,0 +1,32 @@
1
+ """
2
+ LLM-judge conversation adapters.
3
+
4
+ Expose the conversation-focused judges under
5
+ `opik.evaluation.metrics.conversation.llm_judges.*`.
6
+ """
7
+
8
+ from .conversational_coherence.metric import ConversationalCoherenceMetric
9
+ from .g_eval_wrappers import (
10
+ ConversationComplianceRiskMetric,
11
+ ConversationDialogueHelpfulnessMetric,
12
+ ConversationPromptUncertaintyMetric,
13
+ ConversationQARelevanceMetric,
14
+ ConversationSummarizationCoherenceMetric,
15
+ ConversationSummarizationConsistencyMetric,
16
+ GEvalConversationMetric,
17
+ )
18
+ from .session_completeness.metric import SessionCompletenessQuality
19
+ from .user_frustration.metric import UserFrustrationMetric
20
+
21
+ __all__ = [
22
+ "ConversationalCoherenceMetric",
23
+ "ConversationComplianceRiskMetric",
24
+ "ConversationDialogueHelpfulnessMetric",
25
+ "ConversationPromptUncertaintyMetric",
26
+ "ConversationQARelevanceMetric",
27
+ "ConversationSummarizationCoherenceMetric",
28
+ "ConversationSummarizationConsistencyMetric",
29
+ "GEvalConversationMetric",
30
+ "SessionCompletenessQuality",
31
+ "UserFrustrationMetric",
32
+ ]