opik 1.6.4__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1021) hide show
  1. opik/__init__.py +33 -2
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/__init__.py +5 -0
  9. opik/api_objects/attachment/attachment.py +20 -0
  10. opik/api_objects/attachment/attachment_context.py +36 -0
  11. opik/api_objects/attachment/attachments_extractor.py +153 -0
  12. opik/api_objects/attachment/client.py +220 -0
  13. opik/api_objects/attachment/converters.py +51 -0
  14. opik/api_objects/attachment/decoder.py +18 -0
  15. opik/api_objects/attachment/decoder_base64.py +83 -0
  16. opik/api_objects/attachment/decoder_helpers.py +137 -0
  17. opik/api_objects/conversation/__init__.py +0 -0
  18. opik/api_objects/conversation/conversation_factory.py +43 -0
  19. opik/api_objects/conversation/conversation_thread.py +49 -0
  20. opik/api_objects/data_helpers.py +79 -0
  21. opik/api_objects/dataset/dataset.py +107 -45
  22. opik/api_objects/dataset/rest_operations.py +12 -3
  23. opik/api_objects/experiment/experiment.py +81 -45
  24. opik/api_objects/experiment/experiment_item.py +2 -1
  25. opik/api_objects/experiment/experiments_client.py +64 -0
  26. opik/api_objects/experiment/helpers.py +35 -11
  27. opik/api_objects/experiment/rest_operations.py +88 -19
  28. opik/api_objects/helpers.py +104 -7
  29. opik/api_objects/local_recording.py +81 -0
  30. opik/api_objects/opik_client.py +872 -174
  31. opik/api_objects/opik_query_language.py +136 -18
  32. opik/api_objects/optimization/__init__.py +3 -0
  33. opik/api_objects/optimization/optimization.py +39 -0
  34. opik/api_objects/prompt/__init__.py +13 -1
  35. opik/api_objects/prompt/base_prompt.py +69 -0
  36. opik/api_objects/prompt/base_prompt_template.py +29 -0
  37. opik/api_objects/prompt/chat/__init__.py +1 -0
  38. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  39. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  40. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  41. opik/api_objects/prompt/client.py +193 -41
  42. opik/api_objects/prompt/text/__init__.py +1 -0
  43. opik/api_objects/prompt/text/prompt.py +174 -0
  44. opik/api_objects/prompt/text/prompt_template.py +55 -0
  45. opik/api_objects/prompt/types.py +29 -0
  46. opik/api_objects/rest_stream_parser.py +98 -0
  47. opik/api_objects/search_helpers.py +89 -0
  48. opik/api_objects/span/span_client.py +165 -45
  49. opik/api_objects/span/span_data.py +136 -25
  50. opik/api_objects/threads/__init__.py +0 -0
  51. opik/api_objects/threads/threads_client.py +185 -0
  52. opik/api_objects/trace/trace_client.py +72 -36
  53. opik/api_objects/trace/trace_data.py +112 -26
  54. opik/api_objects/validation_helpers.py +3 -3
  55. opik/cli/__init__.py +5 -0
  56. opik/cli/__main__.py +6 -0
  57. opik/cli/configure.py +66 -0
  58. opik/cli/exports/__init__.py +131 -0
  59. opik/cli/exports/dataset.py +278 -0
  60. opik/cli/exports/experiment.py +784 -0
  61. opik/cli/exports/project.py +685 -0
  62. opik/cli/exports/prompt.py +578 -0
  63. opik/cli/exports/utils.py +406 -0
  64. opik/cli/harbor.py +39 -0
  65. opik/cli/healthcheck.py +21 -0
  66. opik/cli/imports/__init__.py +439 -0
  67. opik/cli/imports/dataset.py +143 -0
  68. opik/cli/imports/experiment.py +1192 -0
  69. opik/cli/imports/project.py +262 -0
  70. opik/cli/imports/prompt.py +177 -0
  71. opik/cli/imports/utils.py +280 -0
  72. opik/cli/main.py +49 -0
  73. opik/cli/proxy.py +93 -0
  74. opik/cli/usage_report/__init__.py +16 -0
  75. opik/cli/usage_report/charts.py +783 -0
  76. opik/cli/usage_report/cli.py +274 -0
  77. opik/cli/usage_report/constants.py +9 -0
  78. opik/cli/usage_report/extraction.py +749 -0
  79. opik/cli/usage_report/pdf.py +244 -0
  80. opik/cli/usage_report/statistics.py +78 -0
  81. opik/cli/usage_report/utils.py +235 -0
  82. opik/config.py +62 -4
  83. opik/configurator/configure.py +45 -6
  84. opik/configurator/opik_rest_helpers.py +4 -1
  85. opik/context_storage.py +164 -65
  86. opik/datetime_helpers.py +12 -0
  87. opik/decorator/arguments_helpers.py +9 -1
  88. opik/decorator/base_track_decorator.py +298 -146
  89. opik/decorator/context_manager/__init__.py +0 -0
  90. opik/decorator/context_manager/span_context_manager.py +123 -0
  91. opik/decorator/context_manager/trace_context_manager.py +84 -0
  92. opik/decorator/generator_wrappers.py +3 -2
  93. opik/decorator/inspect_helpers.py +11 -0
  94. opik/decorator/opik_args/__init__.py +13 -0
  95. opik/decorator/opik_args/api_classes.py +71 -0
  96. opik/decorator/opik_args/helpers.py +120 -0
  97. opik/decorator/span_creation_handler.py +49 -21
  98. opik/decorator/tracker.py +9 -1
  99. opik/dict_utils.py +3 -3
  100. opik/environment.py +13 -1
  101. opik/error_tracking/api.py +1 -1
  102. opik/error_tracking/before_send.py +6 -5
  103. opik/error_tracking/environment_details.py +29 -7
  104. opik/error_tracking/error_filtering/filter_by_response_status_code.py +42 -0
  105. opik/error_tracking/error_filtering/filter_chain_builder.py +14 -3
  106. opik/evaluation/__init__.py +14 -2
  107. opik/evaluation/engine/engine.py +280 -82
  108. opik/evaluation/engine/evaluation_tasks_executor.py +15 -10
  109. opik/evaluation/engine/helpers.py +34 -9
  110. opik/evaluation/engine/metrics_evaluator.py +237 -0
  111. opik/evaluation/engine/types.py +5 -4
  112. opik/evaluation/evaluation_result.py +169 -2
  113. opik/evaluation/evaluator.py +659 -58
  114. opik/evaluation/metrics/__init__.py +121 -6
  115. opik/evaluation/metrics/aggregated_metric.py +92 -0
  116. opik/evaluation/metrics/arguments_helpers.py +15 -21
  117. opik/evaluation/metrics/arguments_validator.py +38 -0
  118. opik/evaluation/metrics/base_metric.py +20 -10
  119. opik/evaluation/metrics/conversation/__init__.py +48 -0
  120. opik/evaluation/metrics/conversation/conversation_thread_metric.py +79 -0
  121. opik/evaluation/metrics/conversation/conversation_turns_factory.py +39 -0
  122. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  123. opik/evaluation/metrics/conversation/helpers.py +84 -0
  124. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  125. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  126. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  127. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  128. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  129. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  130. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  131. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/__init__.py +0 -0
  132. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +274 -0
  133. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/schema.py +16 -0
  134. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/templates.py +95 -0
  135. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  136. opik/evaluation/metrics/conversation/llm_judges/session_completeness/__init__.py +0 -0
  137. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +295 -0
  138. opik/evaluation/metrics/conversation/llm_judges/session_completeness/schema.py +22 -0
  139. opik/evaluation/metrics/conversation/llm_judges/session_completeness/templates.py +139 -0
  140. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  141. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +277 -0
  142. opik/evaluation/metrics/conversation/llm_judges/user_frustration/schema.py +16 -0
  143. opik/evaluation/metrics/conversation/llm_judges/user_frustration/templates.py +135 -0
  144. opik/evaluation/metrics/conversation/types.py +34 -0
  145. opik/evaluation/metrics/conversation_types.py +9 -0
  146. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  147. opik/evaluation/metrics/heuristics/bleu.py +43 -16
  148. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  149. opik/evaluation/metrics/heuristics/contains.py +50 -11
  150. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  151. opik/evaluation/metrics/heuristics/equals.py +4 -1
  152. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  153. opik/evaluation/metrics/heuristics/is_json.py +9 -3
  154. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  155. opik/evaluation/metrics/heuristics/levenshtein_ratio.py +6 -5
  156. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  157. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  158. opik/evaluation/metrics/heuristics/readability.py +129 -0
  159. opik/evaluation/metrics/heuristics/regex_match.py +4 -1
  160. opik/evaluation/metrics/heuristics/rouge.py +148 -0
  161. opik/evaluation/metrics/heuristics/sentiment.py +98 -0
  162. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  163. opik/evaluation/metrics/heuristics/tone.py +155 -0
  164. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  165. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +27 -30
  166. opik/evaluation/metrics/llm_judges/answer_relevance/parser.py +27 -0
  167. opik/evaluation/metrics/llm_judges/answer_relevance/templates.py +10 -10
  168. opik/evaluation/metrics/llm_judges/context_precision/metric.py +28 -31
  169. opik/evaluation/metrics/llm_judges/context_precision/parser.py +27 -0
  170. opik/evaluation/metrics/llm_judges/context_precision/template.py +7 -7
  171. opik/evaluation/metrics/llm_judges/context_recall/metric.py +27 -31
  172. opik/evaluation/metrics/llm_judges/context_recall/parser.py +27 -0
  173. opik/evaluation/metrics/llm_judges/context_recall/template.py +7 -7
  174. opik/evaluation/metrics/llm_judges/factuality/metric.py +7 -26
  175. opik/evaluation/metrics/llm_judges/factuality/parser.py +35 -0
  176. opik/evaluation/metrics/llm_judges/factuality/template.py +1 -1
  177. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  178. opik/evaluation/metrics/llm_judges/g_eval/metric.py +244 -113
  179. opik/evaluation/metrics/llm_judges/g_eval/parser.py +161 -0
  180. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  181. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  182. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  183. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  184. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  185. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  186. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  187. opik/evaluation/metrics/llm_judges/hallucination/metric.py +23 -27
  188. opik/evaluation/metrics/llm_judges/hallucination/parser.py +29 -0
  189. opik/evaluation/metrics/llm_judges/hallucination/template.py +2 -4
  190. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  191. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  192. opik/evaluation/metrics/llm_judges/moderation/metric.py +23 -28
  193. opik/evaluation/metrics/llm_judges/moderation/parser.py +27 -0
  194. opik/evaluation/metrics/llm_judges/moderation/template.py +2 -2
  195. opik/evaluation/metrics/llm_judges/parsing_helpers.py +26 -0
  196. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  197. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  198. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  199. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  200. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  201. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  202. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  203. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  204. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  205. opik/evaluation/metrics/llm_judges/trajectory_accuracy/__init__.py +3 -0
  206. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +171 -0
  207. opik/evaluation/metrics/llm_judges/trajectory_accuracy/parser.py +38 -0
  208. opik/evaluation/metrics/llm_judges/trajectory_accuracy/templates.py +65 -0
  209. opik/evaluation/metrics/llm_judges/usefulness/metric.py +23 -32
  210. opik/evaluation/metrics/llm_judges/usefulness/parser.py +28 -0
  211. opik/evaluation/metrics/ragas_metric.py +112 -0
  212. opik/evaluation/models/__init__.py +10 -0
  213. opik/evaluation/models/base_model.py +140 -18
  214. opik/evaluation/models/langchain/__init__.py +3 -0
  215. opik/evaluation/models/langchain/langchain_chat_model.py +166 -0
  216. opik/evaluation/models/langchain/message_converters.py +106 -0
  217. opik/evaluation/models/langchain/opik_monitoring.py +23 -0
  218. opik/evaluation/models/litellm/litellm_chat_model.py +186 -40
  219. opik/evaluation/models/litellm/opik_monitor.py +24 -21
  220. opik/evaluation/models/litellm/util.py +125 -0
  221. opik/evaluation/models/litellm/warning_filters.py +16 -4
  222. opik/evaluation/models/model_capabilities.py +187 -0
  223. opik/evaluation/models/models_factory.py +25 -3
  224. opik/evaluation/preprocessing.py +92 -0
  225. opik/evaluation/report.py +70 -12
  226. opik/evaluation/rest_operations.py +49 -45
  227. opik/evaluation/samplers/__init__.py +4 -0
  228. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  229. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  230. opik/evaluation/score_statistics.py +66 -0
  231. opik/evaluation/scorers/__init__.py +4 -0
  232. opik/evaluation/scorers/scorer_function.py +55 -0
  233. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  234. opik/evaluation/test_case.py +3 -2
  235. opik/evaluation/test_result.py +1 -0
  236. opik/evaluation/threads/__init__.py +0 -0
  237. opik/evaluation/threads/context_helper.py +32 -0
  238. opik/evaluation/threads/evaluation_engine.py +181 -0
  239. opik/evaluation/threads/evaluation_result.py +18 -0
  240. opik/evaluation/threads/evaluator.py +120 -0
  241. opik/evaluation/threads/helpers.py +51 -0
  242. opik/evaluation/types.py +9 -1
  243. opik/exceptions.py +116 -3
  244. opik/file_upload/__init__.py +0 -0
  245. opik/file_upload/base_upload_manager.py +39 -0
  246. opik/file_upload/file_upload_monitor.py +14 -0
  247. opik/file_upload/file_uploader.py +141 -0
  248. opik/file_upload/mime_type.py +9 -0
  249. opik/file_upload/s3_multipart_upload/__init__.py +0 -0
  250. opik/file_upload/s3_multipart_upload/file_parts_strategy.py +89 -0
  251. opik/file_upload/s3_multipart_upload/s3_file_uploader.py +86 -0
  252. opik/file_upload/s3_multipart_upload/s3_upload_error.py +29 -0
  253. opik/file_upload/thread_pool.py +17 -0
  254. opik/file_upload/upload_client.py +114 -0
  255. opik/file_upload/upload_manager.py +255 -0
  256. opik/file_upload/upload_options.py +37 -0
  257. opik/format_helpers.py +17 -0
  258. opik/guardrails/__init__.py +4 -0
  259. opik/guardrails/guardrail.py +157 -0
  260. opik/guardrails/guards/__init__.py +5 -0
  261. opik/guardrails/guards/guard.py +17 -0
  262. opik/guardrails/guards/pii.py +47 -0
  263. opik/guardrails/guards/topic.py +76 -0
  264. opik/guardrails/rest_api_client.py +34 -0
  265. opik/guardrails/schemas.py +24 -0
  266. opik/guardrails/tracing.py +61 -0
  267. opik/healthcheck/__init__.py +2 -1
  268. opik/healthcheck/checks.py +2 -2
  269. opik/healthcheck/rich_representation.py +1 -1
  270. opik/hooks/__init__.py +23 -0
  271. opik/hooks/anonymizer_hook.py +36 -0
  272. opik/hooks/httpx_client_hook.py +112 -0
  273. opik/httpx_client.py +75 -4
  274. opik/id_helpers.py +18 -0
  275. opik/integrations/adk/__init__.py +14 -0
  276. opik/integrations/adk/callback_context_info_extractors.py +32 -0
  277. opik/integrations/adk/graph/__init__.py +0 -0
  278. opik/integrations/adk/graph/mermaid_graph_builder.py +128 -0
  279. opik/integrations/adk/graph/nodes.py +101 -0
  280. opik/integrations/adk/graph/subgraph_edges_builders.py +41 -0
  281. opik/integrations/adk/helpers.py +48 -0
  282. opik/integrations/adk/legacy_opik_tracer.py +381 -0
  283. opik/integrations/adk/opik_tracer.py +370 -0
  284. opik/integrations/adk/patchers/__init__.py +4 -0
  285. opik/integrations/adk/patchers/adk_otel_tracer/__init__.py +0 -0
  286. opik/integrations/adk/patchers/adk_otel_tracer/llm_span_helpers.py +30 -0
  287. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +201 -0
  288. opik/integrations/adk/patchers/litellm_wrappers.py +91 -0
  289. opik/integrations/adk/patchers/llm_response_wrapper.py +105 -0
  290. opik/integrations/adk/patchers/patchers.py +64 -0
  291. opik/integrations/adk/recursive_callback_injector.py +126 -0
  292. opik/integrations/aisuite/aisuite_decorator.py +8 -3
  293. opik/integrations/aisuite/opik_tracker.py +1 -0
  294. opik/integrations/anthropic/messages_create_decorator.py +8 -3
  295. opik/integrations/anthropic/opik_tracker.py +0 -1
  296. opik/integrations/bedrock/converse/__init__.py +0 -0
  297. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  298. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +18 -8
  299. opik/integrations/bedrock/invoke_agent_decorator.py +12 -7
  300. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  301. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  302. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  303. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  304. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  305. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  306. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  307. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  308. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  309. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  310. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  311. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  312. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  313. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  314. opik/integrations/bedrock/opik_tracker.py +43 -4
  315. opik/integrations/bedrock/types.py +19 -0
  316. opik/integrations/crewai/crewai_decorator.py +34 -56
  317. opik/integrations/crewai/opik_tracker.py +31 -10
  318. opik/integrations/crewai/patchers/__init__.py +5 -0
  319. opik/integrations/crewai/patchers/flow.py +118 -0
  320. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  321. opik/integrations/crewai/patchers/llm_client.py +207 -0
  322. opik/integrations/dspy/callback.py +246 -84
  323. opik/integrations/dspy/graph.py +88 -0
  324. opik/integrations/dspy/parsers.py +168 -0
  325. opik/integrations/genai/encoder_extension.py +2 -6
  326. opik/integrations/genai/generate_content_decorator.py +20 -13
  327. opik/integrations/guardrails/guardrails_decorator.py +4 -0
  328. opik/integrations/harbor/__init__.py +17 -0
  329. opik/integrations/harbor/experiment_service.py +269 -0
  330. opik/integrations/harbor/opik_tracker.py +528 -0
  331. opik/integrations/haystack/constants.py +35 -0
  332. opik/integrations/haystack/converters.py +1 -2
  333. opik/integrations/haystack/opik_connector.py +28 -6
  334. opik/integrations/haystack/opik_span_bridge.py +284 -0
  335. opik/integrations/haystack/opik_tracer.py +124 -222
  336. opik/integrations/langchain/__init__.py +3 -1
  337. opik/integrations/langchain/helpers.py +96 -0
  338. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  339. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  340. opik/integrations/langchain/opik_encoder_extension.py +2 -2
  341. opik/integrations/langchain/opik_tracer.py +641 -206
  342. opik/integrations/langchain/provider_usage_extractors/__init__.py +5 -0
  343. opik/integrations/langchain/provider_usage_extractors/anthropic_usage_extractor.py +101 -0
  344. opik/integrations/langchain/provider_usage_extractors/anthropic_vertexai_usage_extractor.py +67 -0
  345. opik/integrations/langchain/provider_usage_extractors/bedrock_usage_extractor.py +94 -0
  346. opik/integrations/langchain/provider_usage_extractors/google_generative_ai_usage_extractor.py +109 -0
  347. opik/integrations/langchain/provider_usage_extractors/groq_usage_extractor.py +92 -0
  348. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/__init__.py +15 -0
  349. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +134 -0
  350. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/langchain_usage.py +163 -0
  351. opik/integrations/langchain/provider_usage_extractors/openai_usage_extractor.py +124 -0
  352. opik/integrations/langchain/provider_usage_extractors/provider_usage_extractor_protocol.py +29 -0
  353. opik/integrations/langchain/provider_usage_extractors/usage_extractor.py +48 -0
  354. opik/integrations/langchain/provider_usage_extractors/vertexai_usage_extractor.py +109 -0
  355. opik/integrations/litellm/__init__.py +5 -0
  356. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  357. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  358. opik/integrations/litellm/opik_tracker.py +43 -0
  359. opik/integrations/litellm/stream_patchers.py +151 -0
  360. opik/integrations/llama_index/callback.py +179 -78
  361. opik/integrations/llama_index/event_parsing_utils.py +29 -9
  362. opik/integrations/openai/agents/opik_tracing_processor.py +204 -32
  363. opik/integrations/openai/agents/span_data_parsers.py +15 -6
  364. opik/integrations/openai/chat_completion_chunks_aggregator.py +1 -1
  365. opik/integrations/openai/{openai_decorator.py → openai_chat_completions_decorator.py} +45 -35
  366. opik/integrations/openai/openai_responses_decorator.py +158 -0
  367. opik/integrations/openai/opik_tracker.py +94 -13
  368. opik/integrations/openai/response_events_aggregator.py +36 -0
  369. opik/integrations/openai/stream_patchers.py +125 -15
  370. opik/integrations/sagemaker/auth.py +5 -1
  371. opik/jsonable_encoder.py +29 -1
  372. opik/llm_usage/base_original_provider_usage.py +15 -8
  373. opik/llm_usage/bedrock_usage.py +8 -2
  374. opik/llm_usage/google_usage.py +6 -1
  375. opik/llm_usage/llm_usage_info.py +6 -0
  376. opik/llm_usage/{openai_usage.py → openai_chat_completions_usage.py} +2 -12
  377. opik/llm_usage/{openai_agent_usage.py → openai_responses_usage.py} +7 -15
  378. opik/llm_usage/opik_usage.py +36 -10
  379. opik/llm_usage/opik_usage_factory.py +35 -19
  380. opik/logging_messages.py +19 -7
  381. opik/message_processing/arguments_utils.py +22 -0
  382. opik/message_processing/batching/base_batcher.py +45 -17
  383. opik/message_processing/batching/batch_manager.py +22 -10
  384. opik/message_processing/batching/batch_manager_constuctors.py +36 -11
  385. opik/message_processing/batching/batchers.py +167 -44
  386. opik/message_processing/batching/flushing_thread.py +0 -3
  387. opik/message_processing/batching/sequence_splitter.py +50 -5
  388. opik/message_processing/emulation/__init__.py +0 -0
  389. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  390. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  391. opik/message_processing/emulation/models.py +162 -0
  392. opik/message_processing/encoder_helpers.py +79 -0
  393. opik/message_processing/message_queue.py +79 -0
  394. opik/message_processing/messages.py +154 -12
  395. opik/message_processing/preprocessing/__init__.py +0 -0
  396. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  397. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  398. opik/message_processing/preprocessing/constants.py +1 -0
  399. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  400. opik/message_processing/preprocessing/preprocessor.py +36 -0
  401. opik/message_processing/processors/__init__.py +0 -0
  402. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  403. opik/message_processing/processors/message_processors.py +92 -0
  404. opik/message_processing/processors/message_processors_chain.py +96 -0
  405. opik/message_processing/processors/online_message_processor.py +324 -0
  406. opik/message_processing/queue_consumer.py +61 -13
  407. opik/message_processing/streamer.py +102 -31
  408. opik/message_processing/streamer_constructors.py +67 -12
  409. opik/opik_context.py +103 -11
  410. opik/plugins/pytest/decorator.py +2 -2
  411. opik/plugins/pytest/experiment_runner.py +3 -2
  412. opik/plugins/pytest/hooks.py +6 -4
  413. opik/rate_limit/__init__.py +0 -0
  414. opik/rate_limit/rate_limit.py +25 -0
  415. opik/rest_api/__init__.py +643 -11
  416. opik/rest_api/alerts/__init__.py +7 -0
  417. opik/rest_api/alerts/client.py +667 -0
  418. opik/rest_api/alerts/raw_client.py +1015 -0
  419. opik/rest_api/alerts/types/__init__.py +7 -0
  420. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  421. opik/rest_api/annotation_queues/__init__.py +4 -0
  422. opik/rest_api/annotation_queues/client.py +668 -0
  423. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  424. opik/rest_api/attachments/__init__.py +17 -0
  425. opik/rest_api/attachments/client.py +752 -0
  426. opik/rest_api/attachments/raw_client.py +1125 -0
  427. opik/rest_api/attachments/types/__init__.py +15 -0
  428. opik/rest_api/attachments/types/attachment_list_request_entity_type.py +5 -0
  429. opik/rest_api/attachments/types/download_attachment_request_entity_type.py +5 -0
  430. opik/rest_api/attachments/types/start_multipart_upload_request_entity_type.py +5 -0
  431. opik/rest_api/attachments/types/upload_attachment_request_entity_type.py +5 -0
  432. opik/rest_api/automation_rule_evaluators/__init__.py +2 -0
  433. opik/rest_api/automation_rule_evaluators/client.py +182 -1162
  434. opik/rest_api/automation_rule_evaluators/raw_client.py +598 -0
  435. opik/rest_api/chat_completions/__init__.py +2 -0
  436. opik/rest_api/chat_completions/client.py +115 -149
  437. opik/rest_api/chat_completions/raw_client.py +339 -0
  438. opik/rest_api/check/__init__.py +2 -0
  439. opik/rest_api/check/client.py +88 -106
  440. opik/rest_api/check/raw_client.py +258 -0
  441. opik/rest_api/client.py +112 -212
  442. opik/rest_api/core/__init__.py +5 -0
  443. opik/rest_api/core/api_error.py +12 -6
  444. opik/rest_api/core/client_wrapper.py +4 -14
  445. opik/rest_api/core/datetime_utils.py +1 -3
  446. opik/rest_api/core/file.py +2 -5
  447. opik/rest_api/core/http_client.py +42 -120
  448. opik/rest_api/core/http_response.py +55 -0
  449. opik/rest_api/core/jsonable_encoder.py +1 -4
  450. opik/rest_api/core/pydantic_utilities.py +79 -147
  451. opik/rest_api/core/query_encoder.py +1 -3
  452. opik/rest_api/core/serialization.py +10 -10
  453. opik/rest_api/dashboards/__init__.py +4 -0
  454. opik/rest_api/dashboards/client.py +462 -0
  455. opik/rest_api/dashboards/raw_client.py +648 -0
  456. opik/rest_api/datasets/__init__.py +5 -0
  457. opik/rest_api/datasets/client.py +1638 -1091
  458. opik/rest_api/datasets/raw_client.py +3389 -0
  459. opik/rest_api/datasets/types/__init__.py +8 -0
  460. opik/rest_api/datasets/types/dataset_update_visibility.py +5 -0
  461. opik/rest_api/datasets/types/dataset_write_visibility.py +5 -0
  462. opik/rest_api/errors/__init__.py +2 -0
  463. opik/rest_api/errors/bad_request_error.py +4 -3
  464. opik/rest_api/errors/conflict_error.py +4 -3
  465. opik/rest_api/errors/forbidden_error.py +4 -2
  466. opik/rest_api/errors/not_found_error.py +4 -3
  467. opik/rest_api/errors/not_implemented_error.py +4 -3
  468. opik/rest_api/errors/unauthorized_error.py +4 -3
  469. opik/rest_api/errors/unprocessable_entity_error.py +4 -3
  470. opik/rest_api/experiments/__init__.py +5 -0
  471. opik/rest_api/experiments/client.py +676 -752
  472. opik/rest_api/experiments/raw_client.py +1872 -0
  473. opik/rest_api/experiments/types/__init__.py +10 -0
  474. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  475. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  476. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  477. opik/rest_api/experiments/types/experiment_write_type.py +5 -0
  478. opik/rest_api/feedback_definitions/__init__.py +2 -0
  479. opik/rest_api/feedback_definitions/client.py +96 -370
  480. opik/rest_api/feedback_definitions/raw_client.py +541 -0
  481. opik/rest_api/feedback_definitions/types/__init__.py +2 -0
  482. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -3
  483. opik/rest_api/guardrails/__init__.py +4 -0
  484. opik/rest_api/guardrails/client.py +104 -0
  485. opik/rest_api/guardrails/raw_client.py +102 -0
  486. opik/rest_api/llm_provider_key/__init__.py +2 -0
  487. opik/rest_api/llm_provider_key/client.py +166 -440
  488. opik/rest_api/llm_provider_key/raw_client.py +643 -0
  489. opik/rest_api/llm_provider_key/types/__init__.py +2 -0
  490. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  491. opik/rest_api/manual_evaluation/__init__.py +4 -0
  492. opik/rest_api/manual_evaluation/client.py +347 -0
  493. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  494. opik/rest_api/open_telemetry_ingestion/__init__.py +2 -0
  495. opik/rest_api/open_telemetry_ingestion/client.py +38 -63
  496. opik/rest_api/open_telemetry_ingestion/raw_client.py +88 -0
  497. opik/rest_api/optimizations/__init__.py +7 -0
  498. opik/rest_api/optimizations/client.py +704 -0
  499. opik/rest_api/optimizations/raw_client.py +920 -0
  500. opik/rest_api/optimizations/types/__init__.py +7 -0
  501. opik/rest_api/optimizations/types/optimization_update_status.py +7 -0
  502. opik/rest_api/projects/__init__.py +10 -1
  503. opik/rest_api/projects/client.py +180 -855
  504. opik/rest_api/projects/raw_client.py +1216 -0
  505. opik/rest_api/projects/types/__init__.py +11 -4
  506. opik/rest_api/projects/types/project_metric_request_public_interval.py +1 -3
  507. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +11 -1
  508. opik/rest_api/projects/types/project_update_visibility.py +5 -0
  509. opik/rest_api/projects/types/project_write_visibility.py +5 -0
  510. opik/rest_api/prompts/__init__.py +4 -2
  511. opik/rest_api/prompts/client.py +381 -970
  512. opik/rest_api/prompts/raw_client.py +1634 -0
  513. opik/rest_api/prompts/types/__init__.py +5 -1
  514. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  515. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  516. opik/rest_api/raw_client.py +156 -0
  517. opik/rest_api/redirect/__init__.py +4 -0
  518. opik/rest_api/redirect/client.py +375 -0
  519. opik/rest_api/redirect/raw_client.py +566 -0
  520. opik/rest_api/service_toggles/__init__.py +4 -0
  521. opik/rest_api/service_toggles/client.py +91 -0
  522. opik/rest_api/service_toggles/raw_client.py +93 -0
  523. opik/rest_api/spans/__init__.py +2 -0
  524. opik/rest_api/spans/client.py +659 -1354
  525. opik/rest_api/spans/raw_client.py +2383 -0
  526. opik/rest_api/spans/types/__init__.py +2 -0
  527. opik/rest_api/spans/types/find_feedback_score_names_1_request_type.py +1 -3
  528. opik/rest_api/spans/types/get_span_stats_request_type.py +1 -3
  529. opik/rest_api/spans/types/get_spans_by_project_request_type.py +1 -3
  530. opik/rest_api/spans/types/span_search_stream_request_public_type.py +1 -3
  531. opik/rest_api/system_usage/__init__.py +2 -0
  532. opik/rest_api/system_usage/client.py +157 -216
  533. opik/rest_api/system_usage/raw_client.py +455 -0
  534. opik/rest_api/traces/__init__.py +2 -0
  535. opik/rest_api/traces/client.py +2102 -1625
  536. opik/rest_api/traces/raw_client.py +4144 -0
  537. opik/rest_api/types/__init__.py +629 -24
  538. opik/rest_api/types/aggregation_data.py +27 -0
  539. opik/rest_api/types/alert.py +33 -0
  540. opik/rest_api/types/alert_alert_type.py +5 -0
  541. opik/rest_api/types/alert_page_public.py +24 -0
  542. opik/rest_api/types/alert_public.py +33 -0
  543. opik/rest_api/types/alert_public_alert_type.py +5 -0
  544. opik/rest_api/types/alert_trigger.py +27 -0
  545. opik/rest_api/types/alert_trigger_config.py +28 -0
  546. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  547. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  548. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  549. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  550. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  551. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  552. opik/rest_api/types/alert_trigger_public.py +27 -0
  553. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  554. opik/rest_api/types/alert_trigger_write.py +23 -0
  555. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  556. opik/rest_api/types/alert_write.py +28 -0
  557. opik/rest_api/types/alert_write_alert_type.py +5 -0
  558. opik/rest_api/types/annotation_queue.py +42 -0
  559. opik/rest_api/types/annotation_queue_batch.py +27 -0
  560. opik/rest_api/types/{json_schema_element.py → annotation_queue_item_ids.py} +5 -7
  561. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  562. opik/rest_api/types/annotation_queue_public.py +38 -0
  563. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  564. opik/rest_api/types/{workspace_metadata.py → annotation_queue_reviewer.py} +6 -7
  565. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  566. opik/rest_api/types/annotation_queue_scope.py +5 -0
  567. opik/rest_api/types/annotation_queue_write.py +31 -0
  568. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  569. opik/rest_api/types/assistant_message.py +7 -8
  570. opik/rest_api/types/assistant_message_role.py +1 -3
  571. opik/rest_api/types/attachment.py +22 -0
  572. opik/rest_api/types/attachment_page.py +28 -0
  573. opik/rest_api/types/audio_url.py +19 -0
  574. opik/rest_api/types/audio_url_public.py +19 -0
  575. opik/rest_api/types/audio_url_write.py +19 -0
  576. opik/rest_api/types/automation_rule_evaluator.py +160 -0
  577. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +6 -6
  578. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +6 -6
  579. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +6 -6
  580. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  581. opik/rest_api/types/automation_rule_evaluator_page_public.py +6 -6
  582. opik/rest_api/types/automation_rule_evaluator_public.py +155 -0
  583. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  584. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  585. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  586. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  587. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  588. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  589. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +22 -0
  590. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +22 -0
  591. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +22 -0
  592. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +22 -0
  593. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +22 -0
  594. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +22 -0
  595. opik/rest_api/types/automation_rule_evaluator_update.py +143 -0
  596. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +6 -6
  597. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  598. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  599. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +22 -0
  600. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +22 -0
  601. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +6 -6
  602. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +6 -6
  603. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +6 -6
  604. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +6 -6
  605. opik/rest_api/types/automation_rule_evaluator_write.py +143 -0
  606. opik/rest_api/types/avg_value_stat_public.py +3 -5
  607. opik/rest_api/types/batch_delete.py +3 -5
  608. opik/rest_api/types/batch_delete_by_project.py +20 -0
  609. opik/rest_api/types/bi_information.py +3 -5
  610. opik/rest_api/types/bi_information_response.py +4 -6
  611. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  612. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  613. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  614. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  615. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  616. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  617. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  618. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  619. opik/rest_api/types/categorical_feedback_definition.py +5 -7
  620. opik/rest_api/types/categorical_feedback_definition_create.py +4 -6
  621. opik/rest_api/types/categorical_feedback_definition_public.py +5 -7
  622. opik/rest_api/types/categorical_feedback_definition_update.py +4 -6
  623. opik/rest_api/types/categorical_feedback_detail.py +3 -5
  624. opik/rest_api/types/categorical_feedback_detail_create.py +3 -5
  625. opik/rest_api/types/categorical_feedback_detail_public.py +3 -5
  626. opik/rest_api/types/categorical_feedback_detail_update.py +3 -5
  627. opik/rest_api/types/chat_completion_choice.py +4 -6
  628. opik/rest_api/types/chat_completion_response.py +5 -6
  629. opik/rest_api/types/check.py +22 -0
  630. opik/rest_api/types/{json_node_compare.py → check_name.py} +1 -1
  631. opik/rest_api/types/check_public.py +22 -0
  632. opik/rest_api/types/check_public_name.py +5 -0
  633. opik/rest_api/types/check_public_result.py +5 -0
  634. opik/rest_api/types/check_result.py +5 -0
  635. opik/rest_api/types/chunked_output_json_node.py +4 -6
  636. opik/rest_api/types/chunked_output_json_node_public.py +4 -6
  637. opik/rest_api/types/chunked_output_json_node_public_type.py +6 -10
  638. opik/rest_api/types/chunked_output_json_node_type.py +6 -10
  639. opik/rest_api/types/column.py +8 -10
  640. opik/rest_api/types/column_compare.py +8 -10
  641. opik/rest_api/types/column_public.py +8 -10
  642. opik/rest_api/types/column_types_item.py +1 -3
  643. opik/rest_api/types/comment.py +4 -6
  644. opik/rest_api/types/comment_compare.py +4 -6
  645. opik/rest_api/types/comment_public.py +4 -6
  646. opik/rest_api/types/complete_multipart_upload_request.py +33 -0
  647. opik/rest_api/types/complete_multipart_upload_request_entity_type.py +5 -0
  648. opik/rest_api/types/completion_tokens_details.py +3 -5
  649. opik/rest_api/types/count_value_stat_public.py +3 -5
  650. opik/rest_api/types/dashboard_page_public.py +24 -0
  651. opik/rest_api/types/dashboard_public.py +30 -0
  652. opik/rest_api/types/data_point_double.py +21 -0
  653. opik/rest_api/types/data_point_number_public.py +3 -5
  654. opik/rest_api/types/dataset.py +14 -6
  655. opik/rest_api/types/dataset_expansion.py +42 -0
  656. opik/rest_api/types/dataset_expansion_response.py +39 -0
  657. opik/rest_api/types/dataset_item.py +9 -8
  658. opik/rest_api/types/dataset_item_batch.py +3 -5
  659. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  660. opik/rest_api/types/dataset_item_compare.py +9 -8
  661. opik/rest_api/types/dataset_item_compare_source.py +1 -3
  662. opik/rest_api/types/dataset_item_filter.py +27 -0
  663. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  664. opik/rest_api/types/dataset_item_page_compare.py +10 -7
  665. opik/rest_api/types/dataset_item_page_public.py +10 -7
  666. opik/rest_api/types/dataset_item_public.py +9 -8
  667. opik/rest_api/types/dataset_item_public_source.py +1 -3
  668. opik/rest_api/types/dataset_item_source.py +1 -3
  669. opik/rest_api/types/dataset_item_update.py +39 -0
  670. opik/rest_api/types/dataset_item_write.py +5 -6
  671. opik/rest_api/types/dataset_item_write_source.py +1 -3
  672. opik/rest_api/types/dataset_page_public.py +9 -6
  673. opik/rest_api/types/dataset_public.py +14 -6
  674. opik/rest_api/types/dataset_public_status.py +5 -0
  675. opik/rest_api/types/dataset_public_visibility.py +5 -0
  676. opik/rest_api/types/dataset_status.py +5 -0
  677. opik/rest_api/types/dataset_version_diff.py +22 -0
  678. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  679. opik/rest_api/types/dataset_version_page_public.py +23 -0
  680. opik/rest_api/types/dataset_version_public.py +59 -0
  681. opik/rest_api/types/dataset_version_summary.py +46 -0
  682. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  683. opik/rest_api/types/dataset_visibility.py +5 -0
  684. opik/rest_api/types/delete_attachments_request.py +23 -0
  685. opik/rest_api/types/delete_attachments_request_entity_type.py +5 -0
  686. opik/rest_api/types/delete_feedback_score.py +4 -5
  687. opik/rest_api/types/delete_ids_holder.py +19 -0
  688. opik/rest_api/types/delta.py +7 -9
  689. opik/rest_api/types/error_count_with_deviation.py +21 -0
  690. opik/rest_api/types/error_count_with_deviation_detailed.py +21 -0
  691. opik/rest_api/types/error_info.py +3 -5
  692. opik/rest_api/types/error_info_experiment_item_bulk_write_view.py +21 -0
  693. opik/rest_api/types/error_info_public.py +3 -5
  694. opik/rest_api/types/error_info_write.py +3 -5
  695. opik/rest_api/types/error_message.py +3 -5
  696. opik/rest_api/types/error_message_detail.py +3 -5
  697. opik/rest_api/types/error_message_detailed.py +3 -5
  698. opik/rest_api/types/error_message_public.py +3 -5
  699. opik/rest_api/types/experiment.py +21 -10
  700. opik/rest_api/types/experiment_group_aggregations_response.py +20 -0
  701. opik/rest_api/types/experiment_group_response.py +22 -0
  702. opik/rest_api/types/experiment_item.py +14 -11
  703. opik/rest_api/types/experiment_item_bulk_record.py +27 -0
  704. opik/rest_api/types/experiment_item_bulk_record_experiment_item_bulk_write_view.py +27 -0
  705. opik/rest_api/types/experiment_item_bulk_upload.py +27 -0
  706. opik/rest_api/types/experiment_item_compare.py +14 -11
  707. opik/rest_api/types/experiment_item_compare_trace_visibility_mode.py +5 -0
  708. opik/rest_api/types/experiment_item_public.py +6 -6
  709. opik/rest_api/types/experiment_item_public_trace_visibility_mode.py +5 -0
  710. opik/rest_api/types/experiment_item_trace_visibility_mode.py +5 -0
  711. opik/rest_api/types/experiment_page_public.py +9 -6
  712. opik/rest_api/types/experiment_public.py +21 -10
  713. opik/rest_api/types/experiment_public_status.py +5 -0
  714. opik/rest_api/types/experiment_public_type.py +5 -0
  715. opik/rest_api/types/experiment_score.py +20 -0
  716. opik/rest_api/types/experiment_score_public.py +20 -0
  717. opik/rest_api/types/experiment_score_write.py +20 -0
  718. opik/rest_api/types/experiment_status.py +5 -0
  719. opik/rest_api/types/experiment_type.py +5 -0
  720. opik/rest_api/types/export_trace_service_request.py +5 -0
  721. opik/rest_api/types/feedback.py +40 -27
  722. opik/rest_api/types/feedback_create.py +27 -13
  723. opik/rest_api/types/feedback_definition_page_public.py +4 -6
  724. opik/rest_api/types/feedback_object_public.py +40 -27
  725. opik/rest_api/types/feedback_public.py +40 -27
  726. opik/rest_api/types/feedback_score.py +7 -7
  727. opik/rest_api/types/feedback_score_average.py +3 -5
  728. opik/rest_api/types/feedback_score_average_detailed.py +3 -5
  729. opik/rest_api/types/feedback_score_average_public.py +3 -5
  730. opik/rest_api/types/feedback_score_batch.py +4 -6
  731. opik/rest_api/types/feedback_score_batch_item.py +6 -6
  732. opik/rest_api/types/feedback_score_batch_item_source.py +1 -3
  733. opik/rest_api/types/feedback_score_batch_item_thread.py +32 -0
  734. opik/rest_api/types/feedback_score_batch_item_thread_source.py +5 -0
  735. opik/rest_api/types/feedback_score_compare.py +7 -7
  736. opik/rest_api/types/feedback_score_compare_source.py +1 -3
  737. opik/rest_api/types/feedback_score_experiment_item_bulk_write_view.py +31 -0
  738. opik/rest_api/types/feedback_score_experiment_item_bulk_write_view_source.py +5 -0
  739. opik/rest_api/types/feedback_score_names.py +4 -6
  740. opik/rest_api/types/feedback_score_public.py +11 -7
  741. opik/rest_api/types/feedback_score_public_source.py +1 -3
  742. opik/rest_api/types/feedback_score_source.py +1 -3
  743. opik/rest_api/types/feedback_update.py +27 -13
  744. opik/rest_api/types/function.py +4 -7
  745. opik/rest_api/types/function_call.py +3 -5
  746. opik/rest_api/types/group_content.py +19 -0
  747. opik/rest_api/types/group_content_with_aggregations.py +21 -0
  748. opik/rest_api/types/group_detail.py +19 -0
  749. opik/rest_api/types/group_details.py +20 -0
  750. opik/rest_api/types/guardrail.py +34 -0
  751. opik/rest_api/types/guardrail_batch.py +20 -0
  752. opik/rest_api/types/guardrail_name.py +5 -0
  753. opik/rest_api/types/guardrail_result.py +5 -0
  754. opik/rest_api/types/guardrail_write.py +33 -0
  755. opik/rest_api/types/guardrail_write_name.py +5 -0
  756. opik/rest_api/types/guardrail_write_result.py +5 -0
  757. opik/rest_api/types/guardrails_validation.py +21 -0
  758. opik/rest_api/types/guardrails_validation_public.py +21 -0
  759. opik/rest_api/types/ids_holder.py +19 -0
  760. opik/rest_api/types/image_url.py +20 -0
  761. opik/rest_api/types/image_url_public.py +20 -0
  762. opik/rest_api/types/image_url_write.py +20 -0
  763. opik/rest_api/types/json_list_string.py +7 -0
  764. opik/rest_api/types/json_list_string_compare.py +7 -0
  765. opik/rest_api/types/json_list_string_experiment_item_bulk_write_view.py +7 -0
  766. opik/rest_api/types/json_list_string_public.py +7 -0
  767. opik/rest_api/types/json_list_string_write.py +7 -0
  768. opik/rest_api/types/json_schema.py +5 -8
  769. opik/rest_api/types/llm_as_judge_code.py +8 -12
  770. opik/rest_api/types/llm_as_judge_code_public.py +8 -12
  771. opik/rest_api/types/llm_as_judge_code_write.py +8 -12
  772. opik/rest_api/types/llm_as_judge_message.py +9 -7
  773. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  774. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  775. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  776. opik/rest_api/types/llm_as_judge_message_public.py +9 -7
  777. opik/rest_api/types/llm_as_judge_message_public_role.py +1 -1
  778. opik/rest_api/types/llm_as_judge_message_role.py +1 -1
  779. opik/rest_api/types/llm_as_judge_message_write.py +9 -7
  780. opik/rest_api/types/llm_as_judge_message_write_role.py +1 -1
  781. opik/rest_api/types/llm_as_judge_model_parameters.py +6 -5
  782. opik/rest_api/types/llm_as_judge_model_parameters_public.py +6 -5
  783. opik/rest_api/types/llm_as_judge_model_parameters_write.py +6 -5
  784. opik/rest_api/types/llm_as_judge_output_schema.py +4 -6
  785. opik/rest_api/types/llm_as_judge_output_schema_public.py +4 -6
  786. opik/rest_api/types/llm_as_judge_output_schema_public_type.py +1 -3
  787. opik/rest_api/types/llm_as_judge_output_schema_type.py +1 -3
  788. opik/rest_api/types/llm_as_judge_output_schema_write.py +4 -6
  789. opik/rest_api/types/llm_as_judge_output_schema_write_type.py +1 -3
  790. opik/rest_api/types/log_item.py +5 -7
  791. opik/rest_api/types/log_item_level.py +1 -3
  792. opik/rest_api/types/log_page.py +4 -6
  793. opik/rest_api/types/manual_evaluation_request.py +38 -0
  794. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  795. opik/rest_api/types/manual_evaluation_response.py +27 -0
  796. opik/rest_api/types/multipart_upload_part.py +20 -0
  797. opik/rest_api/types/numerical_feedback_definition.py +5 -7
  798. opik/rest_api/types/numerical_feedback_definition_create.py +4 -6
  799. opik/rest_api/types/numerical_feedback_definition_public.py +5 -7
  800. opik/rest_api/types/numerical_feedback_definition_update.py +4 -6
  801. opik/rest_api/types/numerical_feedback_detail.py +3 -5
  802. opik/rest_api/types/numerical_feedback_detail_create.py +3 -5
  803. opik/rest_api/types/numerical_feedback_detail_public.py +3 -5
  804. opik/rest_api/types/numerical_feedback_detail_update.py +3 -5
  805. opik/rest_api/types/optimization.py +37 -0
  806. opik/rest_api/types/optimization_page_public.py +28 -0
  807. opik/rest_api/types/optimization_public.py +37 -0
  808. opik/rest_api/types/optimization_public_status.py +7 -0
  809. opik/rest_api/types/optimization_status.py +7 -0
  810. opik/rest_api/types/optimization_studio_config.py +27 -0
  811. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  812. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  813. opik/rest_api/types/optimization_studio_log.py +22 -0
  814. opik/rest_api/types/optimization_write.py +30 -0
  815. opik/rest_api/types/optimization_write_status.py +7 -0
  816. opik/rest_api/types/page_columns.py +4 -6
  817. opik/rest_api/types/percentage_value_stat_public.py +4 -6
  818. opik/rest_api/types/percentage_values.py +8 -16
  819. opik/rest_api/types/percentage_values_detailed.py +8 -16
  820. opik/rest_api/types/percentage_values_public.py +8 -16
  821. opik/rest_api/types/project.py +12 -7
  822. opik/rest_api/types/project_detailed.py +12 -7
  823. opik/rest_api/types/project_detailed_visibility.py +5 -0
  824. opik/rest_api/types/project_metric_response_public.py +5 -9
  825. opik/rest_api/types/project_metric_response_public_interval.py +1 -3
  826. opik/rest_api/types/project_metric_response_public_metric_type.py +11 -1
  827. opik/rest_api/types/project_page_public.py +8 -10
  828. opik/rest_api/types/project_public.py +6 -6
  829. opik/rest_api/types/project_public_visibility.py +5 -0
  830. opik/rest_api/types/project_reference.py +31 -0
  831. opik/rest_api/types/project_reference_public.py +31 -0
  832. opik/rest_api/types/project_stat_item_object_public.py +8 -17
  833. opik/rest_api/types/project_stats_public.py +4 -6
  834. opik/rest_api/types/project_stats_summary.py +4 -6
  835. opik/rest_api/types/project_stats_summary_item.py +9 -6
  836. opik/rest_api/types/project_visibility.py +5 -0
  837. opik/rest_api/types/prompt.py +12 -7
  838. opik/rest_api/types/prompt_detail.py +12 -7
  839. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  840. opik/rest_api/types/prompt_page_public.py +9 -6
  841. opik/rest_api/types/prompt_public.py +11 -6
  842. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  843. opik/rest_api/types/prompt_template_structure.py +5 -0
  844. opik/rest_api/types/prompt_tokens_details.py +19 -0
  845. opik/rest_api/types/prompt_version.py +7 -6
  846. opik/rest_api/types/prompt_version_detail.py +7 -6
  847. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  848. opik/rest_api/types/prompt_version_link.py +4 -5
  849. opik/rest_api/types/prompt_version_link_public.py +4 -5
  850. opik/rest_api/types/prompt_version_link_write.py +3 -5
  851. opik/rest_api/types/prompt_version_page_public.py +9 -6
  852. opik/rest_api/types/prompt_version_public.py +7 -6
  853. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  854. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  855. opik/rest_api/types/prompt_version_update.py +33 -0
  856. opik/rest_api/types/provider_api_key.py +18 -8
  857. opik/rest_api/types/provider_api_key_page_public.py +27 -0
  858. opik/rest_api/types/provider_api_key_provider.py +1 -1
  859. opik/rest_api/types/provider_api_key_public.py +18 -8
  860. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  861. opik/rest_api/types/response_format.py +5 -7
  862. opik/rest_api/types/response_format_type.py +1 -3
  863. opik/rest_api/types/result.py +21 -0
  864. opik/rest_api/types/results_number_public.py +4 -6
  865. opik/rest_api/types/score_name.py +4 -5
  866. opik/rest_api/types/service_toggles_config.py +44 -0
  867. opik/rest_api/types/span.py +13 -15
  868. opik/rest_api/types/span_batch.py +4 -6
  869. opik/rest_api/types/span_enrichment_options.py +31 -0
  870. opik/rest_api/types/span_experiment_item_bulk_write_view.py +39 -0
  871. opik/rest_api/types/span_experiment_item_bulk_write_view_type.py +5 -0
  872. opik/rest_api/types/span_filter.py +23 -0
  873. opik/rest_api/types/span_filter_operator.py +21 -0
  874. opik/rest_api/types/span_filter_public.py +4 -6
  875. opik/rest_api/types/span_filter_public_operator.py +2 -0
  876. opik/rest_api/types/span_filter_write.py +23 -0
  877. opik/rest_api/types/span_filter_write_operator.py +21 -0
  878. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  879. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  880. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  881. opik/rest_api/types/span_page_public.py +9 -6
  882. opik/rest_api/types/span_public.py +19 -16
  883. opik/rest_api/types/span_public_type.py +1 -1
  884. opik/rest_api/types/span_type.py +1 -1
  885. opik/rest_api/types/span_update.py +46 -0
  886. opik/rest_api/types/span_update_type.py +5 -0
  887. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  888. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  889. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  890. opik/rest_api/types/span_write.py +13 -14
  891. opik/rest_api/types/span_write_type.py +1 -1
  892. opik/rest_api/types/spans_count_response.py +20 -0
  893. opik/rest_api/types/start_multipart_upload_response.py +20 -0
  894. opik/rest_api/types/stream_options.py +3 -5
  895. opik/rest_api/types/studio_evaluation.py +20 -0
  896. opik/rest_api/types/studio_evaluation_public.py +20 -0
  897. opik/rest_api/types/studio_evaluation_write.py +20 -0
  898. opik/rest_api/types/studio_llm_model.py +21 -0
  899. opik/rest_api/types/studio_llm_model_public.py +21 -0
  900. opik/rest_api/types/studio_llm_model_write.py +21 -0
  901. opik/rest_api/types/studio_message.py +20 -0
  902. opik/rest_api/types/studio_message_public.py +20 -0
  903. opik/rest_api/types/studio_message_write.py +20 -0
  904. opik/rest_api/types/studio_metric.py +21 -0
  905. opik/rest_api/types/studio_metric_public.py +21 -0
  906. opik/rest_api/types/studio_metric_write.py +21 -0
  907. opik/rest_api/types/studio_optimizer.py +21 -0
  908. opik/rest_api/types/studio_optimizer_public.py +21 -0
  909. opik/rest_api/types/studio_optimizer_write.py +21 -0
  910. opik/rest_api/types/studio_prompt.py +20 -0
  911. opik/rest_api/types/studio_prompt_public.py +20 -0
  912. opik/rest_api/types/studio_prompt_write.py +20 -0
  913. opik/rest_api/types/tool.py +4 -6
  914. opik/rest_api/types/tool_call.py +4 -6
  915. opik/rest_api/types/trace.py +26 -12
  916. opik/rest_api/types/trace_batch.py +4 -6
  917. opik/rest_api/types/trace_count_response.py +4 -6
  918. opik/rest_api/types/trace_enrichment_options.py +32 -0
  919. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +41 -0
  920. opik/rest_api/types/trace_filter.py +23 -0
  921. opik/rest_api/types/trace_filter_operator.py +21 -0
  922. opik/rest_api/types/trace_filter_public.py +23 -0
  923. opik/rest_api/types/trace_filter_public_operator.py +21 -0
  924. opik/rest_api/types/trace_filter_write.py +23 -0
  925. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  926. opik/rest_api/types/trace_page_public.py +8 -10
  927. opik/rest_api/types/trace_public.py +27 -13
  928. opik/rest_api/types/trace_public_visibility_mode.py +5 -0
  929. opik/rest_api/types/trace_thread.py +18 -9
  930. opik/rest_api/types/trace_thread_filter.py +23 -0
  931. opik/rest_api/types/trace_thread_filter_operator.py +21 -0
  932. opik/rest_api/types/trace_thread_filter_public.py +23 -0
  933. opik/rest_api/types/trace_thread_filter_public_operator.py +21 -0
  934. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  935. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  936. opik/rest_api/types/trace_thread_identifier.py +22 -0
  937. opik/rest_api/types/trace_thread_llm_as_judge_code.py +26 -0
  938. opik/rest_api/types/trace_thread_llm_as_judge_code_public.py +26 -0
  939. opik/rest_api/types/trace_thread_llm_as_judge_code_write.py +26 -0
  940. opik/rest_api/types/trace_thread_page.py +9 -6
  941. opik/rest_api/types/trace_thread_status.py +5 -0
  942. opik/rest_api/types/trace_thread_update.py +19 -0
  943. opik/rest_api/types/trace_thread_user_defined_metric_python_code.py +19 -0
  944. opik/rest_api/types/trace_thread_user_defined_metric_python_code_public.py +19 -0
  945. opik/rest_api/types/trace_thread_user_defined_metric_python_code_write.py +19 -0
  946. opik/rest_api/types/trace_update.py +39 -0
  947. opik/rest_api/types/trace_visibility_mode.py +5 -0
  948. opik/rest_api/types/trace_write.py +10 -11
  949. opik/rest_api/types/usage.py +6 -6
  950. opik/rest_api/types/user_defined_metric_python_code.py +3 -5
  951. opik/rest_api/types/user_defined_metric_python_code_public.py +3 -5
  952. opik/rest_api/types/user_defined_metric_python_code_write.py +3 -5
  953. opik/rest_api/types/value_entry.py +27 -0
  954. opik/rest_api/types/value_entry_compare.py +27 -0
  955. opik/rest_api/types/value_entry_compare_source.py +5 -0
  956. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +27 -0
  957. opik/rest_api/types/value_entry_experiment_item_bulk_write_view_source.py +5 -0
  958. opik/rest_api/types/value_entry_public.py +27 -0
  959. opik/rest_api/types/value_entry_public_source.py +5 -0
  960. opik/rest_api/types/value_entry_source.py +5 -0
  961. opik/rest_api/types/video_url.py +19 -0
  962. opik/rest_api/types/video_url_public.py +19 -0
  963. opik/rest_api/types/video_url_write.py +19 -0
  964. opik/rest_api/types/webhook.py +28 -0
  965. opik/rest_api/types/webhook_examples.py +19 -0
  966. opik/rest_api/types/webhook_public.py +28 -0
  967. opik/rest_api/types/webhook_test_result.py +23 -0
  968. opik/rest_api/types/webhook_test_result_status.py +5 -0
  969. opik/rest_api/types/webhook_write.py +23 -0
  970. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  971. opik/rest_api/types/workspace_configuration.py +27 -0
  972. opik/rest_api/types/workspace_metric_request.py +24 -0
  973. opik/rest_api/types/workspace_metric_response.py +20 -0
  974. opik/rest_api/types/workspace_metrics_summary_request.py +23 -0
  975. opik/rest_api/types/workspace_metrics_summary_response.py +20 -0
  976. opik/rest_api/types/workspace_name_holder.py +19 -0
  977. opik/rest_api/types/workspace_spans_count.py +20 -0
  978. opik/rest_api/types/workspace_trace_count.py +3 -5
  979. opik/rest_api/welcome_wizard/__init__.py +4 -0
  980. opik/rest_api/welcome_wizard/client.py +195 -0
  981. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  982. opik/rest_api/workspaces/__init__.py +2 -0
  983. opik/rest_api/workspaces/client.py +550 -77
  984. opik/rest_api/workspaces/raw_client.py +923 -0
  985. opik/rest_client_configurator/api.py +1 -0
  986. opik/rest_client_configurator/retry_decorator.py +1 -0
  987. opik/s3_httpx_client.py +67 -0
  988. opik/simulation/__init__.py +6 -0
  989. opik/simulation/simulated_user.py +99 -0
  990. opik/simulation/simulator.py +108 -0
  991. opik/synchronization.py +11 -24
  992. opik/tracing_runtime_config.py +48 -0
  993. opik/types.py +48 -2
  994. opik/url_helpers.py +13 -3
  995. opik/validation/chat_prompt_messages.py +241 -0
  996. opik/validation/feedback_score.py +4 -5
  997. opik/validation/parameter.py +122 -0
  998. opik/validation/parameters_validator.py +175 -0
  999. opik/validation/validator.py +30 -2
  1000. opik/validation/validator_helpers.py +147 -0
  1001. opik-1.9.71.dist-info/METADATA +370 -0
  1002. opik-1.9.71.dist-info/RECORD +1110 -0
  1003. {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/WHEEL +1 -1
  1004. opik-1.9.71.dist-info/licenses/LICENSE +203 -0
  1005. opik/api_objects/prompt/prompt.py +0 -107
  1006. opik/api_objects/prompt/prompt_template.py +0 -35
  1007. opik/cli.py +0 -193
  1008. opik/evaluation/metrics/models.py +0 -8
  1009. opik/hooks.py +0 -13
  1010. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  1011. opik/integrations/bedrock/helpers.py +0 -8
  1012. opik/integrations/langchain/google_run_helpers.py +0 -75
  1013. opik/integrations/langchain/openai_run_helpers.py +0 -122
  1014. opik/message_processing/message_processors.py +0 -203
  1015. opik/rest_api/types/delta_role.py +0 -7
  1016. opik/rest_api/types/json_object_schema.py +0 -34
  1017. opik-1.6.4.dist-info/METADATA +0 -270
  1018. opik-1.6.4.dist-info/RECORD +0 -507
  1019. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  1020. {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  1021. {opik-1.6.4.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,35 @@
1
+ import logging
2
+ import opik.logging_messages as logging_messages
3
+ import opik.exceptions as exceptions
4
+ from opik.evaluation.metrics import score_result
5
+ from opik.evaluation.metrics.llm_judges import parsing_helpers
6
+
7
+ LOGGER = logging.getLogger(__name__)
8
+
9
+
10
+ def parse_model_output(content: str, name: str) -> score_result.ScoreResult:
11
+ try:
12
+ list_content = parsing_helpers.extract_json_content_or_raise(content)
13
+
14
+ reason = ""
15
+ score = 0.0
16
+
17
+ for claim in list_content:
18
+ claim_score = float(claim["score"])
19
+
20
+ if not (0.0 <= claim_score <= 1.0):
21
+ raise exceptions.MetricComputationError(
22
+ f"Factuality score must be between 0.0 and 1.0, got {claim_score}"
23
+ )
24
+
25
+ score += claim_score
26
+ reason += claim["reason"] + "\n"
27
+
28
+ score /= len(list_content)
29
+
30
+ return score_result.ScoreResult(name=name, value=score, reason=reason)
31
+ except Exception as e:
32
+ LOGGER.error(f"Failed to parse model output: {e}", exc_info=True)
33
+ raise exceptions.MetricComputationError(
34
+ logging_messages.FACTUALITY_SCORE_CALC_FAILED
35
+ )
@@ -46,7 +46,7 @@ def generate_query(
46
46
  ) -> str:
47
47
  examples_str = "\n\n".join(
48
48
  [
49
- f"Example {i+1}:\n"
49
+ f"Example {i + 1}:\n"
50
50
  f"Input: {example['input']}\n"
51
51
  f"Output: {example['output']}\n"
52
52
  f"Contexts: {example['context']}\n"
@@ -0,0 +1,5 @@
1
+ """Public exports for the GEval metric package."""
2
+
3
+ from .metric import GEval
4
+
5
+ __all__ = ["GEval"]
@@ -1,17 +1,13 @@
1
- import math
2
- from functools import cached_property
3
- from typing import Any, Optional, TYPE_CHECKING, Union
1
+ from collections import OrderedDict
2
+ from threading import Lock
3
+ from typing import Any, Dict, Optional, Tuple, Union
4
4
  import pydantic
5
- import json
6
-
7
- if TYPE_CHECKING:
8
- from litellm.types.utils import ModelResponse
9
5
 
10
6
  from opik.evaluation.metrics import base_metric, score_result
11
7
  from opik.evaluation.models import base_model, models_factory
12
- from opik.logging_messages import GEVAL_SCORE_CALC_FAILED
13
- from .template import G_EVAL_COT_TEMPLATE, G_EVAL_QUERY_TEMPLATE
14
- from opik import exceptions
8
+ from opik.evaluation import models
9
+ from . import template, parser
10
+ from .presets import GEVAL_PRESETS
15
11
 
16
12
 
17
13
  class GEvalScoreFormat(pydantic.BaseModel):
@@ -19,56 +15,175 @@ class GEvalScoreFormat(pydantic.BaseModel):
19
15
  reason: str
20
16
 
21
17
 
18
+ def _freeze_for_cache(value: Any) -> Any:
19
+ """Convert nested structures into hashable representations for caching."""
20
+
21
+ if isinstance(value, dict):
22
+ return tuple(
23
+ sorted((key, _freeze_for_cache(val)) for key, val in value.items())
24
+ )
25
+ if isinstance(value, (list, tuple)):
26
+ return tuple(_freeze_for_cache(item) for item in value)
27
+ if isinstance(value, set):
28
+ return tuple(sorted(_freeze_for_cache(item) for item in value))
29
+ return value
30
+
31
+
22
32
  class GEval(base_metric.BaseMetric):
33
+ """
34
+ Generalised evaluation metric that prompts an LLM to grade another LLM output.
35
+
36
+ GEval builds a reusable chain-of-thought using the provided
37
+ ``task_introduction`` and ``evaluation_criteria`` prompts, then requests a
38
+ final score and rationale for each evaluated output.
39
+
40
+ Args:
41
+ task_introduction: Instruction describing the evaluator's persona/purpose.
42
+ evaluation_criteria: Detailed rubric presented to the evaluator.
43
+ model: Optional model identifier or ``OpikBaseModel`` for the judge.
44
+ name: Display name for the metric result. Defaults to ``"g_eval_metric"``.
45
+ track: Whether to automatically track metric results. Defaults to ``True``.
46
+ project_name: Optional tracking project name.
47
+ temperature: Sampling temperature forwarded to the judge model.
48
+ seed: Optional seed for reproducible generation (if supported by the model).
49
+
50
+ Example:
51
+ >>> from opik.evaluation.metrics.llm_judges.g_eval.metric import GEval
52
+ >>> metric = GEval(
53
+ ... task_introduction="You evaluate politeness of responses.",
54
+ ... evaluation_criteria="Score from 1 (rude) to 5 (very polite).",
55
+ ... model="gpt-4",
56
+ ... )
57
+ >>> result = metric.score(output="Thanks so much for your help!") # doctest: +SKIP
58
+ >>> result.value # doctest: +SKIP
59
+ 0.9
60
+ """
61
+
62
+ _CHAIN_OF_THOUGHT_CACHE: "OrderedDict[Tuple[str, str, str, Any], str]" = (
63
+ OrderedDict()
64
+ )
65
+ _CHAIN_OF_THOUGHT_LOCK: Lock = Lock()
66
+ _MAX_CHAIN_OF_THOUGHT_CACHE = 128
67
+
23
68
  def __init__(
24
69
  self,
25
70
  task_introduction: str,
26
71
  evaluation_criteria: str,
27
- model: Optional[Union[str, base_model.OpikBaseModel]] = None,
72
+ model: Optional[Union[str, models.base_model.OpikBaseModel]] = None,
28
73
  name: str = "g_eval_metric",
29
74
  track: bool = True,
75
+ project_name: Optional[str] = None,
76
+ temperature: float = 0.0,
77
+ seed: Optional[int] = None,
30
78
  ):
31
- """
32
- A metric that evaluates an LLM output based on chain-of-thought built with the evaluation criteria provided
33
- by the user.
34
-
35
- For more details see the original paper: https://arxiv.org/pdf/2303.16634
36
-
37
- Args:
38
- task_introduction: An instruction for LLM used to generate an evaluation chain-of-thought and in evaluation call itself.
39
- `opik.evaluation.models.LiteLLMChatModel` is used by default.
40
- evaluation_criteria: The main task for G-Eval metric written in human language.
41
- model: The LLM to use for evaluation. Can be a string (model name) or an `opik.evaluation.models.OpikBaseModel` subclass instance.
42
- name: The name of the metric.
43
- track: Whether to track the metric. Defaults to True.
44
- """
45
79
  super().__init__(
46
80
  name=name,
47
81
  track=track,
82
+ project_name=project_name,
48
83
  )
49
- self._init_model(model)
50
-
51
84
  self.task_introduction = task_introduction
52
85
  self.evaluation_criteria = evaluation_criteria
86
+ self._seed = seed
87
+
88
+ self._log_probs_supported = False
89
+
90
+ self._init_model(model, temperature=temperature)
53
91
 
54
- @cached_property
55
92
  def llm_chain_of_thought(self) -> str:
56
- prompt = G_EVAL_COT_TEMPLATE.format(
93
+ cache_key = self._chain_of_thought_cache_key()
94
+ cached = self._get_cached_chain_of_thought(cache_key)
95
+ if cached is not None:
96
+ return cached
97
+
98
+ prompt = template.G_EVAL_COT_TEMPLATE.format(
57
99
  task_introduction=self.task_introduction,
58
100
  evaluation_criteria=self.evaluation_criteria,
59
101
  )
60
- return self._model.generate_string(input=prompt)
102
+ generated = self._model.generate_string(input=prompt)
103
+ self._store_chain_of_thought(cache_key, generated)
104
+ return generated
105
+
106
+ async def allm_chain_of_thought(self) -> str:
107
+ cache_key = self._chain_of_thought_cache_key()
108
+ cached = self._get_cached_chain_of_thought(cache_key)
109
+ if cached is not None:
110
+ return cached
111
+
112
+ prompt = template.G_EVAL_COT_TEMPLATE.format(
113
+ task_introduction=self.task_introduction,
114
+ evaluation_criteria=self.evaluation_criteria,
115
+ )
116
+ generated = await self._model.agenerate_string(input=prompt)
117
+ self._store_chain_of_thought(cache_key, generated)
118
+ return generated
61
119
 
62
120
  def _init_model(
63
- self, model: Optional[Union[str, base_model.OpikBaseModel]]
121
+ self, model: Optional[Union[str, base_model.OpikBaseModel]], temperature: float
64
122
  ) -> None:
65
123
  if isinstance(model, base_model.OpikBaseModel):
66
124
  self._model = model
67
125
  else:
68
- self._model = models_factory.get(
69
- model_name=model,
70
- must_support_arguments=["logprobs", "top_logprobs"],
71
- )
126
+ model_kwargs = {"temperature": temperature}
127
+ if self._seed is not None:
128
+ model_kwargs["seed"] = self._seed
129
+
130
+ self._model = models_factory.get(model_name=model, **model_kwargs)
131
+
132
+ if (
133
+ hasattr(self._model, "supported_params")
134
+ and "logprobs" in self._model.supported_params
135
+ and "top_logprobs" in self._model.supported_params
136
+ ):
137
+ self._log_probs_supported = True
138
+
139
+ @classmethod
140
+ def _get_cached_chain_of_thought(
141
+ cls, cache_key: Tuple[str, str, str, Any]
142
+ ) -> Optional[str]:
143
+ with cls._CHAIN_OF_THOUGHT_LOCK:
144
+ value = cls._CHAIN_OF_THOUGHT_CACHE.get(cache_key)
145
+ if value is not None:
146
+ cls._CHAIN_OF_THOUGHT_CACHE.move_to_end(cache_key)
147
+ return value
148
+
149
+ @classmethod
150
+ def _store_chain_of_thought(
151
+ cls, cache_key: Tuple[str, str, str, Any], value: str
152
+ ) -> None:
153
+ with cls._CHAIN_OF_THOUGHT_LOCK:
154
+ existing = cls._CHAIN_OF_THOUGHT_CACHE.get(cache_key)
155
+ if existing is not None:
156
+ cls._CHAIN_OF_THOUGHT_CACHE.move_to_end(cache_key)
157
+ return
158
+ cls._CHAIN_OF_THOUGHT_CACHE[cache_key] = value
159
+ cls._CHAIN_OF_THOUGHT_CACHE.move_to_end(cache_key)
160
+ while len(cls._CHAIN_OF_THOUGHT_CACHE) > cls._MAX_CHAIN_OF_THOUGHT_CACHE:
161
+ cls._CHAIN_OF_THOUGHT_CACHE.popitem(last=False)
162
+
163
+ def _chain_of_thought_cache_key(self) -> Tuple[str, str, str, Any]:
164
+ model_name = getattr(self._model, "model_name", "unknown")
165
+ return (
166
+ self.task_introduction,
167
+ self.evaluation_criteria,
168
+ model_name,
169
+ self._model_cache_fingerprint(),
170
+ )
171
+
172
+ def _model_cache_fingerprint(self) -> Any:
173
+ fingerprint_candidate = getattr(self._model, "cache_fingerprint", None)
174
+ if callable(fingerprint_candidate):
175
+ try:
176
+ fingerprint = fingerprint_candidate()
177
+ except Exception:
178
+ fingerprint = None
179
+ else:
180
+ return _freeze_for_cache(fingerprint)
181
+
182
+ completion_kwargs = getattr(self._model, "_completion_kwargs", None)
183
+ if isinstance(completion_kwargs, dict):
184
+ return _freeze_for_cache(completion_kwargs)
185
+
186
+ return id(self._model)
72
187
 
73
188
  def score(
74
189
  self,
@@ -86,10 +201,10 @@ class GEval(base_metric.BaseMetric):
86
201
  score_result.ScoreResult: A ScoreResult object containing the G-Eval score
87
202
  (between 0.0 and 1.0) and a reason for the score.
88
203
  """
89
- llm_query = G_EVAL_QUERY_TEMPLATE.format(
204
+ llm_query = template.G_EVAL_QUERY_TEMPLATE.format(
90
205
  task_introduction=self.task_introduction,
91
206
  evaluation_criteria=self.evaluation_criteria,
92
- chain_of_thought=self.llm_chain_of_thought,
207
+ chain_of_thought=self.llm_chain_of_thought(),
93
208
  input=output,
94
209
  )
95
210
 
@@ -100,33 +215,44 @@ class GEval(base_metric.BaseMetric):
100
215
  },
101
216
  ]
102
217
 
103
- model_output = self._model.generate_provider_response(
104
- messages=request,
105
- logprobs=True,
106
- top_logprobs=20,
107
- response_format=GEvalScoreFormat,
218
+ if isinstance(self._model, models.LiteLLMChatModel):
219
+ provider_kwargs: Dict[str, Any] = {
220
+ "response_format": GEvalScoreFormat,
221
+ }
222
+ if self._log_probs_supported:
223
+ provider_kwargs["logprobs"] = True
224
+ provider_kwargs["top_logprobs"] = 20
225
+
226
+ with base_model.get_provider_response(
227
+ model_provider=self._model,
228
+ messages=request,
229
+ **provider_kwargs,
230
+ ) as model_output:
231
+ return parser.parse_litellm_model_output(
232
+ content=model_output,
233
+ name=self.name,
234
+ log_probs_supported=self._log_probs_supported,
235
+ )
236
+
237
+ model_output_string = self._model.generate_string(
238
+ input=llm_query, response_format=GEvalScoreFormat
108
239
  )
109
240
 
110
- return self._parse_model_output(model_output)
241
+ return parser.parse_model_output_string(model_output_string, self.name)
111
242
 
112
243
  async def ascore(
113
- self, output: str, **ignored_kwargs: Any
244
+ self,
245
+ output: str,
246
+ **ignored_kwargs: Any,
114
247
  ) -> score_result.ScoreResult:
115
248
  """
116
- Calculate the G-Eval score for the given LLM's output.
117
-
118
- Args:
119
- output: The LLM's output to evaluate.
120
- **ignored_kwargs: Additional keyword arguments that are ignored.
121
-
122
- Returns:
123
- score_result.ScoreResult: A ScoreResult object containing the G-Eval score
124
- (between 0.0 and 1.0) and a reason for the score.
249
+ Async variant of :meth:`score`, evaluating the provided LLM output using
250
+ the configured judge model and returning a ``ScoreResult``.
125
251
  """
126
- llm_query = G_EVAL_QUERY_TEMPLATE.format(
252
+ llm_query = template.G_EVAL_QUERY_TEMPLATE.format(
127
253
  task_introduction=self.task_introduction,
128
254
  evaluation_criteria=self.evaluation_criteria,
129
- chain_of_thought=self.llm_chain_of_thought,
255
+ chain_of_thought=await self.allm_chain_of_thought(),
130
256
  input=output,
131
257
  )
132
258
 
@@ -137,69 +263,74 @@ class GEval(base_metric.BaseMetric):
137
263
  },
138
264
  ]
139
265
 
140
- model_output = await self._model.agenerate_provider_response(
141
- messages=request,
142
- logprobs=True,
143
- top_logprobs=20,
144
- response_format=GEvalScoreFormat,
266
+ if isinstance(self._model, models.LiteLLMChatModel):
267
+ provider_kwargs: Dict[str, Any] = {
268
+ "response_format": GEvalScoreFormat,
269
+ }
270
+ if self._log_probs_supported:
271
+ provider_kwargs["logprobs"] = True
272
+ provider_kwargs["top_logprobs"] = 20
273
+
274
+ async with base_model.aget_provider_response(
275
+ model_provider=self._model,
276
+ messages=request,
277
+ **provider_kwargs,
278
+ ) as model_output:
279
+ return parser.parse_litellm_model_output(
280
+ content=model_output,
281
+ name=self.name,
282
+ log_probs_supported=self._log_probs_supported,
283
+ )
284
+
285
+ model_output_string = await self._model.agenerate_string(
286
+ input=llm_query, response_format=GEvalScoreFormat
145
287
  )
146
288
 
147
- return self._parse_model_output(model_output)
148
-
149
- def _parse_model_output(self, content: "ModelResponse") -> score_result.ScoreResult:
150
- """
151
- This method computes the final score based on the model's response. The model's response is a dictionary
152
- with a `score` key and a `reason` key. The prompt template also specifies that the score should be an integer
153
- between 0 and 10.
154
-
155
- In order to make the score computation more robust, we look at the top logprobs of the score token and compute
156
- a weighted average of the scores. Since we try to enforce the format of the model's response, we can assume that
157
- the score token is always the fourth token in the response (first token is `{"`, followed by `score` and `":`).
158
- """
159
- try:
160
- # Compute score using top logprobs
161
- score_token_position = 3
162
- top_score_logprobs = content.choices[0].model_extra["logprobs"]["content"][
163
- score_token_position
164
- ]["top_logprobs"]
289
+ return parser.parse_model_output_string(model_output_string, self.name)
165
290
 
166
- linear_probs_sum = 0.0
167
- weighted_score_sum = 0.0
168
291
 
169
- for token_info in top_score_logprobs:
170
- # litellm in v1.60.2 (or earlier) started provide logprobes
171
- # as pydantic model, not just dict
172
- # we will convert model to dict to provide backward compatability
173
- if not isinstance(token_info, dict):
174
- token_info = token_info.model_dump()
292
+ class GEvalPreset(GEval):
293
+ """
294
+ Pre-configured GEval variant with author-provided prompt templates.
175
295
 
176
- # if not a number
177
- if not token_info["token"].isdecimal():
178
- continue
296
+ Args:
297
+ preset: Key name from ``GEVAL_PRESETS`` describing the evaluation rubric.
298
+ model: Optional model identifier or ``OpikBaseModel`` instance.
299
+ track: Whether to automatically track metric results. Defaults to ``True``.
300
+ project_name: Optional tracking project name.
301
+ temperature: Sampling temperature forwarded to the judge model.
302
+ name: Optional override for the metric name (defaults to preset name).
179
303
 
180
- score = int(token_info["token"])
304
+ Example:
305
+ >>> from opik.evaluation.metrics.llm_judges.g_eval.metric import GEvalPreset
306
+ >>> metric = GEvalPreset(preset="qa_relevance", model="gpt-4")
307
+ >>> result = metric.score(output="Answer addresses the user's question.") # doctest: +SKIP
308
+ >>> result.value # doctest: +SKIP
309
+ 0.85
310
+ """
181
311
 
182
- # if score value not in scale
183
- if not 0 <= score <= 10:
184
- continue
185
-
186
- log_prob = token_info["logprob"]
187
- linear_prob = math.exp(log_prob)
188
-
189
- linear_probs_sum += linear_prob
190
- weighted_score_sum += linear_prob * score
191
-
192
- final_score: float = weighted_score_sum / linear_probs_sum / 10
193
-
194
- if not (0.0 <= final_score <= 1.0):
195
- raise ValueError
196
-
197
- # Get the reason
198
- reason = json.loads(content.choices[0].message.content)["reason"]
312
+ def __init__(
313
+ self,
314
+ preset: str,
315
+ model: Optional[Union[str, models.base_model.OpikBaseModel]] = None,
316
+ track: bool = True,
317
+ project_name: Optional[str] = None,
318
+ temperature: float = 0.0,
319
+ name: Optional[str] = None,
320
+ ):
321
+ try:
322
+ definition = GEVAL_PRESETS[preset]
323
+ except KeyError as error:
324
+ raise ValueError(
325
+ f"Unknown GEval preset '{preset}'. Available presets: {list(GEVAL_PRESETS)}"
326
+ ) from error
199
327
 
200
- # Return the score and the reason
201
- return score_result.ScoreResult(
202
- name=self.name, value=final_score, reason=reason
203
- )
204
- except Exception:
205
- raise exceptions.MetricComputationError(GEVAL_SCORE_CALC_FAILED)
328
+ super().__init__(
329
+ task_introduction=definition.task_introduction,
330
+ evaluation_criteria=definition.evaluation_criteria,
331
+ model=model,
332
+ name=name or definition.name,
333
+ track=track,
334
+ project_name=project_name,
335
+ temperature=temperature,
336
+ )
@@ -0,0 +1,161 @@
1
+ import logging
2
+ import json
3
+ import math
4
+ from typing import Any, Dict, TYPE_CHECKING
5
+ import opik.exceptions as exceptions
6
+ from opik.evaluation.metrics import score_result
7
+ from opik.evaluation.metrics.llm_judges import parsing_helpers
8
+ from opik.logging_messages import GEVAL_SCORE_CALC_FAILED
9
+
10
+ if TYPE_CHECKING: # TODO: Daniel check if this is needed
11
+ from litellm.types.utils import ModelResponse as LiteLLMModelResponse
12
+
13
+ LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ def parse_model_output_string(
17
+ content: str, metric_name: str
18
+ ) -> score_result.ScoreResult:
19
+ try:
20
+ dict_content = parsing_helpers.extract_json_content_or_raise(content)
21
+
22
+ score_raw = float(dict_content["score"])
23
+
24
+ if not 0 <= score_raw <= 10:
25
+ raise ValueError(
26
+ f"LLM returned score outside of [0, 10] range: {score_raw}"
27
+ )
28
+
29
+ normalised_score = score_raw / 10
30
+
31
+ reason = str(dict_content["reason"])
32
+
33
+ return score_result.ScoreResult(
34
+ name=metric_name,
35
+ value=normalised_score,
36
+ reason=reason,
37
+ )
38
+ except Exception as exception:
39
+ LOGGER.error(f"Failed to parse model output: {exception}", exc_info=True)
40
+ raise exceptions.MetricComputationError(GEVAL_SCORE_CALC_FAILED) from exception
41
+
42
+
43
+ def parse_litellm_model_output(
44
+ content: "LiteLLMModelResponse", name: str, log_probs_supported: bool
45
+ ) -> score_result.ScoreResult:
46
+ """
47
+ This method computes the final score based on the model's response. The model's response is a dictionary
48
+ with a `score` key and a `reason` key. The prompt template also specifies that the score should be an integer
49
+ between 0 and 10.
50
+
51
+ In order to make the score computation more robust, we look at the top logprobs of the score token and compute
52
+ a weighted average of the scores. Since we try to enforce the format of the model's response, we can assume that
53
+ the score token is always the fourth token in the response (first token is `{"`, followed by `score` and `":`).
54
+ """
55
+ try:
56
+ choice_dict = _normalise_first_choice(content)
57
+
58
+ if not log_probs_supported:
59
+ return _extract_score_from_text_content(choice_dict, name=name)
60
+
61
+ log_probs = _to_dict(choice_dict.get("logprobs"))
62
+ entries = log_probs.get("content") or []
63
+ score_token_position = 3
64
+ if len(entries) <= score_token_position:
65
+ return _extract_score_from_text_content(choice_dict, name=name)
66
+
67
+ entry_dict = _to_dict(entries[score_token_position])
68
+ top_logprobs = entry_dict.get("top_logprobs") or []
69
+ token_candidate = str(entry_dict.get("token", ""))
70
+
71
+ linear_probs_sum = 0.0
72
+ weighted_score_sum = 0.0
73
+
74
+ for candidate in top_logprobs:
75
+ token_info = _to_dict(candidate)
76
+ token_str = str(token_info.get("token", ""))
77
+ if not token_str.isdecimal():
78
+ continue
79
+
80
+ score = int(token_str)
81
+ if not 0 <= score <= 10:
82
+ continue
83
+
84
+ log_prob = token_info.get("logprob")
85
+ if log_prob is None:
86
+ continue
87
+
88
+ linear_prob = math.exp(float(log_prob))
89
+ linear_probs_sum += linear_prob
90
+ weighted_score_sum += linear_prob * score
91
+
92
+ if linear_probs_sum != 0.0:
93
+ final_score: float = weighted_score_sum / linear_probs_sum / 10
94
+ else:
95
+ if not token_candidate.isdecimal():
96
+ raise exceptions.MetricComputationError(GEVAL_SCORE_CALC_FAILED)
97
+ final_score = int(token_candidate) / 10
98
+
99
+ if not (0.0 <= final_score <= 1.0):
100
+ raise ValueError(
101
+ f"Failed to compute final score from log_probs, the value is out of [0, 1] range: {final_score}"
102
+ )
103
+
104
+ reason_data = json.loads(_extract_message_content(choice_dict))
105
+ reason = reason_data["reason"]
106
+ return score_result.ScoreResult(name=name, value=final_score, reason=reason)
107
+ except Exception as exception:
108
+ LOGGER.error(f"Failed to parse model output: {exception}", exc_info=True)
109
+ raise exceptions.MetricComputationError(GEVAL_SCORE_CALC_FAILED) from exception
110
+
111
+
112
+ def _extract_score_from_text_content(
113
+ choice: Dict[str, Any], name: str
114
+ ) -> score_result.ScoreResult:
115
+ text_content = _extract_message_content(choice)
116
+ return parse_model_output_string(text_content, name)
117
+
118
+
119
+ def _extract_message_content(choice: Dict[str, Any]) -> str:
120
+ message = choice.get("message")
121
+ if isinstance(message, dict):
122
+ content = message.get("content")
123
+ else:
124
+ content = getattr(message, "content", None)
125
+
126
+ if not isinstance(content, str):
127
+ raise ValueError("LLM response is missing textual content")
128
+
129
+ return content
130
+
131
+
132
+ def _normalise_choice(choice: Any) -> Dict[str, Any]:
133
+ choice_dict = _to_dict(choice)
134
+ if choice_dict:
135
+ return choice_dict
136
+ return {
137
+ "message": getattr(choice, "message", None),
138
+ "logprobs": getattr(choice, "logprobs", None),
139
+ }
140
+
141
+
142
+ def _normalise_first_choice(response: Any) -> Dict[str, Any]:
143
+ choices = getattr(response, "choices", None)
144
+ if not isinstance(choices, list) or not choices:
145
+ raise exceptions.MetricComputationError(
146
+ "LLM response did not contain any choices to parse."
147
+ )
148
+ return _normalise_choice(choices[0])
149
+
150
+
151
+ def _to_dict(value: Any) -> Dict[str, Any]:
152
+ if isinstance(value, dict):
153
+ return value
154
+ if hasattr(value, "model_dump") and callable(value.model_dump):
155
+ try:
156
+ return value.model_dump()
157
+ except TypeError:
158
+ pass
159
+ if hasattr(value, "__dict__"):
160
+ return dict(value.__dict__)
161
+ return {}