opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -4,28 +4,67 @@ import warnings
4
4
  from functools import cached_property
5
5
  from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING, Type
6
6
  import pydantic
7
+ import tenacity
7
8
 
8
9
  if TYPE_CHECKING:
9
10
  from litellm.types.utils import ModelResponse
10
11
 
11
12
  import opik.semantic_version as semantic_version
13
+ import opik.integrations.litellm as litellm_integration
14
+ import opik.config as opik_config
12
15
 
13
16
  from .. import base_model
14
- from . import opik_monitor, warning_filters
17
+ from . import warning_filters, util
18
+ from opik import exceptions
15
19
 
16
20
  LOGGER = logging.getLogger(__name__)
17
21
 
18
22
 
23
+ def _log_warning(message: str, *args: Any) -> None:
24
+ """Emit a warning to both this module logger and the root logger.
25
+
26
+ pytest's logging capture hooks into the root logger, while production runs use
27
+ the module-level logger. Logging to both keeps warnings visible in tests and at
28
+ runtime without duplicating call sites.
29
+ """
30
+
31
+ LOGGER.warning(message, *args)
32
+ root_logger = logging.getLogger()
33
+ if root_logger is not LOGGER:
34
+ root_logger.log(logging.WARNING, message, *args)
35
+
36
+
37
+ def _extract_message_content(choice: Dict[str, Any]) -> Optional[str]:
38
+ message = choice.get("message")
39
+ if isinstance(message, dict):
40
+ content = message.get("content")
41
+ else:
42
+ content = getattr(message, "content", None)
43
+ if content is not None and not isinstance(content, str):
44
+ raise ValueError("LLM choice contains non-text content")
45
+ return content
46
+
47
+
48
+ def _first_choice(response: Any) -> Dict[str, Any]:
49
+ choices = getattr(response, "choices", None)
50
+ if not isinstance(choices, list) or not choices:
51
+ raise exceptions.BaseLLMError(
52
+ "LLM response did not contain any choices to parse."
53
+ )
54
+ return util.normalise_choice(choices[0])
55
+
56
+
19
57
  class LiteLLMChatModel(base_model.OpikBaseModel):
20
58
  def __init__(
21
59
  self,
22
- model_name: str = "gpt-4o",
60
+ model_name: str = "gpt-5-nano",
23
61
  must_support_arguments: Optional[List[str]] = None,
24
62
  **completion_kwargs: Any,
25
63
  ) -> None:
64
+ import litellm
65
+
26
66
  """
27
67
  Initializes the base model with a given model name.
28
- Wraps `litellm.completion` function.
29
68
  You can find all possible completion_kwargs parameters here: https://docs.litellm.ai/docs/completion/input.
30
69
 
31
70
  Args:
@@ -39,12 +78,13 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
39
78
 
40
79
  completion_kwargs: key-value arguments to always pass additionally into `litellm.completion` function.
41
80
  """
42
-
43
81
  super().__init__(model_name=model_name)
44
82
 
45
83
  self._check_model_name()
46
84
  self._check_must_support_arguments(must_support_arguments)
47
85
 
86
+ self._unsupported_warned: Set[str] = set()
87
+
48
88
  self._completion_kwargs: Dict[str, Any] = (
49
89
  self._remove_unnecessary_not_supported_params(completion_kwargs)
50
90
  )
@@ -55,11 +95,21 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
55
95
  # Litellm has already fixed that, but it is not released yet, so this filter
56
96
  # should be removed from here soon.
57
97
  warnings.simplefilter("ignore")
58
- import litellm
59
98
 
60
99
  warning_filters.add_warning_filters()
61
100
 
62
- self._engine = litellm
101
+ config = opik_config.OpikConfig()
102
+
103
+ if config.enable_litellm_models_monitoring:
104
+ self._litellm_completion = litellm_integration.track_completion()(
105
+ litellm.completion
106
+ )
107
+ self._litellm_acompletion = litellm_integration.track_completion()(
108
+ litellm.acompletion
109
+ )
110
+ else:
111
+ self._litellm_completion = litellm.completion
112
+ self._litellm_acompletion = litellm.acompletion
63
113
 
64
114
  @cached_property
65
115
  def supported_params(self) -> Set[str]:
@@ -114,6 +164,7 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
114
164
  ) -> Dict[str, Any]:
115
165
  filtered_params = {**params}
116
166
 
167
+ # Fix for impacted providers like Groq and OpenAI
117
168
  if (
118
169
  "response_format" in params
119
170
  and "response_format" not in self.supported_params
@@ -122,9 +173,54 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
122
173
  LOGGER.debug(
123
174
  "This model does not support the response_format parameter and it will be ignored."
124
175
  )
176
+ # NOTE: Filtering based on `supported_params` has been disabled temporarily
177
+ # because LiteLLM does not surface provider-specific connection fields via
178
+ # `get_supported_openai_params`. Dropping those kwargs breaks Azure/Groq
179
+ # users who rely on parameters such as `api_version` and `azure_endpoint`.
180
+ # The old logic is kept here commented for future restoration.
181
+ #
182
+ # for key in list(filtered_params.keys()):
183
+ # if (
184
+ # key not in self.supported_params
185
+ # and not util.should_preserve_provider_param(key)
186
+ # ):
187
+ # filtered_params.pop(key)
188
+ # if key not in self._unsupported_warned:
189
+ # _log_warning(
190
+ # "Parameter '%s' is not supported by model %s and will be ignored.",
191
+ # key,
192
+ # self.model_name,
193
+ # )
194
+ # self._unsupported_warned.add(key)
195
+
196
+ util.apply_model_specific_filters(
197
+ model_name=self.model_name,
198
+ params=filtered_params,
199
+ already_warned=self._unsupported_warned,
200
+ warn=self._warn_about_unsupported_param,
201
+ )
125
202
 
126
203
  return filtered_params
127
204
 
205
+ def _warn_about_unsupported_param(self, param: str, value: Any) -> None:
206
+ if param in {"logprobs", "top_logprobs"}:
207
+ # LiteLLM warns noisily when models like gpt-5-nano don't support these
208
+ # fields. We already drop them gracefully, so skip logging to avoid
209
+ # spamming GEval users with repeated warnings.
210
+ return
211
+ if param == "temperature":
212
+ _log_warning(
213
+ "Model %s only supports temperature=1. Dropping temperature=%s.",
214
+ self.model_name,
215
+ value,
216
+ )
217
+ else:
218
+ _log_warning(
219
+ "Model %s does not support %s. Dropping the parameter.",
220
+ self.model_name,
221
+ param,
222
+ )
223
+
128
224
  def generate_string(
129
225
  self,
130
226
  input: str,
@@ -155,10 +251,14 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
155
251
  },
156
252
  ]
157
253
 
158
- response = self.generate_provider_response(
159
- messages=request, **valid_litellm_params
160
- )
161
- return response.choices[0].message.content
254
+ with base_model.get_provider_response(
255
+ model_provider=self,
256
+ messages=request,
257
+ **valid_litellm_params,
258
+ ) as response:
259
+ choice = _first_choice(response)
260
+ content = _extract_message_content(choice)
261
+ return base_model.check_model_output_string(content)
162
262
 
163
263
  def generate_provider_response(
164
264
  self,
@@ -166,6 +266,8 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
166
266
  **kwargs: Any,
167
267
  ) -> "ModelResponse":
168
268
  """
269
+ Do not use this method directly. It is intended to be used within `base_model.get_provider_response()` method.
270
+
169
271
  Generate a provider-specific response. Can be used to interface with
170
272
  the underlying model provider (e.g., OpenAI, Anthropic) and get raw output.
171
273
  You can find all possible input parameters here: https://docs.litellm.ai/docs/completion/input
@@ -179,21 +281,29 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
179
281
  Any: The response from the model provider, which can be of any type depending on the use case and LLM.
180
282
  """
181
283
 
284
+ # Extract retry configuration before filtering params
285
+ retries = kwargs.pop("__opik_retries", 3)
286
+ try:
287
+ max_attempts = max(1, int(retries))
288
+ except (TypeError, ValueError):
289
+ max_attempts = 1
290
+
182
291
  # we need to pop messages first, and after we will check the rest params
183
292
  valid_litellm_params = self._remove_unnecessary_not_supported_params(kwargs)
184
293
  all_kwargs = {**self._completion_kwargs, **valid_litellm_params}
185
294
 
186
- if (
187
- opik_monitor.enabled_in_config()
188
- and not opik_monitor.opik_is_misconfigured()
189
- ):
190
- all_kwargs = opik_monitor.try_add_opik_monitoring_to_params(all_kwargs)
191
-
192
- response = self._engine.completion(
193
- model=self.model_name, messages=messages, **all_kwargs
295
+ retrying = tenacity.Retrying(
296
+ reraise=True,
297
+ stop=tenacity.stop_after_attempt(max_attempts),
298
+ wait=tenacity.wait_exponential(multiplier=0.5, min=0.5, max=8.0),
194
299
  )
195
300
 
196
- return response
301
+ return retrying(
302
+ self._litellm_completion,
303
+ model=self.model_name,
304
+ messages=messages,
305
+ **all_kwargs,
306
+ )
197
307
 
198
308
  async def agenerate_string(
199
309
  self,
@@ -225,15 +335,19 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
225
335
  },
226
336
  ]
227
337
 
228
- response = await self.agenerate_provider_response(
229
- messages=request, **valid_litellm_params
230
- )
231
- return response.choices[0].message.content
338
+ async with base_model.aget_provider_response(
339
+ model_provider=self, messages=request, **valid_litellm_params
340
+ ) as response:
341
+ choice = _first_choice(response)
342
+ content = _extract_message_content(choice)
343
+ return base_model.check_model_output_string(content)
232
344
 
233
345
  async def agenerate_provider_response(
234
346
  self, messages: List[Dict[str, Any]], **kwargs: Any
235
347
  ) -> "ModelResponse":
236
348
  """
349
+ Do not use this method directly. It is intended to be used within `base_model.aget_provider_response()` method.
350
+
237
351
  Generate a provider-specific response. Can be used to interface with
238
352
  the underlying model provider (e.g., OpenAI, Anthropic) and get raw output. Async version.
239
353
  You can find all possible input parameters here: https://docs.litellm.ai/docs/completion/input
@@ -247,14 +361,27 @@ class LiteLLMChatModel(base_model.OpikBaseModel):
247
361
  Any: The response from the model provider, which can be of any type depending on the use case and LLM.
248
362
  """
249
363
 
364
+ retries = kwargs.pop("__opik_retries", 3)
365
+ try:
366
+ max_attempts = max(1, int(retries))
367
+ except (TypeError, ValueError):
368
+ max_attempts = 1
369
+
250
370
  valid_litellm_params = self._remove_unnecessary_not_supported_params(kwargs)
251
371
  all_kwargs = {**self._completion_kwargs, **valid_litellm_params}
252
372
 
253
- if opik_monitor.enabled_in_config():
254
- all_kwargs = opik_monitor.try_add_opik_monitoring_to_params(all_kwargs)
255
-
256
- response = await self._engine.acompletion(
257
- model=self.model_name, messages=messages, **all_kwargs
373
+ retrying = tenacity.AsyncRetrying(
374
+ reraise=True,
375
+ stop=tenacity.stop_after_attempt(max_attempts),
376
+ wait=tenacity.wait_exponential(multiplier=0.5, min=0.5, max=8.0),
258
377
  )
259
378
 
260
- return response
379
+ async for attempt in retrying:
380
+ with attempt:
381
+ return await self._litellm_acompletion(
382
+ model=self.model_name, messages=messages, **all_kwargs
383
+ )
384
+
385
+ raise exceptions.BaseLLMError(
386
+ "Async LLM completion failed without raising an exception"
387
+ ) # pragma: no cover
@@ -0,0 +1,125 @@
1
+ """Utility helpers shared across LiteLLM models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Callable, Dict, Set
6
+
7
+
8
+ def normalise_choice(choice: Any) -> Dict[str, Any]:
9
+ """Produce a dict view of a LiteLLM choice regardless of response type.
10
+
11
+ LiteLLM may return raw dicts, Pydantic models, or dataclasses. Normalising to a
12
+ dict here keeps downstream parsing logic consistent and backwards compatible with
13
+ older client versions.
14
+ """
15
+
16
+ if isinstance(choice, dict):
17
+ return choice
18
+ if hasattr(choice, "model_dump") and callable(choice.model_dump):
19
+ try:
20
+ return choice.model_dump()
21
+ except TypeError:
22
+ pass
23
+ normalised: Dict[str, Any] = {}
24
+ message = getattr(choice, "message", None)
25
+ if message is not None:
26
+ normalised["message"] = message
27
+ logprobs = getattr(choice, "logprobs", None)
28
+ if logprobs is not None:
29
+ normalised["logprobs"] = logprobs
30
+ return normalised
31
+
32
+
33
+ def apply_model_specific_filters(
34
+ model_name: str,
35
+ params: Dict[str, Any],
36
+ already_warned: Set[str],
37
+ warn: Callable[[str, Any], None],
38
+ ) -> None:
39
+ """Adjust/drop params for specific model families before calling LiteLLM.
40
+
41
+ Currently handles:
42
+ - GPT-5: only honours temperature=1 and does not return log probabilities.
43
+ - DashScope Qwen: enforces constraints for logprobs / top_logprobs
44
+ """
45
+ if model_name.startswith("gpt-5"):
46
+ _apply_gpt5_filters(params, already_warned, warn)
47
+ return
48
+
49
+ if model_name.startswith("dashscope/"):
50
+ _apply_qwen_dashscope_filters(params, already_warned, warn)
51
+ return
52
+
53
+
54
+ def _apply_gpt5_filters(
55
+ params: Dict[str, Any],
56
+ already_warned: Set[str],
57
+ warn: Callable[[str, Any], None],
58
+ ) -> None:
59
+ """Apply GPT-5 specific parameter filters.
60
+
61
+ Only honours temperature=1 and does not return log probabilities.
62
+ Removing those eagerly avoids provider errors while the callback surfaces a
63
+ one-time warning to the caller.
64
+ """
65
+
66
+ unsupported: list[tuple[str, Any]] = []
67
+
68
+ if "temperature" in params:
69
+ value = params["temperature"]
70
+ try:
71
+ numeric_value = float(value)
72
+ except (TypeError, ValueError):
73
+ numeric_value = None
74
+ if numeric_value is None or abs(numeric_value - 1.0) > 1e-6:
75
+ unsupported.append(("temperature", value))
76
+
77
+ for param in ("logprobs", "top_logprobs"):
78
+ if param in params:
79
+ unsupported.append((param, params[param]))
80
+
81
+ _drop_unsupported_params_with_warning(
82
+ params,
83
+ unsupported,
84
+ already_warned,
85
+ warn,
86
+ )
87
+
88
+
89
+ def _apply_qwen_dashscope_filters(
90
+ params: Dict[str, Any],
91
+ already_warned: Set[str],
92
+ warn: Callable[[str, Any], None],
93
+ ) -> None:
94
+ """Apply Qwen/DashScope specific parameter filters.
95
+
96
+ Does not return log probabilities.
97
+ """
98
+
99
+ unsupported: list[tuple[str, Any]] = []
100
+
101
+ for param in ("logprobs", "top_logprobs"):
102
+ if param in params:
103
+ unsupported.append((param, params[param]))
104
+
105
+ _drop_unsupported_params_with_warning(
106
+ params,
107
+ unsupported,
108
+ already_warned,
109
+ warn,
110
+ )
111
+
112
+
113
+ def _drop_unsupported_params_with_warning(
114
+ params: Dict[str, Any],
115
+ unsupported_params: list[tuple[str, Any]],
116
+ already_warned: Set[str],
117
+ warn: Callable[[str, Any], None],
118
+ ) -> None:
119
+ """Remove unsupported params and emit warnings once per param name."""
120
+ for param, value in unsupported_params:
121
+ params.pop(param, None)
122
+ if param in already_warned:
123
+ continue
124
+ warn(param, value)
125
+ already_warned.add(param)
@@ -3,8 +3,14 @@ import warnings
3
3
  from typing import Any
4
4
 
5
5
 
6
+ _FILTERS_INSTALLED = False
7
+
8
+
6
9
  def add_warning_filters() -> None:
7
- # TODO: This should be removed when we have fixed the error messages in the LiteLLM library
10
+ global _FILTERS_INSTALLED
11
+ if _FILTERS_INSTALLED:
12
+ return
13
+
8
14
  warnings.filterwarnings("ignore", message="coroutine '.*' was never awaited")
9
15
  warnings.filterwarnings(
10
16
  "ignore",
@@ -19,9 +25,15 @@ def add_warning_filters() -> None:
19
25
  )
20
26
 
21
27
  # Add filter to multiple possible loggers
22
- filter = NoEventLoopFilterLiteLLM()
23
- logging.getLogger("LiteLLM").addFilter(filter)
28
+ lite_logger = logging.getLogger("LiteLLM")
29
+ has_filter = any(
30
+ isinstance(f, NoEventLoopFilterLiteLLM) for f in lite_logger.filters
31
+ )
32
+ if not has_filter:
33
+ lite_logger.addFilter(NoEventLoopFilterLiteLLM())
24
34
 
25
35
  import litellm
26
36
 
27
- litellm.suppress_debug_info = True # to disable colorized prints with links to litellm whenever an LLM provider raises an error
37
+ litellm.suppress_debug_info = True
38
+
39
+ _FILTERS_INSTALLED = True
@@ -0,0 +1,187 @@
1
+ """
2
+ Capability registry for evaluation models.
3
+
4
+ The registry is designed to grow beyond vision support (e.g. audio in the future).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Callable, Dict, Iterable, Optional, Set
10
+
11
+ CapabilityDetector = Callable[[str], bool]
12
+
13
+
14
+ VISION_MODEL_PREFIXES: Set[str] = {
15
+ # OpenAI
16
+ "gpt-4-vision",
17
+ "gpt-4o",
18
+ "gpt-4o-mini",
19
+ "gpt-4-turbo",
20
+ "chatgpt-4o-latest",
21
+ "gpt-5-mini",
22
+ "gpt-4.1",
23
+ "gpt-4.1-mini",
24
+ "gpt-4.1-nano",
25
+ "gpt-4.1-preview",
26
+ # Anthropic
27
+ "claude-3",
28
+ "claude-3-5",
29
+ # Google
30
+ "gemini-1.5-pro",
31
+ "gemini-1.5-flash",
32
+ "gemini-pro-vision",
33
+ "gemini-2.0-flash",
34
+ # Meta
35
+ "llama-3.2-11b-vision",
36
+ "llama-3.2-90b-vision",
37
+ # Mistral
38
+ "pixtral",
39
+ # Misc
40
+ "qwen-vl",
41
+ "qwen2-vl",
42
+ "phi-3-vision",
43
+ "phi-3.5-vision",
44
+ "llava",
45
+ "cogvlm",
46
+ "yi-vl",
47
+ }
48
+ VISION_MODEL_PREFIXES = {prefix.lower() for prefix in VISION_MODEL_PREFIXES}
49
+ VISION_MODEL_SUFFIXES: Set[str] = {"-vision", "-vl"}
50
+
51
+
52
+ def _strip_provider_prefix(model_name: str) -> str:
53
+ if "/" not in model_name:
54
+ return model_name
55
+ _, suffix = model_name.split("/", 1)
56
+ return suffix
57
+
58
+
59
+ def _litellm_supports_vision(model_name: str) -> bool:
60
+ try:
61
+ import litellm # type: ignore
62
+
63
+ return litellm.supports_vision(model=model_name)
64
+ except Exception:
65
+ return False
66
+
67
+
68
+ def vision_capability_detector(model_name: str) -> bool:
69
+ stripped = _strip_provider_prefix(model_name)
70
+ candidates = {model_name, stripped}
71
+ for candidate in candidates:
72
+ if _litellm_supports_vision(candidate):
73
+ return True
74
+ normalized = candidate.lower()
75
+ if any(normalized.startswith(prefix) for prefix in VISION_MODEL_PREFIXES):
76
+ return True
77
+ if any(normalized.endswith(suffix) for suffix in VISION_MODEL_SUFFIXES):
78
+ return True
79
+ return False
80
+
81
+
82
+ def video_capability_detector(model_name: str) -> bool:
83
+ """
84
+ Heuristically determine whether a model accepts video inputs.
85
+
86
+ Providers rarely expose structured metadata for video support, so we fall back
87
+ to naming conventions (e.g. models whose names contain ``video`` or ``qwen``
88
+ + ``vl``). When those heuristics fail we delegate to the vision detector since
89
+ current SDK integrations treat video as an extension of multimodal/vision APIs.
90
+ """
91
+ stripped = _strip_provider_prefix(model_name)
92
+ candidates = {model_name, stripped}
93
+ for candidate in candidates:
94
+ normalized = candidate.lower()
95
+ if "video" in normalized:
96
+ return True
97
+ if "qwen" in normalized and "vl" in normalized:
98
+ return True
99
+ # TODO(opik): litellm/model metadata still treats video + image inputs the same.
100
+ # Fall back to the vision heuristic so we can keep this dedicated capability
101
+ # and tighten detection once providers expose richer metadata.
102
+ return vision_capability_detector(model_name)
103
+
104
+
105
+ class ModelCapabilitiesRegistry:
106
+ """
107
+ Central registry for model capability detection.
108
+ """
109
+
110
+ def __init__(self) -> None:
111
+ self._capability_detectors: Dict[str, CapabilityDetector] = {}
112
+
113
+ def register_capability_detector(
114
+ self, capability: str, detector: CapabilityDetector
115
+ ) -> None:
116
+ """
117
+ Register a detector callable for a capability name.
118
+ """
119
+ self._capability_detectors[capability] = detector
120
+
121
+ def supports(self, capability: str, model_name: Optional[str]) -> bool:
122
+ """
123
+ Return True when the supplied model name supports the requested capability.
124
+ """
125
+ if not model_name:
126
+ return False
127
+
128
+ detector = self._capability_detectors.get(capability)
129
+ if detector is None:
130
+ return False
131
+
132
+ try:
133
+ return detector(model_name)
134
+ except Exception:
135
+ return False
136
+
137
+ def supports_vision(self, model_name: Optional[str]) -> bool:
138
+ """
139
+ Convenience wrapper for vision-capable detection.
140
+ """
141
+ return self.supports("vision", model_name)
142
+
143
+ def supports_video(self, model_name: Optional[str]) -> bool:
144
+ """
145
+ Convenience wrapper for video-capable detection.
146
+ """
147
+ return self.supports("video", model_name)
148
+
149
+ def add_vision_model(self, model_name: str) -> None:
150
+ # Extend the module-level registry used by vision_capability_detector
151
+ VISION_MODEL_PREFIXES.add(self._strip_provider_prefix(model_name).lower())
152
+
153
+ def add_vision_models(self, model_names: Iterable[str]) -> None:
154
+ for model_name in model_names:
155
+ self.add_vision_model(model_name)
156
+
157
+ def _supports_vision(self, model_name: str) -> bool:
158
+ return vision_capability_detector(model_name)
159
+
160
+ @staticmethod
161
+ def _strip_provider_prefix(model_name: str) -> str:
162
+ return _strip_provider_prefix(model_name)
163
+
164
+ @staticmethod
165
+ def _litellm_supports_vision(model_name: str) -> bool:
166
+ return _litellm_supports_vision(model_name)
167
+
168
+
169
+ MODEL_CAPABILITIES_REGISTRY = ModelCapabilitiesRegistry()
170
+ MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
171
+ "vision", vision_capability_detector
172
+ )
173
+ MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
174
+ "video", video_capability_detector
175
+ )
176
+
177
+ # Backwards compatibility shim for previous API which exposed a class with classmethods.
178
+ ModelCapabilities = MODEL_CAPABILITIES_REGISTRY
179
+
180
+
181
+ __all__ = [
182
+ "ModelCapabilitiesRegistry",
183
+ "MODEL_CAPABILITIES_REGISTRY",
184
+ "ModelCapabilities",
185
+ "vision_capability_detector",
186
+ "video_capability_detector",
187
+ ]
@@ -1,13 +1,35 @@
1
- from typing import Optional, Any
1
+ from typing import Optional, Any, Dict
2
2
 
3
3
  from .litellm import litellm_chat_model
4
4
  from . import base_model
5
5
 
6
- DEFAULT_GPT_MODEL_NAME = "gpt-4o"
6
+ DEFAULT_GPT_MODEL_NAME = "gpt-5-nano"
7
+
8
+ _MODEL_CACHE: Dict[Any, base_model.OpikBaseModel] = {}
9
+
10
+
11
+ def _freeze(value: Any) -> Any:
12
+ if isinstance(value, dict):
13
+ return frozenset((k, _freeze(v)) for k, v in value.items())
14
+ if isinstance(value, (list, tuple)):
15
+ return tuple(_freeze(v) for v in value)
16
+ if isinstance(value, set):
17
+ return frozenset(_freeze(v) for v in value)
18
+ return value
19
+
20
+
21
+ def _make_cache_key(model_name: str, model_kwargs: Dict[str, Any]) -> Any:
22
+ frozen_kwargs = frozenset((k, _freeze(v)) for k, v in model_kwargs.items())
23
+ return (model_name, frozen_kwargs)
7
24
 
8
25
 
9
26
  def get(model_name: Optional[str], **model_kwargs: Any) -> base_model.OpikBaseModel:
10
27
  if model_name is None:
11
28
  model_name = DEFAULT_GPT_MODEL_NAME
12
29
 
13
- return litellm_chat_model.LiteLLMChatModel(model_name=model_name, **model_kwargs)
30
+ cache_key = _make_cache_key(model_name, model_kwargs)
31
+ if cache_key not in _MODEL_CACHE:
32
+ _MODEL_CACHE[cache_key] = litellm_chat_model.LiteLLMChatModel(
33
+ model_name=model_name, **model_kwargs
34
+ )
35
+ return _MODEL_CACHE[cache_key]