opik 1.8.39__py3-none-any.whl → 1.9.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. opik/__init__.py +19 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/attachment/attachment_context.py +36 -0
  9. opik/api_objects/attachment/attachments_extractor.py +153 -0
  10. opik/api_objects/attachment/client.py +1 -0
  11. opik/api_objects/attachment/converters.py +2 -0
  12. opik/api_objects/attachment/decoder.py +18 -0
  13. opik/api_objects/attachment/decoder_base64.py +83 -0
  14. opik/api_objects/attachment/decoder_helpers.py +137 -0
  15. opik/api_objects/data_helpers.py +79 -0
  16. opik/api_objects/dataset/dataset.py +64 -4
  17. opik/api_objects/dataset/rest_operations.py +11 -2
  18. opik/api_objects/experiment/experiment.py +57 -57
  19. opik/api_objects/experiment/experiment_item.py +2 -1
  20. opik/api_objects/experiment/experiments_client.py +64 -0
  21. opik/api_objects/experiment/helpers.py +35 -11
  22. opik/api_objects/experiment/rest_operations.py +65 -5
  23. opik/api_objects/helpers.py +8 -5
  24. opik/api_objects/local_recording.py +81 -0
  25. opik/api_objects/opik_client.py +600 -108
  26. opik/api_objects/opik_query_language.py +39 -5
  27. opik/api_objects/prompt/__init__.py +12 -2
  28. opik/api_objects/prompt/base_prompt.py +69 -0
  29. opik/api_objects/prompt/base_prompt_template.py +29 -0
  30. opik/api_objects/prompt/chat/__init__.py +1 -0
  31. opik/api_objects/prompt/chat/chat_prompt.py +210 -0
  32. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  33. opik/api_objects/prompt/chat/content_renderer_registry.py +203 -0
  34. opik/api_objects/prompt/client.py +189 -47
  35. opik/api_objects/prompt/text/__init__.py +1 -0
  36. opik/api_objects/prompt/text/prompt.py +174 -0
  37. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  38. opik/api_objects/prompt/types.py +23 -0
  39. opik/api_objects/search_helpers.py +89 -0
  40. opik/api_objects/span/span_data.py +35 -25
  41. opik/api_objects/threads/threads_client.py +39 -5
  42. opik/api_objects/trace/trace_client.py +52 -2
  43. opik/api_objects/trace/trace_data.py +15 -24
  44. opik/api_objects/validation_helpers.py +3 -3
  45. opik/cli/__init__.py +5 -0
  46. opik/cli/__main__.py +6 -0
  47. opik/cli/configure.py +66 -0
  48. opik/cli/exports/__init__.py +131 -0
  49. opik/cli/exports/dataset.py +278 -0
  50. opik/cli/exports/experiment.py +784 -0
  51. opik/cli/exports/project.py +685 -0
  52. opik/cli/exports/prompt.py +578 -0
  53. opik/cli/exports/utils.py +406 -0
  54. opik/cli/harbor.py +39 -0
  55. opik/cli/healthcheck.py +21 -0
  56. opik/cli/imports/__init__.py +439 -0
  57. opik/cli/imports/dataset.py +143 -0
  58. opik/cli/imports/experiment.py +1192 -0
  59. opik/cli/imports/project.py +262 -0
  60. opik/cli/imports/prompt.py +177 -0
  61. opik/cli/imports/utils.py +280 -0
  62. opik/cli/main.py +49 -0
  63. opik/cli/proxy.py +93 -0
  64. opik/cli/usage_report/__init__.py +16 -0
  65. opik/cli/usage_report/charts.py +783 -0
  66. opik/cli/usage_report/cli.py +274 -0
  67. opik/cli/usage_report/constants.py +9 -0
  68. opik/cli/usage_report/extraction.py +749 -0
  69. opik/cli/usage_report/pdf.py +244 -0
  70. opik/cli/usage_report/statistics.py +78 -0
  71. opik/cli/usage_report/utils.py +235 -0
  72. opik/config.py +13 -7
  73. opik/configurator/configure.py +17 -0
  74. opik/datetime_helpers.py +12 -0
  75. opik/decorator/arguments_helpers.py +9 -1
  76. opik/decorator/base_track_decorator.py +205 -133
  77. opik/decorator/context_manager/span_context_manager.py +123 -0
  78. opik/decorator/context_manager/trace_context_manager.py +84 -0
  79. opik/decorator/opik_args/__init__.py +13 -0
  80. opik/decorator/opik_args/api_classes.py +71 -0
  81. opik/decorator/opik_args/helpers.py +120 -0
  82. opik/decorator/span_creation_handler.py +25 -6
  83. opik/dict_utils.py +3 -3
  84. opik/evaluation/__init__.py +13 -2
  85. opik/evaluation/engine/engine.py +272 -75
  86. opik/evaluation/engine/evaluation_tasks_executor.py +6 -3
  87. opik/evaluation/engine/helpers.py +31 -6
  88. opik/evaluation/engine/metrics_evaluator.py +237 -0
  89. opik/evaluation/evaluation_result.py +168 -2
  90. opik/evaluation/evaluator.py +533 -62
  91. opik/evaluation/metrics/__init__.py +103 -4
  92. opik/evaluation/metrics/aggregated_metric.py +35 -6
  93. opik/evaluation/metrics/base_metric.py +1 -1
  94. opik/evaluation/metrics/conversation/__init__.py +48 -0
  95. opik/evaluation/metrics/conversation/conversation_thread_metric.py +56 -2
  96. opik/evaluation/metrics/conversation/g_eval_wrappers.py +19 -0
  97. opik/evaluation/metrics/conversation/helpers.py +14 -15
  98. opik/evaluation/metrics/conversation/heuristics/__init__.py +14 -0
  99. opik/evaluation/metrics/conversation/heuristics/degeneration/__init__.py +3 -0
  100. opik/evaluation/metrics/conversation/heuristics/degeneration/metric.py +189 -0
  101. opik/evaluation/metrics/conversation/heuristics/degeneration/phrases.py +12 -0
  102. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/__init__.py +3 -0
  103. opik/evaluation/metrics/conversation/heuristics/knowledge_retention/metric.py +172 -0
  104. opik/evaluation/metrics/conversation/llm_judges/__init__.py +32 -0
  105. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/metric.py +22 -17
  106. opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/templates.py +1 -1
  107. opik/evaluation/metrics/conversation/llm_judges/g_eval_wrappers.py +442 -0
  108. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/metric.py +13 -7
  109. opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/templates.py +1 -1
  110. opik/evaluation/metrics/conversation/llm_judges/user_frustration/__init__.py +0 -0
  111. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/metric.py +21 -14
  112. opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/templates.py +1 -1
  113. opik/evaluation/metrics/conversation/types.py +4 -5
  114. opik/evaluation/metrics/conversation_types.py +9 -0
  115. opik/evaluation/metrics/heuristics/bertscore.py +107 -0
  116. opik/evaluation/metrics/heuristics/bleu.py +35 -15
  117. opik/evaluation/metrics/heuristics/chrf.py +127 -0
  118. opik/evaluation/metrics/heuristics/contains.py +47 -11
  119. opik/evaluation/metrics/heuristics/distribution_metrics.py +331 -0
  120. opik/evaluation/metrics/heuristics/gleu.py +113 -0
  121. opik/evaluation/metrics/heuristics/language_adherence.py +123 -0
  122. opik/evaluation/metrics/heuristics/meteor.py +119 -0
  123. opik/evaluation/metrics/heuristics/prompt_injection.py +150 -0
  124. opik/evaluation/metrics/heuristics/readability.py +129 -0
  125. opik/evaluation/metrics/heuristics/rouge.py +26 -9
  126. opik/evaluation/metrics/heuristics/spearman.py +88 -0
  127. opik/evaluation/metrics/heuristics/tone.py +155 -0
  128. opik/evaluation/metrics/heuristics/vader_sentiment.py +77 -0
  129. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +20 -5
  130. opik/evaluation/metrics/llm_judges/context_precision/metric.py +20 -6
  131. opik/evaluation/metrics/llm_judges/context_recall/metric.py +20 -6
  132. opik/evaluation/metrics/llm_judges/g_eval/__init__.py +5 -0
  133. opik/evaluation/metrics/llm_judges/g_eval/metric.py +219 -68
  134. opik/evaluation/metrics/llm_judges/g_eval/parser.py +102 -52
  135. opik/evaluation/metrics/llm_judges/g_eval/presets.py +209 -0
  136. opik/evaluation/metrics/llm_judges/g_eval_presets/__init__.py +36 -0
  137. opik/evaluation/metrics/llm_judges/g_eval_presets/agent_assessment.py +77 -0
  138. opik/evaluation/metrics/llm_judges/g_eval_presets/bias_classifier.py +181 -0
  139. opik/evaluation/metrics/llm_judges/g_eval_presets/compliance_risk.py +41 -0
  140. opik/evaluation/metrics/llm_judges/g_eval_presets/prompt_uncertainty.py +41 -0
  141. opik/evaluation/metrics/llm_judges/g_eval_presets/qa_suite.py +146 -0
  142. opik/evaluation/metrics/llm_judges/hallucination/metric.py +16 -3
  143. opik/evaluation/metrics/llm_judges/llm_juries/__init__.py +3 -0
  144. opik/evaluation/metrics/llm_judges/llm_juries/metric.py +76 -0
  145. opik/evaluation/metrics/llm_judges/moderation/metric.py +16 -4
  146. opik/evaluation/metrics/llm_judges/structure_output_compliance/__init__.py +0 -0
  147. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +144 -0
  148. opik/evaluation/metrics/llm_judges/structure_output_compliance/parser.py +79 -0
  149. opik/evaluation/metrics/llm_judges/structure_output_compliance/schema.py +15 -0
  150. opik/evaluation/metrics/llm_judges/structure_output_compliance/template.py +50 -0
  151. opik/evaluation/metrics/llm_judges/syc_eval/__init__.py +0 -0
  152. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +252 -0
  153. opik/evaluation/metrics/llm_judges/syc_eval/parser.py +82 -0
  154. opik/evaluation/metrics/llm_judges/syc_eval/template.py +155 -0
  155. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +20 -5
  156. opik/evaluation/metrics/llm_judges/usefulness/metric.py +16 -4
  157. opik/evaluation/metrics/ragas_metric.py +43 -23
  158. opik/evaluation/models/__init__.py +8 -0
  159. opik/evaluation/models/base_model.py +107 -1
  160. opik/evaluation/models/langchain/langchain_chat_model.py +15 -7
  161. opik/evaluation/models/langchain/message_converters.py +97 -15
  162. opik/evaluation/models/litellm/litellm_chat_model.py +156 -29
  163. opik/evaluation/models/litellm/util.py +125 -0
  164. opik/evaluation/models/litellm/warning_filters.py +16 -4
  165. opik/evaluation/models/model_capabilities.py +187 -0
  166. opik/evaluation/models/models_factory.py +25 -3
  167. opik/evaluation/preprocessing.py +92 -0
  168. opik/evaluation/report.py +70 -12
  169. opik/evaluation/rest_operations.py +49 -45
  170. opik/evaluation/samplers/__init__.py +4 -0
  171. opik/evaluation/samplers/base_dataset_sampler.py +40 -0
  172. opik/evaluation/samplers/random_dataset_sampler.py +48 -0
  173. opik/evaluation/score_statistics.py +66 -0
  174. opik/evaluation/scorers/__init__.py +4 -0
  175. opik/evaluation/scorers/scorer_function.py +55 -0
  176. opik/evaluation/scorers/scorer_wrapper_metric.py +130 -0
  177. opik/evaluation/test_case.py +3 -2
  178. opik/evaluation/test_result.py +1 -0
  179. opik/evaluation/threads/evaluator.py +31 -3
  180. opik/evaluation/threads/helpers.py +3 -2
  181. opik/evaluation/types.py +9 -1
  182. opik/exceptions.py +33 -0
  183. opik/file_upload/file_uploader.py +13 -0
  184. opik/file_upload/upload_options.py +2 -0
  185. opik/hooks/__init__.py +23 -0
  186. opik/hooks/anonymizer_hook.py +36 -0
  187. opik/hooks/httpx_client_hook.py +112 -0
  188. opik/httpx_client.py +12 -9
  189. opik/id_helpers.py +18 -0
  190. opik/integrations/adk/graph/subgraph_edges_builders.py +1 -2
  191. opik/integrations/adk/helpers.py +16 -7
  192. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  193. opik/integrations/adk/opik_tracer.py +14 -1
  194. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  195. opik/integrations/adk/recursive_callback_injector.py +4 -7
  196. opik/integrations/bedrock/converse/__init__.py +0 -0
  197. opik/integrations/bedrock/converse/chunks_aggregator.py +188 -0
  198. opik/integrations/bedrock/{converse_decorator.py → converse/converse_decorator.py} +4 -3
  199. opik/integrations/bedrock/invoke_agent_decorator.py +5 -4
  200. opik/integrations/bedrock/invoke_model/__init__.py +0 -0
  201. opik/integrations/bedrock/invoke_model/chunks_aggregator/__init__.py +78 -0
  202. opik/integrations/bedrock/invoke_model/chunks_aggregator/api.py +45 -0
  203. opik/integrations/bedrock/invoke_model/chunks_aggregator/base.py +23 -0
  204. opik/integrations/bedrock/invoke_model/chunks_aggregator/claude.py +121 -0
  205. opik/integrations/bedrock/invoke_model/chunks_aggregator/format_detector.py +107 -0
  206. opik/integrations/bedrock/invoke_model/chunks_aggregator/llama.py +108 -0
  207. opik/integrations/bedrock/invoke_model/chunks_aggregator/mistral.py +118 -0
  208. opik/integrations/bedrock/invoke_model/chunks_aggregator/nova.py +99 -0
  209. opik/integrations/bedrock/invoke_model/invoke_model_decorator.py +178 -0
  210. opik/integrations/bedrock/invoke_model/response_types.py +34 -0
  211. opik/integrations/bedrock/invoke_model/stream_wrappers.py +122 -0
  212. opik/integrations/bedrock/invoke_model/usage_converters.py +87 -0
  213. opik/integrations/bedrock/invoke_model/usage_extraction.py +108 -0
  214. opik/integrations/bedrock/opik_tracker.py +42 -4
  215. opik/integrations/bedrock/types.py +19 -0
  216. opik/integrations/crewai/crewai_decorator.py +8 -51
  217. opik/integrations/crewai/opik_tracker.py +31 -10
  218. opik/integrations/crewai/patchers/__init__.py +5 -0
  219. opik/integrations/crewai/patchers/flow.py +118 -0
  220. opik/integrations/crewai/patchers/litellm_completion.py +30 -0
  221. opik/integrations/crewai/patchers/llm_client.py +207 -0
  222. opik/integrations/dspy/callback.py +80 -17
  223. opik/integrations/dspy/parsers.py +168 -0
  224. opik/integrations/harbor/__init__.py +17 -0
  225. opik/integrations/harbor/experiment_service.py +269 -0
  226. opik/integrations/harbor/opik_tracker.py +528 -0
  227. opik/integrations/haystack/opik_connector.py +2 -2
  228. opik/integrations/haystack/opik_tracer.py +3 -7
  229. opik/integrations/langchain/__init__.py +3 -1
  230. opik/integrations/langchain/helpers.py +96 -0
  231. opik/integrations/langchain/langgraph_async_context_bridge.py +131 -0
  232. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  233. opik/integrations/langchain/opik_encoder_extension.py +1 -1
  234. opik/integrations/langchain/opik_tracer.py +474 -229
  235. opik/integrations/litellm/__init__.py +5 -0
  236. opik/integrations/litellm/completion_chunks_aggregator.py +115 -0
  237. opik/integrations/litellm/litellm_completion_decorator.py +242 -0
  238. opik/integrations/litellm/opik_tracker.py +43 -0
  239. opik/integrations/litellm/stream_patchers.py +151 -0
  240. opik/integrations/llama_index/callback.py +146 -107
  241. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  242. opik/integrations/openai/openai_chat_completions_decorator.py +2 -16
  243. opik/integrations/openai/opik_tracker.py +1 -1
  244. opik/integrations/sagemaker/auth.py +5 -1
  245. opik/llm_usage/google_usage.py +3 -1
  246. opik/llm_usage/opik_usage.py +7 -8
  247. opik/llm_usage/opik_usage_factory.py +4 -2
  248. opik/logging_messages.py +6 -0
  249. opik/message_processing/batching/base_batcher.py +14 -21
  250. opik/message_processing/batching/batch_manager.py +22 -10
  251. opik/message_processing/batching/batch_manager_constuctors.py +10 -0
  252. opik/message_processing/batching/batchers.py +59 -27
  253. opik/message_processing/batching/flushing_thread.py +0 -3
  254. opik/message_processing/emulation/__init__.py +0 -0
  255. opik/message_processing/emulation/emulator_message_processor.py +578 -0
  256. opik/message_processing/emulation/local_emulator_message_processor.py +140 -0
  257. opik/message_processing/emulation/models.py +162 -0
  258. opik/message_processing/encoder_helpers.py +79 -0
  259. opik/message_processing/messages.py +56 -1
  260. opik/message_processing/preprocessing/__init__.py +0 -0
  261. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  262. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  263. opik/message_processing/preprocessing/constants.py +1 -0
  264. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  265. opik/message_processing/preprocessing/preprocessor.py +36 -0
  266. opik/message_processing/processors/__init__.py +0 -0
  267. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  268. opik/message_processing/processors/message_processors.py +92 -0
  269. opik/message_processing/processors/message_processors_chain.py +96 -0
  270. opik/message_processing/{message_processors.py → processors/online_message_processor.py} +85 -29
  271. opik/message_processing/queue_consumer.py +9 -3
  272. opik/message_processing/streamer.py +71 -33
  273. opik/message_processing/streamer_constructors.py +43 -10
  274. opik/opik_context.py +16 -4
  275. opik/plugins/pytest/hooks.py +5 -3
  276. opik/rest_api/__init__.py +346 -15
  277. opik/rest_api/alerts/__init__.py +7 -0
  278. opik/rest_api/alerts/client.py +667 -0
  279. opik/rest_api/alerts/raw_client.py +1015 -0
  280. opik/rest_api/alerts/types/__init__.py +7 -0
  281. opik/rest_api/alerts/types/get_webhook_examples_request_alert_type.py +5 -0
  282. opik/rest_api/annotation_queues/__init__.py +4 -0
  283. opik/rest_api/annotation_queues/client.py +668 -0
  284. opik/rest_api/annotation_queues/raw_client.py +1019 -0
  285. opik/rest_api/automation_rule_evaluators/client.py +34 -2
  286. opik/rest_api/automation_rule_evaluators/raw_client.py +24 -0
  287. opik/rest_api/client.py +15 -0
  288. opik/rest_api/dashboards/__init__.py +4 -0
  289. opik/rest_api/dashboards/client.py +462 -0
  290. opik/rest_api/dashboards/raw_client.py +648 -0
  291. opik/rest_api/datasets/client.py +1310 -44
  292. opik/rest_api/datasets/raw_client.py +2269 -358
  293. opik/rest_api/experiments/__init__.py +2 -2
  294. opik/rest_api/experiments/client.py +191 -5
  295. opik/rest_api/experiments/raw_client.py +301 -7
  296. opik/rest_api/experiments/types/__init__.py +4 -1
  297. opik/rest_api/experiments/types/experiment_update_status.py +5 -0
  298. opik/rest_api/experiments/types/experiment_update_type.py +5 -0
  299. opik/rest_api/experiments/types/experiment_write_status.py +5 -0
  300. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  301. opik/rest_api/llm_provider_key/client.py +20 -0
  302. opik/rest_api/llm_provider_key/raw_client.py +20 -0
  303. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +1 -1
  304. opik/rest_api/manual_evaluation/__init__.py +4 -0
  305. opik/rest_api/manual_evaluation/client.py +347 -0
  306. opik/rest_api/manual_evaluation/raw_client.py +543 -0
  307. opik/rest_api/optimizations/client.py +145 -9
  308. opik/rest_api/optimizations/raw_client.py +237 -13
  309. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  310. opik/rest_api/prompts/__init__.py +2 -2
  311. opik/rest_api/prompts/client.py +227 -6
  312. opik/rest_api/prompts/raw_client.py +331 -2
  313. opik/rest_api/prompts/types/__init__.py +3 -1
  314. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  315. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  316. opik/rest_api/spans/__init__.py +0 -2
  317. opik/rest_api/spans/client.py +238 -76
  318. opik/rest_api/spans/raw_client.py +307 -95
  319. opik/rest_api/spans/types/__init__.py +0 -2
  320. opik/rest_api/traces/client.py +572 -161
  321. opik/rest_api/traces/raw_client.py +736 -229
  322. opik/rest_api/types/__init__.py +352 -17
  323. opik/rest_api/types/aggregation_data.py +1 -0
  324. opik/rest_api/types/alert.py +33 -0
  325. opik/rest_api/types/alert_alert_type.py +5 -0
  326. opik/rest_api/types/alert_page_public.py +24 -0
  327. opik/rest_api/types/alert_public.py +33 -0
  328. opik/rest_api/types/alert_public_alert_type.py +5 -0
  329. opik/rest_api/types/alert_trigger.py +27 -0
  330. opik/rest_api/types/alert_trigger_config.py +28 -0
  331. opik/rest_api/types/alert_trigger_config_public.py +28 -0
  332. opik/rest_api/types/alert_trigger_config_public_type.py +10 -0
  333. opik/rest_api/types/alert_trigger_config_type.py +10 -0
  334. opik/rest_api/types/alert_trigger_config_write.py +22 -0
  335. opik/rest_api/types/alert_trigger_config_write_type.py +10 -0
  336. opik/rest_api/types/alert_trigger_event_type.py +19 -0
  337. opik/rest_api/types/alert_trigger_public.py +27 -0
  338. opik/rest_api/types/alert_trigger_public_event_type.py +19 -0
  339. opik/rest_api/types/alert_trigger_write.py +23 -0
  340. opik/rest_api/types/alert_trigger_write_event_type.py +19 -0
  341. opik/rest_api/types/alert_write.py +28 -0
  342. opik/rest_api/types/alert_write_alert_type.py +5 -0
  343. opik/rest_api/types/annotation_queue.py +42 -0
  344. opik/rest_api/types/annotation_queue_batch.py +27 -0
  345. opik/rest_api/types/annotation_queue_item_ids.py +19 -0
  346. opik/rest_api/types/annotation_queue_page_public.py +28 -0
  347. opik/rest_api/types/annotation_queue_public.py +38 -0
  348. opik/rest_api/types/annotation_queue_public_scope.py +5 -0
  349. opik/rest_api/types/annotation_queue_reviewer.py +20 -0
  350. opik/rest_api/types/annotation_queue_reviewer_public.py +20 -0
  351. opik/rest_api/types/annotation_queue_scope.py +5 -0
  352. opik/rest_api/types/annotation_queue_write.py +31 -0
  353. opik/rest_api/types/annotation_queue_write_scope.py +5 -0
  354. opik/rest_api/types/audio_url.py +19 -0
  355. opik/rest_api/types/audio_url_public.py +19 -0
  356. opik/rest_api/types/audio_url_write.py +19 -0
  357. opik/rest_api/types/automation_rule_evaluator.py +62 -2
  358. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  359. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  360. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  361. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +155 -0
  362. opik/rest_api/types/automation_rule_evaluator_page_public.py +3 -2
  363. opik/rest_api/types/automation_rule_evaluator_public.py +57 -2
  364. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  365. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  366. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  367. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  368. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  369. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  370. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  371. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  372. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  373. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  374. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  375. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  376. opik/rest_api/types/automation_rule_evaluator_update.py +51 -1
  377. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  378. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  379. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  380. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  381. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  382. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  383. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  384. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  385. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  386. opik/rest_api/types/automation_rule_evaluator_write.py +51 -1
  387. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  388. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  389. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  390. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  391. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  392. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  393. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  394. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  395. opik/rest_api/types/dashboard_page_public.py +24 -0
  396. opik/rest_api/types/dashboard_public.py +30 -0
  397. opik/rest_api/types/dataset.py +4 -0
  398. opik/rest_api/types/dataset_expansion.py +42 -0
  399. opik/rest_api/types/dataset_expansion_response.py +39 -0
  400. opik/rest_api/types/dataset_item.py +2 -0
  401. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  402. opik/rest_api/types/dataset_item_compare.py +2 -0
  403. opik/rest_api/types/dataset_item_filter.py +27 -0
  404. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  405. opik/rest_api/types/dataset_item_page_compare.py +5 -0
  406. opik/rest_api/types/dataset_item_page_public.py +5 -0
  407. opik/rest_api/types/dataset_item_public.py +2 -0
  408. opik/rest_api/types/dataset_item_update.py +39 -0
  409. opik/rest_api/types/dataset_item_write.py +1 -0
  410. opik/rest_api/types/dataset_public.py +4 -0
  411. opik/rest_api/types/dataset_public_status.py +5 -0
  412. opik/rest_api/types/dataset_status.py +5 -0
  413. opik/rest_api/types/dataset_version_diff.py +22 -0
  414. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  415. opik/rest_api/types/dataset_version_page_public.py +23 -0
  416. opik/rest_api/types/dataset_version_public.py +59 -0
  417. opik/rest_api/types/dataset_version_summary.py +46 -0
  418. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  419. opik/rest_api/types/experiment.py +7 -2
  420. opik/rest_api/types/experiment_group_response.py +2 -0
  421. opik/rest_api/types/experiment_public.py +7 -2
  422. opik/rest_api/types/experiment_public_status.py +5 -0
  423. opik/rest_api/types/experiment_score.py +20 -0
  424. opik/rest_api/types/experiment_score_public.py +20 -0
  425. opik/rest_api/types/experiment_score_write.py +20 -0
  426. opik/rest_api/types/experiment_status.py +5 -0
  427. opik/rest_api/types/feedback.py +25 -1
  428. opik/rest_api/types/feedback_create.py +20 -1
  429. opik/rest_api/types/feedback_object_public.py +27 -1
  430. opik/rest_api/types/feedback_public.py +25 -1
  431. opik/rest_api/types/feedback_score_batch_item.py +2 -1
  432. opik/rest_api/types/feedback_score_batch_item_thread.py +2 -1
  433. opik/rest_api/types/feedback_score_public.py +4 -0
  434. opik/rest_api/types/feedback_update.py +20 -1
  435. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  436. opik/rest_api/types/group_detail.py +19 -0
  437. opik/rest_api/types/group_details.py +20 -0
  438. opik/rest_api/types/guardrail.py +1 -0
  439. opik/rest_api/types/guardrail_write.py +1 -0
  440. opik/rest_api/types/ids_holder.py +19 -0
  441. opik/rest_api/types/image_url.py +20 -0
  442. opik/rest_api/types/image_url_public.py +20 -0
  443. opik/rest_api/types/image_url_write.py +20 -0
  444. opik/rest_api/types/llm_as_judge_message.py +5 -1
  445. opik/rest_api/types/llm_as_judge_message_content.py +26 -0
  446. opik/rest_api/types/llm_as_judge_message_content_public.py +26 -0
  447. opik/rest_api/types/llm_as_judge_message_content_write.py +26 -0
  448. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  449. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  450. opik/rest_api/types/llm_as_judge_model_parameters.py +3 -0
  451. opik/rest_api/types/llm_as_judge_model_parameters_public.py +3 -0
  452. opik/rest_api/types/llm_as_judge_model_parameters_write.py +3 -0
  453. opik/rest_api/types/manual_evaluation_request.py +38 -0
  454. opik/rest_api/types/manual_evaluation_request_entity_type.py +5 -0
  455. opik/rest_api/types/manual_evaluation_response.py +27 -0
  456. opik/rest_api/types/optimization.py +4 -2
  457. opik/rest_api/types/optimization_public.py +4 -2
  458. opik/rest_api/types/optimization_public_status.py +3 -1
  459. opik/rest_api/types/optimization_status.py +3 -1
  460. opik/rest_api/types/optimization_studio_config.py +27 -0
  461. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  462. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  463. opik/rest_api/types/optimization_studio_log.py +22 -0
  464. opik/rest_api/types/optimization_write.py +4 -2
  465. opik/rest_api/types/optimization_write_status.py +3 -1
  466. opik/rest_api/types/project.py +1 -0
  467. opik/rest_api/types/project_detailed.py +1 -0
  468. opik/rest_api/types/project_reference.py +31 -0
  469. opik/rest_api/types/project_reference_public.py +31 -0
  470. opik/rest_api/types/project_stats_summary_item.py +1 -0
  471. opik/rest_api/types/prompt.py +6 -0
  472. opik/rest_api/types/prompt_detail.py +6 -0
  473. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  474. opik/rest_api/types/prompt_public.py +6 -0
  475. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  476. opik/rest_api/types/prompt_template_structure.py +5 -0
  477. opik/rest_api/types/prompt_version.py +3 -0
  478. opik/rest_api/types/prompt_version_detail.py +3 -0
  479. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  480. opik/rest_api/types/prompt_version_link.py +1 -0
  481. opik/rest_api/types/prompt_version_link_public.py +1 -0
  482. opik/rest_api/types/prompt_version_page_public.py +5 -0
  483. opik/rest_api/types/prompt_version_public.py +3 -0
  484. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  485. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  486. opik/rest_api/types/prompt_version_update.py +33 -0
  487. opik/rest_api/types/provider_api_key.py +9 -0
  488. opik/rest_api/types/provider_api_key_provider.py +1 -1
  489. opik/rest_api/types/provider_api_key_public.py +9 -0
  490. opik/rest_api/types/provider_api_key_public_provider.py +1 -1
  491. opik/rest_api/types/score_name.py +1 -0
  492. opik/rest_api/types/service_toggles_config.py +18 -0
  493. opik/rest_api/types/span.py +1 -2
  494. opik/rest_api/types/span_enrichment_options.py +31 -0
  495. opik/rest_api/types/span_experiment_item_bulk_write_view.py +1 -2
  496. opik/rest_api/types/span_filter.py +23 -0
  497. opik/rest_api/types/span_filter_operator.py +21 -0
  498. opik/rest_api/types/span_filter_write.py +23 -0
  499. opik/rest_api/types/span_filter_write_operator.py +21 -0
  500. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  501. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  502. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  503. opik/rest_api/types/span_public.py +1 -2
  504. opik/rest_api/types/span_update.py +46 -0
  505. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  506. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  507. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  508. opik/rest_api/types/span_write.py +1 -2
  509. opik/rest_api/types/studio_evaluation.py +20 -0
  510. opik/rest_api/types/studio_evaluation_public.py +20 -0
  511. opik/rest_api/types/studio_evaluation_write.py +20 -0
  512. opik/rest_api/types/studio_llm_model.py +21 -0
  513. opik/rest_api/types/studio_llm_model_public.py +21 -0
  514. opik/rest_api/types/studio_llm_model_write.py +21 -0
  515. opik/rest_api/types/studio_message.py +20 -0
  516. opik/rest_api/types/studio_message_public.py +20 -0
  517. opik/rest_api/types/studio_message_write.py +20 -0
  518. opik/rest_api/types/studio_metric.py +21 -0
  519. opik/rest_api/types/studio_metric_public.py +21 -0
  520. opik/rest_api/types/studio_metric_write.py +21 -0
  521. opik/rest_api/types/studio_optimizer.py +21 -0
  522. opik/rest_api/types/studio_optimizer_public.py +21 -0
  523. opik/rest_api/types/studio_optimizer_write.py +21 -0
  524. opik/rest_api/types/studio_prompt.py +20 -0
  525. opik/rest_api/types/studio_prompt_public.py +20 -0
  526. opik/rest_api/types/studio_prompt_write.py +20 -0
  527. opik/rest_api/types/trace.py +11 -2
  528. opik/rest_api/types/trace_enrichment_options.py +32 -0
  529. opik/rest_api/types/trace_experiment_item_bulk_write_view.py +1 -2
  530. opik/rest_api/types/trace_filter.py +23 -0
  531. opik/rest_api/types/trace_filter_operator.py +21 -0
  532. opik/rest_api/types/trace_filter_write.py +23 -0
  533. opik/rest_api/types/trace_filter_write_operator.py +21 -0
  534. opik/rest_api/types/trace_public.py +11 -2
  535. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  536. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  537. opik/rest_api/types/trace_thread_identifier.py +1 -0
  538. opik/rest_api/types/trace_update.py +39 -0
  539. opik/rest_api/types/trace_write.py +1 -2
  540. opik/rest_api/types/value_entry.py +2 -0
  541. opik/rest_api/types/value_entry_compare.py +2 -0
  542. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  543. opik/rest_api/types/value_entry_public.py +2 -0
  544. opik/rest_api/types/video_url.py +19 -0
  545. opik/rest_api/types/video_url_public.py +19 -0
  546. opik/rest_api/types/video_url_write.py +19 -0
  547. opik/rest_api/types/webhook.py +28 -0
  548. opik/rest_api/types/webhook_examples.py +19 -0
  549. opik/rest_api/types/webhook_public.py +28 -0
  550. opik/rest_api/types/webhook_test_result.py +23 -0
  551. opik/rest_api/types/webhook_test_result_status.py +5 -0
  552. opik/rest_api/types/webhook_write.py +23 -0
  553. opik/rest_api/types/welcome_wizard_tracking.py +22 -0
  554. opik/rest_api/types/workspace_configuration.py +5 -0
  555. opik/rest_api/welcome_wizard/__init__.py +4 -0
  556. opik/rest_api/welcome_wizard/client.py +195 -0
  557. opik/rest_api/welcome_wizard/raw_client.py +208 -0
  558. opik/rest_api/workspaces/client.py +14 -2
  559. opik/rest_api/workspaces/raw_client.py +10 -0
  560. opik/s3_httpx_client.py +14 -1
  561. opik/simulation/__init__.py +6 -0
  562. opik/simulation/simulated_user.py +99 -0
  563. opik/simulation/simulator.py +108 -0
  564. opik/synchronization.py +5 -6
  565. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  566. opik/types.py +36 -0
  567. opik/validation/chat_prompt_messages.py +241 -0
  568. opik/validation/feedback_score.py +3 -3
  569. opik/validation/validator.py +28 -0
  570. opik-1.9.71.dist-info/METADATA +370 -0
  571. opik-1.9.71.dist-info/RECORD +1110 -0
  572. opik/api_objects/prompt/prompt.py +0 -112
  573. opik/cli.py +0 -193
  574. opik/hooks.py +0 -13
  575. opik/integrations/bedrock/chunks_aggregator.py +0 -55
  576. opik/integrations/bedrock/helpers.py +0 -8
  577. opik/rest_api/types/automation_rule_evaluator_object_public.py +0 -100
  578. opik/rest_api/types/json_node_experiment_item_bulk_write_view.py +0 -5
  579. opik-1.8.39.dist-info/METADATA +0 -339
  580. opik-1.8.39.dist-info/RECORD +0 -790
  581. /opik/{evaluation/metrics/conversation/conversational_coherence → decorator/context_manager}/__init__.py +0 -0
  582. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/conversational_coherence}/__init__.py +0 -0
  583. /opik/evaluation/metrics/conversation/{conversational_coherence → llm_judges/conversational_coherence}/schema.py +0 -0
  584. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/session_completeness}/__init__.py +0 -0
  585. /opik/evaluation/metrics/conversation/{session_completeness → llm_judges/session_completeness}/schema.py +0 -0
  586. /opik/evaluation/metrics/conversation/{user_frustration → llm_judges/user_frustration}/schema.py +0 -0
  587. /opik/integrations/bedrock/{stream_wrappers.py → converse/stream_wrappers.py} +0 -0
  588. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  589. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/WHEEL +0 -0
  590. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/entry_points.txt +0 -0
  591. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/licenses/LICENSE +0 -0
  592. {opik-1.8.39.dist-info → opik-1.9.71.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ """Multi-turn simulation module for Opik."""
2
+
3
+ from .simulated_user import SimulatedUser
4
+ from .simulator import run_simulation
5
+
6
+ __all__ = ["SimulatedUser", "run_simulation"]
@@ -0,0 +1,99 @@
1
+ """SimulatedUser class for multi-turn conversation simulation."""
2
+
3
+ from typing import List, Dict, Optional
4
+ from opik.evaluation.models.models_factory import get as get_model
5
+
6
+
7
+ class SimulatedUser:
8
+ """
9
+ A simulated user that generates responses using LLMs or fixed responses.
10
+
11
+ The user simulator generates string responses that are then incorporated
12
+ into the conversation by the application logic.
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ persona: str,
18
+ model: str = "gpt-4o-mini",
19
+ fixed_responses: Optional[List[str]] = None,
20
+ ):
21
+ """
22
+ Initialize a simulated user.
23
+
24
+ Args:
25
+ persona: Description of the user's personality and behavior
26
+ model: LLM model to use for generating responses (default: gpt-4o-mini)
27
+ fixed_responses: Optional list of predefined responses to cycle through
28
+ """
29
+ self.persona = persona
30
+ self.model = model
31
+ self.fixed_responses = fixed_responses or []
32
+ self._response_index = 0
33
+
34
+ # Initialize LLM backend using models_factory for consistency
35
+ self._llm = get_model(model_name=model)
36
+
37
+ def generate_response(self, conversation_history: List[Dict[str, str]]) -> str:
38
+ """
39
+ Generate a response based on the conversation history.
40
+
41
+ Args:
42
+ conversation_history: List of message dicts with 'role' and 'content' keys
43
+
44
+ Returns:
45
+ String response from the simulated user
46
+ """
47
+ # Use fixed responses first if available
48
+ if self.fixed_responses:
49
+ response = self.fixed_responses[
50
+ self._response_index % len(self.fixed_responses)
51
+ ]
52
+ self._response_index += 1
53
+ return response
54
+
55
+ # Generate response using LLM
56
+ return self._generate_llm_response(conversation_history)
57
+
58
+ def _generate_llm_response(self, conversation_history: List[Dict[str, str]]) -> str:
59
+ """Generate response using the LLM backend."""
60
+ # Build system prompt with persona and clear instructions
61
+ system_prompt = f"""You are a simulated user with the following persona: {self.persona}
62
+
63
+ Your task is to generate realistic user messages that this persona would send in a conversation.
64
+ Respond as if you are the user, not as an assistant describing the user.
65
+ Generate a single user message that fits your persona and the conversation context."""
66
+
67
+ # Convert conversation history to messages format expected by LLM
68
+ messages = [{"role": "system", "content": system_prompt}]
69
+
70
+ # Add all conversation history
71
+ messages.extend(conversation_history)
72
+
73
+ # Convert messages to string format for generate_string
74
+ conversation_text = self._format_messages_as_text(messages)
75
+
76
+ # Generate response
77
+ try:
78
+ response = self._llm.generate_string(input=conversation_text)
79
+ return response
80
+ except Exception as e:
81
+ # Fallback response if LLM fails
82
+ return f"I'm having trouble responding right now. ({str(e)})"
83
+
84
+ def _format_messages_as_text(self, messages: List[Dict[str, str]]) -> str:
85
+ """Convert message list to text format for LLM input."""
86
+ formatted_messages = []
87
+ for message in messages:
88
+ role = message["role"]
89
+ content = message["content"]
90
+ if role == "system":
91
+ formatted_messages.append(f"System: {content}")
92
+ elif role == "user":
93
+ formatted_messages.append(f"User: {content}")
94
+ elif role == "assistant":
95
+ formatted_messages.append(f"Assistant: {content}")
96
+ else:
97
+ formatted_messages.append(f"{role.title()}: {content}")
98
+
99
+ return "\n".join(formatted_messages)
@@ -0,0 +1,108 @@
1
+ """Multi-turn simulation functionality."""
2
+
3
+ from typing import Callable, Optional, Dict, Any, List
4
+ from opik import id_helpers, track
5
+ from .simulated_user import SimulatedUser
6
+
7
+
8
+ def run_simulation(
9
+ app: Callable,
10
+ user_simulator: SimulatedUser,
11
+ initial_message: Optional[str] = None,
12
+ max_turns: int = 5,
13
+ thread_id: Optional[str] = None,
14
+ project_name: Optional[str] = None,
15
+ **app_kwargs: Any,
16
+ ) -> Dict[str, Any]:
17
+ """
18
+ Run a multi-turn conversation simulation between a simulated user and an app.
19
+
20
+ 1. The simulator passes single message strings to the app
21
+ 2. The app manages full conversation history internally using thread_id
22
+ 3. The app logs traces with thread_id for evaluation
23
+
24
+ Args:
25
+ app: Callable that processes messages and manages conversation history internally.
26
+ Signature: app(message: str, *, thread_id: str, **kwargs) -> Dict[str, str]
27
+ The app is automatically decorated with @track and thread_id is injected via opik_args.
28
+ user_simulator: SimulatedUser instance that generates user responses
29
+ initial_message: Optional initial message from the user. If None, generated by simulator
30
+ max_turns: Maximum number of conversation turns (default: 5)
31
+ thread_id: Optional thread ID for grouping traces. Generated if not provided
32
+ project_name: Optional project name for trace logging
33
+ **app_kwargs: Additional keyword arguments passed to the app
34
+
35
+ Returns:
36
+ Dict containing:
37
+ - thread_id: The thread ID used for this simulation
38
+ - conversation_history: List of message dicts from the simulation
39
+ - project_name: Project name if provided
40
+ """
41
+ # Generate thread_id if not provided
42
+ if thread_id is None:
43
+ thread_id = id_helpers.generate_id()
44
+
45
+ # Automatically decorate app if not already decorated
46
+ if not hasattr(app, "opik_tracked"):
47
+ app_name = app.__name__ if hasattr(app, "__name__") else "simulation_app"
48
+ app = track(name=app_name)(app)
49
+
50
+ # Track conversation for simulator (app manages its own history internally)
51
+ conversation_history: List[Dict[str, str]] = []
52
+
53
+ # Generate initial message if needed
54
+ if initial_message is None:
55
+ initial_message = user_simulator.generate_response(conversation_history)
56
+
57
+ # Simulation loop
58
+ for turn in range(max_turns):
59
+ # Get user message
60
+ if turn == 0:
61
+ user_message_text = initial_message
62
+ else:
63
+ user_message_text = user_simulator.generate_response(conversation_history)
64
+
65
+ # Create message dict for tracking
66
+ user_message = {"role": "user", "content": user_message_text}
67
+ conversation_history.append(user_message)
68
+
69
+ # Call app with SINGLE message string, thread_id parameter, and opik_args for tracing
70
+ try:
71
+ assistant_message = app(
72
+ user_message_text,
73
+ thread_id=thread_id,
74
+ **app_kwargs,
75
+ opik_args={
76
+ "trace": {
77
+ "thread_id": thread_id,
78
+ "metadata": {"turn": turn + 1, "project_name": project_name},
79
+ }
80
+ },
81
+ )
82
+ except Exception as e:
83
+ # Handle app errors gracefully
84
+ assistant_message = {
85
+ "role": "assistant",
86
+ "content": f"Error processing message: {str(e)}",
87
+ }
88
+
89
+ # Validate assistant message format
90
+ if (
91
+ not isinstance(assistant_message, dict)
92
+ or "role" not in assistant_message
93
+ or "content" not in assistant_message
94
+ ):
95
+ assistant_message = {
96
+ "role": "assistant",
97
+ "content": str(assistant_message)
98
+ if assistant_message
99
+ else "No response",
100
+ }
101
+
102
+ conversation_history.append(assistant_message)
103
+
104
+ return {
105
+ "thread_id": thread_id,
106
+ "conversation_history": conversation_history,
107
+ "project_name": project_name,
108
+ }
opik/synchronization.py CHANGED
@@ -40,15 +40,14 @@ def until(
40
40
  while True:
41
41
  try:
42
42
  if function():
43
- break
43
+ return True
44
44
  except Exception:
45
45
  LOGGER.debug(
46
46
  f"{function.__name__} raised error in 'until' function.", exc_info=True
47
47
  )
48
48
  if not allow_errors:
49
49
  raise
50
- finally:
51
- if (time.time() - start_time) > max_try_seconds:
52
- return False
53
- time.sleep(sleep)
54
- return True
50
+
51
+ if (time.time() - start_time) > max_try_seconds:
52
+ return False
53
+ time.sleep(sleep)
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import threading
4
4
  from typing import Optional
5
+ from . import config
5
6
 
6
7
 
7
8
  class TracingRuntimeConfig:
@@ -23,9 +24,7 @@ class TracingRuntimeConfig:
23
24
  return self._tracing_active
24
25
 
25
26
  try:
26
- from .. import config as _config_module
27
-
28
- enabled = not _config_module.OpikConfig().track_disable
27
+ enabled = not config.OpikConfig().track_disable
29
28
  self._tracing_active = enabled
30
29
 
31
30
  except Exception:
@@ -34,16 +33,16 @@ class TracingRuntimeConfig:
34
33
  return enabled
35
34
 
36
35
 
37
- runtime_cfg = TracingRuntimeConfig()
36
+ runtime_config = TracingRuntimeConfig()
38
37
 
39
38
 
40
39
  def set_tracing_active(active: bool) -> None:
41
- runtime_cfg.set_tracing_active(active)
40
+ runtime_config.set_tracing_active(active)
42
41
 
43
42
 
44
43
  def is_tracing_active() -> bool:
45
- return runtime_cfg.is_tracing_active()
44
+ return runtime_config.is_tracing_active()
46
45
 
47
46
 
48
47
  def reset_tracing_to_config_default() -> None:
49
- runtime_cfg.reset_to_config_default()
48
+ runtime_config.reset_to_config_default()
opik/types.py CHANGED
@@ -2,6 +2,7 @@ import enum
2
2
  import sys
3
3
  from typing import Literal, Optional
4
4
 
5
+ from pydantic import StrictStr
5
6
  from typing_extensions import TypedDict
6
7
 
7
8
  if sys.version_info < (3, 11):
@@ -79,6 +80,41 @@ class FeedbackScoreDict(TypedDict):
79
80
  """An optional explanation or justification for the given score."""
80
81
 
81
82
 
83
+ class BatchFeedbackScoreDict(TypedDict):
84
+ """
85
+ A TypedDict representing a feedback score for batch operations.
86
+
87
+ This class defines the structure for feedback scores used in batch logging
88
+ operations, with a required id field and optional per-score project_name.
89
+ """
90
+
91
+ id: Required[str]
92
+ """
93
+ A unique identifier for the object this score should be assigned to.
94
+ Refers to either the trace_id, span_id or thread_id depending on how the score is logged.
95
+ Required for batch operations.
96
+ """
97
+
98
+ name: Required[str]
99
+ """The name of the feedback metric or criterion."""
100
+
101
+ value: Required[float]
102
+ """The numerical value of the feedback score."""
103
+
104
+ project_name: NotRequired[Optional[StrictStr]]
105
+ """
106
+ The name of the project for this specific score.
107
+ If not provided, falls back to the project_name parameter in the method call,
108
+ or the default project name configured in the Opik instance.
109
+ """
110
+
111
+ category_name: NotRequired[Optional[str]]
112
+ """An optional category name for the given score."""
113
+
114
+ reason: NotRequired[Optional[str]]
115
+ """An optional explanation or justification for the given score."""
116
+
117
+
82
118
  class ErrorInfoDict(TypedDict):
83
119
  """
84
120
  A TypedDict representing the information about the error occurred.
@@ -0,0 +1,241 @@
1
+ from typing import Any, List, Optional
2
+
3
+ import opik.exceptions as exceptions
4
+ from . import validator, result
5
+
6
+
7
+ class ChatPromptMessagesValidator(validator.RaisableValidator):
8
+ """
9
+ Validator for ChatPrompt messages list.
10
+
11
+ Validates that messages is a list of dicts with:
12
+ - "role" key with value "system", "user", or "assistant"
13
+ - "content" key with value either string or list of dicts
14
+ - If content is list of dicts, each dict must have "type" key
15
+ """
16
+
17
+ VALID_ROLES = {"system", "user", "assistant"}
18
+ URL_BASED_CONTENT_TYPES = {"image_url", "video_url", "audio_url"}
19
+
20
+ def __init__(self, messages: Any):
21
+ self.messages = messages
22
+ self.validation_result: Optional[result.ValidationResult] = None
23
+
24
+ def validate(self) -> result.ValidationResult:
25
+ failure_reasons: List[str] = []
26
+
27
+ # Validate messages is a list
28
+ if not self._validate_messages_is_list(failure_reasons):
29
+ self.validation_result = result.ValidationResult(
30
+ failed=True, failure_reasons=failure_reasons
31
+ )
32
+ return self.validation_result
33
+
34
+ # Validate each message in the list
35
+ for idx, message in enumerate(self.messages):
36
+ prefix = f"messages[{idx}]"
37
+ self._validate_message(prefix, message, failure_reasons)
38
+
39
+ # Create validation result
40
+ if len(failure_reasons) > 0:
41
+ self.validation_result = result.ValidationResult(
42
+ failed=True, failure_reasons=failure_reasons
43
+ )
44
+ else:
45
+ self.validation_result = result.ValidationResult(failed=False)
46
+
47
+ return self.validation_result
48
+
49
+ def _validate_messages_is_list(self, failure_reasons: List[str]) -> bool:
50
+ """Validate that messages is a list. Returns False if validation fails."""
51
+ if not isinstance(self.messages, list):
52
+ msg = (
53
+ f"messages must be a list but {type(self.messages).__name__} was given"
54
+ )
55
+ failure_reasons.append(msg)
56
+ return False
57
+ return True
58
+
59
+ def _validate_message(
60
+ self, prefix: str, message: Any, failure_reasons: List[str]
61
+ ) -> None:
62
+ """Validate a single message structure, role, and content."""
63
+ if not self._validate_message_structure(prefix, message, failure_reasons):
64
+ return
65
+
66
+ self._validate_role(prefix, message, failure_reasons)
67
+ self._validate_content(prefix, message, failure_reasons)
68
+
69
+ def _validate_message_structure(
70
+ self, prefix: str, message: Any, failure_reasons: List[str]
71
+ ) -> bool:
72
+ """Validate that message is a dict with exactly 'role' and 'content' keys. Returns False if validation fails."""
73
+ # Validate message is a dict
74
+ if not isinstance(message, dict):
75
+ msg = f"{prefix}: must be a dict but {type(message).__name__} was given"
76
+ failure_reasons.append(msg)
77
+ return False
78
+
79
+ # Validate message has exactly "role" and "content" keys
80
+ message_keys = set(message.keys())
81
+ expected_keys = {"role", "content"}
82
+
83
+ if message_keys != expected_keys:
84
+ if not message_keys.issubset(expected_keys):
85
+ missing_keys = expected_keys - message_keys
86
+ msg = f"{prefix}: missing required keys: {sorted(missing_keys)}"
87
+ failure_reasons.append(msg)
88
+ if not expected_keys.issubset(message_keys):
89
+ extra_keys = message_keys - expected_keys
90
+ msg = (
91
+ f"{prefix}: unexpected keys: {sorted(extra_keys)}. "
92
+ f"Expected only: {sorted(expected_keys)}"
93
+ )
94
+ failure_reasons.append(msg)
95
+ return False
96
+
97
+ return True
98
+
99
+ def _validate_role(
100
+ self, prefix: str, message: dict, failure_reasons: List[str]
101
+ ) -> None:
102
+ """Validate the role field of a message."""
103
+ role = message.get("role")
104
+ if role not in self.VALID_ROLES:
105
+ valid_roles_str = ", ".join([f"'{r}'" for r in sorted(self.VALID_ROLES)])
106
+ msg = (
107
+ f"{prefix}.role: must be one of [{valid_roles_str}] "
108
+ f"but {repr(role)} was given"
109
+ )
110
+ failure_reasons.append(msg)
111
+
112
+ def _validate_content(
113
+ self, prefix: str, message: dict, failure_reasons: List[str]
114
+ ) -> None:
115
+ """Validate the content field of a message."""
116
+ content = message.get("content")
117
+ if content is None:
118
+ msg = f"{prefix}.content: must not be None"
119
+ failure_reasons.append(msg)
120
+ elif not isinstance(content, (str, list)):
121
+ msg = (
122
+ f"{prefix}.content: must be either str or list of dicts "
123
+ f"but {type(content).__name__} was given"
124
+ )
125
+ failure_reasons.append(msg)
126
+ elif isinstance(content, list):
127
+ self._validate_content_list(prefix, content, failure_reasons)
128
+
129
+ def _validate_content_list(
130
+ self, prefix: str, content: list, failure_reasons: List[str]
131
+ ) -> None:
132
+ """Validate content when it is a list of content parts."""
133
+ for content_idx, content_part in enumerate(content):
134
+ content_prefix = f"{prefix}.content[{content_idx}]"
135
+ self._validate_content_part(content_prefix, content_part, failure_reasons)
136
+
137
+ def _validate_content_part(
138
+ self, content_prefix: str, content_part: Any, failure_reasons: List[str]
139
+ ) -> None:
140
+ """Validate a single content part in the content list."""
141
+ if not isinstance(content_part, dict):
142
+ msg = (
143
+ f"{content_prefix}: must be a dict "
144
+ f"but {type(content_part).__name__} was given"
145
+ )
146
+ failure_reasons.append(msg)
147
+ return
148
+
149
+ if "type" not in content_part:
150
+ msg = f"{content_prefix}: must have 'type' key"
151
+ failure_reasons.append(msg)
152
+ return
153
+
154
+ # Validate type-specific requirements
155
+ content_type = content_part.get("type")
156
+ self._validate_content_type_specific(
157
+ content_prefix, content_type, content_part, failure_reasons
158
+ )
159
+
160
+ def _validate_content_type_specific(
161
+ self,
162
+ content_prefix: str,
163
+ content_type: Any,
164
+ content_part: dict,
165
+ failure_reasons: List[str],
166
+ ) -> None:
167
+ """Validate type-specific requirements for content parts."""
168
+ if content_type in self.URL_BASED_CONTENT_TYPES:
169
+ self._validate_required_url_object(
170
+ content_prefix,
171
+ content_part,
172
+ content_type,
173
+ content_type,
174
+ failure_reasons,
175
+ )
176
+ elif content_type == "text":
177
+ self._validate_required_string_key(
178
+ content_prefix, content_part, "text", "text", failure_reasons
179
+ )
180
+
181
+ def _validate_required_string_key(
182
+ self,
183
+ prefix: str,
184
+ content_part: dict,
185
+ key_name: str,
186
+ type_name: str,
187
+ failure_reasons: List[str],
188
+ ) -> None:
189
+ """Validate that a required key exists and is a string."""
190
+ if key_name not in content_part:
191
+ msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
192
+ failure_reasons.append(msg)
193
+ elif not isinstance(content_part.get(key_name), str):
194
+ msg = (
195
+ f"{prefix}.{key_name}: must be a string "
196
+ f"but {type(content_part.get(key_name)).__name__} was given"
197
+ )
198
+ failure_reasons.append(msg)
199
+
200
+ def _validate_required_url_object(
201
+ self,
202
+ prefix: str,
203
+ content_part: dict,
204
+ key_name: str,
205
+ type_name: str,
206
+ failure_reasons: List[str],
207
+ ) -> None:
208
+ """Validate that a required key exists and is a dict with a 'url' key that is a string."""
209
+ if key_name not in content_part:
210
+ msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
211
+ failure_reasons.append(msg)
212
+ return
213
+
214
+ url_object = content_part.get(key_name)
215
+ if not isinstance(url_object, dict):
216
+ msg = (
217
+ f"{prefix}.{key_name}: must be a dict "
218
+ f"but {type(url_object).__name__} was given"
219
+ )
220
+ failure_reasons.append(msg)
221
+ return
222
+
223
+ if "url" not in url_object:
224
+ msg = f"{prefix}.{key_name}: must have 'url' key"
225
+ failure_reasons.append(msg)
226
+ elif not isinstance(url_object.get("url"), str):
227
+ msg = (
228
+ f"{prefix}.{key_name}.url: must be a string "
229
+ f"but {type(url_object.get('url')).__name__} was given"
230
+ )
231
+ failure_reasons.append(msg)
232
+
233
+ def raise_if_validation_failed(self) -> None:
234
+ if (
235
+ self.validation_result is not None
236
+ and len(self.validation_result.failure_reasons) > 0
237
+ ):
238
+ raise exceptions.ValidationError(
239
+ prefix="ChatPrompt.__init__",
240
+ failure_reasons=self.validation_result.failure_reasons,
241
+ )
@@ -1,16 +1,16 @@
1
1
  import pydantic
2
2
 
3
3
  from typing import Any
4
- from ..types import FeedbackScoreDict
4
+ from ..types import BatchFeedbackScoreDict
5
5
  from . import validator, result
6
6
 
7
7
 
8
8
  class PydanticWrapper(pydantic.BaseModel):
9
9
  model_config = pydantic.ConfigDict(extra="forbid")
10
- feedback_score: FeedbackScoreDict
10
+ feedback_score: BatchFeedbackScoreDict
11
11
 
12
12
 
13
- EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str]}"
13
+ EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str], 'project_name': NotRequired[str]}"
14
14
 
15
15
 
16
16
  class FeedbackScoreValidator(validator.Validator):
@@ -1,4 +1,5 @@
1
1
  import abc
2
+ from typing import Any
2
3
 
3
4
  from . import result
4
5
 
@@ -7,3 +8,30 @@ class Validator(abc.ABC):
7
8
  @abc.abstractmethod
8
9
  def validate(self) -> result.ValidationResult:
9
10
  pass
11
+
12
+
13
+ class RaisableValidator(Validator):
14
+ """
15
+ Abstract validator class that extends Validator and adds raise_if_validation_failed method.
16
+
17
+ This is used for validators that need to raise ValidationError exceptions
18
+ when validation fails, typically used in class initialization.
19
+ """
20
+
21
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
22
+ """
23
+ Initialize the validator.
24
+
25
+ Subclasses can override this method with their own initialization signature.
26
+ """
27
+ pass
28
+
29
+ @abc.abstractmethod
30
+ def raise_if_validation_failed(self) -> None:
31
+ """
32
+ Raise a ValidationError if validation failed.
33
+
34
+ This method should check the validation result and raise an appropriate
35
+ ValidationError exception if validation failed.
36
+ """
37
+ pass