opik 1.9.5__py3-none-any.whl → 1.9.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. opik/__init__.py +10 -3
  2. opik/anonymizer/__init__.py +5 -0
  3. opik/anonymizer/anonymizer.py +12 -0
  4. opik/anonymizer/factory.py +80 -0
  5. opik/anonymizer/recursive_anonymizer.py +64 -0
  6. opik/anonymizer/rules.py +56 -0
  7. opik/anonymizer/rules_anonymizer.py +35 -0
  8. opik/api_objects/dataset/rest_operations.py +5 -0
  9. opik/api_objects/experiment/experiment.py +46 -49
  10. opik/api_objects/experiment/helpers.py +34 -10
  11. opik/api_objects/local_recording.py +8 -3
  12. opik/api_objects/opik_client.py +230 -48
  13. opik/api_objects/opik_query_language.py +9 -0
  14. opik/api_objects/prompt/__init__.py +11 -3
  15. opik/api_objects/prompt/base_prompt.py +69 -0
  16. opik/api_objects/prompt/base_prompt_template.py +29 -0
  17. opik/api_objects/prompt/chat/__init__.py +1 -0
  18. opik/api_objects/prompt/chat/chat_prompt.py +193 -0
  19. opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
  20. opik/api_objects/prompt/{chat_content_renderer_registry.py → chat/content_renderer_registry.py} +37 -35
  21. opik/api_objects/prompt/client.py +101 -30
  22. opik/api_objects/prompt/text/__init__.py +1 -0
  23. opik/api_objects/prompt/text/prompt.py +174 -0
  24. opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
  25. opik/api_objects/prompt/types.py +1 -1
  26. opik/cli/export.py +6 -2
  27. opik/cli/usage_report/charts.py +39 -10
  28. opik/cli/usage_report/cli.py +164 -45
  29. opik/cli/usage_report/pdf.py +14 -1
  30. opik/config.py +0 -5
  31. opik/decorator/base_track_decorator.py +37 -40
  32. opik/decorator/context_manager/span_context_manager.py +9 -0
  33. opik/decorator/context_manager/trace_context_manager.py +5 -0
  34. opik/dict_utils.py +3 -3
  35. opik/evaluation/__init__.py +13 -2
  36. opik/evaluation/engine/engine.py +195 -223
  37. opik/evaluation/engine/helpers.py +8 -7
  38. opik/evaluation/engine/metrics_evaluator.py +237 -0
  39. opik/evaluation/evaluation_result.py +35 -1
  40. opik/evaluation/evaluator.py +318 -30
  41. opik/evaluation/models/litellm/util.py +78 -6
  42. opik/evaluation/models/model_capabilities.py +33 -0
  43. opik/evaluation/report.py +14 -2
  44. opik/evaluation/rest_operations.py +36 -33
  45. opik/evaluation/test_case.py +2 -2
  46. opik/evaluation/types.py +9 -1
  47. opik/exceptions.py +17 -0
  48. opik/hooks/__init__.py +17 -1
  49. opik/hooks/anonymizer_hook.py +36 -0
  50. opik/id_helpers.py +18 -0
  51. opik/integrations/adk/helpers.py +16 -7
  52. opik/integrations/adk/legacy_opik_tracer.py +7 -4
  53. opik/integrations/adk/opik_tracer.py +3 -1
  54. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
  55. opik/integrations/adk/recursive_callback_injector.py +1 -6
  56. opik/integrations/dspy/callback.py +1 -4
  57. opik/integrations/haystack/opik_connector.py +2 -2
  58. opik/integrations/haystack/opik_tracer.py +2 -4
  59. opik/integrations/langchain/opik_tracer.py +273 -82
  60. opik/integrations/llama_index/callback.py +110 -108
  61. opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
  62. opik/integrations/openai/opik_tracker.py +1 -1
  63. opik/message_processing/batching/batchers.py +11 -7
  64. opik/message_processing/encoder_helpers.py +79 -0
  65. opik/message_processing/messages.py +25 -1
  66. opik/message_processing/online_message_processor.py +23 -8
  67. opik/opik_context.py +7 -7
  68. opik/rest_api/__init__.py +188 -12
  69. opik/rest_api/client.py +3 -0
  70. opik/rest_api/dashboards/__init__.py +4 -0
  71. opik/rest_api/dashboards/client.py +462 -0
  72. opik/rest_api/dashboards/raw_client.py +648 -0
  73. opik/rest_api/datasets/client.py +893 -89
  74. opik/rest_api/datasets/raw_client.py +1328 -87
  75. opik/rest_api/experiments/client.py +30 -2
  76. opik/rest_api/experiments/raw_client.py +26 -0
  77. opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
  78. opik/rest_api/optimizations/client.py +302 -0
  79. opik/rest_api/optimizations/raw_client.py +463 -0
  80. opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
  81. opik/rest_api/prompts/__init__.py +2 -2
  82. opik/rest_api/prompts/client.py +34 -4
  83. opik/rest_api/prompts/raw_client.py +32 -2
  84. opik/rest_api/prompts/types/__init__.py +3 -1
  85. opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
  86. opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
  87. opik/rest_api/spans/__init__.py +0 -2
  88. opik/rest_api/spans/client.py +148 -64
  89. opik/rest_api/spans/raw_client.py +210 -83
  90. opik/rest_api/spans/types/__init__.py +0 -2
  91. opik/rest_api/traces/client.py +241 -73
  92. opik/rest_api/traces/raw_client.py +344 -90
  93. opik/rest_api/types/__init__.py +200 -15
  94. opik/rest_api/types/aggregation_data.py +1 -0
  95. opik/rest_api/types/alert_trigger_config_public_type.py +6 -1
  96. opik/rest_api/types/alert_trigger_config_type.py +6 -1
  97. opik/rest_api/types/alert_trigger_config_write_type.py +6 -1
  98. opik/rest_api/types/automation_rule_evaluator.py +23 -1
  99. opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
  100. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
  101. opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
  102. opik/rest_api/types/{automation_rule_evaluator_object_public.py → automation_rule_evaluator_object_object_public.py} +32 -10
  103. opik/rest_api/types/automation_rule_evaluator_page_public.py +2 -2
  104. opik/rest_api/types/automation_rule_evaluator_public.py +23 -1
  105. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
  106. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
  107. opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
  108. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
  109. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
  110. opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
  111. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
  112. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
  113. opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
  114. opik/rest_api/types/automation_rule_evaluator_update.py +23 -1
  115. opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
  116. opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
  117. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
  118. opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
  119. opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
  120. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
  121. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
  122. opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
  123. opik/rest_api/types/automation_rule_evaluator_write.py +23 -1
  124. opik/rest_api/types/boolean_feedback_definition.py +25 -0
  125. opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
  126. opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
  127. opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
  128. opik/rest_api/types/boolean_feedback_detail.py +29 -0
  129. opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
  130. opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
  131. opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
  132. opik/rest_api/types/dashboard_page_public.py +24 -0
  133. opik/rest_api/types/dashboard_public.py +30 -0
  134. opik/rest_api/types/dataset.py +2 -0
  135. opik/rest_api/types/dataset_item.py +2 -0
  136. opik/rest_api/types/dataset_item_compare.py +2 -0
  137. opik/rest_api/types/dataset_item_filter.py +23 -0
  138. opik/rest_api/types/dataset_item_filter_operator.py +21 -0
  139. opik/rest_api/types/dataset_item_page_compare.py +1 -0
  140. opik/rest_api/types/dataset_item_page_public.py +1 -0
  141. opik/rest_api/types/dataset_item_public.py +2 -0
  142. opik/rest_api/types/dataset_item_update.py +39 -0
  143. opik/rest_api/types/dataset_item_write.py +1 -0
  144. opik/rest_api/types/dataset_public.py +2 -0
  145. opik/rest_api/types/dataset_public_status.py +5 -0
  146. opik/rest_api/types/dataset_status.py +5 -0
  147. opik/rest_api/types/dataset_version_diff.py +22 -0
  148. opik/rest_api/types/dataset_version_diff_stats.py +24 -0
  149. opik/rest_api/types/dataset_version_page_public.py +23 -0
  150. opik/rest_api/types/dataset_version_public.py +49 -0
  151. opik/rest_api/types/experiment.py +2 -0
  152. opik/rest_api/types/experiment_public.py +2 -0
  153. opik/rest_api/types/experiment_score.py +20 -0
  154. opik/rest_api/types/experiment_score_public.py +20 -0
  155. opik/rest_api/types/experiment_score_write.py +20 -0
  156. opik/rest_api/types/feedback.py +20 -1
  157. opik/rest_api/types/feedback_create.py +16 -1
  158. opik/rest_api/types/feedback_object_public.py +22 -1
  159. opik/rest_api/types/feedback_public.py +20 -1
  160. opik/rest_api/types/feedback_score_public.py +4 -0
  161. opik/rest_api/types/feedback_update.py +16 -1
  162. opik/rest_api/types/image_url.py +20 -0
  163. opik/rest_api/types/image_url_public.py +20 -0
  164. opik/rest_api/types/image_url_write.py +20 -0
  165. opik/rest_api/types/llm_as_judge_message.py +5 -1
  166. opik/rest_api/types/llm_as_judge_message_content.py +24 -0
  167. opik/rest_api/types/llm_as_judge_message_content_public.py +24 -0
  168. opik/rest_api/types/llm_as_judge_message_content_write.py +24 -0
  169. opik/rest_api/types/llm_as_judge_message_public.py +5 -1
  170. opik/rest_api/types/llm_as_judge_message_write.py +5 -1
  171. opik/rest_api/types/llm_as_judge_model_parameters.py +2 -0
  172. opik/rest_api/types/llm_as_judge_model_parameters_public.py +2 -0
  173. opik/rest_api/types/llm_as_judge_model_parameters_write.py +2 -0
  174. opik/rest_api/types/optimization.py +2 -0
  175. opik/rest_api/types/optimization_public.py +2 -0
  176. opik/rest_api/types/optimization_public_status.py +3 -1
  177. opik/rest_api/types/optimization_status.py +3 -1
  178. opik/rest_api/types/optimization_studio_config.py +27 -0
  179. opik/rest_api/types/optimization_studio_config_public.py +27 -0
  180. opik/rest_api/types/optimization_studio_config_write.py +27 -0
  181. opik/rest_api/types/optimization_studio_log.py +22 -0
  182. opik/rest_api/types/optimization_write.py +2 -0
  183. opik/rest_api/types/optimization_write_status.py +3 -1
  184. opik/rest_api/types/prompt.py +6 -0
  185. opik/rest_api/types/prompt_detail.py +6 -0
  186. opik/rest_api/types/prompt_detail_template_structure.py +5 -0
  187. opik/rest_api/types/prompt_public.py +6 -0
  188. opik/rest_api/types/prompt_public_template_structure.py +5 -0
  189. opik/rest_api/types/prompt_template_structure.py +5 -0
  190. opik/rest_api/types/prompt_version.py +2 -0
  191. opik/rest_api/types/prompt_version_detail.py +2 -0
  192. opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
  193. opik/rest_api/types/prompt_version_public.py +2 -0
  194. opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
  195. opik/rest_api/types/prompt_version_template_structure.py +5 -0
  196. opik/rest_api/types/score_name.py +1 -0
  197. opik/rest_api/types/service_toggles_config.py +6 -0
  198. opik/rest_api/types/span_enrichment_options.py +31 -0
  199. opik/rest_api/types/span_filter.py +23 -0
  200. opik/rest_api/types/span_filter_operator.py +21 -0
  201. opik/rest_api/types/span_filter_write.py +23 -0
  202. opik/rest_api/types/span_filter_write_operator.py +21 -0
  203. opik/rest_api/types/span_llm_as_judge_code.py +27 -0
  204. opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
  205. opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
  206. opik/rest_api/types/span_update.py +46 -0
  207. opik/rest_api/types/studio_evaluation.py +20 -0
  208. opik/rest_api/types/studio_evaluation_public.py +20 -0
  209. opik/rest_api/types/studio_evaluation_write.py +20 -0
  210. opik/rest_api/types/studio_llm_model.py +21 -0
  211. opik/rest_api/types/studio_llm_model_public.py +21 -0
  212. opik/rest_api/types/studio_llm_model_write.py +21 -0
  213. opik/rest_api/types/studio_message.py +20 -0
  214. opik/rest_api/types/studio_message_public.py +20 -0
  215. opik/rest_api/types/studio_message_write.py +20 -0
  216. opik/rest_api/types/studio_metric.py +21 -0
  217. opik/rest_api/types/studio_metric_public.py +21 -0
  218. opik/rest_api/types/studio_metric_write.py +21 -0
  219. opik/rest_api/types/studio_optimizer.py +21 -0
  220. opik/rest_api/types/studio_optimizer_public.py +21 -0
  221. opik/rest_api/types/studio_optimizer_write.py +21 -0
  222. opik/rest_api/types/studio_prompt.py +20 -0
  223. opik/rest_api/types/studio_prompt_public.py +20 -0
  224. opik/rest_api/types/studio_prompt_write.py +20 -0
  225. opik/rest_api/types/trace.py +6 -0
  226. opik/rest_api/types/trace_public.py +6 -0
  227. opik/rest_api/types/trace_thread_filter_write.py +23 -0
  228. opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
  229. opik/rest_api/types/trace_thread_update.py +19 -0
  230. opik/rest_api/types/trace_update.py +39 -0
  231. opik/rest_api/types/value_entry.py +2 -0
  232. opik/rest_api/types/value_entry_compare.py +2 -0
  233. opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
  234. opik/rest_api/types/value_entry_public.py +2 -0
  235. opik/rest_api/types/video_url.py +19 -0
  236. opik/rest_api/types/video_url_public.py +19 -0
  237. opik/rest_api/types/video_url_write.py +19 -0
  238. opik/synchronization.py +5 -6
  239. opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
  240. {opik-1.9.5.dist-info → opik-1.9.39.dist-info}/METADATA +5 -4
  241. {opik-1.9.5.dist-info → opik-1.9.39.dist-info}/RECORD +246 -151
  242. opik/api_objects/prompt/chat_prompt_template.py +0 -164
  243. opik/api_objects/prompt/prompt.py +0 -131
  244. /opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
  245. {opik-1.9.5.dist-info → opik-1.9.39.dist-info}/WHEEL +0 -0
  246. {opik-1.9.5.dist-info → opik-1.9.39.dist-info}/entry_points.txt +0 -0
  247. {opik-1.9.5.dist-info → opik-1.9.39.dist-info}/licenses/LICENSE +0 -0
  248. {opik-1.9.5.dist-info → opik-1.9.39.dist-info}/top_level.txt +0 -0
@@ -36,16 +36,33 @@ def apply_model_specific_filters(
36
36
  already_warned: Set[str],
37
37
  warn: Callable[[str, Any], None],
38
38
  ) -> None:
39
- """Remove parameters known to be unsupported for specific models.
39
+ """Adjust/drop params for specific model families before calling LiteLLM.
40
40
 
41
- Currently handles the GPT-5 family which only honours temperature=1 and does not
42
- return log probabilities. Removing those eagerly avoids provider errors while the
43
- callback surfaces a one-time warning to the caller.
41
+ Currently handles:
42
+ - GPT-5: only honours temperature=1 and does not return log probabilities.
43
+ - DashScope Qwen: enforces constraints for logprobs / top_logprobs
44
44
  """
45
+ if model_name.startswith("gpt-5"):
46
+ _apply_gpt5_filters(params, already_warned, warn)
47
+ return
45
48
 
46
- if not model_name.startswith("gpt-5"):
49
+ if model_name.startswith("dashscope/"):
50
+ _apply_qwen_dashscope_filters(params, already_warned, warn)
47
51
  return
48
52
 
53
+
54
+ def _apply_gpt5_filters(
55
+ params: Dict[str, Any],
56
+ already_warned: Set[str],
57
+ warn: Callable[[str, Any], None],
58
+ ) -> None:
59
+ """Apply GPT-5 specific parameter filters.
60
+
61
+ Only honours temperature=1 and does not return log probabilities.
62
+ Removing those eagerly avoids provider errors while the callback surfaces a
63
+ one-time warning to the caller.
64
+ """
65
+
49
66
  unsupported: list[tuple[str, Any]] = []
50
67
 
51
68
  if "temperature" in params:
@@ -61,7 +78,62 @@ def apply_model_specific_filters(
61
78
  if param in params:
62
79
  unsupported.append((param, params[param]))
63
80
 
64
- for param, value in unsupported:
81
+ _drop_unsupported_params_with_warning(
82
+ params,
83
+ unsupported,
84
+ already_warned,
85
+ warn,
86
+ )
87
+
88
+
89
+ def _apply_qwen_dashscope_filters(
90
+ params: Dict[str, Any],
91
+ already_warned: Set[str],
92
+ warn: Callable[[str, Any], None],
93
+ ) -> None:
94
+ """Apply Qwen/DashScope specific parameter filters.
95
+
96
+ top_logprobs is only meaningful if logprobs is true and must be an int
97
+ in [0, 5]. When logprobs is false, drops top_logprobs; when logprobs is
98
+ true, clamps top_logprobs into [0, 5].
99
+ """
100
+
101
+ unsupported: list[tuple[str, Any]] = []
102
+
103
+ logprobs_value = params.get("logprobs")
104
+ if not logprobs_value:
105
+ if "top_logprobs" in params:
106
+ unsupported.append(("top_logprobs", params["top_logprobs"]))
107
+ else:
108
+ if "top_logprobs" in params:
109
+ raw_top_logprobs = params["top_logprobs"]
110
+ try:
111
+ top_logprobs = int(raw_top_logprobs)
112
+ except (TypeError, ValueError):
113
+ unsupported.append(("top_logprobs", raw_top_logprobs))
114
+ else:
115
+ if top_logprobs < 0:
116
+ top_logprobs = 0
117
+ elif top_logprobs > 5:
118
+ top_logprobs = 5
119
+ params["top_logprobs"] = top_logprobs
120
+
121
+ _drop_unsupported_params_with_warning(
122
+ params,
123
+ unsupported,
124
+ already_warned,
125
+ warn,
126
+ )
127
+
128
+
129
+ def _drop_unsupported_params_with_warning(
130
+ params: Dict[str, Any],
131
+ unsupported_params: list[tuple[str, Any]],
132
+ already_warned: Set[str],
133
+ warn: Callable[[str, Any], None],
134
+ ) -> None:
135
+ """Remove unsupported params and emit warnings once per param name."""
136
+ for param, value in unsupported_params:
65
137
  params.pop(param, None)
66
138
  if param in already_warned:
67
139
  continue
@@ -79,6 +79,29 @@ def vision_capability_detector(model_name: str) -> bool:
79
79
  return False
80
80
 
81
81
 
82
+ def video_capability_detector(model_name: str) -> bool:
83
+ """
84
+ Heuristically determine whether a model accepts video inputs.
85
+
86
+ Providers rarely expose structured metadata for video support, so we fall back
87
+ to naming conventions (e.g. models whose names contain ``video`` or ``qwen``
88
+ + ``vl``). When those heuristics fail we delegate to the vision detector since
89
+ current SDK integrations treat video as an extension of multimodal/vision APIs.
90
+ """
91
+ stripped = _strip_provider_prefix(model_name)
92
+ candidates = {model_name, stripped}
93
+ for candidate in candidates:
94
+ normalized = candidate.lower()
95
+ if "video" in normalized:
96
+ return True
97
+ if "qwen" in normalized and "vl" in normalized:
98
+ return True
99
+ # TODO(opik): litellm/model metadata still treats video + image inputs the same.
100
+ # Fall back to the vision heuristic so we can keep this dedicated capability
101
+ # and tighten detection once providers expose richer metadata.
102
+ return vision_capability_detector(model_name)
103
+
104
+
82
105
  class ModelCapabilitiesRegistry:
83
106
  """
84
107
  Central registry for model capability detection.
@@ -117,6 +140,12 @@ class ModelCapabilitiesRegistry:
117
140
  """
118
141
  return self.supports("vision", model_name)
119
142
 
143
+ def supports_video(self, model_name: Optional[str]) -> bool:
144
+ """
145
+ Convenience wrapper for video-capable detection.
146
+ """
147
+ return self.supports("video", model_name)
148
+
120
149
  def add_vision_model(self, model_name: str) -> None:
121
150
  # Extend the module-level registry used by vision_capability_detector
122
151
  VISION_MODEL_PREFIXES.add(self._strip_provider_prefix(model_name).lower())
@@ -141,6 +170,9 @@ MODEL_CAPABILITIES_REGISTRY = ModelCapabilitiesRegistry()
141
170
  MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
142
171
  "vision", vision_capability_detector
143
172
  )
173
+ MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
174
+ "video", video_capability_detector
175
+ )
144
176
 
145
177
  # Backwards compatibility shim for previous API which exposed a class with classmethods.
146
178
  ModelCapabilities = MODEL_CAPABILITIES_REGISTRY
@@ -151,4 +183,5 @@ __all__ = [
151
183
  "MODEL_CAPABILITIES_REGISTRY",
152
184
  "ModelCapabilities",
153
185
  "vision_capability_detector",
186
+ "video_capability_detector",
154
187
  ]
opik/evaluation/report.py CHANGED
@@ -1,10 +1,11 @@
1
1
  from collections import defaultdict
2
- from typing import Dict, List, Tuple
2
+ from typing import Dict, List, Optional, Tuple
3
3
 
4
4
  from rich import align, console, panel, table, text
5
5
 
6
6
 
7
7
  from . import test_result, evaluation_result
8
+ from .metrics import score_result
8
9
 
9
10
 
10
11
  def _format_time(seconds: float) -> str:
@@ -41,7 +42,10 @@ def _compute_average_scores(
41
42
 
42
43
 
43
44
  def display_experiment_results(
44
- dataset_name: str, total_time: float, test_results: List[test_result.TestResult]
45
+ dataset_name: str,
46
+ total_time: float,
47
+ test_results: List[test_result.TestResult],
48
+ experiment_scores: Optional[List[score_result.ScoreResult]] = None,
45
49
  ) -> None:
46
50
  average_scores, failed_scores = _compute_average_scores(test_results)
47
51
  nb_items = len(test_results)
@@ -62,6 +66,14 @@ def display_experiment_results(
62
66
  score_strings += text.Text(f" - {failed_scores[name]} failed", style="red")
63
67
  score_strings += text.Text("\n")
64
68
 
69
+ # Add experiment scores if available
70
+ if experiment_scores:
71
+ for score in experiment_scores:
72
+ score_strings += text.Text(
73
+ f"{score.name}: {score.value:.4f}", style="green bold"
74
+ )
75
+ score_strings += text.Text("\n")
76
+
65
77
  aligned_test_results = align.Align.left(score_strings)
66
78
 
67
79
  # Combine table, time text, and test results
@@ -1,11 +1,14 @@
1
+ import logging
1
2
  from typing import List, Optional
2
3
 
3
- from opik.api_objects import experiment, opik_client
4
+ from opik.api_objects import dataset, experiment, opik_client
4
5
  from opik.types import FeedbackScoreDict
5
6
  from . import test_case
6
- from .metrics import arguments_helpers, score_result
7
+ from .metrics import score_result
7
8
  from .types import ScoringKeyMappingType
8
9
 
10
+ LOGGER = logging.getLogger(__name__)
11
+
9
12
 
10
13
  def get_experiment_with_unique_name(
11
14
  client: opik_client.Opik, experiment_name: str
@@ -34,40 +37,39 @@ def get_trace_project_name(client: opik_client.Opik, trace_id: str) -> str:
34
37
 
35
38
 
36
39
  def get_experiment_test_cases(
37
- client: opik_client.Opik,
38
- experiment_id: str,
39
- dataset_id: str,
40
+ experiment_: experiment.Experiment,
41
+ dataset_: dataset.Dataset,
40
42
  scoring_key_mapping: Optional[ScoringKeyMappingType],
41
43
  ) -> List[test_case.TestCase]:
44
+ experiment_items = experiment_.get_items()
45
+
46
+ # Fetch dataset items to get input data for bulk-uploaded experiment items
47
+ dataset_items_by_id = {item["id"]: item for item in dataset_.get_items()}
48
+
42
49
  test_cases = []
43
- page = 1
50
+ for item in experiment_items:
51
+ dataset_item_data = dataset_items_by_id.get(item.dataset_item_id)
44
52
 
45
- while True:
46
- experiment_items_page = (
47
- client._rest_client.datasets.find_dataset_items_with_experiment_items(
48
- id=dataset_id, experiment_ids=f'["{experiment_id}"]', page=page
53
+ if dataset_item_data is None:
54
+ LOGGER.error(
55
+ f"Unexpected error: Dataset item with id {item.dataset_item_id} not found, skipping experiment item {item.id}"
56
+ )
57
+ continue
58
+
59
+ if item.evaluation_task_output is None:
60
+ LOGGER.error(
61
+ f"Unexpected error: Evaluation task output is None for experiment item {item.id}, skipping experiment item"
62
+ )
63
+ continue
64
+
65
+ test_cases.append(
66
+ test_case.TestCase(
67
+ trace_id=item.trace_id,
68
+ dataset_item_id=item.dataset_item_id,
69
+ task_output=item.evaluation_task_output,
70
+ dataset_item_content=dataset_item_data,
49
71
  )
50
72
  )
51
- if len(experiment_items_page.content) == 0:
52
- break
53
-
54
- for item in experiment_items_page.content:
55
- experiment_item = item.experiment_items[0]
56
-
57
- test_cases += [
58
- test_case.TestCase(
59
- trace_id=experiment_item.trace_id,
60
- dataset_item_id=experiment_item.dataset_item_id,
61
- task_output=experiment_item.output,
62
- scoring_inputs=arguments_helpers.create_scoring_inputs(
63
- dataset_item=experiment_item.input,
64
- task_output=experiment_item.output,
65
- scoring_key_mapping=scoring_key_mapping,
66
- ),
67
- )
68
- ]
69
-
70
- page += 1
71
73
 
72
74
  return test_cases
73
75
 
@@ -92,6 +94,7 @@ def log_test_result_feedback_scores(
92
94
  )
93
95
  all_trace_scores.append(trace_score)
94
96
 
95
- client.log_traces_feedback_scores(
96
- scores=all_trace_scores, project_name=project_name
97
- )
97
+ if len(all_trace_scores) > 0:
98
+ client.log_traces_feedback_scores(
99
+ scores=all_trace_scores, project_name=project_name
100
+ )
@@ -1,4 +1,4 @@
1
- from typing import Dict, Any
1
+ from typing import Dict, Any, Optional
2
2
  import dataclasses
3
3
 
4
4
 
@@ -6,6 +6,6 @@ import dataclasses
6
6
  class TestCase:
7
7
  trace_id: str
8
8
  dataset_item_id: str
9
- scoring_inputs: Dict[str, Any]
10
9
  task_output: Dict[str, Any]
11
10
  dataset_item_content: Dict[str, Any] = dataclasses.field(default_factory=dict)
11
+ mapped_scoring_inputs: Optional[Dict[str, Any]] = None
opik/evaluation/types.py CHANGED
@@ -1,5 +1,13 @@
1
- from typing import Any, Callable, Dict, Union
1
+ from typing import Any, Callable, Dict, List, Union
2
+
3
+ from . import test_result
4
+ from .metrics import score_result
2
5
 
3
6
  LLMTask = Callable[[Dict[str, Any]], Dict[str, Any]]
4
7
 
5
8
  ScoringKeyMappingType = Dict[str, Union[str, Callable[[Dict[str, Any]], Any]]]
9
+
10
+ ExperimentScoreFunction = Callable[
11
+ [List[test_result.TestResult]],
12
+ Union[score_result.ScoreResult, List[score_result.ScoreResult]],
13
+ ]
opik/exceptions.py CHANGED
@@ -81,6 +81,23 @@ class PromptPlaceholdersDontMatchFormatArguments(OpikException):
81
81
  )
82
82
 
83
83
 
84
+ class PromptTemplateStructureMismatch(OpikException):
85
+ """Exception raised when attempting to create a prompt version with a different template structure than the existing prompt."""
86
+
87
+ def __init__(
88
+ self, prompt_name: str, existing_structure: str, attempted_structure: str
89
+ ):
90
+ self.prompt_name = prompt_name
91
+ self.existing_structure = existing_structure
92
+ self.attempted_structure = attempted_structure
93
+
94
+ def __str__(self) -> str:
95
+ return (
96
+ f"Prompt with name '{self.prompt_name}' already exists and has immutable "
97
+ f"'{self.existing_structure}' template structure, not '{self.attempted_structure}'. "
98
+ )
99
+
100
+
84
101
  class ExperimentNotFound(OpikException):
85
102
  pass
86
103
 
opik/hooks/__init__.py CHANGED
@@ -3,5 +3,21 @@ from .httpx_client_hook import (
3
3
  add_httpx_client_hook,
4
4
  register_httpx_client_hook,
5
5
  )
6
+ from .anonymizer_hook import (
7
+ has_anonymizers,
8
+ add_anonymizer,
9
+ apply_anonymizers,
10
+ get_anonymizers,
11
+ clear_anonymizers,
12
+ )
6
13
 
7
- __all__ = ("HttpxClientHook", "add_httpx_client_hook", "register_httpx_client_hook")
14
+ __all__ = (
15
+ "HttpxClientHook",
16
+ "add_httpx_client_hook",
17
+ "register_httpx_client_hook",
18
+ "add_anonymizer",
19
+ "apply_anonymizers",
20
+ "clear_anonymizers",
21
+ "get_anonymizers",
22
+ "has_anonymizers",
23
+ )
@@ -0,0 +1,36 @@
1
+ from typing import List
2
+
3
+ from opik.anonymizer import anonymizer
4
+
5
+
6
+ # holder for a global list of anonymizers
7
+ _anonymizers: List[anonymizer.Anonymizer] = []
8
+
9
+
10
+ def add_anonymizer(anonymizer_hook: anonymizer.Anonymizer) -> None:
11
+ """Register a new anonymizer to be applied to all sensitive data logged by Opik."""
12
+ _anonymizers.append(anonymizer_hook)
13
+
14
+
15
+ def clear_anonymizers() -> None:
16
+ """Clear all registered anonymizers."""
17
+ _anonymizers.clear()
18
+
19
+
20
+ def has_anonymizers() -> bool:
21
+ """Check if any anonymizers have been registered."""
22
+ return len(_anonymizers) > 0
23
+
24
+
25
+ def get_anonymizers() -> List[anonymizer.Anonymizer]:
26
+ """Get a list of all registered anonymizers."""
27
+ return _anonymizers
28
+
29
+
30
+ def apply_anonymizers(
31
+ data: anonymizer.AnonymizerDataType,
32
+ ) -> anonymizer.AnonymizerDataType:
33
+ """Apply all registered anonymizers to the given data."""
34
+ for anonymizer_ in _anonymizers:
35
+ data = anonymizer_.anonymize(data)
36
+ return data
opik/id_helpers.py CHANGED
@@ -1,5 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import Optional
3
+ import random
4
+ import string
3
5
  import uuid
4
6
  import uuid6
5
7
 
@@ -12,6 +14,22 @@ def generate_id(timestamp: Optional[datetime] = None) -> str:
12
14
  return str(uuid6.uuid7())
13
15
 
14
16
 
17
+ def generate_random_alphanumeric_string(length: int) -> str:
18
+ """Generate a random alphanumeric string of the specified length.
19
+
20
+ Args:
21
+ length: The length of the string to generate.
22
+
23
+ Returns:
24
+ A random string containing only alphanumeric characters (a-z, A-Z, 0-9).
25
+ """
26
+ if length < 0:
27
+ raise ValueError("Length must be non-negative")
28
+
29
+ characters = string.ascii_letters + string.digits
30
+ return "".join(random.choice(characters) for _ in range(length))
31
+
32
+
15
33
  def uuid4_to_uuid7(user_datetime: datetime, user_uuid: str) -> uuid.UUID:
16
34
  """Convert a UUID v4 into a UUID v7 following RFC draft specification."""
17
35
  # Get Unix timestamp in milliseconds
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  from typing import (
3
4
  Any,
@@ -8,6 +9,8 @@ from google.adk.models import LlmResponse
8
9
  import opik.types as opik_types
9
10
  import pydantic
10
11
 
12
+ LOGGER = logging.getLogger(__name__)
13
+
11
14
 
12
15
  def convert_adk_base_model_to_dict(value: pydantic.BaseModel) -> Dict[str, Any]:
13
16
  """Most ADK objects are Pydantic Base Models"""
@@ -27,13 +30,19 @@ def get_adk_provider() -> opik_types.LLMProvider:
27
30
 
28
31
 
29
32
  def has_empty_text_part_content(llm_response: LlmResponse) -> bool:
30
- if llm_response.content is None or len(llm_response.content.parts) == 0:
31
- return True
33
+ try:
34
+ if llm_response.content is None:
35
+ return True
32
36
 
33
- # to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
34
- if len(llm_response.content.parts) == 1:
35
- part = llm_response.content.parts[0]
36
- if part.text is not None and len(part.text) == 0:
37
+ if not llm_response.content.parts:
37
38
  return True
38
39
 
39
- return False
40
+ # to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
41
+ if len(llm_response.content.parts) == 1:
42
+ part = llm_response.content.parts[0]
43
+ if part.text is not None and len(part.text) == 0:
44
+ return True
45
+ return False
46
+ except Exception as e:
47
+ LOGGER.warning(f"Exception in has_empty_text_part_content {e}", exc_info=True)
48
+ return True
@@ -8,6 +8,7 @@ from google.adk import models
8
8
  from google.adk.tools import base_tool
9
9
  from google.adk.tools import tool_context
10
10
 
11
+ import opik
11
12
  from opik import context_storage
12
13
  from opik.decorator import arguments_helpers, span_creation_handler
13
14
  from opik.api_objects import opik_client, span, trace
@@ -77,7 +78,8 @@ class LegacyOpikTracer:
77
78
  trace_data = self._context_storage.pop_trace_data()
78
79
  assert trace_data is not None
79
80
  trace_data.init_end_time()
80
- self._opik_client.trace(**trace_data.as_parameters)
81
+ if opik.is_tracing_active():
82
+ self._opik_client.trace(**trace_data.as_parameters)
81
83
 
82
84
  def _end_current_span(
83
85
  self,
@@ -85,20 +87,21 @@ class LegacyOpikTracer:
85
87
  span_data = self._context_storage.pop_span_data()
86
88
  assert span_data is not None
87
89
  span_data.init_end_time()
88
- self._opik_client.span(**span_data.as_parameters)
90
+ if opik.is_tracing_active():
91
+ self._opik_client.span(**span_data.as_parameters)
89
92
 
90
93
  def _start_span(self, span_data: span.SpanData) -> None:
91
94
  self._context_storage.add_span_data(span_data)
92
95
  self._opik_created_spans.add(span_data.id)
93
96
 
94
- if self._opik_client.config.log_start_trace_span:
97
+ if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
95
98
  self._opik_client.span(**span_data.as_start_parameters)
96
99
 
97
100
  def _start_trace(self, trace_data: trace.TraceData) -> None:
98
101
  self._context_storage.set_trace_data(trace_data)
99
102
  self._current_trace_created_by_opik_tracer.set(trace_data.id)
100
103
 
101
- if self._opik_client.config.log_start_trace_span:
104
+ if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
102
105
  self._opik_client.trace(**trace_data.as_start_parameters)
103
106
 
104
107
  def _set_current_context_data(self, value: SpanOrTraceData) -> None:
@@ -7,6 +7,7 @@ from google.adk import models
7
7
  from google.adk.tools import base_tool
8
8
  from google.adk.tools import tool_context
9
9
 
10
+ import opik
10
11
  from opik import context_storage
11
12
  from opik.api_objects import opik_client, span, trace
12
13
  from opik.types import DistributedTraceHeadersDict
@@ -253,7 +254,8 @@ class OpikTracer:
253
254
  current_span.init_end_time()
254
255
  # We close this span manually because otherwise ADK will close it too late,
255
256
  # and it will also add tool spans inside of it, which we want to avoid.
256
- self._opik_client.span(**current_span.as_parameters)
257
+ if opik.is_tracing_active():
258
+ self._opik_client.span(**current_span.as_parameters)
257
259
  self._last_model_output = output
258
260
 
259
261
  except Exception as e:
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Iterator, Optional, Tuple
3
3
 
4
4
  import opentelemetry.trace
5
+ import opik
5
6
  import opik.context_storage
6
7
  from opik.api_objects import trace, span
7
8
  from opik.decorator import (
@@ -100,7 +101,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
100
101
  # so we manually finalize it here to avoid incorrect span nesting.
101
102
  opik.context_storage.pop_span_data(ensure_id=current_span_data.id)
102
103
  current_span_data.init_end_time()
103
- self.opik_client.span(**current_span_data.as_parameters)
104
+ if opik.is_tracing_active():
105
+ self.opik_client.span(**current_span_data.as_parameters)
104
106
  current_span_data = opik.context_storage.top_span_data()
105
107
 
106
108
  try:
@@ -145,7 +147,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
145
147
  trace_data = opik.context_storage.pop_trace_data(ensure_id=trace_id)
146
148
  if trace_data is not None:
147
149
  trace_data.init_end_time()
148
- self.opik_client.trace(**trace_data.as_parameters)
150
+ if opik.is_tracing_active():
151
+ self.opik_client.trace(**trace_data.as_parameters)
149
152
 
150
153
  def _ensure_span_is_finalized(self, span_id: str) -> None:
151
154
  opik.context_storage.trim_span_data_stack_to_certain_span(span_id)
@@ -153,7 +156,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
153
156
  span_data = opik.context_storage.pop_span_data(ensure_id=span_id)
154
157
  if span_data is not None:
155
158
  span_data.init_end_time()
156
- self.opik_client.span(**span_data.as_parameters)
159
+ if opik.is_tracing_active():
160
+ self.opik_client.span(**span_data.as_parameters)
157
161
 
158
162
 
159
163
  def _prepare_trace_and_span_to_be_finalized(
@@ -2,7 +2,6 @@ import types
2
2
  from typing import TypeVar, List, Any, Set
3
3
  from . import opik_tracer
4
4
  import logging
5
- from opik import _logging
6
5
 
7
6
  from google.adk.tools import agent_tool
8
7
  from google.adk import agents
@@ -120,11 +119,7 @@ def track_adk_agent_recursive(
120
119
  Returns:
121
120
  The modified root agent with tracking enabled
122
121
  """
123
- _logging.log_once_at_level(
124
- logging.INFO,
125
- "`track_adk_agent_recursive` is experimental feature. Please let us know if something is not working as expected: https://github.com/comet-ml/opik/issues",
126
- logger=LOGGER,
127
- )
122
+
128
123
  recursive_callback_injector = RecursiveCallbackInjector(tracer)
129
124
  recursive_callback_injector.inject(root_agent)
130
125
 
@@ -4,11 +4,8 @@ import logging
4
4
  import dspy
5
5
  from dspy.utils import callback as dspy_callback
6
6
 
7
- import opik.types as types
8
- import opik.opik_context as opik_context
9
- import opik.context_storage as context_storage
7
+ from opik import context_storage, opik_context, tracing_runtime_config, types
10
8
  from opik.api_objects import helpers, span, trace, opik_client
11
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
12
9
  from opik.decorator import error_info_collector
13
10
 
14
11
  from .graph import build_mermaid_graph_from_module
@@ -4,8 +4,8 @@ from typing import Any, Dict, Optional
4
4
  import haystack
5
5
  from haystack import tracing
6
6
 
7
- import opik.api_objects.opik_client as opik_client
8
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
7
+ from opik import tracing_runtime_config
8
+ from opik.api_objects import opik_client
9
9
  from . import opik_tracer
10
10
 
11
11
  LOGGER = logging.getLogger(__name__)
@@ -5,10 +5,8 @@ from typing import Any, Dict, Iterator, List, Optional, Union
5
5
 
6
6
  from haystack import tracing
7
7
 
8
- import opik.url_helpers as url_helpers
9
- import opik.decorator.tracing_runtime_config as tracing_runtime_config
10
- import opik.decorator.span_creation_handler as span_creation_handler
11
- import opik.decorator.arguments_helpers as arguments_helpers
8
+ from opik import tracing_runtime_config, url_helpers
9
+ from opik.decorator import arguments_helpers, span_creation_handler
12
10
  from opik.api_objects import opik_client
13
11
  from opik.api_objects import span as opik_span
14
12
  from opik.api_objects import trace as opik_trace