opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +42 -0
  114. opik/rest_api/datasets/client.py +321 -123
  115. opik/rest_api/datasets/raw_client.py +470 -145
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +50 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset.py +2 -0
  146. opik/rest_api/types/dataset_item.py +1 -1
  147. opik/rest_api/types/dataset_item_batch.py +4 -0
  148. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  149. opik/rest_api/types/dataset_item_compare.py +1 -1
  150. opik/rest_api/types/dataset_item_filter.py +4 -0
  151. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  152. opik/rest_api/types/dataset_item_page_public.py +0 -1
  153. opik/rest_api/types/dataset_item_public.py +1 -1
  154. opik/rest_api/types/dataset_public.py +2 -0
  155. opik/rest_api/types/dataset_version_public.py +10 -0
  156. opik/rest_api/types/dataset_version_summary.py +46 -0
  157. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  158. opik/rest_api/types/experiment.py +9 -0
  159. opik/rest_api/types/experiment_public.py +9 -0
  160. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  161. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  162. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  163. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  164. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  165. opik/rest_api/types/project.py +1 -0
  166. opik/rest_api/types/project_detailed.py +1 -0
  167. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  168. opik/rest_api/types/project_reference.py +31 -0
  169. opik/rest_api/types/project_reference_public.py +31 -0
  170. opik/rest_api/types/project_stats_summary_item.py +1 -0
  171. opik/rest_api/types/prompt_version.py +1 -0
  172. opik/rest_api/types/prompt_version_detail.py +1 -0
  173. opik/rest_api/types/prompt_version_page_public.py +5 -0
  174. opik/rest_api/types/prompt_version_public.py +1 -0
  175. opik/rest_api/types/prompt_version_update.py +33 -0
  176. opik/rest_api/types/provider_api_key.py +5 -1
  177. opik/rest_api/types/provider_api_key_provider.py +2 -1
  178. opik/rest_api/types/provider_api_key_public.py +5 -1
  179. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  180. opik/rest_api/types/service_toggles_config.py +11 -1
  181. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  182. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  183. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  184. opik/types.py +36 -0
  185. opik/validation/chat_prompt_messages.py +241 -0
  186. opik/validation/feedback_score.py +3 -3
  187. opik/validation/validator.py +28 -0
  188. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
  189. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
  190. opik/cli/export.py +0 -791
  191. opik/cli/import_command.py +0 -575
  192. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
  193. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
  194. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  195. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import pydantic
6
+ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
+
8
+
9
+ class SpanUserDefinedMetricPythonCodePublic(UniversalBaseModel):
10
+ metric: str
11
+ arguments: typing.Dict[str, str]
12
+
13
+ if IS_PYDANTIC_V2:
14
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
15
+ else:
16
+
17
+ class Config:
18
+ frozen = True
19
+ smart_union = True
20
+ extra = pydantic.Extra.allow
@@ -0,0 +1,20 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import pydantic
6
+ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
+
8
+
9
+ class SpanUserDefinedMetricPythonCodeWrite(UniversalBaseModel):
10
+ metric: str
11
+ arguments: typing.Dict[str, str]
12
+
13
+ if IS_PYDANTIC_V2:
14
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
15
+ else:
16
+
17
+ class Config:
18
+ frozen = True
19
+ smart_union = True
20
+ extra = pydantic.Extra.allow
opik/types.py CHANGED
@@ -2,6 +2,7 @@ import enum
2
2
  import sys
3
3
  from typing import Literal, Optional
4
4
 
5
+ from pydantic import StrictStr
5
6
  from typing_extensions import TypedDict
6
7
 
7
8
  if sys.version_info < (3, 11):
@@ -79,6 +80,41 @@ class FeedbackScoreDict(TypedDict):
79
80
  """An optional explanation or justification for the given score."""
80
81
 
81
82
 
83
+ class BatchFeedbackScoreDict(TypedDict):
84
+ """
85
+ A TypedDict representing a feedback score for batch operations.
86
+
87
+ This class defines the structure for feedback scores used in batch logging
88
+ operations, with a required id field and optional per-score project_name.
89
+ """
90
+
91
+ id: Required[str]
92
+ """
93
+ A unique identifier for the object this score should be assigned to.
94
+ Refers to either the trace_id, span_id or thread_id depending on how the score is logged.
95
+ Required for batch operations.
96
+ """
97
+
98
+ name: Required[str]
99
+ """The name of the feedback metric or criterion."""
100
+
101
+ value: Required[float]
102
+ """The numerical value of the feedback score."""
103
+
104
+ project_name: NotRequired[Optional[StrictStr]]
105
+ """
106
+ The name of the project for this specific score.
107
+ If not provided, falls back to the project_name parameter in the method call,
108
+ or the default project name configured in the Opik instance.
109
+ """
110
+
111
+ category_name: NotRequired[Optional[str]]
112
+ """An optional category name for the given score."""
113
+
114
+ reason: NotRequired[Optional[str]]
115
+ """An optional explanation or justification for the given score."""
116
+
117
+
82
118
  class ErrorInfoDict(TypedDict):
83
119
  """
84
120
  A TypedDict representing the information about the error occurred.
@@ -0,0 +1,241 @@
1
+ from typing import Any, List, Optional
2
+
3
+ import opik.exceptions as exceptions
4
+ from . import validator, result
5
+
6
+
7
+ class ChatPromptMessagesValidator(validator.RaisableValidator):
8
+ """
9
+ Validator for ChatPrompt messages list.
10
+
11
+ Validates that messages is a list of dicts with:
12
+ - "role" key with value "system", "user", or "assistant"
13
+ - "content" key with value either string or list of dicts
14
+ - If content is list of dicts, each dict must have "type" key
15
+ """
16
+
17
+ VALID_ROLES = {"system", "user", "assistant"}
18
+ URL_BASED_CONTENT_TYPES = {"image_url", "video_url", "audio_url"}
19
+
20
+ def __init__(self, messages: Any):
21
+ self.messages = messages
22
+ self.validation_result: Optional[result.ValidationResult] = None
23
+
24
+ def validate(self) -> result.ValidationResult:
25
+ failure_reasons: List[str] = []
26
+
27
+ # Validate messages is a list
28
+ if not self._validate_messages_is_list(failure_reasons):
29
+ self.validation_result = result.ValidationResult(
30
+ failed=True, failure_reasons=failure_reasons
31
+ )
32
+ return self.validation_result
33
+
34
+ # Validate each message in the list
35
+ for idx, message in enumerate(self.messages):
36
+ prefix = f"messages[{idx}]"
37
+ self._validate_message(prefix, message, failure_reasons)
38
+
39
+ # Create validation result
40
+ if len(failure_reasons) > 0:
41
+ self.validation_result = result.ValidationResult(
42
+ failed=True, failure_reasons=failure_reasons
43
+ )
44
+ else:
45
+ self.validation_result = result.ValidationResult(failed=False)
46
+
47
+ return self.validation_result
48
+
49
+ def _validate_messages_is_list(self, failure_reasons: List[str]) -> bool:
50
+ """Validate that messages is a list. Returns False if validation fails."""
51
+ if not isinstance(self.messages, list):
52
+ msg = (
53
+ f"messages must be a list but {type(self.messages).__name__} was given"
54
+ )
55
+ failure_reasons.append(msg)
56
+ return False
57
+ return True
58
+
59
+ def _validate_message(
60
+ self, prefix: str, message: Any, failure_reasons: List[str]
61
+ ) -> None:
62
+ """Validate a single message structure, role, and content."""
63
+ if not self._validate_message_structure(prefix, message, failure_reasons):
64
+ return
65
+
66
+ self._validate_role(prefix, message, failure_reasons)
67
+ self._validate_content(prefix, message, failure_reasons)
68
+
69
+ def _validate_message_structure(
70
+ self, prefix: str, message: Any, failure_reasons: List[str]
71
+ ) -> bool:
72
+ """Validate that message is a dict with exactly 'role' and 'content' keys. Returns False if validation fails."""
73
+ # Validate message is a dict
74
+ if not isinstance(message, dict):
75
+ msg = f"{prefix}: must be a dict but {type(message).__name__} was given"
76
+ failure_reasons.append(msg)
77
+ return False
78
+
79
+ # Validate message has exactly "role" and "content" keys
80
+ message_keys = set(message.keys())
81
+ expected_keys = {"role", "content"}
82
+
83
+ if message_keys != expected_keys:
84
+ if not message_keys.issubset(expected_keys):
85
+ missing_keys = expected_keys - message_keys
86
+ msg = f"{prefix}: missing required keys: {sorted(missing_keys)}"
87
+ failure_reasons.append(msg)
88
+ if not expected_keys.issubset(message_keys):
89
+ extra_keys = message_keys - expected_keys
90
+ msg = (
91
+ f"{prefix}: unexpected keys: {sorted(extra_keys)}. "
92
+ f"Expected only: {sorted(expected_keys)}"
93
+ )
94
+ failure_reasons.append(msg)
95
+ return False
96
+
97
+ return True
98
+
99
+ def _validate_role(
100
+ self, prefix: str, message: dict, failure_reasons: List[str]
101
+ ) -> None:
102
+ """Validate the role field of a message."""
103
+ role = message.get("role")
104
+ if role not in self.VALID_ROLES:
105
+ valid_roles_str = ", ".join([f"'{r}'" for r in sorted(self.VALID_ROLES)])
106
+ msg = (
107
+ f"{prefix}.role: must be one of [{valid_roles_str}] "
108
+ f"but {repr(role)} was given"
109
+ )
110
+ failure_reasons.append(msg)
111
+
112
+ def _validate_content(
113
+ self, prefix: str, message: dict, failure_reasons: List[str]
114
+ ) -> None:
115
+ """Validate the content field of a message."""
116
+ content = message.get("content")
117
+ if content is None:
118
+ msg = f"{prefix}.content: must not be None"
119
+ failure_reasons.append(msg)
120
+ elif not isinstance(content, (str, list)):
121
+ msg = (
122
+ f"{prefix}.content: must be either str or list of dicts "
123
+ f"but {type(content).__name__} was given"
124
+ )
125
+ failure_reasons.append(msg)
126
+ elif isinstance(content, list):
127
+ self._validate_content_list(prefix, content, failure_reasons)
128
+
129
+ def _validate_content_list(
130
+ self, prefix: str, content: list, failure_reasons: List[str]
131
+ ) -> None:
132
+ """Validate content when it is a list of content parts."""
133
+ for content_idx, content_part in enumerate(content):
134
+ content_prefix = f"{prefix}.content[{content_idx}]"
135
+ self._validate_content_part(content_prefix, content_part, failure_reasons)
136
+
137
+ def _validate_content_part(
138
+ self, content_prefix: str, content_part: Any, failure_reasons: List[str]
139
+ ) -> None:
140
+ """Validate a single content part in the content list."""
141
+ if not isinstance(content_part, dict):
142
+ msg = (
143
+ f"{content_prefix}: must be a dict "
144
+ f"but {type(content_part).__name__} was given"
145
+ )
146
+ failure_reasons.append(msg)
147
+ return
148
+
149
+ if "type" not in content_part:
150
+ msg = f"{content_prefix}: must have 'type' key"
151
+ failure_reasons.append(msg)
152
+ return
153
+
154
+ # Validate type-specific requirements
155
+ content_type = content_part.get("type")
156
+ self._validate_content_type_specific(
157
+ content_prefix, content_type, content_part, failure_reasons
158
+ )
159
+
160
+ def _validate_content_type_specific(
161
+ self,
162
+ content_prefix: str,
163
+ content_type: Any,
164
+ content_part: dict,
165
+ failure_reasons: List[str],
166
+ ) -> None:
167
+ """Validate type-specific requirements for content parts."""
168
+ if content_type in self.URL_BASED_CONTENT_TYPES:
169
+ self._validate_required_url_object(
170
+ content_prefix,
171
+ content_part,
172
+ content_type,
173
+ content_type,
174
+ failure_reasons,
175
+ )
176
+ elif content_type == "text":
177
+ self._validate_required_string_key(
178
+ content_prefix, content_part, "text", "text", failure_reasons
179
+ )
180
+
181
+ def _validate_required_string_key(
182
+ self,
183
+ prefix: str,
184
+ content_part: dict,
185
+ key_name: str,
186
+ type_name: str,
187
+ failure_reasons: List[str],
188
+ ) -> None:
189
+ """Validate that a required key exists and is a string."""
190
+ if key_name not in content_part:
191
+ msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
192
+ failure_reasons.append(msg)
193
+ elif not isinstance(content_part.get(key_name), str):
194
+ msg = (
195
+ f"{prefix}.{key_name}: must be a string "
196
+ f"but {type(content_part.get(key_name)).__name__} was given"
197
+ )
198
+ failure_reasons.append(msg)
199
+
200
+ def _validate_required_url_object(
201
+ self,
202
+ prefix: str,
203
+ content_part: dict,
204
+ key_name: str,
205
+ type_name: str,
206
+ failure_reasons: List[str],
207
+ ) -> None:
208
+ """Validate that a required key exists and is a dict with a 'url' key that is a string."""
209
+ if key_name not in content_part:
210
+ msg = f"{prefix}: must have '{key_name}' key when type is '{type_name}'"
211
+ failure_reasons.append(msg)
212
+ return
213
+
214
+ url_object = content_part.get(key_name)
215
+ if not isinstance(url_object, dict):
216
+ msg = (
217
+ f"{prefix}.{key_name}: must be a dict "
218
+ f"but {type(url_object).__name__} was given"
219
+ )
220
+ failure_reasons.append(msg)
221
+ return
222
+
223
+ if "url" not in url_object:
224
+ msg = f"{prefix}.{key_name}: must have 'url' key"
225
+ failure_reasons.append(msg)
226
+ elif not isinstance(url_object.get("url"), str):
227
+ msg = (
228
+ f"{prefix}.{key_name}.url: must be a string "
229
+ f"but {type(url_object.get('url')).__name__} was given"
230
+ )
231
+ failure_reasons.append(msg)
232
+
233
+ def raise_if_validation_failed(self) -> None:
234
+ if (
235
+ self.validation_result is not None
236
+ and len(self.validation_result.failure_reasons) > 0
237
+ ):
238
+ raise exceptions.ValidationError(
239
+ prefix="ChatPrompt.__init__",
240
+ failure_reasons=self.validation_result.failure_reasons,
241
+ )
@@ -1,16 +1,16 @@
1
1
  import pydantic
2
2
 
3
3
  from typing import Any
4
- from ..types import FeedbackScoreDict
4
+ from ..types import BatchFeedbackScoreDict
5
5
  from . import validator, result
6
6
 
7
7
 
8
8
  class PydanticWrapper(pydantic.BaseModel):
9
9
  model_config = pydantic.ConfigDict(extra="forbid")
10
- feedback_score: FeedbackScoreDict
10
+ feedback_score: BatchFeedbackScoreDict
11
11
 
12
12
 
13
- EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str]}"
13
+ EXPECTED_TYPES = "{'id': str, 'name': str, 'value': float, 'reason': NotRequired[str], 'category_name': NotRequired[str], 'project_name': NotRequired[str]}"
14
14
 
15
15
 
16
16
  class FeedbackScoreValidator(validator.Validator):
@@ -1,4 +1,5 @@
1
1
  import abc
2
+ from typing import Any
2
3
 
3
4
  from . import result
4
5
 
@@ -7,3 +8,30 @@ class Validator(abc.ABC):
7
8
  @abc.abstractmethod
8
9
  def validate(self) -> result.ValidationResult:
9
10
  pass
11
+
12
+
13
+ class RaisableValidator(Validator):
14
+ """
15
+ Abstract validator class that extends Validator and adds raise_if_validation_failed method.
16
+
17
+ This is used for validators that need to raise ValidationError exceptions
18
+ when validation fails, typically used in class initialization.
19
+ """
20
+
21
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
22
+ """
23
+ Initialize the validator.
24
+
25
+ Subclasses can override this method with their own initialization signature.
26
+ """
27
+ pass
28
+
29
+ @abc.abstractmethod
30
+ def raise_if_validation_failed(self) -> None:
31
+ """
32
+ Raise a ValidationError if validation failed.
33
+
34
+ This method should check the validation result and raise an appropriate
35
+ ValidationError exception if validation failed.
36
+ """
37
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opik
3
- Version: 1.9.39
3
+ Version: 1.9.86
4
4
  Summary: Comet tool for logging and evaluating LLM traces
5
5
  Home-page: https://www.comet.com
6
6
  Author: Comet ML Inc.
@@ -14,13 +14,12 @@ Classifier: License :: OSI Approved :: Apache Software License
14
14
  Classifier: Natural Language :: English
15
15
  Classifier: Programming Language :: Python :: 3 :: Only
16
16
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
20
19
  Classifier: Programming Language :: Python :: 3.12
21
20
  Classifier: Programming Language :: Python :: 3.13
22
21
  Classifier: Programming Language :: Python :: 3.14
23
- Requires-Python: >=3.9
22
+ Requires-Python: >=3.10
24
23
  Description-Content-Type: text/markdown
25
24
  License-File: LICENSE
26
25
  Requires-Dist: boto3-stubs[bedrock-runtime]>=1.34.110
@@ -70,9 +69,9 @@ Dynamic: summary
70
69
  Opik
71
70
  </div>
72
71
  </h1>
73
- <h2 align="center" style="border-bottom: none">Open-source LLM evaluation platform</h2>
72
+ <h2 align="center" style="border-bottom: none">Open-source AI Observability, Evaluation, and Optimization</h2>
74
73
  <p align="center">
75
- Opik helps you build, evaluate, and optimize LLM systems that run better, faster, and cheaper. From RAG chatbots to code assistants to complex agentic pipelines, Opik provides comprehensive tracing, evaluations, dashboards, and powerful features like <b>Opik Agent Optimizer</b> and <b>Opik Guardrails</b> to improve and secure your LLM powered applications in production.
74
+ Opik helps you build, test, and optimize generative AI application that run better, from prototype to production. From RAG chatbots to code assistants to complex agentic systems, Opik provides comprehensive tracing, evaluation, and automatic prompt and tool optimization to take the guesswork out of AI development.
76
75
  </p>
77
76
 
78
77
  <div align="center">
@@ -215,7 +214,7 @@ For production or larger-scale self-hosted deployments, Opik can be installed on
215
214
 
216
215
  ## 💻 Opik Client SDK
217
216
 
218
- Opik provides a suite of client libraries and a REST API to interact with the Opik server. This includes SDKs for Python, TypeScript, and Ruby (via OpenTelemetry), allowing for seamless integration into your workflows. For detailed API and SDK references, see the [Opik Client Reference Documentation](apps/opik-documentation/documentation/fern/docs/reference/overview.mdx).
217
+ Opik provides a suite of client libraries and a REST API to interact with the Opik server. This includes SDKs for Python, TypeScript, and Ruby (via OpenTelemetry), allowing for seamless integration into your workflows. For detailed API and SDK references, see the [Opik Client Reference Documentation](https://www.comet.com/docs/opik/reference/overview?from=llm&utm_source=opik&utm_medium=github&utm_content=reference_link&utm_campaign=opik).
219
218
 
220
219
  ### Python SDK Quick Start
221
220
 
@@ -238,7 +237,7 @@ opik configure
238
237
  ```
239
238
 
240
239
  > [!TIP]
241
- > You can also call `opik.configure(use_local=True)` from your Python code to configure the SDK to run on a local self-hosted installation, or provide API key and workspace details directly for Comet.com. Refer to the [Python SDK documentation](apps/opik-documentation/documentation/fern/docs/reference/python-sdk/) for more configuration options.
240
+ > You can also call `opik.configure(use_local=True)` from your Python code to configure the SDK to run on a local self-hosted installation, or provide API key and workspace details directly for Comet.com. Refer to the [Python SDK documentation](https://www.comet.com/docs/opik/python-sdk-reference/?from=llm&utm_source=opik&utm_medium=github&utm_content=python_sdk_docs_link&utm_campaign=opik) for more configuration options.
242
241
 
243
242
  You are now ready to start logging traces using the [Python SDK](https://www.comet.com/docs/opik/python-sdk-reference/?from=llm&utm_source=opik&utm_medium=github&utm_content=sdk_link2&utm_campaign=opik).
244
243
 
@@ -272,6 +271,7 @@ The easiest way to log traces is to use one of our direct integrations. Opik sup
272
271
  | Groq | Log traces for Groq LLM calls | [Documentation](https://www.comet.com/docs/opik/integrations/groq?utm_source=opik&utm_medium=github&utm_content=groq_link&utm_campaign=opik) |
273
272
  | Guardrails | Log traces for Guardrails AI validations | [Documentation](https://www.comet.com/docs/opik/integrations/guardrails-ai?utm_source=opik&utm_medium=github&utm_content=guardrails_link&utm_campaign=opik) |
274
273
  | Haystack | Log traces for Haystack calls | [Documentation](https://www.comet.com/docs/opik/integrations/haystack?utm_source=opik&utm_medium=github&utm_content=haystack_link&utm_campaign=opik) |
274
+ | Harbor | Log traces for Harbor benchmark evaluation trials | [Documentation](https://www.comet.com/docs/opik/integrations/harbor?utm_source=opik&utm_medium=github&utm_content=harbor_link&utm_campaign=opik) |
275
275
  | Instructor | Log traces for LLM calls made with Instructor | [Documentation](https://www.comet.com/docs/opik/integrations/instructor?utm_source=opik&utm_medium=github&utm_content=instructor_link&utm_campaign=opik) |
276
276
  | LangChain (Python) | Log traces for LangChain LLM calls | [Documentation](https://www.comet.com/docs/opik/integrations/langchain?utm_source=opik&utm_medium=github&utm_content=langchain_link&utm_campaign=opik) |
277
277
  | LangChain (JS/TS) | Log traces for LangChain JavaScript/TypeScript calls | [Documentation](https://www.comet.com/docs/opik/integrations/langchainjs?utm_source=opik&utm_medium=github&utm_content=langchainjs_link&utm_campaign=opik) |