opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +38 -0
  114. opik/rest_api/datasets/client.py +249 -148
  115. opik/rest_api/datasets/raw_client.py +356 -217
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +46 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset_item.py +1 -1
  146. opik/rest_api/types/dataset_item_batch.py +4 -0
  147. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  148. opik/rest_api/types/dataset_item_compare.py +1 -1
  149. opik/rest_api/types/dataset_item_filter.py +4 -0
  150. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  151. opik/rest_api/types/dataset_item_page_public.py +0 -1
  152. opik/rest_api/types/dataset_item_public.py +1 -1
  153. opik/rest_api/types/dataset_version_public.py +5 -0
  154. opik/rest_api/types/dataset_version_summary.py +5 -0
  155. opik/rest_api/types/dataset_version_summary_public.py +5 -0
  156. opik/rest_api/types/experiment.py +9 -0
  157. opik/rest_api/types/experiment_public.py +9 -0
  158. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  159. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  160. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  161. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  162. opik/rest_api/types/project.py +1 -0
  163. opik/rest_api/types/project_detailed.py +1 -0
  164. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  165. opik/rest_api/types/project_reference.py +31 -0
  166. opik/rest_api/types/project_reference_public.py +31 -0
  167. opik/rest_api/types/project_stats_summary_item.py +1 -0
  168. opik/rest_api/types/prompt_version.py +1 -0
  169. opik/rest_api/types/prompt_version_detail.py +1 -0
  170. opik/rest_api/types/prompt_version_page_public.py +5 -0
  171. opik/rest_api/types/prompt_version_public.py +1 -0
  172. opik/rest_api/types/prompt_version_update.py +33 -0
  173. opik/rest_api/types/provider_api_key.py +5 -1
  174. opik/rest_api/types/provider_api_key_provider.py +2 -1
  175. opik/rest_api/types/provider_api_key_public.py +5 -1
  176. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  177. opik/rest_api/types/service_toggles_config.py +11 -1
  178. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  179. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  180. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  181. opik/types.py +36 -0
  182. opik/validation/chat_prompt_messages.py +241 -0
  183. opik/validation/feedback_score.py +3 -3
  184. opik/validation/validator.py +28 -0
  185. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
  186. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
  187. opik/cli/export.py +0 -791
  188. opik/cli/import_command.py +0 -575
  189. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
  190. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
  191. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  192. {opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,28 @@
1
1
  import logging
2
2
  import functools
3
3
  import time
4
- from typing import Optional, Any, List, Dict, Sequence, Set, TYPE_CHECKING, Callable
4
+ from typing import (
5
+ Optional,
6
+ Any,
7
+ List,
8
+ Dict,
9
+ Sequence,
10
+ Set,
11
+ TYPE_CHECKING,
12
+ Callable,
13
+ Iterator,
14
+ )
5
15
 
6
16
  from opik.api_objects import rest_stream_parser
7
17
  from opik.rest_api import client as rest_api_client
8
- from opik.rest_api.types import dataset_item_write as rest_dataset_item
18
+ from opik.rest_api.types import (
19
+ dataset_item_write as rest_dataset_item,
20
+ dataset_item as rest_dataset_item_read,
21
+ )
9
22
  from opik.rest_api.core.api_error import ApiError
10
23
  from opik.message_processing.batching import sequence_splitter
11
24
  from opik.rate_limit import rate_limit
25
+ from opik import id_helpers
12
26
  import opik.exceptions as exceptions
13
27
  import opik.config as config
14
28
  from opik.rest_client_configurator import retry_decorator
@@ -75,6 +89,7 @@ class Dataset:
75
89
  name: str,
76
90
  description: Optional[str],
77
91
  rest_client: rest_api_client.OpikApi,
92
+ dataset_items_count: Optional[int] = None,
78
93
  ) -> None:
79
94
  """
80
95
  A Dataset object. This object should not be created directly, instead use :meth:`opik.Opik.create_dataset` or :meth:`opik.Opik.get_dataset`.
@@ -82,6 +97,7 @@ class Dataset:
82
97
  self._name = name
83
98
  self._description = description
84
99
  self._rest_client = rest_client
100
+ self._dataset_items_count = dataset_items_count
85
101
 
86
102
  self._id_to_hash: Dict[str, str] = {}
87
103
  self._hashes: Set[str] = set()
@@ -103,13 +119,36 @@ class Dataset:
103
119
  """The description of the dataset."""
104
120
  return self._description
105
121
 
122
+ @property
123
+ def dataset_items_count(self) -> Optional[int]:
124
+ """
125
+ The total number of items in the dataset.
126
+
127
+ If the count is not cached locally, it will be fetched from the backend.
128
+ """
129
+ if self._dataset_items_count is None:
130
+ dataset_info = self._rest_client.datasets.get_dataset_by_identifier(
131
+ dataset_name=self._name
132
+ )
133
+ self._dataset_items_count = dataset_info.dataset_items_count
134
+ return self._dataset_items_count
135
+
106
136
  def _insert_batch_with_retry(
107
- self, batch: List[rest_dataset_item.DatasetItemWrite]
137
+ self,
138
+ batch: List[rest_dataset_item.DatasetItemWrite],
139
+ batch_group_id: str,
108
140
  ) -> None:
109
- """Insert a batch of dataset items with automatic retry on rate limit errors."""
141
+ """Insert a batch of dataset items with automatic retry on rate limit errors.
142
+
143
+ Args:
144
+ batch: List of dataset items to insert.
145
+ batch_group_id: UUIDv7 identifier that groups all batches from a single
146
+ user operation together. All batches sent as part of one insert/update
147
+ call share the same batch_group_id.
148
+ """
110
149
  _ensure_rest_api_call_respecting_rate_limit(
111
150
  lambda: self._rest_client.datasets.create_or_update_dataset_items(
112
- dataset_name=self._name, items=batch
151
+ dataset_name=self._name, items=batch, batch_group_id=batch_group_id
113
152
  )
114
153
  )
115
154
  LOGGER.debug("Successfully sent dataset items batch of size %d", len(batch))
@@ -150,13 +189,15 @@ class Dataset:
150
189
  max_length=constants.DATASET_ITEMS_MAX_BATCH_SIZE,
151
190
  )
152
191
 
192
+ batch_group_id = id_helpers.generate_id()
193
+
153
194
  for batch in batches:
154
195
  LOGGER.debug("Sending dataset items batch of size %d", len(batch))
155
- self._insert_batch_with_retry(batch)
196
+ self._insert_batch_with_retry(batch, batch_group_id=batch_group_id)
156
197
 
157
198
  def insert(self, items: Sequence[Dict[str, Any]]) -> None:
158
199
  """
159
- Insert new items into the dataset.
200
+ Insert new items into the dataset. A new dataset version will be created.
160
201
 
161
202
  Args:
162
203
  items: List of dicts (which will be converted to dataset items)
@@ -168,15 +209,17 @@ class Dataset:
168
209
  ]
169
210
  self.__internal_api__insert_items_as_dataclasses__(dataset_items)
170
211
 
212
+ # Invalidate the cached count so it will be fetched from backend on next access
213
+ self._dataset_items_count = None
214
+
171
215
  def __internal_api__sync_hashes__(self) -> None:
172
216
  """Updates all the hashes in the dataset"""
173
217
  LOGGER.debug("Start hash sync in dataset")
174
- all_items = self.__internal_api__get_items_as_dataclasses__()
175
218
 
176
219
  self._id_to_hash = {}
177
220
  self._hashes = set()
178
221
 
179
- for item in all_items:
222
+ for item in self.__internal_api__stream_items_as_dataclasses__():
180
223
  item_hash = item.content_hash()
181
224
  self._id_to_hash[item.id] = item_hash # type: ignore
182
225
  self._hashes.add(item_hash)
@@ -201,9 +244,29 @@ class Dataset:
201
244
 
202
245
  self.insert(items)
203
246
 
247
+ def _delete_batch_with_retry(
248
+ self,
249
+ batch: List[str],
250
+ batch_group_id: str,
251
+ ) -> None:
252
+ """Delete a batch of dataset items with automatic retry on rate limit errors.
253
+
254
+ Args:
255
+ batch: List of item IDs to delete.
256
+ batch_group_id: UUIDv7 identifier that groups all batches from a single
257
+ user operation together. All batches sent as part of one delete
258
+ call share the same batch_group_id.
259
+ """
260
+ _ensure_rest_api_call_respecting_rate_limit(
261
+ lambda: self._rest_client.datasets.delete_dataset_items(
262
+ item_ids=batch, batch_group_id=batch_group_id
263
+ )
264
+ )
265
+ LOGGER.debug("Successfully deleted dataset items batch of size %d", len(batch))
266
+
204
267
  def delete(self, items_ids: List[str]) -> None:
205
268
  """
206
- Delete items from the dataset.
269
+ Delete items from the dataset. A new dataset version will be created.
207
270
 
208
271
  Args:
209
272
  items_ids: List of item ids to delete.
@@ -212,9 +275,11 @@ class Dataset:
212
275
  items_ids, max_length=constants.DATASET_ITEMS_MAX_BATCH_SIZE
213
276
  )
214
277
 
278
+ batch_group_id = id_helpers.generate_id()
279
+
215
280
  for batch in batches:
216
281
  LOGGER.debug("Deleting dataset items batch: %s", batch)
217
- self._rest_client.datasets.delete_dataset_items(item_ids=batch)
282
+ self._delete_batch_with_retry(batch, batch_group_id=batch_group_id)
218
283
 
219
284
  for item_id in batch:
220
285
  if item_id in self._id_to_hash:
@@ -222,12 +287,18 @@ class Dataset:
222
287
  self._hashes.discard(hash)
223
288
  del self._id_to_hash[item_id]
224
289
 
290
+ # Invalidate the cached count so it will be fetched from backend on next access
291
+ self._dataset_items_count = None
292
+
225
293
  def clear(self) -> None:
226
294
  """
227
- Delete all items from the given dataset.
295
+ Delete all items from the given dataset. A new dataset version will be created.
228
296
  """
229
- all_items = self.__internal_api__get_items_as_dataclasses__()
230
- item_ids = [item.id for item in all_items if item.id is not None]
297
+ item_ids = [
298
+ item.id
299
+ for item in self.__internal_api__stream_items_as_dataclasses__()
300
+ if item.id is not None
301
+ ]
231
302
 
232
303
  self.delete(item_ids)
233
304
 
@@ -240,7 +311,7 @@ class Dataset:
240
311
  Returns:
241
312
  A pandas DataFrame containing all items in the dataset.
242
313
  """
243
- dataset_items = self.__internal_api__get_items_as_dataclasses__()
314
+ dataset_items = list(self.__internal_api__stream_items_as_dataclasses__())
244
315
 
245
316
  return converters.to_pandas(dataset_items, keys_mapping={})
246
317
 
@@ -251,7 +322,7 @@ class Dataset:
251
322
  Returns:
252
323
  A JSON string representation of all items in the dataset.
253
324
  """
254
- dataset_items = self.__internal_api__get_items_as_dataclasses__()
325
+ dataset_items = list(self.__internal_api__stream_items_as_dataclasses__())
255
326
 
256
327
  return converters.to_json(dataset_items, keys_mapping={})
257
328
 
@@ -265,65 +336,88 @@ class Dataset:
265
336
  Returns:
266
337
  A list of dictionaries objects representing the samples.
267
338
  """
268
- dataset_items_as_dataclasses = self.__internal_api__get_items_as_dataclasses__(
269
- nb_samples
270
- )
271
339
  dataset_items_as_dicts = [
272
340
  {"id": item.id, **item.get_content()}
273
- for item in dataset_items_as_dataclasses
341
+ for item in self.__internal_api__stream_items_as_dataclasses__(nb_samples)
274
342
  ]
275
343
 
276
344
  return dataset_items_as_dicts
277
345
 
278
- @retry_decorator.opik_rest_retry
279
- def __internal_api__get_items_as_dataclasses__(
346
+ def __internal_api__stream_items_as_dataclasses__(
280
347
  self,
281
348
  nb_samples: Optional[int] = None,
349
+ batch_size: Optional[int] = None,
282
350
  dataset_item_ids: Optional[List[str]] = None,
283
- ) -> List[dataset_item.DatasetItem]:
284
- results: List[dataset_item.DatasetItem] = []
351
+ ) -> Iterator[dataset_item.DatasetItem]:
352
+ """
353
+ Stream dataset items as a generator instead of loading all at once.
354
+
355
+ This method yields dataset items one at a time, enabling evaluation to start
356
+ processing items before the entire dataset is downloaded. This is particularly
357
+ useful for large datasets with heavy payloads (images, videos, audio).
358
+
359
+ Args:
360
+ nb_samples: Maximum number of items to retrieve. If None, all items are streamed.
361
+ batch_size: Maximum number of items to fetch per batch from the backend.
362
+ If None, uses the default value from constants.DATASET_STREAM_BATCH_SIZE.
363
+ dataset_item_ids: Optional list of specific item IDs to retrieve. If provided,
364
+ only items with matching IDs will be yielded.
365
+
366
+ Yields:
367
+ DatasetItem objects one at a time
368
+ """
369
+ if batch_size is None:
370
+ batch_size = constants.DATASET_STREAM_BATCH_SIZE
371
+
285
372
  last_retrieved_id: Optional[str] = None
286
373
  should_retrieve_more_items = True
287
-
374
+ items_yielded = 0
288
375
  dataset_items_ids_left = set(dataset_item_ids) if dataset_item_ids else None
289
376
 
290
377
  while should_retrieve_more_items:
291
- dataset_items = rest_stream_parser.read_and_parse_stream(
292
- stream=self._rest_client.datasets.stream_dataset_items(
293
- dataset_name=self._name,
294
- last_retrieved_id=last_retrieved_id,
295
- ),
296
- item_class=dataset_item.DatasetItem,
297
- nb_samples=nb_samples,
298
- )
378
+ # Wrap the streaming call in retry logic so we can resume from last_retrieved_id
379
+ @retry_decorator.opik_rest_retry
380
+ def _fetch_batch() -> List[rest_dataset_item_read.DatasetItem]:
381
+ return rest_stream_parser.read_and_parse_stream(
382
+ stream=self._rest_client.datasets.stream_dataset_items(
383
+ dataset_name=self._name,
384
+ last_retrieved_id=last_retrieved_id,
385
+ steam_limit=batch_size,
386
+ ),
387
+ item_class=rest_dataset_item_read.DatasetItem,
388
+ nb_samples=nb_samples,
389
+ )
390
+
391
+ dataset_items = _fetch_batch()
299
392
 
300
393
  if len(dataset_items) == 0:
301
394
  should_retrieve_more_items = False
395
+ break
302
396
 
303
397
  for item in dataset_items:
304
398
  dataset_item_id = item.id
305
399
  last_retrieved_id = dataset_item_id
306
400
 
401
+ # Filter by dataset_item_ids if provided
307
402
  if dataset_items_ids_left is not None:
308
403
  if dataset_item_id not in dataset_items_ids_left:
309
404
  continue
310
405
  else:
311
406
  dataset_items_ids_left.remove(dataset_item_id)
312
407
 
313
- data_item_content = item.get_content().get("data", {})
314
-
315
408
  reconstructed_item = dataset_item.DatasetItem(
316
409
  id=item.id,
317
410
  trace_id=item.trace_id,
318
411
  span_id=item.span_id,
319
412
  source=item.source,
320
- **data_item_content,
413
+ **item.data,
321
414
  )
322
415
 
323
- results.append(reconstructed_item)
416
+ yield reconstructed_item
417
+ items_yielded += 1
324
418
 
325
419
  # Stop retrieving if we have enough samples
326
- if nb_samples is not None and len(results) == nb_samples:
420
+ if nb_samples is not None and items_yielded >= nb_samples:
327
421
  should_retrieve_more_items = False
328
422
  break
329
423
 
@@ -335,14 +429,13 @@ class Dataset:
335
429
  should_retrieve_more_items = False
336
430
  break
337
431
 
432
+ # Warn if some requested items were not found
338
433
  if dataset_items_ids_left and len(dataset_items_ids_left) > 0:
339
434
  LOGGER.warning(
340
435
  "The following dataset items were not found in the dataset: %s",
341
436
  dataset_items_ids_left,
342
437
  )
343
438
 
344
- return results
345
-
346
439
  def insert_from_json(
347
440
  self,
348
441
  json_array: str,
@@ -31,6 +31,7 @@ def get_datasets(
31
31
  name=dataset_fern.name,
32
32
  description=dataset_fern.description,
33
33
  rest_client=rest_client,
34
+ dataset_items_count=dataset_fern.dataset_items_count,
34
35
  )
35
36
 
36
37
  if sync_items:
@@ -88,6 +89,7 @@ def get_dataset_experiments(
88
89
  rest_client=rest_client,
89
90
  streamer=streamer,
90
91
  experiments_client=experiments_client,
92
+ tags=experiment_.tags,
91
93
  )
92
94
  )
93
95
 
@@ -26,6 +26,7 @@ class Experiment:
26
26
  streamer: streamer.Streamer,
27
27
  experiments_client: experiments_client.ExperimentsClient,
28
28
  prompts: Optional[List[base_prompt.BasePrompt]] = None,
29
+ tags: Optional[List[str]] = None,
29
30
  ) -> None:
30
31
  self._id = id
31
32
  self._name = name
@@ -34,6 +35,7 @@ class Experiment:
34
35
  self._prompts = prompts
35
36
  self._streamer = streamer
36
37
  self._experiments_client = experiments_client
38
+ self._tags = tags
37
39
 
38
40
  @property
39
41
  def id(self) -> str:
@@ -53,6 +55,10 @@ class Experiment:
53
55
 
54
56
  return name
55
57
 
58
+ @property
59
+ def tags(self) -> Optional[List[str]]:
60
+ return self._tags
61
+
56
62
  @functools.cached_property
57
63
  def dataset_id(self) -> str:
58
64
  return self._rest_client.datasets.get_dataset_by_identifier(
@@ -13,7 +13,7 @@ from ..rest_api.types import (
13
13
  trace_filter_public,
14
14
  trace_thread_filter,
15
15
  )
16
- from ..types import FeedbackScoreDict
16
+ from ..types import BatchFeedbackScoreDict
17
17
 
18
18
  LOGGER = logging.getLogger(__name__)
19
19
 
@@ -139,7 +139,7 @@ def parse_search_expressions(
139
139
 
140
140
 
141
141
  def parse_feedback_score_messages(
142
- scores: List[FeedbackScoreDict],
142
+ scores: List[BatchFeedbackScoreDict],
143
143
  project_name: str,
144
144
  parsed_item_class: Type[ScoreMessageT],
145
145
  logger: logging.Logger,
@@ -155,9 +155,13 @@ def parse_feedback_score_messages(
155
155
 
156
156
  score_messages = [
157
157
  parsed_item_class(
158
+ id=score_dict["id"],
159
+ name=score_dict["name"],
160
+ value=score_dict["value"],
158
161
  source=constants.FEEDBACK_SCORE_SOURCE_SDK,
159
- project_name=project_name,
160
- **score_dict,
162
+ project_name=score_dict.get("project_name") or project_name,
163
+ reason=score_dict.get("reason"),
164
+ category_name=score_dict.get("category_name"),
161
165
  )
162
166
  for score_dict in valid_scores
163
167
  ]
@@ -1,9 +1,10 @@
1
1
  import contextlib
2
2
  from typing import Iterator, List
3
3
  from typing import Optional
4
+
4
5
  from . import opik_client
5
- from ..message_processing import message_processors_chain
6
6
  from ..message_processing.emulation import local_emulator_message_processor, models
7
+ from ..message_processing.processors import message_processors_chain
7
8
 
8
9
 
9
10
  class _LocalRecordingHandle:
@@ -53,7 +54,7 @@ def record_traces_locally(
53
54
 
54
55
  # Disallow nested/local concurrent recordings in the same process
55
56
  existing_local = message_processors_chain.get_local_emulator_message_processor(
56
- chain=client._message_processor
57
+ chain=client.__internal_api__message_processor__
57
58
  )
58
59
  if existing_local is not None and existing_local.is_active():
59
60
  raise RuntimeError(
@@ -61,10 +62,10 @@ def record_traces_locally(
61
62
  )
62
63
 
63
64
  message_processors_chain.toggle_local_emulator_message_processor(
64
- active=True, chain=client._message_processor, reset=True
65
+ active=True, chain=client.__internal_api__message_processor__, reset=True
65
66
  )
66
67
  local = message_processors_chain.get_local_emulator_message_processor(
67
- chain=client._message_processor
68
+ chain=client.__internal_api__message_processor__
68
69
  )
69
70
  if local is None:
70
71
  # Should not happen given the default chain, but guard just in case
@@ -76,5 +77,5 @@ def record_traces_locally(
76
77
  finally:
77
78
  client.flush()
78
79
  message_processors_chain.toggle_local_emulator_message_processor(
79
- active=False, chain=client._message_processor, reset=True
80
+ active=False, chain=client.__internal_api__message_processor__, reset=True
80
81
  )
@@ -0,0 +1,101 @@
1
+ import dataclasses
2
+ import datetime
3
+ import logging
4
+ from typing import Any, Dict, List, Optional, TypeVar
5
+
6
+ import opik.api_objects.attachment as attachment
7
+ import opik.datetime_helpers as datetime_helpers
8
+ from opik.types import ErrorInfoDict, FeedbackScoreDict
9
+ from . import data_helpers
10
+
11
+ LOGGER = logging.getLogger(__name__)
12
+
13
+ ObservationDataT = TypeVar("ObservationDataT", bound="ObservationData")
14
+
15
+
16
+ @dataclasses.dataclass(kw_only=True)
17
+ class ObservationData:
18
+ """
19
+ Base class for TraceData and SpanData containing common attributes and methods.
20
+
21
+ This class uses Python 3.10's kw_only=True feature to allow optional parameters
22
+ to be defined in the parent class while child classes can have required parameters.
23
+ """
24
+
25
+ name: Optional[str] = None
26
+ start_time: Optional[datetime.datetime] = dataclasses.field(
27
+ default_factory=datetime_helpers.local_timestamp
28
+ )
29
+ end_time: Optional[datetime.datetime] = None
30
+ metadata: Optional[Dict[str, Any]] = None
31
+ input: Optional[Dict[str, Any]] = None
32
+ output: Optional[Dict[str, Any]] = None
33
+ tags: Optional[List[str]] = None
34
+ feedback_scores: Optional[List[FeedbackScoreDict]] = None
35
+ project_name: Optional[str] = None
36
+ error_info: Optional[ErrorInfoDict] = None
37
+ attachments: Optional[List[attachment.Attachment]] = None
38
+
39
+ def update(self: ObservationDataT, **new_data: Any) -> ObservationDataT:
40
+ """
41
+ Updates the attributes of the object with the provided key-value pairs. This method checks if
42
+ an attribute exists before updating it and merges the data appropriately for specific
43
+ keywords like metadata, output, input, attachments, and tags. If a key doesn't correspond
44
+ to an attribute of the object or the provided value is None, the update is skipped.
45
+
46
+ Args:
47
+ **new_data: Key-value pairs of attributes to update. Keys should match existing
48
+ attributes on the object, and values that are None will not update.
49
+
50
+ Returns:
51
+ The updated object instance (preserves the actual subclass type).
52
+ """
53
+ for key, value in new_data.items():
54
+ if value is None:
55
+ continue
56
+
57
+ if key not in self.__dict__ and key != "prompts":
58
+ LOGGER.debug(
59
+ "An attempt to update observation with parameter name it doesn't have: %s",
60
+ key,
61
+ )
62
+ continue
63
+
64
+ if key == "metadata":
65
+ self.metadata = data_helpers.merge_metadata(
66
+ self.metadata, new_metadata=value
67
+ )
68
+ continue
69
+ elif key == "output":
70
+ self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
71
+ continue
72
+ elif key == "input":
73
+ self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
74
+ continue
75
+ elif key == "attachments":
76
+ self._update_attachments(value)
77
+ continue
78
+ elif key == "tags":
79
+ self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
80
+ continue
81
+ elif key == "prompts":
82
+ self.metadata = data_helpers.merge_metadata(
83
+ self.metadata, new_metadata=new_data.get("metadata"), prompts=value
84
+ )
85
+ continue
86
+
87
+ self.__dict__[key] = value
88
+
89
+ return self
90
+
91
+ def init_end_time(self: ObservationDataT) -> ObservationDataT:
92
+ """Initialize the end_time to the current timestamp."""
93
+ self.end_time = datetime_helpers.local_timestamp()
94
+ return self
95
+
96
+ def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
97
+ """Merge new attachments with existing ones."""
98
+ if self.attachments is None:
99
+ self.attachments = attachments
100
+ else:
101
+ self.attachments.extend(attachments)