opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +42 -0
- opik/rest_api/datasets/client.py +321 -123
- opik/rest_api/datasets/raw_client.py +470 -145
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +50 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset.py +2 -0
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_public.py +2 -0
- opik/rest_api/types/dataset_version_public.py +10 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,28 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import functools
|
|
3
3
|
import time
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
Optional,
|
|
6
|
+
Any,
|
|
7
|
+
List,
|
|
8
|
+
Dict,
|
|
9
|
+
Sequence,
|
|
10
|
+
Set,
|
|
11
|
+
TYPE_CHECKING,
|
|
12
|
+
Callable,
|
|
13
|
+
Iterator,
|
|
14
|
+
)
|
|
5
15
|
|
|
6
16
|
from opik.api_objects import rest_stream_parser
|
|
7
17
|
from opik.rest_api import client as rest_api_client
|
|
8
|
-
from opik.rest_api.types import
|
|
18
|
+
from opik.rest_api.types import (
|
|
19
|
+
dataset_item_write as rest_dataset_item,
|
|
20
|
+
dataset_item as rest_dataset_item_read,
|
|
21
|
+
)
|
|
9
22
|
from opik.rest_api.core.api_error import ApiError
|
|
10
23
|
from opik.message_processing.batching import sequence_splitter
|
|
11
24
|
from opik.rate_limit import rate_limit
|
|
25
|
+
from opik import id_helpers
|
|
12
26
|
import opik.exceptions as exceptions
|
|
13
27
|
import opik.config as config
|
|
14
28
|
from opik.rest_client_configurator import retry_decorator
|
|
@@ -75,6 +89,7 @@ class Dataset:
|
|
|
75
89
|
name: str,
|
|
76
90
|
description: Optional[str],
|
|
77
91
|
rest_client: rest_api_client.OpikApi,
|
|
92
|
+
dataset_items_count: Optional[int] = None,
|
|
78
93
|
) -> None:
|
|
79
94
|
"""
|
|
80
95
|
A Dataset object. This object should not be created directly, instead use :meth:`opik.Opik.create_dataset` or :meth:`opik.Opik.get_dataset`.
|
|
@@ -82,6 +97,7 @@ class Dataset:
|
|
|
82
97
|
self._name = name
|
|
83
98
|
self._description = description
|
|
84
99
|
self._rest_client = rest_client
|
|
100
|
+
self._dataset_items_count = dataset_items_count
|
|
85
101
|
|
|
86
102
|
self._id_to_hash: Dict[str, str] = {}
|
|
87
103
|
self._hashes: Set[str] = set()
|
|
@@ -103,13 +119,36 @@ class Dataset:
|
|
|
103
119
|
"""The description of the dataset."""
|
|
104
120
|
return self._description
|
|
105
121
|
|
|
122
|
+
@property
|
|
123
|
+
def dataset_items_count(self) -> Optional[int]:
|
|
124
|
+
"""
|
|
125
|
+
The total number of items in the dataset.
|
|
126
|
+
|
|
127
|
+
If the count is not cached locally, it will be fetched from the backend.
|
|
128
|
+
"""
|
|
129
|
+
if self._dataset_items_count is None:
|
|
130
|
+
dataset_info = self._rest_client.datasets.get_dataset_by_identifier(
|
|
131
|
+
dataset_name=self._name
|
|
132
|
+
)
|
|
133
|
+
self._dataset_items_count = dataset_info.dataset_items_count
|
|
134
|
+
return self._dataset_items_count
|
|
135
|
+
|
|
106
136
|
def _insert_batch_with_retry(
|
|
107
|
-
self,
|
|
137
|
+
self,
|
|
138
|
+
batch: List[rest_dataset_item.DatasetItemWrite],
|
|
139
|
+
batch_group_id: str,
|
|
108
140
|
) -> None:
|
|
109
|
-
"""Insert a batch of dataset items with automatic retry on rate limit errors.
|
|
141
|
+
"""Insert a batch of dataset items with automatic retry on rate limit errors.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
batch: List of dataset items to insert.
|
|
145
|
+
batch_group_id: UUIDv7 identifier that groups all batches from a single
|
|
146
|
+
user operation together. All batches sent as part of one insert/update
|
|
147
|
+
call share the same batch_group_id.
|
|
148
|
+
"""
|
|
110
149
|
_ensure_rest_api_call_respecting_rate_limit(
|
|
111
150
|
lambda: self._rest_client.datasets.create_or_update_dataset_items(
|
|
112
|
-
dataset_name=self._name, items=batch
|
|
151
|
+
dataset_name=self._name, items=batch, batch_group_id=batch_group_id
|
|
113
152
|
)
|
|
114
153
|
)
|
|
115
154
|
LOGGER.debug("Successfully sent dataset items batch of size %d", len(batch))
|
|
@@ -150,13 +189,15 @@ class Dataset:
|
|
|
150
189
|
max_length=constants.DATASET_ITEMS_MAX_BATCH_SIZE,
|
|
151
190
|
)
|
|
152
191
|
|
|
192
|
+
batch_group_id = id_helpers.generate_id()
|
|
193
|
+
|
|
153
194
|
for batch in batches:
|
|
154
195
|
LOGGER.debug("Sending dataset items batch of size %d", len(batch))
|
|
155
|
-
self._insert_batch_with_retry(batch)
|
|
196
|
+
self._insert_batch_with_retry(batch, batch_group_id=batch_group_id)
|
|
156
197
|
|
|
157
198
|
def insert(self, items: Sequence[Dict[str, Any]]) -> None:
|
|
158
199
|
"""
|
|
159
|
-
Insert new items into the dataset.
|
|
200
|
+
Insert new items into the dataset. A new dataset version will be created.
|
|
160
201
|
|
|
161
202
|
Args:
|
|
162
203
|
items: List of dicts (which will be converted to dataset items)
|
|
@@ -168,15 +209,17 @@ class Dataset:
|
|
|
168
209
|
]
|
|
169
210
|
self.__internal_api__insert_items_as_dataclasses__(dataset_items)
|
|
170
211
|
|
|
212
|
+
# Invalidate the cached count so it will be fetched from backend on next access
|
|
213
|
+
self._dataset_items_count = None
|
|
214
|
+
|
|
171
215
|
def __internal_api__sync_hashes__(self) -> None:
|
|
172
216
|
"""Updates all the hashes in the dataset"""
|
|
173
217
|
LOGGER.debug("Start hash sync in dataset")
|
|
174
|
-
all_items = self.__internal_api__get_items_as_dataclasses__()
|
|
175
218
|
|
|
176
219
|
self._id_to_hash = {}
|
|
177
220
|
self._hashes = set()
|
|
178
221
|
|
|
179
|
-
for item in
|
|
222
|
+
for item in self.__internal_api__stream_items_as_dataclasses__():
|
|
180
223
|
item_hash = item.content_hash()
|
|
181
224
|
self._id_to_hash[item.id] = item_hash # type: ignore
|
|
182
225
|
self._hashes.add(item_hash)
|
|
@@ -201,9 +244,29 @@ class Dataset:
|
|
|
201
244
|
|
|
202
245
|
self.insert(items)
|
|
203
246
|
|
|
247
|
+
def _delete_batch_with_retry(
|
|
248
|
+
self,
|
|
249
|
+
batch: List[str],
|
|
250
|
+
batch_group_id: str,
|
|
251
|
+
) -> None:
|
|
252
|
+
"""Delete a batch of dataset items with automatic retry on rate limit errors.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
batch: List of item IDs to delete.
|
|
256
|
+
batch_group_id: UUIDv7 identifier that groups all batches from a single
|
|
257
|
+
user operation together. All batches sent as part of one delete
|
|
258
|
+
call share the same batch_group_id.
|
|
259
|
+
"""
|
|
260
|
+
_ensure_rest_api_call_respecting_rate_limit(
|
|
261
|
+
lambda: self._rest_client.datasets.delete_dataset_items(
|
|
262
|
+
item_ids=batch, batch_group_id=batch_group_id
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
LOGGER.debug("Successfully deleted dataset items batch of size %d", len(batch))
|
|
266
|
+
|
|
204
267
|
def delete(self, items_ids: List[str]) -> None:
|
|
205
268
|
"""
|
|
206
|
-
Delete items from the dataset.
|
|
269
|
+
Delete items from the dataset. A new dataset version will be created.
|
|
207
270
|
|
|
208
271
|
Args:
|
|
209
272
|
items_ids: List of item ids to delete.
|
|
@@ -212,9 +275,11 @@ class Dataset:
|
|
|
212
275
|
items_ids, max_length=constants.DATASET_ITEMS_MAX_BATCH_SIZE
|
|
213
276
|
)
|
|
214
277
|
|
|
278
|
+
batch_group_id = id_helpers.generate_id()
|
|
279
|
+
|
|
215
280
|
for batch in batches:
|
|
216
281
|
LOGGER.debug("Deleting dataset items batch: %s", batch)
|
|
217
|
-
self.
|
|
282
|
+
self._delete_batch_with_retry(batch, batch_group_id=batch_group_id)
|
|
218
283
|
|
|
219
284
|
for item_id in batch:
|
|
220
285
|
if item_id in self._id_to_hash:
|
|
@@ -222,12 +287,18 @@ class Dataset:
|
|
|
222
287
|
self._hashes.discard(hash)
|
|
223
288
|
del self._id_to_hash[item_id]
|
|
224
289
|
|
|
290
|
+
# Invalidate the cached count so it will be fetched from backend on next access
|
|
291
|
+
self._dataset_items_count = None
|
|
292
|
+
|
|
225
293
|
def clear(self) -> None:
|
|
226
294
|
"""
|
|
227
|
-
Delete all items from the given dataset.
|
|
295
|
+
Delete all items from the given dataset. A new dataset version will be created.
|
|
228
296
|
"""
|
|
229
|
-
|
|
230
|
-
|
|
297
|
+
item_ids = [
|
|
298
|
+
item.id
|
|
299
|
+
for item in self.__internal_api__stream_items_as_dataclasses__()
|
|
300
|
+
if item.id is not None
|
|
301
|
+
]
|
|
231
302
|
|
|
232
303
|
self.delete(item_ids)
|
|
233
304
|
|
|
@@ -240,7 +311,7 @@ class Dataset:
|
|
|
240
311
|
Returns:
|
|
241
312
|
A pandas DataFrame containing all items in the dataset.
|
|
242
313
|
"""
|
|
243
|
-
dataset_items = self.
|
|
314
|
+
dataset_items = list(self.__internal_api__stream_items_as_dataclasses__())
|
|
244
315
|
|
|
245
316
|
return converters.to_pandas(dataset_items, keys_mapping={})
|
|
246
317
|
|
|
@@ -251,7 +322,7 @@ class Dataset:
|
|
|
251
322
|
Returns:
|
|
252
323
|
A JSON string representation of all items in the dataset.
|
|
253
324
|
"""
|
|
254
|
-
dataset_items = self.
|
|
325
|
+
dataset_items = list(self.__internal_api__stream_items_as_dataclasses__())
|
|
255
326
|
|
|
256
327
|
return converters.to_json(dataset_items, keys_mapping={})
|
|
257
328
|
|
|
@@ -265,65 +336,88 @@ class Dataset:
|
|
|
265
336
|
Returns:
|
|
266
337
|
A list of dictionaries objects representing the samples.
|
|
267
338
|
"""
|
|
268
|
-
dataset_items_as_dataclasses = self.__internal_api__get_items_as_dataclasses__(
|
|
269
|
-
nb_samples
|
|
270
|
-
)
|
|
271
339
|
dataset_items_as_dicts = [
|
|
272
340
|
{"id": item.id, **item.get_content()}
|
|
273
|
-
for item in
|
|
341
|
+
for item in self.__internal_api__stream_items_as_dataclasses__(nb_samples)
|
|
274
342
|
]
|
|
275
343
|
|
|
276
344
|
return dataset_items_as_dicts
|
|
277
345
|
|
|
278
|
-
|
|
279
|
-
def __internal_api__get_items_as_dataclasses__(
|
|
346
|
+
def __internal_api__stream_items_as_dataclasses__(
|
|
280
347
|
self,
|
|
281
348
|
nb_samples: Optional[int] = None,
|
|
349
|
+
batch_size: Optional[int] = None,
|
|
282
350
|
dataset_item_ids: Optional[List[str]] = None,
|
|
283
|
-
) ->
|
|
284
|
-
|
|
351
|
+
) -> Iterator[dataset_item.DatasetItem]:
|
|
352
|
+
"""
|
|
353
|
+
Stream dataset items as a generator instead of loading all at once.
|
|
354
|
+
|
|
355
|
+
This method yields dataset items one at a time, enabling evaluation to start
|
|
356
|
+
processing items before the entire dataset is downloaded. This is particularly
|
|
357
|
+
useful for large datasets with heavy payloads (images, videos, audio).
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
nb_samples: Maximum number of items to retrieve. If None, all items are streamed.
|
|
361
|
+
batch_size: Maximum number of items to fetch per batch from the backend.
|
|
362
|
+
If None, uses the default value from constants.DATASET_STREAM_BATCH_SIZE.
|
|
363
|
+
dataset_item_ids: Optional list of specific item IDs to retrieve. If provided,
|
|
364
|
+
only items with matching IDs will be yielded.
|
|
365
|
+
|
|
366
|
+
Yields:
|
|
367
|
+
DatasetItem objects one at a time
|
|
368
|
+
"""
|
|
369
|
+
if batch_size is None:
|
|
370
|
+
batch_size = constants.DATASET_STREAM_BATCH_SIZE
|
|
371
|
+
|
|
285
372
|
last_retrieved_id: Optional[str] = None
|
|
286
373
|
should_retrieve_more_items = True
|
|
287
|
-
|
|
374
|
+
items_yielded = 0
|
|
288
375
|
dataset_items_ids_left = set(dataset_item_ids) if dataset_item_ids else None
|
|
289
376
|
|
|
290
377
|
while should_retrieve_more_items:
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
378
|
+
# Wrap the streaming call in retry logic so we can resume from last_retrieved_id
|
|
379
|
+
@retry_decorator.opik_rest_retry
|
|
380
|
+
def _fetch_batch() -> List[rest_dataset_item_read.DatasetItem]:
|
|
381
|
+
return rest_stream_parser.read_and_parse_stream(
|
|
382
|
+
stream=self._rest_client.datasets.stream_dataset_items(
|
|
383
|
+
dataset_name=self._name,
|
|
384
|
+
last_retrieved_id=last_retrieved_id,
|
|
385
|
+
steam_limit=batch_size,
|
|
386
|
+
),
|
|
387
|
+
item_class=rest_dataset_item_read.DatasetItem,
|
|
388
|
+
nb_samples=nb_samples,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
dataset_items = _fetch_batch()
|
|
299
392
|
|
|
300
393
|
if len(dataset_items) == 0:
|
|
301
394
|
should_retrieve_more_items = False
|
|
395
|
+
break
|
|
302
396
|
|
|
303
397
|
for item in dataset_items:
|
|
304
398
|
dataset_item_id = item.id
|
|
305
399
|
last_retrieved_id = dataset_item_id
|
|
306
400
|
|
|
401
|
+
# Filter by dataset_item_ids if provided
|
|
307
402
|
if dataset_items_ids_left is not None:
|
|
308
403
|
if dataset_item_id not in dataset_items_ids_left:
|
|
309
404
|
continue
|
|
310
405
|
else:
|
|
311
406
|
dataset_items_ids_left.remove(dataset_item_id)
|
|
312
407
|
|
|
313
|
-
data_item_content = item.get_content().get("data", {})
|
|
314
|
-
|
|
315
408
|
reconstructed_item = dataset_item.DatasetItem(
|
|
316
409
|
id=item.id,
|
|
317
410
|
trace_id=item.trace_id,
|
|
318
411
|
span_id=item.span_id,
|
|
319
412
|
source=item.source,
|
|
320
|
-
**
|
|
413
|
+
**item.data,
|
|
321
414
|
)
|
|
322
415
|
|
|
323
|
-
|
|
416
|
+
yield reconstructed_item
|
|
417
|
+
items_yielded += 1
|
|
324
418
|
|
|
325
419
|
# Stop retrieving if we have enough samples
|
|
326
|
-
if nb_samples is not None and
|
|
420
|
+
if nb_samples is not None and items_yielded >= nb_samples:
|
|
327
421
|
should_retrieve_more_items = False
|
|
328
422
|
break
|
|
329
423
|
|
|
@@ -335,14 +429,13 @@ class Dataset:
|
|
|
335
429
|
should_retrieve_more_items = False
|
|
336
430
|
break
|
|
337
431
|
|
|
432
|
+
# Warn if some requested items were not found
|
|
338
433
|
if dataset_items_ids_left and len(dataset_items_ids_left) > 0:
|
|
339
434
|
LOGGER.warning(
|
|
340
435
|
"The following dataset items were not found in the dataset: %s",
|
|
341
436
|
dataset_items_ids_left,
|
|
342
437
|
)
|
|
343
438
|
|
|
344
|
-
return results
|
|
345
|
-
|
|
346
439
|
def insert_from_json(
|
|
347
440
|
self,
|
|
348
441
|
json_array: str,
|
|
@@ -31,6 +31,7 @@ def get_datasets(
|
|
|
31
31
|
name=dataset_fern.name,
|
|
32
32
|
description=dataset_fern.description,
|
|
33
33
|
rest_client=rest_client,
|
|
34
|
+
dataset_items_count=dataset_fern.dataset_items_count,
|
|
34
35
|
)
|
|
35
36
|
|
|
36
37
|
if sync_items:
|
|
@@ -88,6 +89,7 @@ def get_dataset_experiments(
|
|
|
88
89
|
rest_client=rest_client,
|
|
89
90
|
streamer=streamer,
|
|
90
91
|
experiments_client=experiments_client,
|
|
92
|
+
tags=experiment_.tags,
|
|
91
93
|
)
|
|
92
94
|
)
|
|
93
95
|
|
|
@@ -26,6 +26,7 @@ class Experiment:
|
|
|
26
26
|
streamer: streamer.Streamer,
|
|
27
27
|
experiments_client: experiments_client.ExperimentsClient,
|
|
28
28
|
prompts: Optional[List[base_prompt.BasePrompt]] = None,
|
|
29
|
+
tags: Optional[List[str]] = None,
|
|
29
30
|
) -> None:
|
|
30
31
|
self._id = id
|
|
31
32
|
self._name = name
|
|
@@ -34,6 +35,7 @@ class Experiment:
|
|
|
34
35
|
self._prompts = prompts
|
|
35
36
|
self._streamer = streamer
|
|
36
37
|
self._experiments_client = experiments_client
|
|
38
|
+
self._tags = tags
|
|
37
39
|
|
|
38
40
|
@property
|
|
39
41
|
def id(self) -> str:
|
|
@@ -53,6 +55,10 @@ class Experiment:
|
|
|
53
55
|
|
|
54
56
|
return name
|
|
55
57
|
|
|
58
|
+
@property
|
|
59
|
+
def tags(self) -> Optional[List[str]]:
|
|
60
|
+
return self._tags
|
|
61
|
+
|
|
56
62
|
@functools.cached_property
|
|
57
63
|
def dataset_id(self) -> str:
|
|
58
64
|
return self._rest_client.datasets.get_dataset_by_identifier(
|
opik/api_objects/helpers.py
CHANGED
|
@@ -13,7 +13,7 @@ from ..rest_api.types import (
|
|
|
13
13
|
trace_filter_public,
|
|
14
14
|
trace_thread_filter,
|
|
15
15
|
)
|
|
16
|
-
from ..types import
|
|
16
|
+
from ..types import BatchFeedbackScoreDict
|
|
17
17
|
|
|
18
18
|
LOGGER = logging.getLogger(__name__)
|
|
19
19
|
|
|
@@ -139,7 +139,7 @@ def parse_search_expressions(
|
|
|
139
139
|
|
|
140
140
|
|
|
141
141
|
def parse_feedback_score_messages(
|
|
142
|
-
scores: List[
|
|
142
|
+
scores: List[BatchFeedbackScoreDict],
|
|
143
143
|
project_name: str,
|
|
144
144
|
parsed_item_class: Type[ScoreMessageT],
|
|
145
145
|
logger: logging.Logger,
|
|
@@ -155,9 +155,13 @@ def parse_feedback_score_messages(
|
|
|
155
155
|
|
|
156
156
|
score_messages = [
|
|
157
157
|
parsed_item_class(
|
|
158
|
+
id=score_dict["id"],
|
|
159
|
+
name=score_dict["name"],
|
|
160
|
+
value=score_dict["value"],
|
|
158
161
|
source=constants.FEEDBACK_SCORE_SOURCE_SDK,
|
|
159
|
-
project_name=project_name,
|
|
160
|
-
|
|
162
|
+
project_name=score_dict.get("project_name") or project_name,
|
|
163
|
+
reason=score_dict.get("reason"),
|
|
164
|
+
category_name=score_dict.get("category_name"),
|
|
161
165
|
)
|
|
162
166
|
for score_dict in valid_scores
|
|
163
167
|
]
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
from typing import Iterator, List
|
|
3
3
|
from typing import Optional
|
|
4
|
+
|
|
4
5
|
from . import opik_client
|
|
5
|
-
from ..message_processing import message_processors_chain
|
|
6
6
|
from ..message_processing.emulation import local_emulator_message_processor, models
|
|
7
|
+
from ..message_processing.processors import message_processors_chain
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class _LocalRecordingHandle:
|
|
@@ -53,7 +54,7 @@ def record_traces_locally(
|
|
|
53
54
|
|
|
54
55
|
# Disallow nested/local concurrent recordings in the same process
|
|
55
56
|
existing_local = message_processors_chain.get_local_emulator_message_processor(
|
|
56
|
-
chain=client.
|
|
57
|
+
chain=client.__internal_api__message_processor__
|
|
57
58
|
)
|
|
58
59
|
if existing_local is not None and existing_local.is_active():
|
|
59
60
|
raise RuntimeError(
|
|
@@ -61,10 +62,10 @@ def record_traces_locally(
|
|
|
61
62
|
)
|
|
62
63
|
|
|
63
64
|
message_processors_chain.toggle_local_emulator_message_processor(
|
|
64
|
-
active=True, chain=client.
|
|
65
|
+
active=True, chain=client.__internal_api__message_processor__, reset=True
|
|
65
66
|
)
|
|
66
67
|
local = message_processors_chain.get_local_emulator_message_processor(
|
|
67
|
-
chain=client.
|
|
68
|
+
chain=client.__internal_api__message_processor__
|
|
68
69
|
)
|
|
69
70
|
if local is None:
|
|
70
71
|
# Should not happen given the default chain, but guard just in case
|
|
@@ -76,5 +77,5 @@ def record_traces_locally(
|
|
|
76
77
|
finally:
|
|
77
78
|
client.flush()
|
|
78
79
|
message_processors_chain.toggle_local_emulator_message_processor(
|
|
79
|
-
active=False, chain=client.
|
|
80
|
+
active=False, chain=client.__internal_api__message_processor__, reset=True
|
|
80
81
|
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, List, Optional, TypeVar
|
|
5
|
+
|
|
6
|
+
import opik.api_objects.attachment as attachment
|
|
7
|
+
import opik.datetime_helpers as datetime_helpers
|
|
8
|
+
from opik.types import ErrorInfoDict, FeedbackScoreDict
|
|
9
|
+
from . import data_helpers
|
|
10
|
+
|
|
11
|
+
LOGGER = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
ObservationDataT = TypeVar("ObservationDataT", bound="ObservationData")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclasses.dataclass(kw_only=True)
|
|
17
|
+
class ObservationData:
|
|
18
|
+
"""
|
|
19
|
+
Base class for TraceData and SpanData containing common attributes and methods.
|
|
20
|
+
|
|
21
|
+
This class uses Python 3.10's kw_only=True feature to allow optional parameters
|
|
22
|
+
to be defined in the parent class while child classes can have required parameters.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name: Optional[str] = None
|
|
26
|
+
start_time: Optional[datetime.datetime] = dataclasses.field(
|
|
27
|
+
default_factory=datetime_helpers.local_timestamp
|
|
28
|
+
)
|
|
29
|
+
end_time: Optional[datetime.datetime] = None
|
|
30
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
31
|
+
input: Optional[Dict[str, Any]] = None
|
|
32
|
+
output: Optional[Dict[str, Any]] = None
|
|
33
|
+
tags: Optional[List[str]] = None
|
|
34
|
+
feedback_scores: Optional[List[FeedbackScoreDict]] = None
|
|
35
|
+
project_name: Optional[str] = None
|
|
36
|
+
error_info: Optional[ErrorInfoDict] = None
|
|
37
|
+
attachments: Optional[List[attachment.Attachment]] = None
|
|
38
|
+
|
|
39
|
+
def update(self: ObservationDataT, **new_data: Any) -> ObservationDataT:
|
|
40
|
+
"""
|
|
41
|
+
Updates the attributes of the object with the provided key-value pairs. This method checks if
|
|
42
|
+
an attribute exists before updating it and merges the data appropriately for specific
|
|
43
|
+
keywords like metadata, output, input, attachments, and tags. If a key doesn't correspond
|
|
44
|
+
to an attribute of the object or the provided value is None, the update is skipped.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
**new_data: Key-value pairs of attributes to update. Keys should match existing
|
|
48
|
+
attributes on the object, and values that are None will not update.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
The updated object instance (preserves the actual subclass type).
|
|
52
|
+
"""
|
|
53
|
+
for key, value in new_data.items():
|
|
54
|
+
if value is None:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
if key not in self.__dict__ and key != "prompts":
|
|
58
|
+
LOGGER.debug(
|
|
59
|
+
"An attempt to update observation with parameter name it doesn't have: %s",
|
|
60
|
+
key,
|
|
61
|
+
)
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
if key == "metadata":
|
|
65
|
+
self.metadata = data_helpers.merge_metadata(
|
|
66
|
+
self.metadata, new_metadata=value
|
|
67
|
+
)
|
|
68
|
+
continue
|
|
69
|
+
elif key == "output":
|
|
70
|
+
self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
|
|
71
|
+
continue
|
|
72
|
+
elif key == "input":
|
|
73
|
+
self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
|
|
74
|
+
continue
|
|
75
|
+
elif key == "attachments":
|
|
76
|
+
self._update_attachments(value)
|
|
77
|
+
continue
|
|
78
|
+
elif key == "tags":
|
|
79
|
+
self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
|
|
80
|
+
continue
|
|
81
|
+
elif key == "prompts":
|
|
82
|
+
self.metadata = data_helpers.merge_metadata(
|
|
83
|
+
self.metadata, new_metadata=new_data.get("metadata"), prompts=value
|
|
84
|
+
)
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
self.__dict__[key] = value
|
|
88
|
+
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def init_end_time(self: ObservationDataT) -> ObservationDataT:
|
|
92
|
+
"""Initialize the end_time to the current timestamp."""
|
|
93
|
+
self.end_time = datetime_helpers.local_timestamp()
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
|
|
97
|
+
"""Merge new attachments with existing ones."""
|
|
98
|
+
if self.attachments is None:
|
|
99
|
+
self.attachments = attachments
|
|
100
|
+
else:
|
|
101
|
+
self.attachments.extend(attachments)
|