opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik/api_objects/attachment/attachment_context.py +36 -0
- opik/api_objects/attachment/attachments_extractor.py +153 -0
- opik/api_objects/attachment/client.py +1 -0
- opik/api_objects/attachment/converters.py +2 -0
- opik/api_objects/attachment/decoder.py +18 -0
- opik/api_objects/attachment/decoder_base64.py +83 -0
- opik/api_objects/attachment/decoder_helpers.py +137 -0
- opik/api_objects/constants.py +2 -0
- opik/api_objects/dataset/dataset.py +133 -40
- opik/api_objects/dataset/rest_operations.py +2 -0
- opik/api_objects/experiment/experiment.py +6 -0
- opik/api_objects/helpers.py +8 -4
- opik/api_objects/local_recording.py +6 -5
- opik/api_objects/observation_data.py +101 -0
- opik/api_objects/opik_client.py +78 -45
- opik/api_objects/opik_query_language.py +9 -3
- opik/api_objects/prompt/chat/chat_prompt.py +18 -1
- opik/api_objects/prompt/client.py +8 -1
- opik/api_objects/span/span_data.py +3 -88
- opik/api_objects/threads/threads_client.py +7 -4
- opik/api_objects/trace/trace_data.py +3 -74
- opik/api_objects/validation_helpers.py +3 -3
- opik/cli/exports/__init__.py +131 -0
- opik/cli/exports/dataset.py +278 -0
- opik/cli/exports/experiment.py +784 -0
- opik/cli/exports/project.py +685 -0
- opik/cli/exports/prompt.py +578 -0
- opik/cli/exports/utils.py +406 -0
- opik/cli/harbor.py +39 -0
- opik/cli/imports/__init__.py +439 -0
- opik/cli/imports/dataset.py +143 -0
- opik/cli/imports/experiment.py +1192 -0
- opik/cli/imports/project.py +262 -0
- opik/cli/imports/prompt.py +177 -0
- opik/cli/imports/utils.py +280 -0
- opik/cli/main.py +14 -12
- opik/config.py +12 -1
- opik/datetime_helpers.py +12 -0
- opik/decorator/arguments_helpers.py +4 -1
- opik/decorator/base_track_decorator.py +111 -37
- opik/decorator/context_manager/span_context_manager.py +5 -1
- opik/decorator/generator_wrappers.py +5 -4
- opik/decorator/span_creation_handler.py +13 -4
- opik/evaluation/engine/engine.py +111 -28
- opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
- opik/evaluation/evaluator.py +12 -0
- opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
- opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
- opik/evaluation/metrics/heuristics/equals.py +11 -7
- opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
- opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
- opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
- opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
- opik/evaluation/metrics/ragas_metric.py +43 -23
- opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
- opik/evaluation/models/litellm/util.py +4 -20
- opik/evaluation/models/models_factory.py +19 -5
- opik/evaluation/rest_operations.py +3 -3
- opik/evaluation/threads/helpers.py +3 -2
- opik/file_upload/file_uploader.py +13 -0
- opik/file_upload/upload_options.py +2 -0
- opik/integrations/adk/legacy_opik_tracer.py +9 -11
- opik/integrations/adk/opik_tracer.py +2 -2
- opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
- opik/integrations/dspy/callback.py +100 -14
- opik/integrations/dspy/parsers.py +168 -0
- opik/integrations/harbor/__init__.py +17 -0
- opik/integrations/harbor/experiment_service.py +269 -0
- opik/integrations/harbor/opik_tracker.py +528 -0
- opik/integrations/haystack/opik_tracer.py +2 -2
- opik/integrations/langchain/__init__.py +15 -2
- opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
- opik/integrations/langchain/opik_tracer.py +258 -160
- opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
- opik/integrations/llama_index/callback.py +43 -6
- opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
- opik/integrations/openai/opik_tracker.py +99 -4
- opik/integrations/openai/videos/__init__.py +9 -0
- opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
- opik/integrations/openai/videos/videos_create_decorator.py +159 -0
- opik/integrations/openai/videos/videos_download_decorator.py +110 -0
- opik/message_processing/batching/base_batcher.py +14 -21
- opik/message_processing/batching/batch_manager.py +22 -10
- opik/message_processing/batching/batchers.py +32 -40
- opik/message_processing/batching/flushing_thread.py +0 -3
- opik/message_processing/emulation/emulator_message_processor.py +36 -1
- opik/message_processing/emulation/models.py +21 -0
- opik/message_processing/messages.py +9 -0
- opik/message_processing/preprocessing/__init__.py +0 -0
- opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
- opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
- opik/message_processing/preprocessing/constants.py +1 -0
- opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
- opik/message_processing/preprocessing/preprocessor.py +36 -0
- opik/message_processing/processors/__init__.py +0 -0
- opik/message_processing/processors/attachments_extraction_processor.py +146 -0
- opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
- opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
- opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
- opik/message_processing/queue_consumer.py +4 -2
- opik/message_processing/streamer.py +71 -33
- opik/message_processing/streamer_constructors.py +36 -8
- opik/plugins/pytest/experiment_runner.py +1 -1
- opik/plugins/pytest/hooks.py +5 -3
- opik/rest_api/__init__.py +42 -0
- opik/rest_api/datasets/client.py +321 -123
- opik/rest_api/datasets/raw_client.py +470 -145
- opik/rest_api/experiments/client.py +26 -0
- opik/rest_api/experiments/raw_client.py +26 -0
- opik/rest_api/llm_provider_key/client.py +4 -4
- opik/rest_api/llm_provider_key/raw_client.py +4 -4
- opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
- opik/rest_api/manual_evaluation/client.py +101 -0
- opik/rest_api/manual_evaluation/raw_client.py +172 -0
- opik/rest_api/optimizations/client.py +0 -166
- opik/rest_api/optimizations/raw_client.py +0 -248
- opik/rest_api/projects/client.py +9 -0
- opik/rest_api/projects/raw_client.py +13 -0
- opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
- opik/rest_api/prompts/client.py +130 -2
- opik/rest_api/prompts/raw_client.py +175 -0
- opik/rest_api/traces/client.py +101 -0
- opik/rest_api/traces/raw_client.py +120 -0
- opik/rest_api/types/__init__.py +50 -0
- opik/rest_api/types/audio_url.py +19 -0
- opik/rest_api/types/audio_url_public.py +19 -0
- opik/rest_api/types/audio_url_write.py +19 -0
- opik/rest_api/types/automation_rule_evaluator.py +38 -2
- opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
- opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
- opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
- opik/rest_api/types/dataset.py +2 -0
- opik/rest_api/types/dataset_item.py +1 -1
- opik/rest_api/types/dataset_item_batch.py +4 -0
- opik/rest_api/types/dataset_item_changes_public.py +5 -0
- opik/rest_api/types/dataset_item_compare.py +1 -1
- opik/rest_api/types/dataset_item_filter.py +4 -0
- opik/rest_api/types/dataset_item_page_compare.py +0 -1
- opik/rest_api/types/dataset_item_page_public.py +0 -1
- opik/rest_api/types/dataset_item_public.py +1 -1
- opik/rest_api/types/dataset_public.py +2 -0
- opik/rest_api/types/dataset_version_public.py +10 -0
- opik/rest_api/types/dataset_version_summary.py +46 -0
- opik/rest_api/types/dataset_version_summary_public.py +46 -0
- opik/rest_api/types/experiment.py +9 -0
- opik/rest_api/types/experiment_public.py +9 -0
- opik/rest_api/types/group_content_with_aggregations.py +1 -0
- opik/rest_api/types/llm_as_judge_message_content.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
- opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
- opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
- opik/rest_api/types/project.py +1 -0
- opik/rest_api/types/project_detailed.py +1 -0
- opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
- opik/rest_api/types/project_reference.py +31 -0
- opik/rest_api/types/project_reference_public.py +31 -0
- opik/rest_api/types/project_stats_summary_item.py +1 -0
- opik/rest_api/types/prompt_version.py +1 -0
- opik/rest_api/types/prompt_version_detail.py +1 -0
- opik/rest_api/types/prompt_version_page_public.py +5 -0
- opik/rest_api/types/prompt_version_public.py +1 -0
- opik/rest_api/types/prompt_version_update.py +33 -0
- opik/rest_api/types/provider_api_key.py +5 -1
- opik/rest_api/types/provider_api_key_provider.py +2 -1
- opik/rest_api/types/provider_api_key_public.py +5 -1
- opik/rest_api/types/provider_api_key_public_provider.py +2 -1
- opik/rest_api/types/service_toggles_config.py +11 -1
- opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
- opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
- opik/types.py +36 -0
- opik/validation/chat_prompt_messages.py +241 -0
- opik/validation/feedback_score.py +3 -3
- opik/validation/validator.py +28 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
- opik/cli/export.py +0 -791
- opik/cli/import_command.py +0 -575
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
- {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import threading
|
|
1
2
|
from typing import Type, Dict
|
|
2
3
|
from .. import messages
|
|
3
4
|
from . import base_batcher
|
|
@@ -15,30 +16,41 @@ class BatchManager:
|
|
|
15
16
|
self._flushing_thread = flushing_thread.FlushingThread(
|
|
16
17
|
batchers=list(self._message_to_batcher_mapping.values())
|
|
17
18
|
)
|
|
19
|
+
self._lock = threading.RLock()
|
|
18
20
|
|
|
19
21
|
def start(self) -> None:
|
|
20
22
|
self._flushing_thread.start()
|
|
21
23
|
|
|
22
24
|
def stop(self) -> None:
|
|
23
|
-
self.
|
|
25
|
+
with self._lock:
|
|
26
|
+
# stop the flushing thread
|
|
27
|
+
self._flushing_thread.close()
|
|
28
|
+
# force flush all pending messages
|
|
29
|
+
self.flush()
|
|
24
30
|
|
|
25
31
|
def message_supports_batching(self, message: messages.BaseMessage) -> bool:
|
|
32
|
+
if message is None:
|
|
33
|
+
return False
|
|
34
|
+
|
|
26
35
|
if hasattr(message, "supports_batching"):
|
|
27
36
|
return message.supports_batching
|
|
28
37
|
|
|
29
38
|
return message.__class__ in self._message_to_batcher_mapping
|
|
30
39
|
|
|
31
40
|
def process_message(self, message: messages.BaseMessage) -> None:
|
|
32
|
-
self.
|
|
41
|
+
with self._lock:
|
|
42
|
+
self._message_to_batcher_mapping[type(message)].add(message)
|
|
33
43
|
|
|
34
44
|
def is_empty(self) -> bool:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
45
|
+
with self._lock:
|
|
46
|
+
return all(
|
|
47
|
+
[
|
|
48
|
+
batcher.is_empty()
|
|
49
|
+
for batcher in self._message_to_batcher_mapping.values()
|
|
50
|
+
]
|
|
51
|
+
)
|
|
41
52
|
|
|
42
53
|
def flush(self) -> None:
|
|
43
|
-
|
|
44
|
-
batcher.
|
|
54
|
+
with self._lock:
|
|
55
|
+
for batcher in self._message_to_batcher_mapping.values():
|
|
56
|
+
batcher.flush()
|
|
@@ -37,8 +37,9 @@ class CreateSpanMessageBatcher(base_batcher.BaseBatcher):
|
|
|
37
37
|
return batches
|
|
38
38
|
|
|
39
39
|
def add(self, message: messages.CreateSpanMessage) -> None: # type: ignore
|
|
40
|
-
# remove any duplicate
|
|
41
|
-
|
|
40
|
+
# remove any duplicate start span message from the batch that was already added
|
|
41
|
+
if message.end_time is not None:
|
|
42
|
+
self._remove_matching_messages(lambda x: x.span_id == message.span_id) # type: ignore
|
|
42
43
|
|
|
43
44
|
return super().add(message)
|
|
44
45
|
|
|
@@ -73,8 +74,9 @@ class CreateTraceMessageBatcher(base_batcher.BaseBatcher):
|
|
|
73
74
|
return batches
|
|
74
75
|
|
|
75
76
|
def add(self, message: messages.CreateTraceMessage) -> None: # type: ignore
|
|
76
|
-
# remove any duplicate
|
|
77
|
-
|
|
77
|
+
# remove any duplicate start trace message from the batch that was already added
|
|
78
|
+
if message.end_time is not None:
|
|
79
|
+
self._remove_matching_messages(lambda x: x.trace_id == message.trace_id) # type: ignore
|
|
78
80
|
|
|
79
81
|
return super().add(message)
|
|
80
82
|
|
|
@@ -99,26 +101,21 @@ class BaseAddFeedbackScoresBatchMessageBatcher(base_batcher.BaseBatcher):
|
|
|
99
101
|
messages.AddThreadsFeedbackScoresBatchMessage,
|
|
100
102
|
],
|
|
101
103
|
) -> None:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
n_accumulated_messages = len(self._accumulated_messages)
|
|
104
|
+
new_messages = message.batch
|
|
105
|
+
n_new_messages = len(new_messages)
|
|
106
|
+
n_accumulated_messages = len(self._accumulated_messages)
|
|
106
107
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
self._max_batch_size - n_accumulated_messages
|
|
110
|
-
)
|
|
108
|
+
if n_new_messages + n_accumulated_messages >= self._max_batch_size:
|
|
109
|
+
free_space_in_accumulator = self._max_batch_size - n_accumulated_messages
|
|
111
110
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
free_space_in_accumulator:
|
|
115
|
-
]
|
|
111
|
+
messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
|
|
112
|
+
messages_that_dont_fit_in_batch = new_messages[free_space_in_accumulator:]
|
|
116
113
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
114
|
+
self._accumulated_messages += messages_that_fit_in_batch
|
|
115
|
+
new_messages = messages_that_dont_fit_in_batch
|
|
116
|
+
self.flush()
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
self._accumulated_messages += new_messages
|
|
122
119
|
|
|
123
120
|
|
|
124
121
|
class AddSpanFeedbackScoresBatchMessageBatcher(
|
|
@@ -195,23 +192,18 @@ class CreateExperimentItemsBatchMessageBatcher(base_batcher.BaseBatcher):
|
|
|
195
192
|
def add( # type: ignore
|
|
196
193
|
self, message: messages.CreateExperimentItemsBatchMessage
|
|
197
194
|
) -> None:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
self._accumulated_messages += messages_that_fit_in_batch
|
|
214
|
-
new_messages = messages_that_dont_fit_in_batch
|
|
215
|
-
self.flush()
|
|
216
|
-
|
|
217
|
-
self._accumulated_messages += new_messages
|
|
195
|
+
new_messages = message.batch
|
|
196
|
+
n_new_messages = len(new_messages)
|
|
197
|
+
n_accumulated_messages = len(self._accumulated_messages)
|
|
198
|
+
|
|
199
|
+
if n_new_messages + n_accumulated_messages >= self._max_batch_size:
|
|
200
|
+
free_space_in_accumulator = self._max_batch_size - n_accumulated_messages
|
|
201
|
+
|
|
202
|
+
messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
|
|
203
|
+
messages_that_dont_fit_in_batch = new_messages[free_space_in_accumulator:]
|
|
204
|
+
|
|
205
|
+
self._accumulated_messages += messages_that_fit_in_batch
|
|
206
|
+
new_messages = messages_that_dont_fit_in_batch
|
|
207
|
+
self.flush()
|
|
208
|
+
|
|
209
|
+
self._accumulated_messages += new_messages
|
|
@@ -9,7 +9,9 @@ from opik import dict_utils
|
|
|
9
9
|
from opik.rest_api.types import span_write, trace_write
|
|
10
10
|
from opik.types import ErrorInfoDict, SpanType
|
|
11
11
|
from . import models
|
|
12
|
-
from .. import
|
|
12
|
+
from .. import messages
|
|
13
|
+
from ..processors import message_processors
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
LOGGER = logging.getLogger(__name__)
|
|
15
17
|
|
|
@@ -77,6 +79,12 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
|
|
|
77
79
|
self._span_to_feedback_scores: Dict[
|
|
78
80
|
str, List[models.FeedbackScoreModel]
|
|
79
81
|
] = collections.defaultdict(list)
|
|
82
|
+
self._trace_to_attachments: Dict[str, List[models.AttachmentModel]] = (
|
|
83
|
+
collections.defaultdict(list)
|
|
84
|
+
)
|
|
85
|
+
self._span_to_attachments: Dict[str, List[models.AttachmentModel]] = (
|
|
86
|
+
collections.defaultdict(list)
|
|
87
|
+
)
|
|
80
88
|
self._experiment_items: List[models.ExperimentItemModel] = []
|
|
81
89
|
|
|
82
90
|
def is_active(self) -> bool:
|
|
@@ -111,6 +119,7 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
|
|
|
111
119
|
|
|
112
120
|
for trace in self._trace_trees:
|
|
113
121
|
trace.feedback_scores = self._trace_to_feedback_scores[trace.id]
|
|
122
|
+
trace.attachments = self._trace_to_attachments[trace.id] or None
|
|
114
123
|
|
|
115
124
|
self._trace_trees.sort(key=lambda x: x.start_time)
|
|
116
125
|
return self._trace_trees
|
|
@@ -176,6 +185,7 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
|
|
|
176
185
|
for span_id in all_span_ids:
|
|
177
186
|
span = self._span_observations[span_id]
|
|
178
187
|
span.feedback_scores = self._span_to_feedback_scores[span_id]
|
|
188
|
+
span.attachments = self._span_to_attachments[span_id] or None
|
|
179
189
|
|
|
180
190
|
self._span_trees.sort(key=lambda x: x.start_time)
|
|
181
191
|
|
|
@@ -353,6 +363,8 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
|
|
|
353
363
|
messages.CreateSpansBatchMessage: self._handle_create_spans_batch_message, # type: ignore
|
|
354
364
|
messages.CreateTraceBatchMessage: self._handle_create_traces_batch_message, # type: ignore
|
|
355
365
|
messages.CreateExperimentItemsBatchMessage: self._handle_create_experiment_items_batch_message, # type: ignore
|
|
366
|
+
messages.AttachmentSupportingMessage: self._noop_handler, # type: ignore
|
|
367
|
+
messages.CreateAttachmentMessage: self._handle_create_attachment_message, # type: ignore
|
|
356
368
|
}
|
|
357
369
|
|
|
358
370
|
def _handle_create_trace_message(
|
|
@@ -553,6 +565,29 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
|
|
|
553
565
|
)
|
|
554
566
|
self._experiment_items.append(experiment_item)
|
|
555
567
|
|
|
568
|
+
def _handle_create_attachment_message(
|
|
569
|
+
self, message: messages.CreateAttachmentMessage
|
|
570
|
+
) -> None:
|
|
571
|
+
"""Handle attachment messages by adding them to the appropriate span or trace.
|
|
572
|
+
|
|
573
|
+
Attachments are stored in temporary dictionaries and will be connected to their
|
|
574
|
+
spans/traces when the trace trees are built, similar to how feedback scores work.
|
|
575
|
+
"""
|
|
576
|
+
attachment_model = models.AttachmentModel(
|
|
577
|
+
file_path=message.file_path,
|
|
578
|
+
file_name=message.file_name,
|
|
579
|
+
content_type=message.mime_type,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if message.entity_type == "span":
|
|
583
|
+
self._span_to_attachments[message.entity_id].append(attachment_model)
|
|
584
|
+
elif message.entity_type == "trace":
|
|
585
|
+
self._trace_to_attachments[message.entity_id].append(attachment_model)
|
|
586
|
+
|
|
587
|
+
def _noop_handler(self, message: messages.BaseMessage) -> None:
|
|
588
|
+
# just ignore the message
|
|
589
|
+
pass
|
|
590
|
+
|
|
556
591
|
@property
|
|
557
592
|
def experiment_items(self) -> List[models.ExperimentItemModel]:
|
|
558
593
|
"""Returns the list of experiment items collected."""
|
|
@@ -30,6 +30,25 @@ class FeedbackScoreModel:
|
|
|
30
30
|
reason: Optional[str] = None
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
@dataclasses.dataclass
|
|
34
|
+
class AttachmentModel:
|
|
35
|
+
"""
|
|
36
|
+
Represents a model for an attachment associated with a span or trace.
|
|
37
|
+
|
|
38
|
+
This class stores metadata about files or data attached to spans or traces,
|
|
39
|
+
including the file path, name, and content type.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
file_path: Path to the attached file.
|
|
43
|
+
file_name: Name of the attached file.
|
|
44
|
+
content_type: MIME type of the attached file.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
file_path: str
|
|
48
|
+
file_name: str
|
|
49
|
+
content_type: Optional[str] = None
|
|
50
|
+
|
|
51
|
+
|
|
33
52
|
@dataclasses.dataclass
|
|
34
53
|
class SpanModel:
|
|
35
54
|
"""
|
|
@@ -86,6 +105,7 @@ class SpanModel:
|
|
|
86
105
|
error_info: Optional[ErrorInfoDict] = None
|
|
87
106
|
total_cost: Optional[float] = None
|
|
88
107
|
last_updated_at: Optional[datetime.datetime] = None
|
|
108
|
+
attachments: Optional[List[AttachmentModel]] = None
|
|
89
109
|
|
|
90
110
|
|
|
91
111
|
@dataclasses.dataclass
|
|
@@ -160,3 +180,4 @@ class TraceModel:
|
|
|
160
180
|
error_info: Optional[ErrorInfoDict] = None
|
|
161
181
|
thread_id: Optional[str] = None
|
|
162
182
|
last_updated_at: Optional[datetime.datetime] = None
|
|
183
|
+
attachments: Optional[List[AttachmentModel]] = None
|
|
@@ -4,6 +4,7 @@ from dataclasses import field
|
|
|
4
4
|
from typing import Optional, Any, Dict, List, Union, Literal, Set
|
|
5
5
|
|
|
6
6
|
from . import arguments_utils
|
|
7
|
+
from .preprocessing import constants
|
|
7
8
|
from ..rest_api.types import span_write, trace_write
|
|
8
9
|
from ..types import SpanType, ErrorInfoDict, LLMProvider, AttachmentEntityType
|
|
9
10
|
|
|
@@ -21,6 +22,8 @@ class BaseMessage:
|
|
|
21
22
|
data.pop("delivery_time")
|
|
22
23
|
if "delivery_attempts" in data:
|
|
23
24
|
data.pop("delivery_attempts")
|
|
25
|
+
if constants.MARKER_ATTRIBUTE_NAME in data:
|
|
26
|
+
data.pop(constants.MARKER_ATTRIBUTE_NAME)
|
|
24
27
|
return data
|
|
25
28
|
|
|
26
29
|
|
|
@@ -285,3 +288,9 @@ class CreateAttachmentMessage(BaseMessage):
|
|
|
285
288
|
entity_id: str
|
|
286
289
|
project_name: str
|
|
287
290
|
encoded_url_override: str
|
|
291
|
+
delete_after_upload: bool = False
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@dataclasses.dataclass
|
|
295
|
+
class AttachmentSupportingMessage(BaseMessage):
|
|
296
|
+
original_message: BaseMessage
|
|
File without changes
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Optional, Union
|
|
2
|
+
|
|
3
|
+
from opik.message_processing import messages
|
|
4
|
+
|
|
5
|
+
from . import constants, preprocessor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AttachmentsPreprocessor(preprocessor.MessagePreprocessor):
|
|
9
|
+
def __init__(self, enabled: bool = True) -> None:
|
|
10
|
+
self._enabled = enabled
|
|
11
|
+
|
|
12
|
+
def preprocess(
|
|
13
|
+
self, message: Optional[messages.BaseMessage]
|
|
14
|
+
) -> Optional[messages.BaseMessage]:
|
|
15
|
+
"""
|
|
16
|
+
Processes a given message and ensures that it is converted into a specialized
|
|
17
|
+
message type if applicable. If the message is already pre-processed, it
|
|
18
|
+
returns the original message to avoid infinite recursion.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
message: The message object to be processed.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
The processed message, either in its original form
|
|
25
|
+
or converted into a message type supporting embedded attachments.
|
|
26
|
+
"""
|
|
27
|
+
if not self._enabled:
|
|
28
|
+
return message
|
|
29
|
+
|
|
30
|
+
if message is None:
|
|
31
|
+
# possibly already pre-processed by other preprocessors
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
if hasattr(message, constants.MARKER_ATTRIBUTE_NAME):
|
|
35
|
+
# already pre-processed - just return the original message to avoid infinite recursion
|
|
36
|
+
return message
|
|
37
|
+
|
|
38
|
+
if _has_potential_content_with_attachments(message):
|
|
39
|
+
return messages.AttachmentSupportingMessage(message)
|
|
40
|
+
else:
|
|
41
|
+
return message
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _has_potential_content_with_attachments(message: messages.BaseMessage) -> bool:
|
|
45
|
+
# Check if it's an Update message - always process these
|
|
46
|
+
if isinstance(message, (messages.UpdateSpanMessage, messages.UpdateTraceMessage)):
|
|
47
|
+
return _message_has_field_of_interest_set(message)
|
|
48
|
+
|
|
49
|
+
# Check if it's a Create message with end_time set - only process these
|
|
50
|
+
if isinstance(message, (messages.CreateSpanMessage, messages.CreateTraceMessage)):
|
|
51
|
+
if message.end_time is not None:
|
|
52
|
+
return _message_has_field_of_interest_set(message)
|
|
53
|
+
|
|
54
|
+
# All other message types should not be wrapped
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _message_has_field_of_interest_set(
|
|
59
|
+
message: Union[
|
|
60
|
+
messages.UpdateSpanMessage,
|
|
61
|
+
messages.UpdateTraceMessage,
|
|
62
|
+
messages.CreateSpanMessage,
|
|
63
|
+
messages.CreateTraceMessage,
|
|
64
|
+
],
|
|
65
|
+
) -> bool:
|
|
66
|
+
return (
|
|
67
|
+
message.input is not None
|
|
68
|
+
or message.output is not None
|
|
69
|
+
or message.metadata is not None
|
|
70
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from . import preprocessor
|
|
4
|
+
from .. import messages
|
|
5
|
+
from ..batching import batch_manager
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BatchingPreprocessor(preprocessor.MessagePreprocessor):
|
|
9
|
+
"""
|
|
10
|
+
Handles message batching during preprocessing.
|
|
11
|
+
|
|
12
|
+
The BatchingPreprocessor class processes messages, enabling efficient message
|
|
13
|
+
batching if a batching manager is provided. It supports starting, stopping,
|
|
14
|
+
flushing, and checking the state of the batching manager, ensuring that
|
|
15
|
+
messages are processed or delegated based on their batching capabilities.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, batching_manager: Optional[batch_manager.BatchManager]) -> None:
|
|
19
|
+
self._batch_manager = batching_manager
|
|
20
|
+
|
|
21
|
+
def preprocess(
|
|
22
|
+
self, message: Optional[messages.BaseMessage]
|
|
23
|
+
) -> Optional[messages.BaseMessage]:
|
|
24
|
+
if message is None:
|
|
25
|
+
# possibly already processed
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
if (
|
|
29
|
+
self._batch_manager is not None
|
|
30
|
+
and self._batch_manager.message_supports_batching(message)
|
|
31
|
+
):
|
|
32
|
+
self._batch_manager.process_message(message)
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
return message
|
|
36
|
+
|
|
37
|
+
def start(self) -> None:
|
|
38
|
+
if self._batch_manager is not None:
|
|
39
|
+
self._batch_manager.start()
|
|
40
|
+
|
|
41
|
+
def stop(self) -> None:
|
|
42
|
+
if self._batch_manager is not None:
|
|
43
|
+
self._batch_manager.stop()
|
|
44
|
+
|
|
45
|
+
def flush(self) -> None:
|
|
46
|
+
if self._batch_manager is not None:
|
|
47
|
+
self._batch_manager.flush()
|
|
48
|
+
|
|
49
|
+
def is_empty(self) -> bool:
|
|
50
|
+
if self._batch_manager is not None:
|
|
51
|
+
return self._batch_manager.is_empty()
|
|
52
|
+
|
|
53
|
+
return True
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MARKER_ATTRIBUTE_NAME = "_preprocessed_for_attachments"
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from opik.file_upload import base_upload_manager
|
|
4
|
+
|
|
5
|
+
from . import preprocessor
|
|
6
|
+
from .. import messages
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FileUploadPreprocessor(preprocessor.MessagePreprocessor):
|
|
10
|
+
"""
|
|
11
|
+
Preprocesses messages to handle file uploads.
|
|
12
|
+
|
|
13
|
+
This class is responsible for processing messages to determine if they support
|
|
14
|
+
file uploads and delegating the upload task to a file upload manager. It also
|
|
15
|
+
provides functionality to flush pending uploads with configurable timeout and
|
|
16
|
+
sleep intervals.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self, file_upload_manager: base_upload_manager.BaseFileUploadManager
|
|
21
|
+
) -> None:
|
|
22
|
+
self.file_upload_manager = file_upload_manager
|
|
23
|
+
|
|
24
|
+
def preprocess(
|
|
25
|
+
self, message: Optional[messages.BaseMessage]
|
|
26
|
+
) -> Optional[messages.BaseMessage]:
|
|
27
|
+
if message is None:
|
|
28
|
+
# possibly already processed
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
if base_upload_manager.message_supports_upload(message):
|
|
32
|
+
self.file_upload_manager.upload(message)
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
return message
|
|
36
|
+
|
|
37
|
+
def flush(self, timeout: Optional[float], sleep_time: int) -> bool:
|
|
38
|
+
return self.file_upload_manager.flush(timeout=timeout, sleep_time=sleep_time)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from opik.message_processing import messages
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MessagePreprocessor(abc.ABC):
|
|
8
|
+
"""
|
|
9
|
+
Abstract base class for message preprocessing.
|
|
10
|
+
|
|
11
|
+
This class provides a common interface for pre-processing messages, allowing
|
|
12
|
+
derived classes to implement custom preprocessing logic tailored to specific
|
|
13
|
+
requirements. Instances of this class cannot be created directly; it must be
|
|
14
|
+
subclassed with the `preprocess` method implemented.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@abc.abstractmethod
|
|
18
|
+
def preprocess(
|
|
19
|
+
self, message: Optional[messages.BaseMessage]
|
|
20
|
+
) -> Optional[messages.BaseMessage]:
|
|
21
|
+
"""
|
|
22
|
+
Processes and preprocesses the given message to prepare it for further operations.
|
|
23
|
+
|
|
24
|
+
This is an abstract method and needs to be implemented in any concrete subclass. The
|
|
25
|
+
preprocessing step is typically used for transformations or checks on the given input
|
|
26
|
+
message before further processing.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
message: The input message to be preprocessed. This can
|
|
30
|
+
optionally be None.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The processed message after preprocessing. Returns None if the input message is None
|
|
34
|
+
or if a message was fully consumed here and no further processing is required.
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional, NamedTuple, List, Literal, cast
|
|
3
|
+
|
|
4
|
+
from opik.api_objects.attachment import (
|
|
5
|
+
attachments_extractor,
|
|
6
|
+
attachment_context,
|
|
7
|
+
converters,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from . import message_processors
|
|
11
|
+
from ..preprocessing import constants
|
|
12
|
+
from .. import messages, streamer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EntityDetails(NamedTuple):
|
|
19
|
+
entity_type: Literal["span", "trace"]
|
|
20
|
+
entity_id: str
|
|
21
|
+
project_name: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AttachmentsExtractionProcessor(message_processors.BaseMessageProcessor):
|
|
25
|
+
"""
|
|
26
|
+
Class for processing message attachments through extraction and further handling.
|
|
27
|
+
|
|
28
|
+
The AttachmentsExtractionProcessor class is designed to handle attachments from incoming
|
|
29
|
+
messages. It checks the type of messages and processes them if they support
|
|
30
|
+
attachments. This includes extracting attachment data, replacing them with references,
|
|
31
|
+
and streaming processed or original messages through a pipeline. The class provides a
|
|
32
|
+
mechanism to toggle processing activity and ensures proper handling of messages with
|
|
33
|
+
embedded attachment information.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
min_attachment_size: int,
|
|
39
|
+
messages_streamer: streamer.Streamer,
|
|
40
|
+
url_override: str,
|
|
41
|
+
is_active: bool = True,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Initializes an object with essential components for managing message streaming
|
|
45
|
+
and attachment extraction.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
min_attachment_size: Minimum size for an attachment to be extracted.
|
|
49
|
+
messages_streamer: The streamer that is responsible for managing
|
|
50
|
+
messages broadcasts.
|
|
51
|
+
url_override: A custom URL to override default configurations if set.
|
|
52
|
+
is_active: Indicator of whether this instance is active. Default is True.
|
|
53
|
+
"""
|
|
54
|
+
self._is_active = is_active
|
|
55
|
+
self.extractor = attachments_extractor.AttachmentsExtractor(min_attachment_size)
|
|
56
|
+
self.messages_streamer = messages_streamer
|
|
57
|
+
self._url_override = url_override
|
|
58
|
+
|
|
59
|
+
self.attachment_attributes = ["input", "output", "metadata"]
|
|
60
|
+
|
|
61
|
+
def is_active(self) -> bool:
|
|
62
|
+
return self._is_active
|
|
63
|
+
|
|
64
|
+
def process(self, message: messages.BaseMessage) -> None:
|
|
65
|
+
if not isinstance(message, messages.AttachmentSupportingMessage):
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
if self._is_active:
|
|
69
|
+
# do attachment processing only if the processor is active
|
|
70
|
+
try:
|
|
71
|
+
self._process_attachments_in_message(message.original_message)
|
|
72
|
+
except Exception as ex:
|
|
73
|
+
LOGGER.error(
|
|
74
|
+
"Failed to process attachment support message: %s", ex, exc_info=ex
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# put the original message into the streamer for further processing
|
|
78
|
+
original_message = message.original_message
|
|
79
|
+
setattr(original_message, constants.MARKER_ATTRIBUTE_NAME, True)
|
|
80
|
+
self.messages_streamer.put(original_message)
|
|
81
|
+
|
|
82
|
+
def _process_attachments_in_message(self, original: messages.BaseMessage) -> None:
|
|
83
|
+
entity_details = entity_type_from_attachment_message(original)
|
|
84
|
+
if entity_details is None:
|
|
85
|
+
LOGGER.error(
|
|
86
|
+
"Failed to extract entity details from message - %s. Skipping embedded attachments processing.",
|
|
87
|
+
original.__class__.__name__,
|
|
88
|
+
)
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
attachments = []
|
|
92
|
+
|
|
93
|
+
for attribute in self.attachment_attributes:
|
|
94
|
+
if getattr(original, attribute, None):
|
|
95
|
+
results = self.extractor.extract_and_replace(
|
|
96
|
+
data=getattr(original, attribute),
|
|
97
|
+
entity_type=entity_details.entity_type,
|
|
98
|
+
entity_id=entity_details.entity_id,
|
|
99
|
+
project_name=entity_details.project_name,
|
|
100
|
+
context=cast(Literal["input", "output", "metadata"], attribute),
|
|
101
|
+
)
|
|
102
|
+
attachments.extend(results)
|
|
103
|
+
|
|
104
|
+
if len(attachments) > 0:
|
|
105
|
+
LOGGER.debug(
|
|
106
|
+
"Extracted %d attachments from %s (entity: %s/%s)",
|
|
107
|
+
len(attachments),
|
|
108
|
+
original.__class__.__name__,
|
|
109
|
+
entity_details.entity_type,
|
|
110
|
+
entity_details.entity_id,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
self._process_attachments(attachments)
|
|
114
|
+
else:
|
|
115
|
+
LOGGER.debug(
|
|
116
|
+
"No attachments found in the message - %s.", original.__class__.__name__
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def _process_attachments(
|
|
120
|
+
self, attachments: List[attachment_context.AttachmentWithContext]
|
|
121
|
+
) -> None:
|
|
122
|
+
for attachment in attachments:
|
|
123
|
+
create_attachment_message = converters.attachment_to_message(
|
|
124
|
+
attachment_data=attachment.attachment_data,
|
|
125
|
+
entity_type=attachment.entity_type,
|
|
126
|
+
entity_id=attachment.entity_id,
|
|
127
|
+
project_name=attachment.project_name,
|
|
128
|
+
url_override=self._url_override,
|
|
129
|
+
delete_after_upload=True, # make sure to delete attachments after upload to avoid leaking space and data
|
|
130
|
+
)
|
|
131
|
+
self.messages_streamer.put(create_attachment_message)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def entity_type_from_attachment_message(
|
|
135
|
+
message: messages.BaseMessage,
|
|
136
|
+
) -> Optional[EntityDetails]:
|
|
137
|
+
if isinstance(message, (messages.CreateSpanMessage, messages.UpdateSpanMessage)):
|
|
138
|
+
return EntityDetails("span", message.span_id, project_name=message.project_name)
|
|
139
|
+
elif isinstance(
|
|
140
|
+
message, (messages.CreateTraceMessage, messages.UpdateTraceMessage)
|
|
141
|
+
):
|
|
142
|
+
return EntityDetails(
|
|
143
|
+
"trace", message.trace_id, project_name=message.project_name
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
return None
|