opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. opik/api_objects/attachment/attachment_context.py +36 -0
  2. opik/api_objects/attachment/attachments_extractor.py +153 -0
  3. opik/api_objects/attachment/client.py +1 -0
  4. opik/api_objects/attachment/converters.py +2 -0
  5. opik/api_objects/attachment/decoder.py +18 -0
  6. opik/api_objects/attachment/decoder_base64.py +83 -0
  7. opik/api_objects/attachment/decoder_helpers.py +137 -0
  8. opik/api_objects/constants.py +2 -0
  9. opik/api_objects/dataset/dataset.py +133 -40
  10. opik/api_objects/dataset/rest_operations.py +2 -0
  11. opik/api_objects/experiment/experiment.py +6 -0
  12. opik/api_objects/helpers.py +8 -4
  13. opik/api_objects/local_recording.py +6 -5
  14. opik/api_objects/observation_data.py +101 -0
  15. opik/api_objects/opik_client.py +78 -45
  16. opik/api_objects/opik_query_language.py +9 -3
  17. opik/api_objects/prompt/chat/chat_prompt.py +18 -1
  18. opik/api_objects/prompt/client.py +8 -1
  19. opik/api_objects/span/span_data.py +3 -88
  20. opik/api_objects/threads/threads_client.py +7 -4
  21. opik/api_objects/trace/trace_data.py +3 -74
  22. opik/api_objects/validation_helpers.py +3 -3
  23. opik/cli/exports/__init__.py +131 -0
  24. opik/cli/exports/dataset.py +278 -0
  25. opik/cli/exports/experiment.py +784 -0
  26. opik/cli/exports/project.py +685 -0
  27. opik/cli/exports/prompt.py +578 -0
  28. opik/cli/exports/utils.py +406 -0
  29. opik/cli/harbor.py +39 -0
  30. opik/cli/imports/__init__.py +439 -0
  31. opik/cli/imports/dataset.py +143 -0
  32. opik/cli/imports/experiment.py +1192 -0
  33. opik/cli/imports/project.py +262 -0
  34. opik/cli/imports/prompt.py +177 -0
  35. opik/cli/imports/utils.py +280 -0
  36. opik/cli/main.py +14 -12
  37. opik/config.py +12 -1
  38. opik/datetime_helpers.py +12 -0
  39. opik/decorator/arguments_helpers.py +4 -1
  40. opik/decorator/base_track_decorator.py +111 -37
  41. opik/decorator/context_manager/span_context_manager.py +5 -1
  42. opik/decorator/generator_wrappers.py +5 -4
  43. opik/decorator/span_creation_handler.py +13 -4
  44. opik/evaluation/engine/engine.py +111 -28
  45. opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
  46. opik/evaluation/evaluator.py +12 -0
  47. opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
  48. opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
  49. opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
  50. opik/evaluation/metrics/heuristics/equals.py +11 -7
  51. opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
  52. opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
  53. opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
  54. opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
  55. opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
  56. opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
  57. opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
  58. opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
  59. opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
  60. opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
  61. opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
  62. opik/evaluation/metrics/ragas_metric.py +43 -23
  63. opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
  64. opik/evaluation/models/litellm/util.py +4 -20
  65. opik/evaluation/models/models_factory.py +19 -5
  66. opik/evaluation/rest_operations.py +3 -3
  67. opik/evaluation/threads/helpers.py +3 -2
  68. opik/file_upload/file_uploader.py +13 -0
  69. opik/file_upload/upload_options.py +2 -0
  70. opik/integrations/adk/legacy_opik_tracer.py +9 -11
  71. opik/integrations/adk/opik_tracer.py +2 -2
  72. opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
  73. opik/integrations/dspy/callback.py +100 -14
  74. opik/integrations/dspy/parsers.py +168 -0
  75. opik/integrations/harbor/__init__.py +17 -0
  76. opik/integrations/harbor/experiment_service.py +269 -0
  77. opik/integrations/harbor/opik_tracker.py +528 -0
  78. opik/integrations/haystack/opik_tracer.py +2 -2
  79. opik/integrations/langchain/__init__.py +15 -2
  80. opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
  81. opik/integrations/langchain/opik_tracer.py +258 -160
  82. opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
  83. opik/integrations/llama_index/callback.py +43 -6
  84. opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
  85. opik/integrations/openai/opik_tracker.py +99 -4
  86. opik/integrations/openai/videos/__init__.py +9 -0
  87. opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
  88. opik/integrations/openai/videos/videos_create_decorator.py +159 -0
  89. opik/integrations/openai/videos/videos_download_decorator.py +110 -0
  90. opik/message_processing/batching/base_batcher.py +14 -21
  91. opik/message_processing/batching/batch_manager.py +22 -10
  92. opik/message_processing/batching/batchers.py +32 -40
  93. opik/message_processing/batching/flushing_thread.py +0 -3
  94. opik/message_processing/emulation/emulator_message_processor.py +36 -1
  95. opik/message_processing/emulation/models.py +21 -0
  96. opik/message_processing/messages.py +9 -0
  97. opik/message_processing/preprocessing/__init__.py +0 -0
  98. opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
  99. opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
  100. opik/message_processing/preprocessing/constants.py +1 -0
  101. opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
  102. opik/message_processing/preprocessing/preprocessor.py +36 -0
  103. opik/message_processing/processors/__init__.py +0 -0
  104. opik/message_processing/processors/attachments_extraction_processor.py +146 -0
  105. opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
  106. opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
  107. opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
  108. opik/message_processing/queue_consumer.py +4 -2
  109. opik/message_processing/streamer.py +71 -33
  110. opik/message_processing/streamer_constructors.py +36 -8
  111. opik/plugins/pytest/experiment_runner.py +1 -1
  112. opik/plugins/pytest/hooks.py +5 -3
  113. opik/rest_api/__init__.py +42 -0
  114. opik/rest_api/datasets/client.py +321 -123
  115. opik/rest_api/datasets/raw_client.py +470 -145
  116. opik/rest_api/experiments/client.py +26 -0
  117. opik/rest_api/experiments/raw_client.py +26 -0
  118. opik/rest_api/llm_provider_key/client.py +4 -4
  119. opik/rest_api/llm_provider_key/raw_client.py +4 -4
  120. opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
  121. opik/rest_api/manual_evaluation/client.py +101 -0
  122. opik/rest_api/manual_evaluation/raw_client.py +172 -0
  123. opik/rest_api/optimizations/client.py +0 -166
  124. opik/rest_api/optimizations/raw_client.py +0 -248
  125. opik/rest_api/projects/client.py +9 -0
  126. opik/rest_api/projects/raw_client.py +13 -0
  127. opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
  128. opik/rest_api/prompts/client.py +130 -2
  129. opik/rest_api/prompts/raw_client.py +175 -0
  130. opik/rest_api/traces/client.py +101 -0
  131. opik/rest_api/traces/raw_client.py +120 -0
  132. opik/rest_api/types/__init__.py +50 -0
  133. opik/rest_api/types/audio_url.py +19 -0
  134. opik/rest_api/types/audio_url_public.py +19 -0
  135. opik/rest_api/types/audio_url_write.py +19 -0
  136. opik/rest_api/types/automation_rule_evaluator.py +38 -2
  137. opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
  138. opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
  139. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
  140. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
  141. opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
  142. opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
  143. opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
  144. opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
  145. opik/rest_api/types/dataset.py +2 -0
  146. opik/rest_api/types/dataset_item.py +1 -1
  147. opik/rest_api/types/dataset_item_batch.py +4 -0
  148. opik/rest_api/types/dataset_item_changes_public.py +5 -0
  149. opik/rest_api/types/dataset_item_compare.py +1 -1
  150. opik/rest_api/types/dataset_item_filter.py +4 -0
  151. opik/rest_api/types/dataset_item_page_compare.py +0 -1
  152. opik/rest_api/types/dataset_item_page_public.py +0 -1
  153. opik/rest_api/types/dataset_item_public.py +1 -1
  154. opik/rest_api/types/dataset_public.py +2 -0
  155. opik/rest_api/types/dataset_version_public.py +10 -0
  156. opik/rest_api/types/dataset_version_summary.py +46 -0
  157. opik/rest_api/types/dataset_version_summary_public.py +46 -0
  158. opik/rest_api/types/experiment.py +9 -0
  159. opik/rest_api/types/experiment_public.py +9 -0
  160. opik/rest_api/types/group_content_with_aggregations.py +1 -0
  161. opik/rest_api/types/llm_as_judge_message_content.py +2 -0
  162. opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
  163. opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
  164. opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
  165. opik/rest_api/types/project.py +1 -0
  166. opik/rest_api/types/project_detailed.py +1 -0
  167. opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
  168. opik/rest_api/types/project_reference.py +31 -0
  169. opik/rest_api/types/project_reference_public.py +31 -0
  170. opik/rest_api/types/project_stats_summary_item.py +1 -0
  171. opik/rest_api/types/prompt_version.py +1 -0
  172. opik/rest_api/types/prompt_version_detail.py +1 -0
  173. opik/rest_api/types/prompt_version_page_public.py +5 -0
  174. opik/rest_api/types/prompt_version_public.py +1 -0
  175. opik/rest_api/types/prompt_version_update.py +33 -0
  176. opik/rest_api/types/provider_api_key.py +5 -1
  177. opik/rest_api/types/provider_api_key_provider.py +2 -1
  178. opik/rest_api/types/provider_api_key_public.py +5 -1
  179. opik/rest_api/types/provider_api_key_public_provider.py +2 -1
  180. opik/rest_api/types/service_toggles_config.py +11 -1
  181. opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
  182. opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
  183. opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
  184. opik/types.py +36 -0
  185. opik/validation/chat_prompt_messages.py +241 -0
  186. opik/validation/feedback_score.py +3 -3
  187. opik/validation/validator.py +28 -0
  188. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
  189. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
  190. opik/cli/export.py +0 -791
  191. opik/cli/import_command.py +0 -575
  192. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
  193. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
  194. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
  195. {opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import threading
1
2
  from typing import Type, Dict
2
3
  from .. import messages
3
4
  from . import base_batcher
@@ -15,30 +16,41 @@ class BatchManager:
15
16
  self._flushing_thread = flushing_thread.FlushingThread(
16
17
  batchers=list(self._message_to_batcher_mapping.values())
17
18
  )
19
+ self._lock = threading.RLock()
18
20
 
19
21
  def start(self) -> None:
20
22
  self._flushing_thread.start()
21
23
 
22
24
  def stop(self) -> None:
23
- self._flushing_thread.close()
25
+ with self._lock:
26
+ # stop the flushing thread
27
+ self._flushing_thread.close()
28
+ # force flush all pending messages
29
+ self.flush()
24
30
 
25
31
  def message_supports_batching(self, message: messages.BaseMessage) -> bool:
32
+ if message is None:
33
+ return False
34
+
26
35
  if hasattr(message, "supports_batching"):
27
36
  return message.supports_batching
28
37
 
29
38
  return message.__class__ in self._message_to_batcher_mapping
30
39
 
31
40
  def process_message(self, message: messages.BaseMessage) -> None:
32
- self._message_to_batcher_mapping[type(message)].add(message)
41
+ with self._lock:
42
+ self._message_to_batcher_mapping[type(message)].add(message)
33
43
 
34
44
  def is_empty(self) -> bool:
35
- return all(
36
- [
37
- batcher.is_empty()
38
- for batcher in self._message_to_batcher_mapping.values()
39
- ]
40
- )
45
+ with self._lock:
46
+ return all(
47
+ [
48
+ batcher.is_empty()
49
+ for batcher in self._message_to_batcher_mapping.values()
50
+ ]
51
+ )
41
52
 
42
53
  def flush(self) -> None:
43
- for batcher in self._message_to_batcher_mapping.values():
44
- batcher.flush()
54
+ with self._lock:
55
+ for batcher in self._message_to_batcher_mapping.values():
56
+ batcher.flush()
@@ -37,8 +37,9 @@ class CreateSpanMessageBatcher(base_batcher.BaseBatcher):
37
37
  return batches
38
38
 
39
39
  def add(self, message: messages.CreateSpanMessage) -> None: # type: ignore
40
- # remove any duplicate spans from the batch that was already added
41
- self._remove_matching_messages(lambda x: x.span_id == message.span_id) # type: ignore
40
+ # remove any duplicate start span message from the batch that was already added
41
+ if message.end_time is not None:
42
+ self._remove_matching_messages(lambda x: x.span_id == message.span_id) # type: ignore
42
43
 
43
44
  return super().add(message)
44
45
 
@@ -73,8 +74,9 @@ class CreateTraceMessageBatcher(base_batcher.BaseBatcher):
73
74
  return batches
74
75
 
75
76
  def add(self, message: messages.CreateTraceMessage) -> None: # type: ignore
76
- # remove any duplicate traces from the batch that was already added
77
- self._remove_matching_messages(lambda x: x.trace_id == message.trace_id) # type: ignore
77
+ # remove any duplicate start trace message from the batch that was already added
78
+ if message.end_time is not None:
79
+ self._remove_matching_messages(lambda x: x.trace_id == message.trace_id) # type: ignore
78
80
 
79
81
  return super().add(message)
80
82
 
@@ -99,26 +101,21 @@ class BaseAddFeedbackScoresBatchMessageBatcher(base_batcher.BaseBatcher):
99
101
  messages.AddThreadsFeedbackScoresBatchMessage,
100
102
  ],
101
103
  ) -> None:
102
- with self._lock:
103
- new_messages = message.batch
104
- n_new_messages = len(new_messages)
105
- n_accumulated_messages = len(self._accumulated_messages)
104
+ new_messages = message.batch
105
+ n_new_messages = len(new_messages)
106
+ n_accumulated_messages = len(self._accumulated_messages)
106
107
 
107
- if n_new_messages + n_accumulated_messages >= self._max_batch_size:
108
- free_space_in_accumulator = (
109
- self._max_batch_size - n_accumulated_messages
110
- )
108
+ if n_new_messages + n_accumulated_messages >= self._max_batch_size:
109
+ free_space_in_accumulator = self._max_batch_size - n_accumulated_messages
111
110
 
112
- messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
113
- messages_that_dont_fit_in_batch = new_messages[
114
- free_space_in_accumulator:
115
- ]
111
+ messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
112
+ messages_that_dont_fit_in_batch = new_messages[free_space_in_accumulator:]
116
113
 
117
- self._accumulated_messages += messages_that_fit_in_batch
118
- new_messages = messages_that_dont_fit_in_batch
119
- self.flush()
114
+ self._accumulated_messages += messages_that_fit_in_batch
115
+ new_messages = messages_that_dont_fit_in_batch
116
+ self.flush()
120
117
 
121
- self._accumulated_messages += new_messages
118
+ self._accumulated_messages += new_messages
122
119
 
123
120
 
124
121
  class AddSpanFeedbackScoresBatchMessageBatcher(
@@ -195,23 +192,18 @@ class CreateExperimentItemsBatchMessageBatcher(base_batcher.BaseBatcher):
195
192
  def add( # type: ignore
196
193
  self, message: messages.CreateExperimentItemsBatchMessage
197
194
  ) -> None:
198
- with self._lock:
199
- new_messages = message.batch
200
- n_new_messages = len(new_messages)
201
- n_accumulated_messages = len(self._accumulated_messages)
202
-
203
- if n_new_messages + n_accumulated_messages >= self._max_batch_size:
204
- free_space_in_accumulator = (
205
- self._max_batch_size - n_accumulated_messages
206
- )
207
-
208
- messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
209
- messages_that_dont_fit_in_batch = new_messages[
210
- free_space_in_accumulator:
211
- ]
212
-
213
- self._accumulated_messages += messages_that_fit_in_batch
214
- new_messages = messages_that_dont_fit_in_batch
215
- self.flush()
216
-
217
- self._accumulated_messages += new_messages
195
+ new_messages = message.batch
196
+ n_new_messages = len(new_messages)
197
+ n_accumulated_messages = len(self._accumulated_messages)
198
+
199
+ if n_new_messages + n_accumulated_messages >= self._max_batch_size:
200
+ free_space_in_accumulator = self._max_batch_size - n_accumulated_messages
201
+
202
+ messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
203
+ messages_that_dont_fit_in_batch = new_messages[free_space_in_accumulator:]
204
+
205
+ self._accumulated_messages += messages_that_fit_in_batch
206
+ new_messages = messages_that_dont_fit_in_batch
207
+ self.flush()
208
+
209
+ self._accumulated_messages += new_messages
@@ -17,9 +17,6 @@ class FlushingThread(threading.Thread):
17
17
  self._closed = False
18
18
 
19
19
  def close(self) -> None:
20
- for batcher in self._batchers:
21
- batcher.flush()
22
-
23
20
  self._closed = True
24
21
 
25
22
  def run(self) -> None:
@@ -9,7 +9,9 @@ from opik import dict_utils
9
9
  from opik.rest_api.types import span_write, trace_write
10
10
  from opik.types import ErrorInfoDict, SpanType
11
11
  from . import models
12
- from .. import message_processors, messages
12
+ from .. import messages
13
+ from ..processors import message_processors
14
+
13
15
 
14
16
  LOGGER = logging.getLogger(__name__)
15
17
 
@@ -77,6 +79,12 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
77
79
  self._span_to_feedback_scores: Dict[
78
80
  str, List[models.FeedbackScoreModel]
79
81
  ] = collections.defaultdict(list)
82
+ self._trace_to_attachments: Dict[str, List[models.AttachmentModel]] = (
83
+ collections.defaultdict(list)
84
+ )
85
+ self._span_to_attachments: Dict[str, List[models.AttachmentModel]] = (
86
+ collections.defaultdict(list)
87
+ )
80
88
  self._experiment_items: List[models.ExperimentItemModel] = []
81
89
 
82
90
  def is_active(self) -> bool:
@@ -111,6 +119,7 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
111
119
 
112
120
  for trace in self._trace_trees:
113
121
  trace.feedback_scores = self._trace_to_feedback_scores[trace.id]
122
+ trace.attachments = self._trace_to_attachments[trace.id] or None
114
123
 
115
124
  self._trace_trees.sort(key=lambda x: x.start_time)
116
125
  return self._trace_trees
@@ -176,6 +185,7 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
176
185
  for span_id in all_span_ids:
177
186
  span = self._span_observations[span_id]
178
187
  span.feedback_scores = self._span_to_feedback_scores[span_id]
188
+ span.attachments = self._span_to_attachments[span_id] or None
179
189
 
180
190
  self._span_trees.sort(key=lambda x: x.start_time)
181
191
 
@@ -353,6 +363,8 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
353
363
  messages.CreateSpansBatchMessage: self._handle_create_spans_batch_message, # type: ignore
354
364
  messages.CreateTraceBatchMessage: self._handle_create_traces_batch_message, # type: ignore
355
365
  messages.CreateExperimentItemsBatchMessage: self._handle_create_experiment_items_batch_message, # type: ignore
366
+ messages.AttachmentSupportingMessage: self._noop_handler, # type: ignore
367
+ messages.CreateAttachmentMessage: self._handle_create_attachment_message, # type: ignore
356
368
  }
357
369
 
358
370
  def _handle_create_trace_message(
@@ -553,6 +565,29 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
553
565
  )
554
566
  self._experiment_items.append(experiment_item)
555
567
 
568
+ def _handle_create_attachment_message(
569
+ self, message: messages.CreateAttachmentMessage
570
+ ) -> None:
571
+ """Handle attachment messages by adding them to the appropriate span or trace.
572
+
573
+ Attachments are stored in temporary dictionaries and will be connected to their
574
+ spans/traces when the trace trees are built, similar to how feedback scores work.
575
+ """
576
+ attachment_model = models.AttachmentModel(
577
+ file_path=message.file_path,
578
+ file_name=message.file_name,
579
+ content_type=message.mime_type,
580
+ )
581
+
582
+ if message.entity_type == "span":
583
+ self._span_to_attachments[message.entity_id].append(attachment_model)
584
+ elif message.entity_type == "trace":
585
+ self._trace_to_attachments[message.entity_id].append(attachment_model)
586
+
587
+ def _noop_handler(self, message: messages.BaseMessage) -> None:
588
+ # just ignore the message
589
+ pass
590
+
556
591
  @property
557
592
  def experiment_items(self) -> List[models.ExperimentItemModel]:
558
593
  """Returns the list of experiment items collected."""
@@ -30,6 +30,25 @@ class FeedbackScoreModel:
30
30
  reason: Optional[str] = None
31
31
 
32
32
 
33
+ @dataclasses.dataclass
34
+ class AttachmentModel:
35
+ """
36
+ Represents a model for an attachment associated with a span or trace.
37
+
38
+ This class stores metadata about files or data attached to spans or traces,
39
+ including the file path, name, and content type.
40
+
41
+ Attributes:
42
+ file_path: Path to the attached file.
43
+ file_name: Name of the attached file.
44
+ content_type: MIME type of the attached file.
45
+ """
46
+
47
+ file_path: str
48
+ file_name: str
49
+ content_type: Optional[str] = None
50
+
51
+
33
52
  @dataclasses.dataclass
34
53
  class SpanModel:
35
54
  """
@@ -86,6 +105,7 @@ class SpanModel:
86
105
  error_info: Optional[ErrorInfoDict] = None
87
106
  total_cost: Optional[float] = None
88
107
  last_updated_at: Optional[datetime.datetime] = None
108
+ attachments: Optional[List[AttachmentModel]] = None
89
109
 
90
110
 
91
111
  @dataclasses.dataclass
@@ -160,3 +180,4 @@ class TraceModel:
160
180
  error_info: Optional[ErrorInfoDict] = None
161
181
  thread_id: Optional[str] = None
162
182
  last_updated_at: Optional[datetime.datetime] = None
183
+ attachments: Optional[List[AttachmentModel]] = None
@@ -4,6 +4,7 @@ from dataclasses import field
4
4
  from typing import Optional, Any, Dict, List, Union, Literal, Set
5
5
 
6
6
  from . import arguments_utils
7
+ from .preprocessing import constants
7
8
  from ..rest_api.types import span_write, trace_write
8
9
  from ..types import SpanType, ErrorInfoDict, LLMProvider, AttachmentEntityType
9
10
 
@@ -21,6 +22,8 @@ class BaseMessage:
21
22
  data.pop("delivery_time")
22
23
  if "delivery_attempts" in data:
23
24
  data.pop("delivery_attempts")
25
+ if constants.MARKER_ATTRIBUTE_NAME in data:
26
+ data.pop(constants.MARKER_ATTRIBUTE_NAME)
24
27
  return data
25
28
 
26
29
 
@@ -285,3 +288,9 @@ class CreateAttachmentMessage(BaseMessage):
285
288
  entity_id: str
286
289
  project_name: str
287
290
  encoded_url_override: str
291
+ delete_after_upload: bool = False
292
+
293
+
294
+ @dataclasses.dataclass
295
+ class AttachmentSupportingMessage(BaseMessage):
296
+ original_message: BaseMessage
File without changes
@@ -0,0 +1,70 @@
1
+ from typing import Optional, Union
2
+
3
+ from opik.message_processing import messages
4
+
5
+ from . import constants, preprocessor
6
+
7
+
8
+ class AttachmentsPreprocessor(preprocessor.MessagePreprocessor):
9
+ def __init__(self, enabled: bool = True) -> None:
10
+ self._enabled = enabled
11
+
12
+ def preprocess(
13
+ self, message: Optional[messages.BaseMessage]
14
+ ) -> Optional[messages.BaseMessage]:
15
+ """
16
+ Processes a given message and ensures that it is converted into a specialized
17
+ message type if applicable. If the message is already pre-processed, it
18
+ returns the original message to avoid infinite recursion.
19
+
20
+ Args:
21
+ message: The message object to be processed.
22
+
23
+ Returns:
24
+ The processed message, either in its original form
25
+ or converted into a message type supporting embedded attachments.
26
+ """
27
+ if not self._enabled:
28
+ return message
29
+
30
+ if message is None:
31
+ # possibly already pre-processed by other preprocessors
32
+ return None
33
+
34
+ if hasattr(message, constants.MARKER_ATTRIBUTE_NAME):
35
+ # already pre-processed - just return the original message to avoid infinite recursion
36
+ return message
37
+
38
+ if _has_potential_content_with_attachments(message):
39
+ return messages.AttachmentSupportingMessage(message)
40
+ else:
41
+ return message
42
+
43
+
44
+ def _has_potential_content_with_attachments(message: messages.BaseMessage) -> bool:
45
+ # Check if it's an Update message - always process these
46
+ if isinstance(message, (messages.UpdateSpanMessage, messages.UpdateTraceMessage)):
47
+ return _message_has_field_of_interest_set(message)
48
+
49
+ # Check if it's a Create message with end_time set - only process these
50
+ if isinstance(message, (messages.CreateSpanMessage, messages.CreateTraceMessage)):
51
+ if message.end_time is not None:
52
+ return _message_has_field_of_interest_set(message)
53
+
54
+ # All other message types should not be wrapped
55
+ return False
56
+
57
+
58
+ def _message_has_field_of_interest_set(
59
+ message: Union[
60
+ messages.UpdateSpanMessage,
61
+ messages.UpdateTraceMessage,
62
+ messages.CreateSpanMessage,
63
+ messages.CreateTraceMessage,
64
+ ],
65
+ ) -> bool:
66
+ return (
67
+ message.input is not None
68
+ or message.output is not None
69
+ or message.metadata is not None
70
+ )
@@ -0,0 +1,53 @@
1
+ from typing import Optional
2
+
3
+ from . import preprocessor
4
+ from .. import messages
5
+ from ..batching import batch_manager
6
+
7
+
8
+ class BatchingPreprocessor(preprocessor.MessagePreprocessor):
9
+ """
10
+ Handles message batching during preprocessing.
11
+
12
+ The BatchingPreprocessor class processes messages, enabling efficient message
13
+ batching if a batching manager is provided. It supports starting, stopping,
14
+ flushing, and checking the state of the batching manager, ensuring that
15
+ messages are processed or delegated based on their batching capabilities.
16
+ """
17
+
18
+ def __init__(self, batching_manager: Optional[batch_manager.BatchManager]) -> None:
19
+ self._batch_manager = batching_manager
20
+
21
+ def preprocess(
22
+ self, message: Optional[messages.BaseMessage]
23
+ ) -> Optional[messages.BaseMessage]:
24
+ if message is None:
25
+ # possibly already processed
26
+ return None
27
+
28
+ if (
29
+ self._batch_manager is not None
30
+ and self._batch_manager.message_supports_batching(message)
31
+ ):
32
+ self._batch_manager.process_message(message)
33
+ return None
34
+
35
+ return message
36
+
37
+ def start(self) -> None:
38
+ if self._batch_manager is not None:
39
+ self._batch_manager.start()
40
+
41
+ def stop(self) -> None:
42
+ if self._batch_manager is not None:
43
+ self._batch_manager.stop()
44
+
45
+ def flush(self) -> None:
46
+ if self._batch_manager is not None:
47
+ self._batch_manager.flush()
48
+
49
+ def is_empty(self) -> bool:
50
+ if self._batch_manager is not None:
51
+ return self._batch_manager.is_empty()
52
+
53
+ return True
@@ -0,0 +1 @@
1
+ MARKER_ATTRIBUTE_NAME = "_preprocessed_for_attachments"
@@ -0,0 +1,38 @@
1
+ from typing import Optional
2
+
3
+ from opik.file_upload import base_upload_manager
4
+
5
+ from . import preprocessor
6
+ from .. import messages
7
+
8
+
9
+ class FileUploadPreprocessor(preprocessor.MessagePreprocessor):
10
+ """
11
+ Preprocesses messages to handle file uploads.
12
+
13
+ This class is responsible for processing messages to determine if they support
14
+ file uploads and delegating the upload task to a file upload manager. It also
15
+ provides functionality to flush pending uploads with configurable timeout and
16
+ sleep intervals.
17
+ """
18
+
19
+ def __init__(
20
+ self, file_upload_manager: base_upload_manager.BaseFileUploadManager
21
+ ) -> None:
22
+ self.file_upload_manager = file_upload_manager
23
+
24
+ def preprocess(
25
+ self, message: Optional[messages.BaseMessage]
26
+ ) -> Optional[messages.BaseMessage]:
27
+ if message is None:
28
+ # possibly already processed
29
+ return None
30
+
31
+ if base_upload_manager.message_supports_upload(message):
32
+ self.file_upload_manager.upload(message)
33
+ return None
34
+
35
+ return message
36
+
37
+ def flush(self, timeout: Optional[float], sleep_time: int) -> bool:
38
+ return self.file_upload_manager.flush(timeout=timeout, sleep_time=sleep_time)
@@ -0,0 +1,36 @@
1
+ import abc
2
+ from typing import Optional
3
+
4
+ from opik.message_processing import messages
5
+
6
+
7
+ class MessagePreprocessor(abc.ABC):
8
+ """
9
+ Abstract base class for message preprocessing.
10
+
11
+ This class provides a common interface for pre-processing messages, allowing
12
+ derived classes to implement custom preprocessing logic tailored to specific
13
+ requirements. Instances of this class cannot be created directly; it must be
14
+ subclassed with the `preprocess` method implemented.
15
+ """
16
+
17
+ @abc.abstractmethod
18
+ def preprocess(
19
+ self, message: Optional[messages.BaseMessage]
20
+ ) -> Optional[messages.BaseMessage]:
21
+ """
22
+ Processes and preprocesses the given message to prepare it for further operations.
23
+
24
+ This is an abstract method and needs to be implemented in any concrete subclass. The
25
+ preprocessing step is typically used for transformations or checks on the given input
26
+ message before further processing.
27
+
28
+ Args:
29
+ message: The input message to be preprocessed. This can
30
+ optionally be None.
31
+
32
+ Returns:
33
+ The processed message after preprocessing. Returns None if the input message is None
34
+ or if a message was fully consumed here and no further processing is required.
35
+ """
36
+ pass
File without changes
@@ -0,0 +1,146 @@
1
+ import logging
2
+ from typing import Optional, NamedTuple, List, Literal, cast
3
+
4
+ from opik.api_objects.attachment import (
5
+ attachments_extractor,
6
+ attachment_context,
7
+ converters,
8
+ )
9
+
10
+ from . import message_processors
11
+ from ..preprocessing import constants
12
+ from .. import messages, streamer
13
+
14
+
15
+ LOGGER = logging.getLogger(__name__)
16
+
17
+
18
+ class EntityDetails(NamedTuple):
19
+ entity_type: Literal["span", "trace"]
20
+ entity_id: str
21
+ project_name: str
22
+
23
+
24
+ class AttachmentsExtractionProcessor(message_processors.BaseMessageProcessor):
25
+ """
26
+ Class for processing message attachments through extraction and further handling.
27
+
28
+ The AttachmentsExtractionProcessor class is designed to handle attachments from incoming
29
+ messages. It checks the type of messages and processes them if they support
30
+ attachments. This includes extracting attachment data, replacing them with references,
31
+ and streaming processed or original messages through a pipeline. The class provides a
32
+ mechanism to toggle processing activity and ensures proper handling of messages with
33
+ embedded attachment information.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ min_attachment_size: int,
39
+ messages_streamer: streamer.Streamer,
40
+ url_override: str,
41
+ is_active: bool = True,
42
+ ):
43
+ """
44
+ Initializes an object with essential components for managing message streaming
45
+ and attachment extraction.
46
+
47
+ Args:
48
+ min_attachment_size: Minimum size for an attachment to be extracted.
49
+ messages_streamer: The streamer that is responsible for managing
50
+ messages broadcasts.
51
+ url_override: A custom URL to override default configurations if set.
52
+ is_active: Indicator of whether this instance is active. Default is True.
53
+ """
54
+ self._is_active = is_active
55
+ self.extractor = attachments_extractor.AttachmentsExtractor(min_attachment_size)
56
+ self.messages_streamer = messages_streamer
57
+ self._url_override = url_override
58
+
59
+ self.attachment_attributes = ["input", "output", "metadata"]
60
+
61
+ def is_active(self) -> bool:
62
+ return self._is_active
63
+
64
+ def process(self, message: messages.BaseMessage) -> None:
65
+ if not isinstance(message, messages.AttachmentSupportingMessage):
66
+ return
67
+
68
+ if self._is_active:
69
+ # do attachment processing only if the processor is active
70
+ try:
71
+ self._process_attachments_in_message(message.original_message)
72
+ except Exception as ex:
73
+ LOGGER.error(
74
+ "Failed to process attachment support message: %s", ex, exc_info=ex
75
+ )
76
+
77
+ # put the original message into the streamer for further processing
78
+ original_message = message.original_message
79
+ setattr(original_message, constants.MARKER_ATTRIBUTE_NAME, True)
80
+ self.messages_streamer.put(original_message)
81
+
82
+ def _process_attachments_in_message(self, original: messages.BaseMessage) -> None:
83
+ entity_details = entity_type_from_attachment_message(original)
84
+ if entity_details is None:
85
+ LOGGER.error(
86
+ "Failed to extract entity details from message - %s. Skipping embedded attachments processing.",
87
+ original.__class__.__name__,
88
+ )
89
+ return
90
+
91
+ attachments = []
92
+
93
+ for attribute in self.attachment_attributes:
94
+ if getattr(original, attribute, None):
95
+ results = self.extractor.extract_and_replace(
96
+ data=getattr(original, attribute),
97
+ entity_type=entity_details.entity_type,
98
+ entity_id=entity_details.entity_id,
99
+ project_name=entity_details.project_name,
100
+ context=cast(Literal["input", "output", "metadata"], attribute),
101
+ )
102
+ attachments.extend(results)
103
+
104
+ if len(attachments) > 0:
105
+ LOGGER.debug(
106
+ "Extracted %d attachments from %s (entity: %s/%s)",
107
+ len(attachments),
108
+ original.__class__.__name__,
109
+ entity_details.entity_type,
110
+ entity_details.entity_id,
111
+ )
112
+
113
+ self._process_attachments(attachments)
114
+ else:
115
+ LOGGER.debug(
116
+ "No attachments found in the message - %s.", original.__class__.__name__
117
+ )
118
+
119
+ def _process_attachments(
120
+ self, attachments: List[attachment_context.AttachmentWithContext]
121
+ ) -> None:
122
+ for attachment in attachments:
123
+ create_attachment_message = converters.attachment_to_message(
124
+ attachment_data=attachment.attachment_data,
125
+ entity_type=attachment.entity_type,
126
+ entity_id=attachment.entity_id,
127
+ project_name=attachment.project_name,
128
+ url_override=self._url_override,
129
+ delete_after_upload=True, # make sure to delete attachments after upload to avoid leaking space and data
130
+ )
131
+ self.messages_streamer.put(create_attachment_message)
132
+
133
+
134
+ def entity_type_from_attachment_message(
135
+ message: messages.BaseMessage,
136
+ ) -> Optional[EntityDetails]:
137
+ if isinstance(message, (messages.CreateSpanMessage, messages.UpdateSpanMessage)):
138
+ return EntityDetails("span", message.span_id, project_name=message.project_name)
139
+ elif isinstance(
140
+ message, (messages.CreateTraceMessage, messages.UpdateTraceMessage)
141
+ ):
142
+ return EntityDetails(
143
+ "trace", message.trace_id, project_name=message.project_name
144
+ )
145
+ else:
146
+ return None