PyPI - opik - Versions diffs - 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl - Mend

opik 1.9.39py3-none-any.whl → 1.9.86py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/constants.py +2 -0
opik/api_objects/dataset/dataset.py +133 -40
opik/api_objects/dataset/rest_operations.py +2 -0
opik/api_objects/experiment/experiment.py +6 -0
opik/api_objects/helpers.py +8 -4
opik/api_objects/local_recording.py +6 -5
opik/api_objects/observation_data.py +101 -0
opik/api_objects/opik_client.py +78 -45
opik/api_objects/opik_query_language.py +9 -3
opik/api_objects/prompt/chat/chat_prompt.py +18 -1
opik/api_objects/prompt/client.py +8 -1
opik/api_objects/span/span_data.py +3 -88
opik/api_objects/threads/threads_client.py +7 -4
opik/api_objects/trace/trace_data.py +3 -74
opik/api_objects/validation_helpers.py +3 -3
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +14 -12
opik/config.py +12 -1
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +4 -1
opik/decorator/base_track_decorator.py +111 -37
opik/decorator/context_manager/span_context_manager.py +5 -1
opik/decorator/generator_wrappers.py +5 -4
opik/decorator/span_creation_handler.py +13 -4
opik/evaluation/engine/engine.py +111 -28
opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
opik/evaluation/evaluator.py +12 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
opik/evaluation/metrics/heuristics/equals.py +11 -7
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
opik/evaluation/models/litellm/util.py +4 -20
opik/evaluation/models/models_factory.py +19 -5
opik/evaluation/rest_operations.py +3 -3
opik/evaluation/threads/helpers.py +3 -2
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/integrations/adk/legacy_opik_tracer.py +9 -11
opik/integrations/adk/opik_tracer.py +2 -2
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
opik/integrations/dspy/callback.py +100 -14
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_tracer.py +2 -2
opik/integrations/langchain/__init__.py +15 -2
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_tracer.py +258 -160
opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
opik/integrations/llama_index/callback.py +43 -6
opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
opik/integrations/openai/opik_tracker.py +99 -4
opik/integrations/openai/videos/__init__.py +9 -0
opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
opik/integrations/openai/videos/videos_create_decorator.py +159 -0
opik/integrations/openai/videos/videos_download_decorator.py +110 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batchers.py +32 -40
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/emulator_message_processor.py +36 -1
opik/message_processing/emulation/models.py +21 -0
opik/message_processing/messages.py +9 -0
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
opik/message_processing/queue_consumer.py +4 -2
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +36 -8
opik/plugins/pytest/experiment_runner.py +1 -1
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +42 -0
opik/rest_api/datasets/client.py +321 -123
opik/rest_api/datasets/raw_client.py +470 -145
opik/rest_api/experiments/client.py +26 -0
opik/rest_api/experiments/raw_client.py +26 -0
opik/rest_api/llm_provider_key/client.py +4 -4
opik/rest_api/llm_provider_key/raw_client.py +4 -4
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
opik/rest_api/manual_evaluation/client.py +101 -0
opik/rest_api/manual_evaluation/raw_client.py +172 -0
opik/rest_api/optimizations/client.py +0 -166
opik/rest_api/optimizations/raw_client.py +0 -248
opik/rest_api/projects/client.py +9 -0
opik/rest_api/projects/raw_client.py +13 -0
opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
opik/rest_api/prompts/client.py +130 -2
opik/rest_api/prompts/raw_client.py +175 -0
opik/rest_api/traces/client.py +101 -0
opik/rest_api/traces/raw_client.py +120 -0
opik/rest_api/types/__init__.py +50 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +38 -2
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
opik/rest_api/types/dataset.py +2 -0
opik/rest_api/types/dataset_item.py +1 -1
opik/rest_api/types/dataset_item_batch.py +4 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +1 -1
opik/rest_api/types/dataset_item_filter.py +4 -0
opik/rest_api/types/dataset_item_page_compare.py +0 -1
opik/rest_api/types/dataset_item_page_public.py +0 -1
opik/rest_api/types/dataset_item_public.py +1 -1
opik/rest_api/types/dataset_public.py +2 -0
opik/rest_api/types/dataset_version_public.py +10 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/experiment.py +9 -0
opik/rest_api/types/experiment_public.py +9 -0
opik/rest_api/types/group_content_with_aggregations.py +1 -0
opik/rest_api/types/llm_as_judge_message_content.py +2 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt_version.py +1 -0
opik/rest_api/types/prompt_version_detail.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +1 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +5 -1
opik/rest_api/types/provider_api_key_provider.py +2 -1
opik/rest_api/types/provider_api_key_public.py +5 -1
opik/rest_api/types/provider_api_key_public_provider.py +2 -1
opik/rest_api/types/service_toggles_config.py +11 -1
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
opik/cli/export.py +0 -791
opik/cli/import_command.py +0 -575
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0

opik/message_processing/batching/batch_manager.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import threading
 from typing import Type, Dict
 from .. import messages
 from . import base_batcher
@@ -15,30 +16,41 @@ class BatchManager:
         self._flushing_thread = flushing_thread.FlushingThread(
             batchers=list(self._message_to_batcher_mapping.values())
         )
+        self._lock = threading.RLock()
     def start(self) -> None:
         self._flushing_thread.start()
     def stop(self) -> None:
-        self._flushing_thread.close()
+        with self._lock:
+            # stop the flushing thread
+            self._flushing_thread.close()
+            # force flush all pending messages
+            self.flush()
     def message_supports_batching(self, message: messages.BaseMessage) -> bool:
+        if message is None:
+            return False
         if hasattr(message, "supports_batching"):
             return message.supports_batching
         return message.__class__ in self._message_to_batcher_mapping
     def process_message(self, message: messages.BaseMessage) -> None:
-        self._message_to_batcher_mapping[type(message)].add(message)
+        with self._lock:
+            self._message_to_batcher_mapping[type(message)].add(message)
     def is_empty(self) -> bool:
-        return all(
-            [
-                batcher.is_empty()
-                for batcher in self._message_to_batcher_mapping.values()
-            ]
-        )
+        with self._lock:
+            return all(
+                [
+                    batcher.is_empty()
+                    for batcher in self._message_to_batcher_mapping.values()
+                ]
+            )
     def flush(self) -> None:
-        for batcher in self._message_to_batcher_mapping.values():
-            batcher.flush()
+        with self._lock:
+            for batcher in self._message_to_batcher_mapping.values():
+                batcher.flush()

opik/message_processing/batching/batchers.py CHANGED Viewed

@@ -37,8 +37,9 @@ class CreateSpanMessageBatcher(base_batcher.BaseBatcher):
         return batches
     def add(self, message: messages.CreateSpanMessage) -> None:  # type: ignore
-        # remove any duplicate spans from the batch that was already added
-        self._remove_matching_messages(lambda x: x.span_id == message.span_id)  # type: ignore
+        # remove any duplicate start span message from the batch that was already added
+        if message.end_time is not None:
+            self._remove_matching_messages(lambda x: x.span_id == message.span_id)  # type: ignore
         return super().add(message)
@@ -73,8 +74,9 @@ class CreateTraceMessageBatcher(base_batcher.BaseBatcher):
         return batches
     def add(self, message: messages.CreateTraceMessage) -> None:  # type: ignore
-        # remove any duplicate traces from the batch that was already added
-        self._remove_matching_messages(lambda x: x.trace_id == message.trace_id)  # type: ignore
+        # remove any duplicate start trace message from the batch that was already added
+        if message.end_time is not None:
+            self._remove_matching_messages(lambda x: x.trace_id == message.trace_id)  # type: ignore
         return super().add(message)
@@ -99,26 +101,21 @@ class BaseAddFeedbackScoresBatchMessageBatcher(base_batcher.BaseBatcher):
             messages.AddThreadsFeedbackScoresBatchMessage,
         ],
     ) -> None:
-        with self._lock:
-            new_messages = message.batch
-            n_new_messages = len(new_messages)
-            n_accumulated_messages = len(self._accumulated_messages)
+        new_messages = message.batch
+        n_new_messages = len(new_messages)
+        n_accumulated_messages = len(self._accumulated_messages)
-            if n_new_messages + n_accumulated_messages >= self._max_batch_size:
-                free_space_in_accumulator = (
-                    self._max_batch_size - n_accumulated_messages
-                )
+        if n_new_messages + n_accumulated_messages >= self._max_batch_size:
+            free_space_in_accumulator = self._max_batch_size - n_accumulated_messages
-                messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
-                messages_that_dont_fit_in_batch = new_messages[
-                    free_space_in_accumulator:
-                ]
+            messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
+            messages_that_dont_fit_in_batch = new_messages[free_space_in_accumulator:]
-                self._accumulated_messages += messages_that_fit_in_batch
-                new_messages = messages_that_dont_fit_in_batch
-                self.flush()
+            self._accumulated_messages += messages_that_fit_in_batch
+            new_messages = messages_that_dont_fit_in_batch
+            self.flush()
-            self._accumulated_messages += new_messages
+        self._accumulated_messages += new_messages
 class AddSpanFeedbackScoresBatchMessageBatcher(
@@ -195,23 +192,18 @@ class CreateExperimentItemsBatchMessageBatcher(base_batcher.BaseBatcher):
     def add(  # type: ignore
         self, message: messages.CreateExperimentItemsBatchMessage
     ) -> None:
-        with self._lock:
-            new_messages = message.batch
-            n_new_messages = len(new_messages)
-            n_accumulated_messages = len(self._accumulated_messages)
-            if n_new_messages + n_accumulated_messages >= self._max_batch_size:
-                free_space_in_accumulator = (
-                    self._max_batch_size - n_accumulated_messages
-                )
-                messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
-                messages_that_dont_fit_in_batch = new_messages[
-                    free_space_in_accumulator:
-                ]
-                self._accumulated_messages += messages_that_fit_in_batch
-                new_messages = messages_that_dont_fit_in_batch
-                self.flush()
-            self._accumulated_messages += new_messages
+        new_messages = message.batch
+        n_new_messages = len(new_messages)
+        n_accumulated_messages = len(self._accumulated_messages)
+        if n_new_messages + n_accumulated_messages >= self._max_batch_size:
+            free_space_in_accumulator = self._max_batch_size - n_accumulated_messages
+            messages_that_fit_in_batch = new_messages[:free_space_in_accumulator]
+            messages_that_dont_fit_in_batch = new_messages[free_space_in_accumulator:]
+            self._accumulated_messages += messages_that_fit_in_batch
+            new_messages = messages_that_dont_fit_in_batch
+            self.flush()
+        self._accumulated_messages += new_messages

opik/message_processing/batching/flushing_thread.py CHANGED Viewed

@@ -17,9 +17,6 @@ class FlushingThread(threading.Thread):
         self._closed = False
     def close(self) -> None:
-        for batcher in self._batchers:
-            batcher.flush()
         self._closed = True
     def run(self) -> None:

opik/message_processing/emulation/emulator_message_processor.py CHANGED Viewed

@@ -9,7 +9,9 @@ from opik import dict_utils
 from opik.rest_api.types import span_write, trace_write
 from opik.types import ErrorInfoDict, SpanType
 from . import models
-from .. import message_processors, messages
+from .. import messages
+from ..processors import message_processors
 LOGGER = logging.getLogger(__name__)
@@ -77,6 +79,12 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
             self._span_to_feedback_scores: Dict[
                 str, List[models.FeedbackScoreModel]
             ] = collections.defaultdict(list)
+            self._trace_to_attachments: Dict[str, List[models.AttachmentModel]] = (
+                collections.defaultdict(list)
+            )
+            self._span_to_attachments: Dict[str, List[models.AttachmentModel]] = (
+                collections.defaultdict(list)
+            )
             self._experiment_items: List[models.ExperimentItemModel] = []
     def is_active(self) -> bool:
@@ -111,6 +119,7 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
             for trace in self._trace_trees:
                 trace.feedback_scores = self._trace_to_feedback_scores[trace.id]
+                trace.attachments = self._trace_to_attachments[trace.id] or None
             self._trace_trees.sort(key=lambda x: x.start_time)
             return self._trace_trees
@@ -176,6 +185,7 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
             for span_id in all_span_ids:
                 span = self._span_observations[span_id]
                 span.feedback_scores = self._span_to_feedback_scores[span_id]
+                span.attachments = self._span_to_attachments[span_id] or None
             self._span_trees.sort(key=lambda x: x.start_time)
@@ -353,6 +363,8 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
             messages.CreateSpansBatchMessage: self._handle_create_spans_batch_message,  # type: ignore
             messages.CreateTraceBatchMessage: self._handle_create_traces_batch_message,  # type: ignore
             messages.CreateExperimentItemsBatchMessage: self._handle_create_experiment_items_batch_message,  # type: ignore
+            messages.AttachmentSupportingMessage: self._noop_handler,  # type: ignore
+            messages.CreateAttachmentMessage: self._handle_create_attachment_message,  # type: ignore
         }
     def _handle_create_trace_message(
@@ -553,6 +565,29 @@ class EmulatorMessageProcessor(message_processors.BaseMessageProcessor, abc.ABC)
             )
             self._experiment_items.append(experiment_item)
+    def _handle_create_attachment_message(
+        self, message: messages.CreateAttachmentMessage
+    ) -> None:
+        """Handle attachment messages by adding them to the appropriate span or trace.
+        Attachments are stored in temporary dictionaries and will be connected to their
+        spans/traces when the trace trees are built, similar to how feedback scores work.
+        """
+        attachment_model = models.AttachmentModel(
+            file_path=message.file_path,
+            file_name=message.file_name,
+            content_type=message.mime_type,
+        )
+        if message.entity_type == "span":
+            self._span_to_attachments[message.entity_id].append(attachment_model)
+        elif message.entity_type == "trace":
+            self._trace_to_attachments[message.entity_id].append(attachment_model)
+    def _noop_handler(self, message: messages.BaseMessage) -> None:
+        # just ignore the message
+        pass
     @property
     def experiment_items(self) -> List[models.ExperimentItemModel]:
         """Returns the list of experiment items collected."""

opik/message_processing/emulation/models.py CHANGED Viewed

@@ -30,6 +30,25 @@ class FeedbackScoreModel:
     reason: Optional[str] = None
+@dataclasses.dataclass
+class AttachmentModel:
+    """
+    Represents a model for an attachment associated with a span or trace.
+    This class stores metadata about files or data attached to spans or traces,
+    including the file path, name, and content type.
+    Attributes:
+        file_path: Path to the attached file.
+        file_name: Name of the attached file.
+        content_type: MIME type of the attached file.
+    """
+    file_path: str
+    file_name: str
+    content_type: Optional[str] = None
 @dataclasses.dataclass
 class SpanModel:
     """
@@ -86,6 +105,7 @@ class SpanModel:
     error_info: Optional[ErrorInfoDict] = None
     total_cost: Optional[float] = None
     last_updated_at: Optional[datetime.datetime] = None
+    attachments: Optional[List[AttachmentModel]] = None
 @dataclasses.dataclass
@@ -160,3 +180,4 @@ class TraceModel:
     error_info: Optional[ErrorInfoDict] = None
     thread_id: Optional[str] = None
     last_updated_at: Optional[datetime.datetime] = None
+    attachments: Optional[List[AttachmentModel]] = None

opik/message_processing/messages.py CHANGED Viewed

@@ -4,6 +4,7 @@ from dataclasses import field
 from typing import Optional, Any, Dict, List, Union, Literal, Set
 from . import arguments_utils
+from .preprocessing import constants
 from ..rest_api.types import span_write, trace_write
 from ..types import SpanType, ErrorInfoDict, LLMProvider, AttachmentEntityType
@@ -21,6 +22,8 @@ class BaseMessage:
             data.pop("delivery_time")
         if "delivery_attempts" in data:
             data.pop("delivery_attempts")
+        if constants.MARKER_ATTRIBUTE_NAME in data:
+            data.pop(constants.MARKER_ATTRIBUTE_NAME)
         return data
@@ -285,3 +288,9 @@ class CreateAttachmentMessage(BaseMessage):
     entity_id: str
     project_name: str
     encoded_url_override: str
+    delete_after_upload: bool = False
+@dataclasses.dataclass
+class AttachmentSupportingMessage(BaseMessage):
+    original_message: BaseMessage

opik/message_processing/preprocessing/__init__.py ADDED Viewed

File without changes

opik/message_processing/preprocessing/attachments_preprocessor.py ADDED Viewed

@@ -0,0 +1,70 @@
+from typing import Optional, Union
+from opik.message_processing import messages
+from . import constants, preprocessor
+class AttachmentsPreprocessor(preprocessor.MessagePreprocessor):
+    def __init__(self, enabled: bool = True) -> None:
+        self._enabled = enabled
+    def preprocess(
+        self, message: Optional[messages.BaseMessage]
+    ) -> Optional[messages.BaseMessage]:
+        """
+        Processes a given message and ensures that it is converted into a specialized
+        message type if applicable. If the message is already pre-processed, it
+        returns the original message to avoid infinite recursion.
+        Args:
+            message: The message object to be processed.
+        Returns:
+            The processed message, either in its original form
+            or converted into a message type supporting embedded attachments.
+        """
+        if not self._enabled:
+            return message
+        if message is None:
+            # possibly already pre-processed by other preprocessors
+            return None
+        if hasattr(message, constants.MARKER_ATTRIBUTE_NAME):
+            # already pre-processed - just return the original message to avoid infinite recursion
+            return message
+        if _has_potential_content_with_attachments(message):
+            return messages.AttachmentSupportingMessage(message)
+        else:
+            return message
+def _has_potential_content_with_attachments(message: messages.BaseMessage) -> bool:
+    # Check if it's an Update message - always process these
+    if isinstance(message, (messages.UpdateSpanMessage, messages.UpdateTraceMessage)):
+        return _message_has_field_of_interest_set(message)
+    # Check if it's a Create message with end_time set - only process these
+    if isinstance(message, (messages.CreateSpanMessage, messages.CreateTraceMessage)):
+        if message.end_time is not None:
+            return _message_has_field_of_interest_set(message)
+    # All other message types should not be wrapped
+    return False
+def _message_has_field_of_interest_set(
+    message: Union[
+        messages.UpdateSpanMessage,
+        messages.UpdateTraceMessage,
+        messages.CreateSpanMessage,
+        messages.CreateTraceMessage,
+    ],
+) -> bool:
+    return (
+        message.input is not None
+        or message.output is not None
+        or message.metadata is not None
+    )

opik/message_processing/preprocessing/batching_preprocessor.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing import Optional
+from . import preprocessor
+from .. import messages
+from ..batching import batch_manager
+class BatchingPreprocessor(preprocessor.MessagePreprocessor):
+    """
+    Handles message batching during preprocessing.
+    The BatchingPreprocessor class processes messages, enabling efficient message
+    batching if a batching manager is provided. It supports starting, stopping,
+    flushing, and checking the state of the batching manager, ensuring that
+    messages are processed or delegated based on their batching capabilities.
+    """
+    def __init__(self, batching_manager: Optional[batch_manager.BatchManager]) -> None:
+        self._batch_manager = batching_manager
+    def preprocess(
+        self, message: Optional[messages.BaseMessage]
+    ) -> Optional[messages.BaseMessage]:
+        if message is None:
+            # possibly already processed
+            return None
+        if (
+            self._batch_manager is not None
+            and self._batch_manager.message_supports_batching(message)
+        ):
+            self._batch_manager.process_message(message)
+            return None
+        return message
+    def start(self) -> None:
+        if self._batch_manager is not None:
+            self._batch_manager.start()
+    def stop(self) -> None:
+        if self._batch_manager is not None:
+            self._batch_manager.stop()
+    def flush(self) -> None:
+        if self._batch_manager is not None:
+            self._batch_manager.flush()
+    def is_empty(self) -> bool:
+        if self._batch_manager is not None:
+            return self._batch_manager.is_empty()
+        return True

opik/message_processing/preprocessing/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ MARKER_ATTRIBUTE_NAME = "_preprocessed_for_attachments"

opik/message_processing/preprocessing/file_upload_preprocessor.py ADDED Viewed

@@ -0,0 +1,38 @@
+from typing import Optional
+from opik.file_upload import base_upload_manager
+from . import preprocessor
+from .. import messages
+class FileUploadPreprocessor(preprocessor.MessagePreprocessor):
+    """
+    Preprocesses messages to handle file uploads.
+    This class is responsible for processing messages to determine if they support
+    file uploads and delegating the upload task to a file upload manager. It also
+    provides functionality to flush pending uploads with configurable timeout and
+    sleep intervals.
+    """
+    def __init__(
+        self, file_upload_manager: base_upload_manager.BaseFileUploadManager
+    ) -> None:
+        self.file_upload_manager = file_upload_manager
+    def preprocess(
+        self, message: Optional[messages.BaseMessage]
+    ) -> Optional[messages.BaseMessage]:
+        if message is None:
+            # possibly already processed
+            return None
+        if base_upload_manager.message_supports_upload(message):
+            self.file_upload_manager.upload(message)
+            return None
+        return message
+    def flush(self, timeout: Optional[float], sleep_time: int) -> bool:
+        return self.file_upload_manager.flush(timeout=timeout, sleep_time=sleep_time)

opik/message_processing/preprocessing/preprocessor.py ADDED Viewed

@@ -0,0 +1,36 @@
+import abc
+from typing import Optional
+from opik.message_processing import messages
+class MessagePreprocessor(abc.ABC):
+    """
+    Abstract base class for message preprocessing.
+    This class provides a common interface for pre-processing messages, allowing
+    derived classes to implement custom preprocessing logic tailored to specific
+    requirements. Instances of this class cannot be created directly; it must be
+    subclassed with the `preprocess` method implemented.
+    """
+    @abc.abstractmethod
+    def preprocess(
+        self, message: Optional[messages.BaseMessage]
+    ) -> Optional[messages.BaseMessage]:
+        """
+        Processes and preprocesses the given message to prepare it for further operations.
+        This is an abstract method and needs to be implemented in any concrete subclass. The
+        preprocessing step is typically used for transformations or checks on the given input
+        message before further processing.
+        Args:
+            message: The input message to be preprocessed. This can
+                optionally be None.
+        Returns:
+            The processed message after preprocessing. Returns None if the input message is None
+            or if a message was fully consumed here and no further processing is required.
+        """
+        pass

opik/message_processing/processors/__init__.py ADDED Viewed

File without changes

opik/message_processing/processors/attachments_extraction_processor.py ADDED Viewed

@@ -0,0 +1,146 @@
+import logging
+from typing import Optional, NamedTuple, List, Literal, cast
+from opik.api_objects.attachment import (
+    attachments_extractor,
+    attachment_context,
+    converters,
+)
+from . import message_processors
+from ..preprocessing import constants
+from .. import messages, streamer
+LOGGER = logging.getLogger(__name__)
+class EntityDetails(NamedTuple):
+    entity_type: Literal["span", "trace"]
+    entity_id: str
+    project_name: str
+class AttachmentsExtractionProcessor(message_processors.BaseMessageProcessor):
+    """
+    Class for processing message attachments through extraction and further handling.
+    The AttachmentsExtractionProcessor class is designed to handle attachments from incoming
+    messages. It checks the type of messages and processes them if they support
+    attachments. This includes extracting attachment data, replacing them with references,
+    and streaming processed or original messages through a pipeline. The class provides a
+    mechanism to toggle processing activity and ensures proper handling of messages with
+    embedded attachment information.
+    """
+    def __init__(
+        self,
+        min_attachment_size: int,
+        messages_streamer: streamer.Streamer,
+        url_override: str,
+        is_active: bool = True,
+    ):
+        """
+        Initializes an object with essential components for managing message streaming
+        and attachment extraction.
+        Args:
+            min_attachment_size: Minimum size for an attachment to be extracted.
+            messages_streamer: The streamer that is responsible for managing
+                messages broadcasts.
+            url_override: A custom URL to override default configurations if set.
+            is_active: Indicator of whether this instance is active. Default is True.
+        """
+        self._is_active = is_active
+        self.extractor = attachments_extractor.AttachmentsExtractor(min_attachment_size)
+        self.messages_streamer = messages_streamer
+        self._url_override = url_override
+        self.attachment_attributes = ["input", "output", "metadata"]
+    def is_active(self) -> bool:
+        return self._is_active
+    def process(self, message: messages.BaseMessage) -> None:
+        if not isinstance(message, messages.AttachmentSupportingMessage):
+            return
+        if self._is_active:
+            # do attachment processing only if the processor is active
+            try:
+                self._process_attachments_in_message(message.original_message)
+            except Exception as ex:
+                LOGGER.error(
+                    "Failed to process attachment support message: %s", ex, exc_info=ex
+                )
+        # put the original message into the streamer for further processing
+        original_message = message.original_message
+        setattr(original_message, constants.MARKER_ATTRIBUTE_NAME, True)
+        self.messages_streamer.put(original_message)
+    def _process_attachments_in_message(self, original: messages.BaseMessage) -> None:
+        entity_details = entity_type_from_attachment_message(original)
+        if entity_details is None:
+            LOGGER.error(
+                "Failed to extract entity details from message - %s. Skipping embedded attachments processing.",
+                original.__class__.__name__,
+            )
+            return
+        attachments = []
+        for attribute in self.attachment_attributes:
+            if getattr(original, attribute, None):
+                results = self.extractor.extract_and_replace(
+                    data=getattr(original, attribute),
+                    entity_type=entity_details.entity_type,
+                    entity_id=entity_details.entity_id,
+                    project_name=entity_details.project_name,
+                    context=cast(Literal["input", "output", "metadata"], attribute),
+                )
+                attachments.extend(results)
+        if len(attachments) > 0:
+            LOGGER.debug(
+                "Extracted %d attachments from %s (entity: %s/%s)",
+                len(attachments),
+                original.__class__.__name__,
+                entity_details.entity_type,
+                entity_details.entity_id,
+            )
+            self._process_attachments(attachments)
+        else:
+            LOGGER.debug(
+                "No attachments found in the message - %s.", original.__class__.__name__
+            )
+    def _process_attachments(
+        self, attachments: List[attachment_context.AttachmentWithContext]
+    ) -> None:
+        for attachment in attachments:
+            create_attachment_message = converters.attachment_to_message(
+                attachment_data=attachment.attachment_data,
+                entity_type=attachment.entity_type,
+                entity_id=attachment.entity_id,
+                project_name=attachment.project_name,
+                url_override=self._url_override,
+                delete_after_upload=True,  # make sure to delete attachments after upload to avoid leaking space and data
+            )
+            self.messages_streamer.put(create_attachment_message)
+def entity_type_from_attachment_message(
+    message: messages.BaseMessage,
+) -> Optional[EntityDetails]:
+    if isinstance(message, (messages.CreateSpanMessage, messages.UpdateSpanMessage)):
+        return EntityDetails("span", message.span_id, project_name=message.project_name)
+    elif isinstance(
+        message, (messages.CreateTraceMessage, messages.UpdateTraceMessage)
+    ):
+        return EntityDetails(
+            "trace", message.trace_id, project_name=message.project_name
+        )
+    else:
+        return None

opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl

opik 1.9.39py3-none-any.whl → 1.9.86py3-none-any.whl