PyPI - opik - Versions diffs - 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl - Mend

opik 1.9.41py3-none-any.whl → 1.9.86py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/constants.py +2 -0
opik/api_objects/dataset/dataset.py +133 -40
opik/api_objects/dataset/rest_operations.py +2 -0
opik/api_objects/experiment/experiment.py +6 -0
opik/api_objects/helpers.py +8 -4
opik/api_objects/local_recording.py +6 -5
opik/api_objects/observation_data.py +101 -0
opik/api_objects/opik_client.py +78 -45
opik/api_objects/opik_query_language.py +9 -3
opik/api_objects/prompt/chat/chat_prompt.py +18 -1
opik/api_objects/prompt/client.py +8 -1
opik/api_objects/span/span_data.py +3 -88
opik/api_objects/threads/threads_client.py +7 -4
opik/api_objects/trace/trace_data.py +3 -74
opik/api_objects/validation_helpers.py +3 -3
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +14 -12
opik/config.py +12 -1
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +4 -1
opik/decorator/base_track_decorator.py +111 -37
opik/decorator/context_manager/span_context_manager.py +5 -1
opik/decorator/generator_wrappers.py +5 -4
opik/decorator/span_creation_handler.py +13 -4
opik/evaluation/engine/engine.py +111 -28
opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
opik/evaluation/evaluator.py +12 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
opik/evaluation/metrics/heuristics/equals.py +11 -7
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
opik/evaluation/models/litellm/util.py +4 -20
opik/evaluation/models/models_factory.py +19 -5
opik/evaluation/rest_operations.py +3 -3
opik/evaluation/threads/helpers.py +3 -2
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/integrations/adk/legacy_opik_tracer.py +9 -11
opik/integrations/adk/opik_tracer.py +2 -2
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
opik/integrations/dspy/callback.py +100 -14
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_tracer.py +2 -2
opik/integrations/langchain/__init__.py +15 -2
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_tracer.py +258 -160
opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
opik/integrations/llama_index/callback.py +43 -6
opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
opik/integrations/openai/opik_tracker.py +99 -4
opik/integrations/openai/videos/__init__.py +9 -0
opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
opik/integrations/openai/videos/videos_create_decorator.py +159 -0
opik/integrations/openai/videos/videos_download_decorator.py +110 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batchers.py +32 -40
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/emulator_message_processor.py +36 -1
opik/message_processing/emulation/models.py +21 -0
opik/message_processing/messages.py +9 -0
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
opik/message_processing/queue_consumer.py +4 -2
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +36 -8
opik/plugins/pytest/experiment_runner.py +1 -1
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +38 -0
opik/rest_api/datasets/client.py +249 -148
opik/rest_api/datasets/raw_client.py +356 -217
opik/rest_api/experiments/client.py +26 -0
opik/rest_api/experiments/raw_client.py +26 -0
opik/rest_api/llm_provider_key/client.py +4 -4
opik/rest_api/llm_provider_key/raw_client.py +4 -4
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
opik/rest_api/manual_evaluation/client.py +101 -0
opik/rest_api/manual_evaluation/raw_client.py +172 -0
opik/rest_api/optimizations/client.py +0 -166
opik/rest_api/optimizations/raw_client.py +0 -248
opik/rest_api/projects/client.py +9 -0
opik/rest_api/projects/raw_client.py +13 -0
opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
opik/rest_api/prompts/client.py +130 -2
opik/rest_api/prompts/raw_client.py +175 -0
opik/rest_api/traces/client.py +101 -0
opik/rest_api/traces/raw_client.py +120 -0
opik/rest_api/types/__init__.py +46 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +38 -2
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
opik/rest_api/types/dataset_item.py +1 -1
opik/rest_api/types/dataset_item_batch.py +4 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +1 -1
opik/rest_api/types/dataset_item_filter.py +4 -0
opik/rest_api/types/dataset_item_page_compare.py +0 -1
opik/rest_api/types/dataset_item_page_public.py +0 -1
opik/rest_api/types/dataset_item_public.py +1 -1
opik/rest_api/types/dataset_version_public.py +5 -0
opik/rest_api/types/dataset_version_summary.py +5 -0
opik/rest_api/types/dataset_version_summary_public.py +5 -0
opik/rest_api/types/experiment.py +9 -0
opik/rest_api/types/experiment_public.py +9 -0
opik/rest_api/types/llm_as_judge_message_content.py +2 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt_version.py +1 -0
opik/rest_api/types/prompt_version_detail.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +1 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +5 -1
opik/rest_api/types/provider_api_key_provider.py +2 -1
opik/rest_api/types/provider_api_key_public.py +5 -1
opik/rest_api/types/provider_api_key_public_provider.py +2 -1
opik/rest_api/types/service_toggles_config.py +11 -1
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
{opik-1.9.41.dist-info → opik-1.9.86.dist-info}/METADATA +5 -5
{opik-1.9.41.dist-info → opik-1.9.86.dist-info}/RECORD +190 -141
opik/cli/export.py +0 -791
opik/cli/import_command.py +0 -575
{opik-1.9.41.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
{opik-1.9.41.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
{opik-1.9.41.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
{opik-1.9.41.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0

opik/api_objects/opik_client.py CHANGED Viewed

@@ -2,7 +2,7 @@ import atexit
 import datetime
 import functools
 import logging
-from typing import Any, Dict, List, Optional, TypeVar, Union, Literal
+from typing import Any, Dict, List, Optional, TypeVar, Union, Literal, cast
 import httpx
@@ -42,9 +42,9 @@ from ..message_processing import (
     messages,
     streamer_constructors,
     message_queue,
-    message_processors_chain,
 )
 from ..message_processing.batching import sequence_splitter
+from ..message_processing.processors import message_processors_chain
 from ..rest_api import client as rest_api_client
 from ..rest_api.core.api_error import ApiError
 from ..rest_api.types import (
@@ -55,7 +55,13 @@ from ..rest_api.types import (
     span_filter_public,
     trace_filter_public,
 )
-from ..types import ErrorInfoDict, FeedbackScoreDict, LLMProvider, SpanType
+from ..types import (
+    BatchFeedbackScoreDict,
+    ErrorInfoDict,
+    FeedbackScoreDict,
+    LLMProvider,
+    SpanType,
+)
 LOGGER = logging.getLogger(__name__)
@@ -107,13 +113,7 @@ class Opik:
         self._use_batching = _use_batching
         self._initialize_streamer(
-            url_override=config_.url_override,
-            workers=config_.background_workers,
-            file_upload_worker_count=config_.file_upload_background_workers,
-            api_key=config_.api_key,
-            check_tls_certificate=config_.check_tls_certificate,
             use_batching=_use_batching,
-            enable_json_request_compression=config_.enable_json_request_compression,
         )
         atexit.register(self.end, timeout=self._flush_timeout)
@@ -152,24 +152,17 @@ class Opik:
     def _initialize_streamer(
         self,
-        url_override: str,
-        workers: int,
-        file_upload_worker_count: int,
-        api_key: Optional[str],
-        check_tls_certificate: bool,
         use_batching: bool,
-        enable_json_request_compression: bool,
     ) -> None:
-        httpx_client_ = httpx_client.get(
+        self._httpx_client = httpx_client.get(
             workspace=self._workspace,
-            api_key=api_key,
-            check_tls_certificate=check_tls_certificate,
-            compress_json_requests=enable_json_request_compression,
+            api_key=self._config.api_key,
+            check_tls_certificate=self._config.check_tls_certificate,
+            compress_json_requests=self._config.enable_json_request_compression,
         )
-        self._httpx_client = httpx_client_
         self._rest_client = rest_api_client.OpikApi(
-            base_url=url_override,
-            httpx_client=httpx_client_,
+            base_url=self._config.url_override,
+            httpx_client=self._httpx_client,
         )
         self._rest_client._client_wrapper._timeout = (
             httpx.USE_CLIENT_DEFAULT
@@ -181,19 +174,22 @@ class Opik:
             batch_factor=self._config.maximal_queue_size_batch_factor,
         )
-        self._message_processor = (
+        self.__internal_api__message_processor__ = (
             message_processors_chain.create_message_processors_chain(
                 rest_client=self._rest_client
             )
         )
         self._streamer = streamer_constructors.construct_online_streamer(
-            n_consumers=workers,
+            n_consumers=self._config.background_workers,
             rest_client=self._rest_client,
-            httpx_client=httpx_client_,
+            httpx_client=self._httpx_client,
             use_batching=use_batching,
-            file_upload_worker_count=file_upload_worker_count,
+            use_attachment_extraction=self._config.is_attachment_extraction_active,
+            min_base64_embedded_attachment_size=self._config.min_base64_embedded_attachment_size,
+            file_upload_worker_count=self._config.file_upload_background_workers,
             max_queue_size=max_queue_size,
-            message_processor=self._message_processor,
+            message_processor=self.__internal_api__message_processor__,
+            url_override=self._config.url_override,
         )
     def _display_trace_url(self, trace_id: str, project_name: str) -> None:
@@ -295,7 +291,9 @@ class Opik:
             for feedback_score in feedback_scores:
                 feedback_score["id"] = id
-            self.log_traces_feedback_scores(feedback_scores, project_name)
+            self.log_traces_feedback_scores(
+                cast(List[BatchFeedbackScoreDict], feedback_scores), project_name
+            )
         if attachments is not None:
             for attachment_data in attachments:
@@ -470,7 +468,9 @@ class Opik:
             for feedback_score in feedback_scores:
                 feedback_score["id"] = id
-            self.log_spans_feedback_scores(feedback_scores, project_name)
+            self.log_spans_feedback_scores(
+                cast(List[BatchFeedbackScoreDict], feedback_scores), project_name
+            )
         return span.span_client.create_span(
             trace_id=trace_id,
@@ -639,23 +639,34 @@ class Opik:
         )
     def log_spans_feedback_scores(
-        self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
+        self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
     ) -> None:
         """
         Log feedback scores for spans.
         Args:
-            scores (List[FeedbackScoreDict]): A list of feedback score dictionaries.
+            scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
                 Specifying a span id via `id` key for each score is mandatory.
             project_name: The name of the project in which the spans are logged. If not set, the project name
                 which was configured when the Opik instance was created will be used.
+                Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
         Returns:
             None
+        Example:
+            >>> from opik import Opik
+            >>> client = Opik()
+            >>> # Batch logging across multiple projects
+            >>> scores = [
+            >>>     {"id": span1_id, "name": "accuracy", "value": 0.95, "project_name": "project-A"},
+            >>>     {"id": span2_id, "name": "accuracy", "value": 0.88, "project_name": "project-B"},
+            >>> ]
+            >>> client.log_spans_feedback_scores(scores=scores)
         """
         score_messages = helpers.parse_feedback_score_messages(
             scores=scores,
-            project_name=project_name or self._project_name,
+            project_name=project_name or self.project_name,
             parsed_item_class=messages.FeedbackScoreMessage,
             logger=LOGGER,
         )
@@ -677,23 +688,34 @@ class Opik:
             self._streamer.put(add_span_feedback_scores_batch_message)
     def log_traces_feedback_scores(
-        self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
+        self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
     ) -> None:
         """
         Log feedback scores for traces.
         Args:
-            scores (List[FeedbackScoreDict]): A list of feedback score dictionaries.
+            scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
                 Specifying a trace id via `id` key for each score is mandatory.
             project_name: The name of the project in which the traces are logged. If not set, the project name
                 which was configured when the Opik instance was created will be used.
+                Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
         Returns:
             None
+        Example:
+            >>> from opik import Opik
+            >>> client = Opik()
+            >>> # Batch logging across multiple projects
+            >>> scores = [
+            >>>     {"id": trace1_id, "name": "accuracy", "value": 0.95, "project_name": "project-A"},
+            >>>     {"id": trace2_id, "name": "accuracy", "value": 0.88, "project_name": "project-B"},
+            >>> ]
+            >>> client.log_traces_feedback_scores(scores=scores)
         """
         score_messages = helpers.parse_feedback_score_messages(
             scores=scores,
-            project_name=project_name or self._project_name,
+            project_name=project_name or self.project_name,
             parsed_item_class=messages.FeedbackScoreMessage,
             logger=LOGGER,
         )
@@ -716,16 +738,17 @@ class Opik:
             self._streamer.put(add_trace_feedback_scores_batch_message)
     def log_threads_feedback_scores(
-        self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
+        self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
     ) -> None:
         """
         Log feedback scores for threads.
         Args:
-            scores (List[FeedbackScoreDict]): A list of feedback score dictionaries.
+            scores (List[BatchFeedbackScoreDict]): A list of feedback score dictionaries.
                 Specifying a thread id via `id` key for each score is mandatory.
             project_name: The name of the project in which the threads are logged. If not set, the project name
                 which was configured when the Opik instance was created will be used.
+                Deprecated: use `project_name` in the feedback score dictionary that's listed in the `scores` parameter.
         Returns:
             None
@@ -733,13 +756,10 @@ class Opik:
         Example:
             >>> from opik import Opik
             >>> client = Opik()
+            >>> # Batch logging across multiple projects
             >>> scores = [
-            >>>     {
-            >>>         "id": "thread_123",
-            >>>         "name": "user_satisfaction",
-            >>>         "value": 0.85,
-            >>>         "reason": "User seemed satisfied with the conversation"
-            >>>     }
+            >>>     {"id": "thread_123", "name": "user_satisfaction", "value": 0.85, "project_name": "project-A"},
+            >>>     {"id": "thread_456", "name": "user_satisfaction", "value": 0.92, "project_name": "project-B"},
             >>> ]
             >>> client.log_threads_feedback_scores(scores=scores)
         """
@@ -801,6 +821,7 @@ class Opik:
             name=name,
             description=dataset_fern.description,
             rest_client=self._rest_client,
+            dataset_items_count=dataset_fern.dataset_items_count,
         )
         dataset_.__internal_api__sync_hashes__()
@@ -886,6 +907,7 @@ class Opik:
             name=name,
             description=description,
             rest_client=self._rest_client,
+            dataset_items_count=0,
         )
         self._display_created_dataset_url(dataset_name=name, dataset_id=result.id)
@@ -921,6 +943,7 @@ class Opik:
         prompts: Optional[List[prompt_module.base_prompt.BasePrompt]] = None,
         type: Literal["regular", "trial", "mini-batch"] = "regular",
         optimization_id: Optional[str] = None,
+        tags: Optional[List[str]] = None,
     ) -> experiment.Experiment:
         """
         Creates a new experiment using the given dataset name and optional parameters.
@@ -934,6 +957,7 @@ class Opik:
             type: The type of the experiment. Can be "regular", "trial", or "mini-batch".
                 Defaults to "regular". "trial" and "mini-batch" are only relevant for prompt optimization experiments.
             optimization_id: Optional ID of the optimization associated with the experiment.
+            tags: Optional list of tags to associate with the experiment.
         Returns:
             experiment.Experiment: The newly created experiment object.
@@ -958,6 +982,7 @@ class Opik:
             prompt_versions=prompt_versions,
             type=type,
             optimization_id=optimization_id,
+            tags=tags,
         )
         experiment_ = experiment.Experiment(
@@ -968,6 +993,7 @@ class Opik:
             streamer=self._streamer,
             experiments_client=self.get_experiments_client(),
             prompts=checked_prompts,
+            tags=tags,
         )
         return experiment_
@@ -1032,6 +1058,7 @@ class Opik:
             rest_client=self._rest_client,
             streamer=self._streamer,
             experiments_client=self.get_experiments_client(),
+            tags=experiment_public.tags,
         )
     def get_experiments_by_name(self, name: str) -> List[experiment.Experiment]:
@@ -1058,6 +1085,7 @@ class Opik:
                 rest_client=self._rest_client,
                 streamer=self._streamer,
                 experiments_client=self.get_experiments_client(),
+                tags=public_experiment.tags,
             )
             result.append(experiment_)
@@ -1091,6 +1119,7 @@ class Opik:
             rest_client=self._rest_client,
             streamer=self._streamer,
             experiments_client=self.get_experiments_client(),
+            tags=experiment_public.tags,
         )
     def end(self, timeout: Optional[int] = None) -> None:
@@ -1155,7 +1184,7 @@ class Opik:
                 - `start_time`, `end_time`: =, >, <, >=, <=
                 - `input`, `output`: =, contains, not_contains
                 - `metadata`: =, contains, >, <
-                - `feedback_scores`: =, >, <, >=, <=
+                - `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
                 - `tags`: contains (only)
                 - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
@@ -1165,6 +1194,8 @@ class Opik:
                 - `input contains "question"` - Filter by input content
                 - `usage.total_tokens > 1000` - Filter by token usage
                 - `feedback_scores.accuracy > 0.8` - Filter by feedback score
+                - `feedback_scores.my_metric is_empty` - Filter traces with empty feedback score
+                - `feedback_scores.my_metric is_not_empty` - Filter traces with non-empty feedback score
                 - `tags contains "production"` - Filter by tag
                 - `metadata.model = "gpt-4"` - Filter by metadata field
                 - `thread_id = "thread_123"` - Filter by thread ID
@@ -1247,7 +1278,7 @@ class Opik:
                 - `start_time`, `end_time`: =, >, <, >=, <=
                 - `input`, `output`: =, contains, not_contains
                 - `metadata`: =, contains, >, <
-                - `feedback_scores`: =, >, <, >=, <=
+                - `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
                 - `tags`: contains (only)
                 - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
@@ -1257,6 +1288,8 @@ class Opik:
                 - `input contains "question"` - Filter by input content
                 - `usage.total_tokens > 1000` - Filter by token usage
                 - `feedback_scores.accuracy > 0.8` - Filter by feedback score
+                - `feedback_scores.my_metric is_empty` - Filter spans with empty feedback score
+                - `feedback_scores.my_metric is_not_empty` - Filter spans with non-empty feedback score
                 - `tags contains "production"` - Filter by tag
                 - `metadata.model = "gpt-4"` - Filter by metadata field
                 - `thread_id = "thread_123"` - Filter by thread ID

opik/api_objects/opik_query_language.py CHANGED Viewed

@@ -54,7 +54,7 @@ SUPPORTED_OPERATORS = {
     ],
     "output": ["=", "contains", "not_contains"],
     "metadata": ["=", "contains", ">", "<"],
-    "feedback_scores": ["=", ">", "<", ">=", "<="],
+    "feedback_scores": ["=", ">", "<", ">=", "<=", "is_empty", "is_not_empty"],
     "tags": ["contains"],
     "usage.total_tokens": ["=", "!=", ">", "<", ">=", "<="],
     "usage.prompt_tokens": ["=", "!=", ">", "<", ">=", "<="],
@@ -132,6 +132,8 @@ SUPPORTED_OPERATORS = {
     ],
 }
+OPERATORS_WITHOUT_VALUES = {"is_empty", "is_not_empty"}
 class OpikQueryLanguage:
     """
@@ -384,8 +386,12 @@ class OpikQueryLanguage:
             # Parse operators
             parsed_operator = self._parse_operator(parsed_field["field"])
-            # Parse values
-            parsed_value = self._parse_value()
+            operator_name = parsed_operator.get("operator", "")
+            if operator_name in OPERATORS_WITHOUT_VALUES:
+                # For operators without values, use empty string as value
+                parsed_value = {"value": ""}
+            else:
+                parsed_value = self._parse_value()
             expressions.append({**parsed_field, **parsed_operator, **parsed_value})

opik/api_objects/prompt/chat/chat_prompt.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import copy
 import json
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple, Type
 from typing_extensions import override
 from opik.rest_api import types as rest_api_types
+from opik.validation import chat_prompt_messages, validator
 from . import chat_prompt_template
 from .. import client as prompt_client
 from .. import types as prompt_types
@@ -16,6 +18,10 @@ class ChatPrompt(base_prompt.BasePrompt):
     Similar to Prompt but uses a list of chat messages instead of a string template.
     """
+    _parameter_validators: List[Tuple[str, Type[validator.RaisableValidator]]] = [
+        ("messages", chat_prompt_messages.ChatPromptMessagesValidator),
+    ]
     def __init__(
         self,
         name: str,
@@ -37,8 +43,12 @@ class ChatPrompt(base_prompt.BasePrompt):
         Raises:
             PromptTemplateStructureMismatch: If a text prompt with the same name already exists (template structure is immutable).
+            ValidationError: If messages structure is invalid.
         """
+        # Validate messages structure
+        self._validate_inputs(messages=messages)
         self._chat_template = chat_prompt_template.ChatPromptTemplate(
             messages=messages,
             template_type=type,
@@ -54,6 +64,13 @@ class ChatPrompt(base_prompt.BasePrompt):
         self._sync_with_backend()
+    def _validate_inputs(self, **kwargs: Any) -> None:
+        for parameter, validator_class in self._parameter_validators:
+            if parameter in kwargs:
+                validator_instance = validator_class(kwargs[parameter])
+                validator_instance.validate()
+                validator_instance.raise_if_validation_failed()
     def _sync_with_backend(self) -> None:
         from opik.api_objects import opik_client

opik/api_objects/prompt/client.py CHANGED Viewed

@@ -143,7 +143,14 @@ class PromptClient:
                 commit=commit,
             )
-            # Client-side validation for template_structure if requested
+            should_skip_validation = (
+                prompt_version.template_structure is None
+                and raise_if_not_template_structure == "text"
+            )
+            if should_skip_validation:
+                return prompt_version
+            # Client-side validation for template_structure if requested and not skipped
             if (
                 raise_if_not_template_structure is not None
                 and prompt_version.template_structure != raise_if_not_template_structure

opik/api_objects/span/span_data.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import dataclasses
 import datetime
-import logging
 from typing import Any, Dict, List, Optional, Union
 import opik.api_objects.attachment as attachment
@@ -13,20 +12,12 @@ from opik.types import (
     LLMProvider,
     SpanType,
 )
-from .. import helpers, data_helpers
+from .. import helpers
+from ..observation_data import ObservationData
-LOGGER = logging.getLogger(__name__)
-# Engineer note:
-#
-# After moving to minimal python version 3.10, a lot of common content
-# from SpanData and TraceData can be moved to ObservationData parent dataclass.
-# Before that it's impossible because of the dataclasses limitation to have optional arguments
-# strictly after positional ones (including the attributes from the parent class).
-# In python 3.10 @dataclass(kw_only=True) should help.
 @dataclasses.dataclass
-class SpanData:
+class SpanData(ObservationData):
     """
     The SpanData object is returned when calling :func:`opik.opik_context.get_current_span_data` from a tracked function.
     """
@@ -34,24 +25,11 @@ class SpanData:
     trace_id: str
     id: str = dataclasses.field(default_factory=helpers.generate_id)
     parent_span_id: Optional[str] = None
-    name: Optional[str] = None
     type: SpanType = "general"
-    start_time: Optional[datetime.datetime] = dataclasses.field(
-        default_factory=datetime_helpers.local_timestamp
-    )
-    end_time: Optional[datetime.datetime] = None
-    metadata: Optional[Dict[str, Any]] = None
-    input: Optional[Dict[str, Any]] = None
-    output: Optional[Dict[str, Any]] = None
-    tags: Optional[List[str]] = None
     usage: Optional[Union[Dict[str, Any], llm_usage.OpikUsage]] = None
-    feedback_scores: Optional[List[FeedbackScoreDict]] = None
-    project_name: Optional[str] = None
     model: Optional[str] = None
     provider: Optional[Union[str, LLMProvider]] = None
-    error_info: Optional[ErrorInfoDict] = None
     total_cost: Optional[float] = None
-    attachments: Optional[List[attachment.Attachment]] = None
     def create_child_span_data(
         self,
@@ -95,69 +73,6 @@ class SpanData:
             attachments=attachments,
         )
-    def update(self, **new_data: Any) -> "SpanData":
-        """
-        Updates the attributes of the object with the provided key-value pairs. This method checks if
-        an attribute exists before updating it and merges the data appropriately for specific
-        keywords like metadata, output, input, attachments, and tags. If a key doesn't correspond
-        to an attribute of the object or the provided value is None, the update is skipped.
-        Args:
-            **new_data: Key-value pairs of attributes to update. Keys should match existing
-                attributes on the object, and values that are None will not update.
-        Returns:
-            SpanData: The updated object instance.
-        """
-        for key, value in new_data.items():
-            if value is None:
-                continue
-            if key not in self.__dict__ and key != "prompts":
-                LOGGER.debug(
-                    "An attempt to update span with parameter name it doesn't have: %s",
-                    key,
-                )
-                continue
-            if key == "metadata":
-                self.metadata = data_helpers.merge_metadata(
-                    self.metadata, new_metadata=value
-                )
-                continue
-            elif key == "output":
-                self.output = data_helpers.merge_outputs(self.output, new_outputs=value)
-                continue
-            elif key == "input":
-                self.input = data_helpers.merge_inputs(self.input, new_inputs=value)
-                continue
-            elif key == "attachments":
-                self._update_attachments(value)
-                continue
-            elif key == "tags":
-                self.tags = data_helpers.merge_tags(self.tags, new_tags=value)
-                continue
-            elif key == "prompts":
-                self.metadata = data_helpers.merge_metadata(
-                    self.metadata, new_metadata=new_data.get("metadata"), prompts=value
-                )
-                continue
-            self.__dict__[key] = value
-        return self
-    def init_end_time(self) -> "SpanData":
-        self.end_time = datetime_helpers.local_timestamp()
-        return self
-    def _update_attachments(self, attachments: List[attachment.Attachment]) -> None:
-        if self.attachments is None:
-            self.attachments = attachments
-        else:
-            self.attachments.extend(attachments)
     @property
     def as_start_parameters(self) -> Dict[str, Any]:
         """Returns parameters of this span to be sent to the server when starting a new span."""

opik/api_objects/threads/threads_client.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import List, Optional
 import opik
 from opik.rest_api import TraceThread
-from opik.types import FeedbackScoreDict
+from opik.types import BatchFeedbackScoreDict
 from .. import helpers, rest_stream_parser, constants
 from ... import config
@@ -74,7 +74,7 @@ class ThreadsClient:
                 - `start_time`, `end_time`: =, >, <, >=, <=
                 - `input`, `output`: =, contains, not_contains
                 - `metadata`: =, contains, >, <
-                - `feedback_scores`: =, >, <, >=, <=
+                - `feedback_scores`: =, >, <, >=, <=, is_empty, is_not_empty
                 - `tags`: contains (only)
                 - `usage.total_tokens`, `usage.prompt_tokens`, `usage.completion_tokens`, `duration`, `number_of_messages`, `total_estimated_cost`: =, !=, >, <, >=, <=
@@ -84,6 +84,8 @@ class ThreadsClient:
                 - `duration > 300` - Filter by thread duration (seconds)
                 - `number_of_messages >= 5` - Filter by message count
                 - `feedback_scores.user_frustration > 0.5` - Filter by feedback score
+                - `feedback_scores.my_metric is_empty` - Filter threads with empty feedback score
+                - `feedback_scores.my_metric is_not_empty` - Filter threads with non-empty feedback score
                 - `tags contains "important"` - Filter by tag
                 If not provided, all threads in the project will be returned up to the limit.
@@ -127,7 +129,7 @@ class ThreadsClient:
         return threads
     def log_threads_feedback_scores(
-        self, scores: List[FeedbackScoreDict], project_name: Optional[str] = None
+        self, scores: List[BatchFeedbackScoreDict], project_name: Optional[str] = None
     ) -> None:
         """
         Logs feedback scores for threads in a specific project. This method processes the given
@@ -138,7 +140,8 @@ class ThreadsClient:
             scores: A list of dictionaries containing feedback scores
                 for threads to be logged. Specifying a thread id via `id` key for each score is mandatory.
             project_name: The name of the project to associate with the logged
-                scores. If not provided, the scores won't be associated with any specific project.
+                scores. If not provided, the project name configured in the Opik client will be used.
+                This parameter is used as a fallback if `project_name` is not specified in the score dictionary.
         """
         project_name = project_name or self._opik_client.project_name

opik 1.9.41__py3-none-any.whl → 1.9.86__py3-none-any.whl

opik 1.9.41py3-none-any.whl → 1.9.86py3-none-any.whl