PyPI - opik - Versions diffs - 1.9.5__py3-none-any.whl → 1.9.39__py3-none-any.whl - Mend

opik 1.9.5py3-none-any.whl → 1.9.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

opik/__init__.py +10 -3
opik/anonymizer/__init__.py +5 -0
opik/anonymizer/anonymizer.py +12 -0
opik/anonymizer/factory.py +80 -0
opik/anonymizer/recursive_anonymizer.py +64 -0
opik/anonymizer/rules.py +56 -0
opik/anonymizer/rules_anonymizer.py +35 -0
opik/api_objects/dataset/rest_operations.py +5 -0
opik/api_objects/experiment/experiment.py +46 -49
opik/api_objects/experiment/helpers.py +34 -10
opik/api_objects/local_recording.py +8 -3
opik/api_objects/opik_client.py +230 -48
opik/api_objects/opik_query_language.py +9 -0
opik/api_objects/prompt/__init__.py +11 -3
opik/api_objects/prompt/base_prompt.py +69 -0
opik/api_objects/prompt/base_prompt_template.py +29 -0
opik/api_objects/prompt/chat/__init__.py +1 -0
opik/api_objects/prompt/chat/chat_prompt.py +193 -0
opik/api_objects/prompt/chat/chat_prompt_template.py +350 -0
opik/api_objects/prompt/{chat_content_renderer_registry.py → chat/content_renderer_registry.py} +37 -35
opik/api_objects/prompt/client.py +101 -30
opik/api_objects/prompt/text/__init__.py +1 -0
opik/api_objects/prompt/text/prompt.py +174 -0
opik/api_objects/prompt/{prompt_template.py → text/prompt_template.py} +10 -6
opik/api_objects/prompt/types.py +1 -1
opik/cli/export.py +6 -2
opik/cli/usage_report/charts.py +39 -10
opik/cli/usage_report/cli.py +164 -45
opik/cli/usage_report/pdf.py +14 -1
opik/config.py +0 -5
opik/decorator/base_track_decorator.py +37 -40
opik/decorator/context_manager/span_context_manager.py +9 -0
opik/decorator/context_manager/trace_context_manager.py +5 -0
opik/dict_utils.py +3 -3
opik/evaluation/__init__.py +13 -2
opik/evaluation/engine/engine.py +195 -223
opik/evaluation/engine/helpers.py +8 -7
opik/evaluation/engine/metrics_evaluator.py +237 -0
opik/evaluation/evaluation_result.py +35 -1
opik/evaluation/evaluator.py +318 -30
opik/evaluation/models/litellm/util.py +78 -6
opik/evaluation/models/model_capabilities.py +33 -0
opik/evaluation/report.py +14 -2
opik/evaluation/rest_operations.py +36 -33
opik/evaluation/test_case.py +2 -2
opik/evaluation/types.py +9 -1
opik/exceptions.py +17 -0
opik/hooks/__init__.py +17 -1
opik/hooks/anonymizer_hook.py +36 -0
opik/id_helpers.py +18 -0
opik/integrations/adk/helpers.py +16 -7
opik/integrations/adk/legacy_opik_tracer.py +7 -4
opik/integrations/adk/opik_tracer.py +3 -1
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +7 -3
opik/integrations/adk/recursive_callback_injector.py +1 -6
opik/integrations/dspy/callback.py +1 -4
opik/integrations/haystack/opik_connector.py +2 -2
opik/integrations/haystack/opik_tracer.py +2 -4
opik/integrations/langchain/opik_tracer.py +273 -82
opik/integrations/llama_index/callback.py +110 -108
opik/integrations/openai/agents/opik_tracing_processor.py +1 -2
opik/integrations/openai/opik_tracker.py +1 -1
opik/message_processing/batching/batchers.py +11 -7
opik/message_processing/encoder_helpers.py +79 -0
opik/message_processing/messages.py +25 -1
opik/message_processing/online_message_processor.py +23 -8
opik/opik_context.py +7 -7
opik/rest_api/__init__.py +188 -12
opik/rest_api/client.py +3 -0
opik/rest_api/dashboards/__init__.py +4 -0
opik/rest_api/dashboards/client.py +462 -0
opik/rest_api/dashboards/raw_client.py +648 -0
opik/rest_api/datasets/client.py +893 -89
opik/rest_api/datasets/raw_client.py +1328 -87
opik/rest_api/experiments/client.py +30 -2
opik/rest_api/experiments/raw_client.py +26 -0
opik/rest_api/feedback_definitions/types/find_feedback_definitions_request_type.py +1 -1
opik/rest_api/optimizations/client.py +302 -0
opik/rest_api/optimizations/raw_client.py +463 -0
opik/rest_api/optimizations/types/optimization_update_status.py +3 -1
opik/rest_api/prompts/__init__.py +2 -2
opik/rest_api/prompts/client.py +34 -4
opik/rest_api/prompts/raw_client.py +32 -2
opik/rest_api/prompts/types/__init__.py +3 -1
opik/rest_api/prompts/types/create_prompt_version_detail_template_structure.py +5 -0
opik/rest_api/prompts/types/prompt_write_template_structure.py +5 -0
opik/rest_api/spans/__init__.py +0 -2
opik/rest_api/spans/client.py +148 -64
opik/rest_api/spans/raw_client.py +210 -83
opik/rest_api/spans/types/__init__.py +0 -2
opik/rest_api/traces/client.py +241 -73
opik/rest_api/traces/raw_client.py +344 -90
opik/rest_api/types/__init__.py +200 -15
opik/rest_api/types/aggregation_data.py +1 -0
opik/rest_api/types/alert_trigger_config_public_type.py +6 -1
opik/rest_api/types/alert_trigger_config_type.py +6 -1
opik/rest_api/types/alert_trigger_config_write_type.py +6 -1
opik/rest_api/types/automation_rule_evaluator.py +23 -1
opik/rest_api/types/automation_rule_evaluator_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_llm_as_judge_write.py +2 -0
opik/rest_api/types/{automation_rule_evaluator_object_public.py → automation_rule_evaluator_object_object_public.py} +32 -10
opik/rest_api/types/automation_rule_evaluator_page_public.py +2 -2
opik/rest_api/types/automation_rule_evaluator_public.py +23 -1
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_llm_as_judge_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_llm_as_judge_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_trace_thread_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update.py +23 -1
opik/rest_api/types/automation_rule_evaluator_update_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_span_llm_as_judge.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_llm_as_judge.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_trace_thread_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_update_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_public.py +2 -0
opik/rest_api/types/automation_rule_evaluator_user_defined_metric_python_write.py +2 -0
opik/rest_api/types/automation_rule_evaluator_write.py +23 -1
opik/rest_api/types/boolean_feedback_definition.py +25 -0
opik/rest_api/types/boolean_feedback_definition_create.py +20 -0
opik/rest_api/types/boolean_feedback_definition_public.py +25 -0
opik/rest_api/types/boolean_feedback_definition_update.py +20 -0
opik/rest_api/types/boolean_feedback_detail.py +29 -0
opik/rest_api/types/boolean_feedback_detail_create.py +29 -0
opik/rest_api/types/boolean_feedback_detail_public.py +29 -0
opik/rest_api/types/boolean_feedback_detail_update.py +29 -0
opik/rest_api/types/dashboard_page_public.py +24 -0
opik/rest_api/types/dashboard_public.py +30 -0
opik/rest_api/types/dataset.py +2 -0
opik/rest_api/types/dataset_item.py +2 -0
opik/rest_api/types/dataset_item_compare.py +2 -0
opik/rest_api/types/dataset_item_filter.py +23 -0
opik/rest_api/types/dataset_item_filter_operator.py +21 -0
opik/rest_api/types/dataset_item_page_compare.py +1 -0
opik/rest_api/types/dataset_item_page_public.py +1 -0
opik/rest_api/types/dataset_item_public.py +2 -0
opik/rest_api/types/dataset_item_update.py +39 -0
opik/rest_api/types/dataset_item_write.py +1 -0
opik/rest_api/types/dataset_public.py +2 -0
opik/rest_api/types/dataset_public_status.py +5 -0
opik/rest_api/types/dataset_status.py +5 -0
opik/rest_api/types/dataset_version_diff.py +22 -0
opik/rest_api/types/dataset_version_diff_stats.py +24 -0
opik/rest_api/types/dataset_version_page_public.py +23 -0
opik/rest_api/types/dataset_version_public.py +49 -0
opik/rest_api/types/experiment.py +2 -0
opik/rest_api/types/experiment_public.py +2 -0
opik/rest_api/types/experiment_score.py +20 -0
opik/rest_api/types/experiment_score_public.py +20 -0
opik/rest_api/types/experiment_score_write.py +20 -0
opik/rest_api/types/feedback.py +20 -1
opik/rest_api/types/feedback_create.py +16 -1
opik/rest_api/types/feedback_object_public.py +22 -1
opik/rest_api/types/feedback_public.py +20 -1
opik/rest_api/types/feedback_score_public.py +4 -0
opik/rest_api/types/feedback_update.py +16 -1
opik/rest_api/types/image_url.py +20 -0
opik/rest_api/types/image_url_public.py +20 -0
opik/rest_api/types/image_url_write.py +20 -0
opik/rest_api/types/llm_as_judge_message.py +5 -1
opik/rest_api/types/llm_as_judge_message_content.py +24 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +24 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +24 -0
opik/rest_api/types/llm_as_judge_message_public.py +5 -1
opik/rest_api/types/llm_as_judge_message_write.py +5 -1
opik/rest_api/types/llm_as_judge_model_parameters.py +2 -0
opik/rest_api/types/llm_as_judge_model_parameters_public.py +2 -0
opik/rest_api/types/llm_as_judge_model_parameters_write.py +2 -0
opik/rest_api/types/optimization.py +2 -0
opik/rest_api/types/optimization_public.py +2 -0
opik/rest_api/types/optimization_public_status.py +3 -1
opik/rest_api/types/optimization_status.py +3 -1
opik/rest_api/types/optimization_studio_config.py +27 -0
opik/rest_api/types/optimization_studio_config_public.py +27 -0
opik/rest_api/types/optimization_studio_config_write.py +27 -0
opik/rest_api/types/optimization_studio_log.py +22 -0
opik/rest_api/types/optimization_write.py +2 -0
opik/rest_api/types/optimization_write_status.py +3 -1
opik/rest_api/types/prompt.py +6 -0
opik/rest_api/types/prompt_detail.py +6 -0
opik/rest_api/types/prompt_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_public.py +6 -0
opik/rest_api/types/prompt_public_template_structure.py +5 -0
opik/rest_api/types/prompt_template_structure.py +5 -0
opik/rest_api/types/prompt_version.py +2 -0
opik/rest_api/types/prompt_version_detail.py +2 -0
opik/rest_api/types/prompt_version_detail_template_structure.py +5 -0
opik/rest_api/types/prompt_version_public.py +2 -0
opik/rest_api/types/prompt_version_public_template_structure.py +5 -0
opik/rest_api/types/prompt_version_template_structure.py +5 -0
opik/rest_api/types/score_name.py +1 -0
opik/rest_api/types/service_toggles_config.py +6 -0
opik/rest_api/types/span_enrichment_options.py +31 -0
opik/rest_api/types/span_filter.py +23 -0
opik/rest_api/types/span_filter_operator.py +21 -0
opik/rest_api/types/span_filter_write.py +23 -0
opik/rest_api/types/span_filter_write_operator.py +21 -0
opik/rest_api/types/span_llm_as_judge_code.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_public.py +27 -0
opik/rest_api/types/span_llm_as_judge_code_write.py +27 -0
opik/rest_api/types/span_update.py +46 -0
opik/rest_api/types/studio_evaluation.py +20 -0
opik/rest_api/types/studio_evaluation_public.py +20 -0
opik/rest_api/types/studio_evaluation_write.py +20 -0
opik/rest_api/types/studio_llm_model.py +21 -0
opik/rest_api/types/studio_llm_model_public.py +21 -0
opik/rest_api/types/studio_llm_model_write.py +21 -0
opik/rest_api/types/studio_message.py +20 -0
opik/rest_api/types/studio_message_public.py +20 -0
opik/rest_api/types/studio_message_write.py +20 -0
opik/rest_api/types/studio_metric.py +21 -0
opik/rest_api/types/studio_metric_public.py +21 -0
opik/rest_api/types/studio_metric_write.py +21 -0
opik/rest_api/types/studio_optimizer.py +21 -0
opik/rest_api/types/studio_optimizer_public.py +21 -0
opik/rest_api/types/studio_optimizer_write.py +21 -0
opik/rest_api/types/studio_prompt.py +20 -0
opik/rest_api/types/studio_prompt_public.py +20 -0
opik/rest_api/types/studio_prompt_write.py +20 -0
opik/rest_api/types/trace.py +6 -0
opik/rest_api/types/trace_public.py +6 -0
opik/rest_api/types/trace_thread_filter_write.py +23 -0
opik/rest_api/types/trace_thread_filter_write_operator.py +21 -0
opik/rest_api/types/trace_thread_update.py +19 -0
opik/rest_api/types/trace_update.py +39 -0
opik/rest_api/types/value_entry.py +2 -0
opik/rest_api/types/value_entry_compare.py +2 -0
opik/rest_api/types/value_entry_experiment_item_bulk_write_view.py +2 -0
opik/rest_api/types/value_entry_public.py +2 -0
opik/rest_api/types/video_url.py +19 -0
opik/rest_api/types/video_url_public.py +19 -0
opik/rest_api/types/video_url_write.py +19 -0
opik/synchronization.py +5 -6
opik/{decorator/tracing_runtime_config.py → tracing_runtime_config.py} +6 -7
{opik-1.9.5.dist-info → opik-1.9.39.dist-info}/METADATA +5 -4
{opik-1.9.5.dist-info → opik-1.9.39.dist-info}/RECORD +246 -151
opik/api_objects/prompt/chat_prompt_template.py +0 -164
opik/api_objects/prompt/prompt.py +0 -131
/opik/rest_api/{spans/types → types}/span_update_type.py +0 -0
{opik-1.9.5.dist-info → opik-1.9.39.dist-info}/WHEEL +0 -0
{opik-1.9.5.dist-info → opik-1.9.39.dist-info}/entry_points.txt +0 -0
{opik-1.9.5.dist-info → opik-1.9.39.dist-info}/licenses/LICENSE +0 -0
{opik-1.9.5.dist-info → opik-1.9.39.dist-info}/top_level.txt +0 -0

opik/evaluation/models/litellm/util.py CHANGED Viewed

@@ -36,16 +36,33 @@ def apply_model_specific_filters(
     already_warned: Set[str],
     warn: Callable[[str, Any], None],
 ) -> None:
-    """Remove parameters known to be unsupported for specific models.
+    """Adjust/drop params for specific model families before calling LiteLLM.
-    Currently handles the GPT-5 family which only honours temperature=1 and does not
-    return log probabilities. Removing those eagerly avoids provider errors while the
-    callback surfaces a one-time warning to the caller.
+    Currently handles:
+    - GPT-5: only honours temperature=1 and does not return log probabilities.
+    - DashScope Qwen: enforces constraints for logprobs / top_logprobs
     """
+    if model_name.startswith("gpt-5"):
+        _apply_gpt5_filters(params, already_warned, warn)
+        return
-    if not model_name.startswith("gpt-5"):
+    if model_name.startswith("dashscope/"):
+        _apply_qwen_dashscope_filters(params, already_warned, warn)
         return
+def _apply_gpt5_filters(
+    params: Dict[str, Any],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Apply GPT-5 specific parameter filters.
+    Only honours temperature=1 and does not return log probabilities.
+    Removing those eagerly avoids provider errors while the callback surfaces a
+    one-time warning to the caller.
+    """
     unsupported: list[tuple[str, Any]] = []
     if "temperature" in params:
@@ -61,7 +78,62 @@ def apply_model_specific_filters(
         if param in params:
             unsupported.append((param, params[param]))
-    for param, value in unsupported:
+    _drop_unsupported_params_with_warning(
+        params,
+        unsupported,
+        already_warned,
+        warn,
+    )
+def _apply_qwen_dashscope_filters(
+    params: Dict[str, Any],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Apply Qwen/DashScope specific parameter filters.
+    top_logprobs is only meaningful if logprobs is true and must be an int
+    in [0, 5]. When logprobs is false, drops top_logprobs; when logprobs is
+    true, clamps top_logprobs into [0, 5].
+    """
+    unsupported: list[tuple[str, Any]] = []
+    logprobs_value = params.get("logprobs")
+    if not logprobs_value:
+        if "top_logprobs" in params:
+            unsupported.append(("top_logprobs", params["top_logprobs"]))
+    else:
+        if "top_logprobs" in params:
+            raw_top_logprobs = params["top_logprobs"]
+            try:
+                top_logprobs = int(raw_top_logprobs)
+            except (TypeError, ValueError):
+                unsupported.append(("top_logprobs", raw_top_logprobs))
+            else:
+                if top_logprobs < 0:
+                    top_logprobs = 0
+                elif top_logprobs > 5:
+                    top_logprobs = 5
+                params["top_logprobs"] = top_logprobs
+    _drop_unsupported_params_with_warning(
+        params,
+        unsupported,
+        already_warned,
+        warn,
+    )
+def _drop_unsupported_params_with_warning(
+    params: Dict[str, Any],
+    unsupported_params: list[tuple[str, Any]],
+    already_warned: Set[str],
+    warn: Callable[[str, Any], None],
+) -> None:
+    """Remove unsupported params and emit warnings once per param name."""
+    for param, value in unsupported_params:
         params.pop(param, None)
         if param in already_warned:
             continue

opik/evaluation/models/model_capabilities.py CHANGED Viewed

@@ -79,6 +79,29 @@ def vision_capability_detector(model_name: str) -> bool:
     return False
+def video_capability_detector(model_name: str) -> bool:
+    """
+    Heuristically determine whether a model accepts video inputs.
+    Providers rarely expose structured metadata for video support, so we fall back
+    to naming conventions (e.g. models whose names contain ``video`` or ``qwen``
+    + ``vl``). When those heuristics fail we delegate to the vision detector since
+    current SDK integrations treat video as an extension of multimodal/vision APIs.
+    """
+    stripped = _strip_provider_prefix(model_name)
+    candidates = {model_name, stripped}
+    for candidate in candidates:
+        normalized = candidate.lower()
+        if "video" in normalized:
+            return True
+        if "qwen" in normalized and "vl" in normalized:
+            return True
+    # TODO(opik): litellm/model metadata still treats video + image inputs the same.
+    # Fall back to the vision heuristic so we can keep this dedicated capability
+    # and tighten detection once providers expose richer metadata.
+    return vision_capability_detector(model_name)
 class ModelCapabilitiesRegistry:
     """
     Central registry for model capability detection.
@@ -117,6 +140,12 @@ class ModelCapabilitiesRegistry:
         """
         return self.supports("vision", model_name)
+    def supports_video(self, model_name: Optional[str]) -> bool:
+        """
+        Convenience wrapper for video-capable detection.
+        """
+        return self.supports("video", model_name)
     def add_vision_model(self, model_name: str) -> None:
         # Extend the module-level registry used by vision_capability_detector
         VISION_MODEL_PREFIXES.add(self._strip_provider_prefix(model_name).lower())
@@ -141,6 +170,9 @@ MODEL_CAPABILITIES_REGISTRY = ModelCapabilitiesRegistry()
 MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
     "vision", vision_capability_detector
 )
+MODEL_CAPABILITIES_REGISTRY.register_capability_detector(
+    "video", video_capability_detector
+)
 # Backwards compatibility shim for previous API which exposed a class with classmethods.
 ModelCapabilities = MODEL_CAPABILITIES_REGISTRY
@@ -151,4 +183,5 @@ __all__ = [
     "MODEL_CAPABILITIES_REGISTRY",
     "ModelCapabilities",
     "vision_capability_detector",
+    "video_capability_detector",
 ]

opik/evaluation/report.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from collections import defaultdict
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 from rich import align, console, panel, table, text
 from . import test_result, evaluation_result
+from .metrics import score_result
 def _format_time(seconds: float) -> str:
@@ -41,7 +42,10 @@ def _compute_average_scores(
 def display_experiment_results(
-    dataset_name: str, total_time: float, test_results: List[test_result.TestResult]
+    dataset_name: str,
+    total_time: float,
+    test_results: List[test_result.TestResult],
+    experiment_scores: Optional[List[score_result.ScoreResult]] = None,
 ) -> None:
     average_scores, failed_scores = _compute_average_scores(test_results)
     nb_items = len(test_results)
@@ -62,6 +66,14 @@ def display_experiment_results(
             score_strings += text.Text(f" - {failed_scores[name]} failed", style="red")
         score_strings += text.Text("\n")
+    # Add experiment scores if available
+    if experiment_scores:
+        for score in experiment_scores:
+            score_strings += text.Text(
+                f"{score.name}: {score.value:.4f}", style="green bold"
+            )
+            score_strings += text.Text("\n")
     aligned_test_results = align.Align.left(score_strings)
     # Combine table, time text, and test results

opik/evaluation/rest_operations.py CHANGED Viewed

@@ -1,11 +1,14 @@
+import logging
 from typing import List, Optional
-from opik.api_objects import experiment, opik_client
+from opik.api_objects import dataset, experiment, opik_client
 from opik.types import FeedbackScoreDict
 from . import test_case
-from .metrics import arguments_helpers, score_result
+from .metrics import score_result
 from .types import ScoringKeyMappingType
+LOGGER = logging.getLogger(__name__)
 def get_experiment_with_unique_name(
     client: opik_client.Opik, experiment_name: str
@@ -34,40 +37,39 @@ def get_trace_project_name(client: opik_client.Opik, trace_id: str) -> str:
 def get_experiment_test_cases(
-    client: opik_client.Opik,
-    experiment_id: str,
-    dataset_id: str,
+    experiment_: experiment.Experiment,
+    dataset_: dataset.Dataset,
     scoring_key_mapping: Optional[ScoringKeyMappingType],
 ) -> List[test_case.TestCase]:
+    experiment_items = experiment_.get_items()
+    # Fetch dataset items to get input data for bulk-uploaded experiment items
+    dataset_items_by_id = {item["id"]: item for item in dataset_.get_items()}
     test_cases = []
-    page = 1
+    for item in experiment_items:
+        dataset_item_data = dataset_items_by_id.get(item.dataset_item_id)
-    while True:
-        experiment_items_page = (
-            client._rest_client.datasets.find_dataset_items_with_experiment_items(
-                id=dataset_id, experiment_ids=f'["{experiment_id}"]', page=page
+        if dataset_item_data is None:
+            LOGGER.error(
+                f"Unexpected error: Dataset item with id {item.dataset_item_id} not found, skipping experiment item {item.id}"
+            )
+            continue
+        if item.evaluation_task_output is None:
+            LOGGER.error(
+                f"Unexpected error: Evaluation task output is None for experiment item {item.id}, skipping experiment item"
+            )
+            continue
+        test_cases.append(
+            test_case.TestCase(
+                trace_id=item.trace_id,
+                dataset_item_id=item.dataset_item_id,
+                task_output=item.evaluation_task_output,
+                dataset_item_content=dataset_item_data,
             )
         )
-        if len(experiment_items_page.content) == 0:
-            break
-        for item in experiment_items_page.content:
-            experiment_item = item.experiment_items[0]
-            test_cases += [
-                test_case.TestCase(
-                    trace_id=experiment_item.trace_id,
-                    dataset_item_id=experiment_item.dataset_item_id,
-                    task_output=experiment_item.output,
-                    scoring_inputs=arguments_helpers.create_scoring_inputs(
-                        dataset_item=experiment_item.input,
-                        task_output=experiment_item.output,
-                        scoring_key_mapping=scoring_key_mapping,
-                    ),
-                )
-            ]
-        page += 1
     return test_cases
@@ -92,6 +94,7 @@ def log_test_result_feedback_scores(
         )
         all_trace_scores.append(trace_score)
-    client.log_traces_feedback_scores(
-        scores=all_trace_scores, project_name=project_name
-    )
+    if len(all_trace_scores) > 0:
+        client.log_traces_feedback_scores(
+            scores=all_trace_scores, project_name=project_name
+        )

opik/evaluation/test_case.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 import dataclasses
@@ -6,6 +6,6 @@ import dataclasses
 class TestCase:
     trace_id: str
     dataset_item_id: str
-    scoring_inputs: Dict[str, Any]
     task_output: Dict[str, Any]
     dataset_item_content: Dict[str, Any] = dataclasses.field(default_factory=dict)
+    mapped_scoring_inputs: Optional[Dict[str, Any]] = None

opik/evaluation/types.py CHANGED Viewed

@@ -1,5 +1,13 @@
-from typing import Any, Callable, Dict, Union
+from typing import Any, Callable, Dict, List, Union
+from . import test_result
+from .metrics import score_result
 LLMTask = Callable[[Dict[str, Any]], Dict[str, Any]]
 ScoringKeyMappingType = Dict[str, Union[str, Callable[[Dict[str, Any]], Any]]]
+ExperimentScoreFunction = Callable[
+    [List[test_result.TestResult]],
+    Union[score_result.ScoreResult, List[score_result.ScoreResult]],
+]

opik/exceptions.py CHANGED Viewed

@@ -81,6 +81,23 @@ class PromptPlaceholdersDontMatchFormatArguments(OpikException):
         )
+class PromptTemplateStructureMismatch(OpikException):
+    """Exception raised when attempting to create a prompt version with a different template structure than the existing prompt."""
+    def __init__(
+        self, prompt_name: str, existing_structure: str, attempted_structure: str
+    ):
+        self.prompt_name = prompt_name
+        self.existing_structure = existing_structure
+        self.attempted_structure = attempted_structure
+    def __str__(self) -> str:
+        return (
+            f"Prompt with name '{self.prompt_name}' already exists and has immutable "
+            f"'{self.existing_structure}' template structure, not '{self.attempted_structure}'. "
+        )
 class ExperimentNotFound(OpikException):
     pass

opik/hooks/__init__.py CHANGED Viewed

@@ -3,5 +3,21 @@ from .httpx_client_hook import (
     add_httpx_client_hook,
     register_httpx_client_hook,
 )
+from .anonymizer_hook import (
+    has_anonymizers,
+    add_anonymizer,
+    apply_anonymizers,
+    get_anonymizers,
+    clear_anonymizers,
+)
-__all__ = ("HttpxClientHook", "add_httpx_client_hook", "register_httpx_client_hook")
+__all__ = (
+    "HttpxClientHook",
+    "add_httpx_client_hook",
+    "register_httpx_client_hook",
+    "add_anonymizer",
+    "apply_anonymizers",
+    "clear_anonymizers",
+    "get_anonymizers",
+    "has_anonymizers",
+)

opik/hooks/anonymizer_hook.py ADDED Viewed

@@ -0,0 +1,36 @@
+from typing import List
+from opik.anonymizer import anonymizer
+# holder for a global list of anonymizers
+_anonymizers: List[anonymizer.Anonymizer] = []
+def add_anonymizer(anonymizer_hook: anonymizer.Anonymizer) -> None:
+    """Register a new anonymizer to be applied to all sensitive data logged by Opik."""
+    _anonymizers.append(anonymizer_hook)
+def clear_anonymizers() -> None:
+    """Clear all registered anonymizers."""
+    _anonymizers.clear()
+def has_anonymizers() -> bool:
+    """Check if any anonymizers have been registered."""
+    return len(_anonymizers) > 0
+def get_anonymizers() -> List[anonymizer.Anonymizer]:
+    """Get a list of all registered anonymizers."""
+    return _anonymizers
+def apply_anonymizers(
+    data: anonymizer.AnonymizerDataType,
+) -> anonymizer.AnonymizerDataType:
+    """Apply all registered anonymizers to the given data."""
+    for anonymizer_ in _anonymizers:
+        data = anonymizer_.anonymize(data)
+    return data

opik/id_helpers.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from datetime import datetime
 from typing import Optional
+import random
+import string
 import uuid
 import uuid6
@@ -12,6 +14,22 @@ def generate_id(timestamp: Optional[datetime] = None) -> str:
     return str(uuid6.uuid7())
+def generate_random_alphanumeric_string(length: int) -> str:
+    """Generate a random alphanumeric string of the specified length.
+    Args:
+        length: The length of the string to generate.
+    Returns:
+        A random string containing only alphanumeric characters (a-z, A-Z, 0-9).
+    """
+    if length < 0:
+        raise ValueError("Length must be non-negative")
+    characters = string.ascii_letters + string.digits
+    return "".join(random.choice(characters) for _ in range(length))
 def uuid4_to_uuid7(user_datetime: datetime, user_uuid: str) -> uuid.UUID:
     """Convert a UUID v4 into a UUID v7 following RFC draft specification."""
     # Get Unix timestamp in milliseconds

opik/integrations/adk/helpers.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 import os
 from typing import (
     Any,
@@ -8,6 +9,8 @@ from google.adk.models import LlmResponse
 import opik.types as opik_types
 import pydantic
+LOGGER = logging.getLogger(__name__)
 def convert_adk_base_model_to_dict(value: pydantic.BaseModel) -> Dict[str, Any]:
     """Most ADK objects are Pydantic Base Models"""
@@ -27,13 +30,19 @@ def get_adk_provider() -> opik_types.LLMProvider:
 def has_empty_text_part_content(llm_response: LlmResponse) -> bool:
-    if llm_response.content is None or len(llm_response.content.parts) == 0:
-        return True
+    try:
+        if llm_response.content is None:
+            return True
-    # to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
-    if len(llm_response.content.parts) == 1:
-        part = llm_response.content.parts[0]
-        if part.text is not None and len(part.text) == 0:
+        if not llm_response.content.parts:
             return True
-    return False
+        # to filter out something like this: {"candidates":[{"content":{"parts":[{"text":""}],"role":"model"}}],...}}
+        if len(llm_response.content.parts) == 1:
+            part = llm_response.content.parts[0]
+            if part.text is not None and len(part.text) == 0:
+                return True
+        return False
+    except Exception as e:
+        LOGGER.warning(f"Exception in has_empty_text_part_content {e}", exc_info=True)
+        return True

opik/integrations/adk/legacy_opik_tracer.py CHANGED Viewed

@@ -8,6 +8,7 @@ from google.adk import models
 from google.adk.tools import base_tool
 from google.adk.tools import tool_context
+import opik
 from opik import context_storage
 from opik.decorator import arguments_helpers, span_creation_handler
 from opik.api_objects import opik_client, span, trace
@@ -77,7 +78,8 @@ class LegacyOpikTracer:
         trace_data = self._context_storage.pop_trace_data()
         assert trace_data is not None
         trace_data.init_end_time()
-        self._opik_client.trace(**trace_data.as_parameters)
+        if opik.is_tracing_active():
+            self._opik_client.trace(**trace_data.as_parameters)
     def _end_current_span(
         self,
@@ -85,20 +87,21 @@ class LegacyOpikTracer:
         span_data = self._context_storage.pop_span_data()
         assert span_data is not None
         span_data.init_end_time()
-        self._opik_client.span(**span_data.as_parameters)
+        if opik.is_tracing_active():
+            self._opik_client.span(**span_data.as_parameters)
     def _start_span(self, span_data: span.SpanData) -> None:
         self._context_storage.add_span_data(span_data)
         self._opik_created_spans.add(span_data.id)
-        if self._opik_client.config.log_start_trace_span:
+        if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
             self._opik_client.span(**span_data.as_start_parameters)
     def _start_trace(self, trace_data: trace.TraceData) -> None:
         self._context_storage.set_trace_data(trace_data)
         self._current_trace_created_by_opik_tracer.set(trace_data.id)
-        if self._opik_client.config.log_start_trace_span:
+        if self._opik_client.config.log_start_trace_span and opik.is_tracing_active():
             self._opik_client.trace(**trace_data.as_start_parameters)
     def _set_current_context_data(self, value: SpanOrTraceData) -> None:

opik/integrations/adk/opik_tracer.py CHANGED Viewed

@@ -7,6 +7,7 @@ from google.adk import models
 from google.adk.tools import base_tool
 from google.adk.tools import tool_context
+import opik
 from opik import context_storage
 from opik.api_objects import opik_client, span, trace
 from opik.types import DistributedTraceHeadersDict
@@ -253,7 +254,8 @@ class OpikTracer:
             current_span.init_end_time()
             # We close this span manually because otherwise ADK will close it too late,
             # and it will also add tool spans inside of it, which we want to avoid.
-            self._opik_client.span(**current_span.as_parameters)
+            if opik.is_tracing_active():
+                self._opik_client.span(**current_span.as_parameters)
             self._last_model_output = output
         except Exception as e:

opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py CHANGED Viewed

@@ -2,6 +2,7 @@ import logging
 from typing import Iterator, Optional, Tuple
 import opentelemetry.trace
+import opik
 import opik.context_storage
 from opik.api_objects import trace, span
 from opik.decorator import (
@@ -100,7 +101,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
             # so we manually finalize it here to avoid incorrect span nesting.
             opik.context_storage.pop_span_data(ensure_id=current_span_data.id)
             current_span_data.init_end_time()
-            self.opik_client.span(**current_span_data.as_parameters)
+            if opik.is_tracing_active():
+                self.opik_client.span(**current_span_data.as_parameters)
             current_span_data = opik.context_storage.top_span_data()
         try:
@@ -145,7 +147,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
         trace_data = opik.context_storage.pop_trace_data(ensure_id=trace_id)
         if trace_data is not None:
             trace_data.init_end_time()
-            self.opik_client.trace(**trace_data.as_parameters)
+            if opik.is_tracing_active():
+                self.opik_client.trace(**trace_data.as_parameters)
     def _ensure_span_is_finalized(self, span_id: str) -> None:
         opik.context_storage.trim_span_data_stack_to_certain_span(span_id)
@@ -153,7 +156,8 @@ class OpikADKOtelTracer(opentelemetry.trace.NoOpTracer):
         span_data = opik.context_storage.pop_span_data(ensure_id=span_id)
         if span_data is not None:
             span_data.init_end_time()
-            self.opik_client.span(**span_data.as_parameters)
+            if opik.is_tracing_active():
+                self.opik_client.span(**span_data.as_parameters)
 def _prepare_trace_and_span_to_be_finalized(

opik/integrations/adk/recursive_callback_injector.py CHANGED Viewed

@@ -2,7 +2,6 @@ import types
 from typing import TypeVar, List, Any, Set
 from . import opik_tracer
 import logging
-from opik import _logging
 from google.adk.tools import agent_tool
 from google.adk import agents
@@ -120,11 +119,7 @@ def track_adk_agent_recursive(
     Returns:
         The modified root agent with tracking enabled
     """
-    _logging.log_once_at_level(
-        logging.INFO,
-        "`track_adk_agent_recursive` is experimental feature. Please let us know if something is not working as expected: https://github.com/comet-ml/opik/issues",
-        logger=LOGGER,
-    )
     recursive_callback_injector = RecursiveCallbackInjector(tracer)
     recursive_callback_injector.inject(root_agent)

opik/integrations/dspy/callback.py CHANGED Viewed

@@ -4,11 +4,8 @@ import logging
 import dspy
 from dspy.utils import callback as dspy_callback
-import opik.types as types
-import opik.opik_context as opik_context
-import opik.context_storage as context_storage
+from opik import context_storage, opik_context, tracing_runtime_config, types
 from opik.api_objects import helpers, span, trace, opik_client
-import opik.decorator.tracing_runtime_config as tracing_runtime_config
 from opik.decorator import error_info_collector
 from .graph import build_mermaid_graph_from_module

opik/integrations/haystack/opik_connector.py CHANGED Viewed

@@ -4,8 +4,8 @@ from typing import Any, Dict, Optional
 import haystack
 from haystack import tracing
-import opik.api_objects.opik_client as opik_client
-import opik.decorator.tracing_runtime_config as tracing_runtime_config
+from opik import tracing_runtime_config
+from opik.api_objects import opik_client
 from . import opik_tracer
 LOGGER = logging.getLogger(__name__)

opik/integrations/haystack/opik_tracer.py CHANGED Viewed

@@ -5,10 +5,8 @@ from typing import Any, Dict, Iterator, List, Optional, Union
 from haystack import tracing
-import opik.url_helpers as url_helpers
-import opik.decorator.tracing_runtime_config as tracing_runtime_config
-import opik.decorator.span_creation_handler as span_creation_handler
-import opik.decorator.arguments_helpers as arguments_helpers
+from opik import tracing_runtime_config, url_helpers
+from opik.decorator import arguments_helpers, span_creation_handler
 from opik.api_objects import opik_client
 from opik.api_objects import span as opik_span
 from opik.api_objects import trace as opik_trace

opik 1.9.5__py3-none-any.whl → 1.9.39__py3-none-any.whl

opik 1.9.5py3-none-any.whl → 1.9.39py3-none-any.whl