PyPI - opik - Versions diffs - 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl - Mend

opik 1.9.39py3-none-any.whl → 1.9.86py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

opik/api_objects/attachment/attachment_context.py +36 -0
opik/api_objects/attachment/attachments_extractor.py +153 -0
opik/api_objects/attachment/client.py +1 -0
opik/api_objects/attachment/converters.py +2 -0
opik/api_objects/attachment/decoder.py +18 -0
opik/api_objects/attachment/decoder_base64.py +83 -0
opik/api_objects/attachment/decoder_helpers.py +137 -0
opik/api_objects/constants.py +2 -0
opik/api_objects/dataset/dataset.py +133 -40
opik/api_objects/dataset/rest_operations.py +2 -0
opik/api_objects/experiment/experiment.py +6 -0
opik/api_objects/helpers.py +8 -4
opik/api_objects/local_recording.py +6 -5
opik/api_objects/observation_data.py +101 -0
opik/api_objects/opik_client.py +78 -45
opik/api_objects/opik_query_language.py +9 -3
opik/api_objects/prompt/chat/chat_prompt.py +18 -1
opik/api_objects/prompt/client.py +8 -1
opik/api_objects/span/span_data.py +3 -88
opik/api_objects/threads/threads_client.py +7 -4
opik/api_objects/trace/trace_data.py +3 -74
opik/api_objects/validation_helpers.py +3 -3
opik/cli/exports/__init__.py +131 -0
opik/cli/exports/dataset.py +278 -0
opik/cli/exports/experiment.py +784 -0
opik/cli/exports/project.py +685 -0
opik/cli/exports/prompt.py +578 -0
opik/cli/exports/utils.py +406 -0
opik/cli/harbor.py +39 -0
opik/cli/imports/__init__.py +439 -0
opik/cli/imports/dataset.py +143 -0
opik/cli/imports/experiment.py +1192 -0
opik/cli/imports/project.py +262 -0
opik/cli/imports/prompt.py +177 -0
opik/cli/imports/utils.py +280 -0
opik/cli/main.py +14 -12
opik/config.py +12 -1
opik/datetime_helpers.py +12 -0
opik/decorator/arguments_helpers.py +4 -1
opik/decorator/base_track_decorator.py +111 -37
opik/decorator/context_manager/span_context_manager.py +5 -1
opik/decorator/generator_wrappers.py +5 -4
opik/decorator/span_creation_handler.py +13 -4
opik/evaluation/engine/engine.py +111 -28
opik/evaluation/engine/evaluation_tasks_executor.py +71 -19
opik/evaluation/evaluator.py +12 -0
opik/evaluation/metrics/conversation/llm_judges/conversational_coherence/metric.py +3 -1
opik/evaluation/metrics/conversation/llm_judges/session_completeness/metric.py +3 -1
opik/evaluation/metrics/conversation/llm_judges/user_frustration/metric.py +3 -1
opik/evaluation/metrics/heuristics/equals.py +11 -7
opik/evaluation/metrics/llm_judges/answer_relevance/metric.py +3 -1
opik/evaluation/metrics/llm_judges/context_precision/metric.py +3 -1
opik/evaluation/metrics/llm_judges/context_recall/metric.py +3 -1
opik/evaluation/metrics/llm_judges/factuality/metric.py +1 -1
opik/evaluation/metrics/llm_judges/g_eval/metric.py +3 -1
opik/evaluation/metrics/llm_judges/hallucination/metric.py +3 -1
opik/evaluation/metrics/llm_judges/moderation/metric.py +3 -1
opik/evaluation/metrics/llm_judges/structure_output_compliance/metric.py +3 -1
opik/evaluation/metrics/llm_judges/syc_eval/metric.py +4 -2
opik/evaluation/metrics/llm_judges/trajectory_accuracy/metric.py +3 -1
opik/evaluation/metrics/llm_judges/usefulness/metric.py +3 -1
opik/evaluation/metrics/ragas_metric.py +43 -23
opik/evaluation/models/litellm/litellm_chat_model.py +7 -2
opik/evaluation/models/litellm/util.py +4 -20
opik/evaluation/models/models_factory.py +19 -5
opik/evaluation/rest_operations.py +3 -3
opik/evaluation/threads/helpers.py +3 -2
opik/file_upload/file_uploader.py +13 -0
opik/file_upload/upload_options.py +2 -0
opik/integrations/adk/legacy_opik_tracer.py +9 -11
opik/integrations/adk/opik_tracer.py +2 -2
opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py +2 -2
opik/integrations/dspy/callback.py +100 -14
opik/integrations/dspy/parsers.py +168 -0
opik/integrations/harbor/__init__.py +17 -0
opik/integrations/harbor/experiment_service.py +269 -0
opik/integrations/harbor/opik_tracker.py +528 -0
opik/integrations/haystack/opik_tracer.py +2 -2
opik/integrations/langchain/__init__.py +15 -2
opik/integrations/langchain/langgraph_tracer_injector.py +88 -0
opik/integrations/langchain/opik_tracer.py +258 -160
opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/helpers.py +7 -4
opik/integrations/llama_index/callback.py +43 -6
opik/integrations/openai/agents/opik_tracing_processor.py +8 -10
opik/integrations/openai/opik_tracker.py +99 -4
opik/integrations/openai/videos/__init__.py +9 -0
opik/integrations/openai/videos/binary_response_write_to_file_decorator.py +88 -0
opik/integrations/openai/videos/videos_create_decorator.py +159 -0
opik/integrations/openai/videos/videos_download_decorator.py +110 -0
opik/message_processing/batching/base_batcher.py +14 -21
opik/message_processing/batching/batch_manager.py +22 -10
opik/message_processing/batching/batchers.py +32 -40
opik/message_processing/batching/flushing_thread.py +0 -3
opik/message_processing/emulation/emulator_message_processor.py +36 -1
opik/message_processing/emulation/models.py +21 -0
opik/message_processing/messages.py +9 -0
opik/message_processing/preprocessing/__init__.py +0 -0
opik/message_processing/preprocessing/attachments_preprocessor.py +70 -0
opik/message_processing/preprocessing/batching_preprocessor.py +53 -0
opik/message_processing/preprocessing/constants.py +1 -0
opik/message_processing/preprocessing/file_upload_preprocessor.py +38 -0
opik/message_processing/preprocessing/preprocessor.py +36 -0
opik/message_processing/processors/__init__.py +0 -0
opik/message_processing/processors/attachments_extraction_processor.py +146 -0
opik/message_processing/{message_processors.py → processors/message_processors.py} +15 -1
opik/message_processing/{message_processors_chain.py → processors/message_processors_chain.py} +3 -2
opik/message_processing/{online_message_processor.py → processors/online_message_processor.py} +11 -9
opik/message_processing/queue_consumer.py +4 -2
opik/message_processing/streamer.py +71 -33
opik/message_processing/streamer_constructors.py +36 -8
opik/plugins/pytest/experiment_runner.py +1 -1
opik/plugins/pytest/hooks.py +5 -3
opik/rest_api/__init__.py +42 -0
opik/rest_api/datasets/client.py +321 -123
opik/rest_api/datasets/raw_client.py +470 -145
opik/rest_api/experiments/client.py +26 -0
opik/rest_api/experiments/raw_client.py +26 -0
opik/rest_api/llm_provider_key/client.py +4 -4
opik/rest_api/llm_provider_key/raw_client.py +4 -4
opik/rest_api/llm_provider_key/types/provider_api_key_write_provider.py +2 -1
opik/rest_api/manual_evaluation/client.py +101 -0
opik/rest_api/manual_evaluation/raw_client.py +172 -0
opik/rest_api/optimizations/client.py +0 -166
opik/rest_api/optimizations/raw_client.py +0 -248
opik/rest_api/projects/client.py +9 -0
opik/rest_api/projects/raw_client.py +13 -0
opik/rest_api/projects/types/project_metric_request_public_metric_type.py +4 -0
opik/rest_api/prompts/client.py +130 -2
opik/rest_api/prompts/raw_client.py +175 -0
opik/rest_api/traces/client.py +101 -0
opik/rest_api/traces/raw_client.py +120 -0
opik/rest_api/types/__init__.py +50 -0
opik/rest_api/types/audio_url.py +19 -0
opik/rest_api/types/audio_url_public.py +19 -0
opik/rest_api/types/audio_url_write.py +19 -0
opik/rest_api/types/automation_rule_evaluator.py +38 -2
opik/rest_api/types/automation_rule_evaluator_object_object_public.py +33 -2
opik/rest_api/types/automation_rule_evaluator_public.py +33 -2
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_public.py +22 -0
opik/rest_api/types/automation_rule_evaluator_span_user_defined_metric_python_write.py +22 -0
opik/rest_api/types/automation_rule_evaluator_update.py +27 -1
opik/rest_api/types/automation_rule_evaluator_update_span_user_defined_metric_python.py +22 -0
opik/rest_api/types/automation_rule_evaluator_write.py +27 -1
opik/rest_api/types/dataset.py +2 -0
opik/rest_api/types/dataset_item.py +1 -1
opik/rest_api/types/dataset_item_batch.py +4 -0
opik/rest_api/types/dataset_item_changes_public.py +5 -0
opik/rest_api/types/dataset_item_compare.py +1 -1
opik/rest_api/types/dataset_item_filter.py +4 -0
opik/rest_api/types/dataset_item_page_compare.py +0 -1
opik/rest_api/types/dataset_item_page_public.py +0 -1
opik/rest_api/types/dataset_item_public.py +1 -1
opik/rest_api/types/dataset_public.py +2 -0
opik/rest_api/types/dataset_version_public.py +10 -0
opik/rest_api/types/dataset_version_summary.py +46 -0
opik/rest_api/types/dataset_version_summary_public.py +46 -0
opik/rest_api/types/experiment.py +9 -0
opik/rest_api/types/experiment_public.py +9 -0
opik/rest_api/types/group_content_with_aggregations.py +1 -0
opik/rest_api/types/llm_as_judge_message_content.py +2 -0
opik/rest_api/types/llm_as_judge_message_content_public.py +2 -0
opik/rest_api/types/llm_as_judge_message_content_write.py +2 -0
opik/rest_api/types/manual_evaluation_request_entity_type.py +1 -1
opik/rest_api/types/project.py +1 -0
opik/rest_api/types/project_detailed.py +1 -0
opik/rest_api/types/project_metric_response_public_metric_type.py +4 -0
opik/rest_api/types/project_reference.py +31 -0
opik/rest_api/types/project_reference_public.py +31 -0
opik/rest_api/types/project_stats_summary_item.py +1 -0
opik/rest_api/types/prompt_version.py +1 -0
opik/rest_api/types/prompt_version_detail.py +1 -0
opik/rest_api/types/prompt_version_page_public.py +5 -0
opik/rest_api/types/prompt_version_public.py +1 -0
opik/rest_api/types/prompt_version_update.py +33 -0
opik/rest_api/types/provider_api_key.py +5 -1
opik/rest_api/types/provider_api_key_provider.py +2 -1
opik/rest_api/types/provider_api_key_public.py +5 -1
opik/rest_api/types/provider_api_key_public_provider.py +2 -1
opik/rest_api/types/service_toggles_config.py +11 -1
opik/rest_api/types/span_user_defined_metric_python_code.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_public.py +20 -0
opik/rest_api/types/span_user_defined_metric_python_code_write.py +20 -0
opik/types.py +36 -0
opik/validation/chat_prompt_messages.py +241 -0
opik/validation/feedback_score.py +3 -3
opik/validation/validator.py +28 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/METADATA +7 -7
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/RECORD +193 -142
opik/cli/export.py +0 -791
opik/cli/import_command.py +0 -575
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/WHEEL +0 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/entry_points.txt +0 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/licenses/LICENSE +0 -0
{opik-1.9.39.dist-info → opik-1.9.86.dist-info}/top_level.txt +0 -0

opik/file_upload/file_uploader.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import os
 from typing import Optional
 import httpx
@@ -28,6 +29,10 @@ def upload_attachment(
             httpx_client=upload_httpx_client,
             monitor=monitor,
         )
+        # delete the file after upload if requested
+        if upload_options.delete_after_upload:
+            _delete_attachment_file(upload_options.file_path)
     except Exception as e:
         LOGGER.error(
             "Failed to upload attachment: '%s' from file: [%s] with size: [%s]. Error: %s",
@@ -40,6 +45,14 @@ def upload_attachment(
         raise
+def _delete_attachment_file(file_path: str) -> None:
+    try:
+        os.unlink(file_path)
+    except OSError as e:
+        LOGGER.info(f"Failed to delete attachment file: '{file_path}'. Reason: {e}.")
+        pass
 def _do_upload_attachment(
     upload_options: file_upload_options.FileUploadOptions,
     rest_client: rest_api_client.OpikApi,

opik/file_upload/upload_options.py CHANGED Viewed

@@ -16,6 +16,7 @@ class FileUploadOptions:
     entity_id: str
     project_name: str
     encoded_url_override: str
+    delete_after_upload: bool
 def file_upload_options_from_attachment(
@@ -32,4 +33,5 @@ def file_upload_options_from_attachment(
         entity_id=attachment.entity_id,
         project_name=attachment.project_name,
         encoded_url_override=attachment.encoded_url_override,
+        delete_after_upload=attachment.delete_after_upload,
     )

opik/integrations/adk/legacy_opik_tracer.py CHANGED Viewed

@@ -158,15 +158,13 @@ class LegacyOpikTracer:
                     input=user_input,
                     type="general",
                 )
-                _, opik_span_data = (
-                    span_creation_handler.create_span_respecting_context(
-                        start_span_arguments=start_span_arguments,
-                        distributed_trace_headers=None,
-                        opik_context_storage=self._context_storage,
-                    )
+                result = span_creation_handler.create_span_respecting_context(
+                    start_span_arguments=start_span_arguments,
+                    distributed_trace_headers=None,
+                    opik_context_storage=self._context_storage,
                 )
-                self._start_span(span_data=opik_span_data)
+                self._start_span(span_data=result.span_data)
         except Exception as e:
             LOGGER.error(f"Failed during before_agent_callback(): {e}", exc_info=True)
@@ -212,7 +210,7 @@ class LegacyOpikTracer:
             if provider is None:
                 provider = adk_helpers.get_adk_provider()
-            _, span_data = span_creation_handler.create_span_respecting_context(
+            result = span_creation_handler.create_span_respecting_context(
                 start_span_arguments=arguments_helpers.StartSpanParameters(
                     name=llm_request.model,
                     project_name=self.project_name,
@@ -226,7 +224,7 @@ class LegacyOpikTracer:
                 opik_context_storage=self._context_storage,
             )
-            self._start_span(span_data=span_data)
+            self._start_span(span_data=result.span_data)
         except Exception as e:
             LOGGER.error(f"Failed during before_model_callback(): {e}", exc_info=True)
@@ -300,7 +298,7 @@ class LegacyOpikTracer:
                 **self.metadata,
             }
-            _, span_data = span_creation_handler.create_span_respecting_context(
+            result = span_creation_handler.create_span_respecting_context(
                 start_span_arguments=arguments_helpers.StartSpanParameters(
                     name=tool.name,
                     project_name=self.project_name,
@@ -312,7 +310,7 @@ class LegacyOpikTracer:
                 opik_context_storage=self._context_storage,
             )
-            self._start_span(span_data=span_data)
+            self._start_span(span_data=result.span_data)
         except Exception as e:
             LOGGER.error(f"Failed during before_tool_callback(): {e}", exc_info=True)

opik/integrations/adk/opik_tracer.py CHANGED Viewed

@@ -173,7 +173,7 @@ class OpikTracer:
             # ADK runs `before_model_callback` before running `start_as_current_span` function for the LLM call,
             # which makes it impossible to update the Opik span from this method.
             # So we create a span manually here. This flow is handled inside ADKTracerWrapper.
-            _, span_data = span_creation_handler.create_span_respecting_context(
+            result = span_creation_handler.create_span_respecting_context(
                 start_span_arguments=arguments_helpers.StartSpanParameters(
                     name=model,
                     project_name=self.project_name,
@@ -189,7 +189,7 @@ class OpikTracer:
                 distributed_trace_headers=None,
             )
-            context_storage.add_span_data(span_data)
+            context_storage.add_span_data(result.span_data)
         except Exception as e:
             LOGGER.error(f"Failed during before_model_callback(): {e}", exc_info=True)

opik/integrations/adk/patchers/adk_otel_tracer/opik_adk_otel_tracer.py CHANGED Viewed

@@ -190,11 +190,11 @@ def _prepare_trace_and_span_to_be_finalized(
             type="general",
         )
-        _, span_to_close_in_finally_block = (
+        span_to_close_in_finally_block = (
             span_creation_handler.create_span_respecting_context(
                 start_span_arguments=start_span_arguments,
                 distributed_trace_headers=None,
-            )
+            ).span_data
         )
         opik.context_storage.add_span_data(span_to_close_in_finally_block)

opik/integrations/dspy/callback.py CHANGED Viewed

@@ -1,14 +1,16 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Tuple, Union
 import logging
 import dspy
 from dspy.utils import callback as dspy_callback
-from opik import context_storage, opik_context, tracing_runtime_config, types
+from opik import context_storage, opik_context, tracing_runtime_config
+from opik import llm_usage
 from opik.api_objects import helpers, span, trace, opik_client
 from opik.decorator import error_info_collector
 from .graph import build_mermaid_graph_from_module
+from .parsers import LMHistoryInfo, extract_lm_info_from_history, get_span_type
 LOGGER = logging.getLogger(__name__)
@@ -32,6 +34,8 @@ class OpikCallback(dspy_callback.BaseCallback):
     ):
         self._map_call_id_to_span_data: Dict[str, span.SpanData] = {}
         self._map_call_id_to_trace_data: Dict[str, trace.TraceData] = {}
+        # Store (lm_instance, expected_messages) for extracting usage and verifying correct history entry
+        self._map_call_id_to_lm_info: Dict[str, Tuple[Any, Optional[Any]]] = {}
         self._origins_metadata: Dict[str, Any] = {"created_from": "dspy"}
@@ -103,7 +107,7 @@ class OpikCallback(dspy_callback.BaseCallback):
             parent_project_name=current_span_data.project_name,
             child_project_name=self._project_name,
         )
-        span_type = self._get_span_type(instance)
+        span_type = get_span_type(instance)
         span_data = span.SpanData(
             trace_id=current_span_data.trace_id,
@@ -127,7 +131,7 @@ class OpikCallback(dspy_callback.BaseCallback):
             current_trace_data.project_name,
             self._project_name,
         )
-        span_type = self._get_span_type(instance)
+        span_type = get_span_type(instance)
         span_data = span.SpanData(
             trace_id=current_trace_data.id,
@@ -198,13 +202,54 @@ class OpikCallback(dspy_callback.BaseCallback):
         call_id: str,
         outputs: Optional[Any],
         exception: Optional[Exception] = None,
+        usage: Optional[llm_usage.OpikUsage] = None,
+        extra_metadata: Optional[Dict[str, Any]] = None,
+        actual_provider: Optional[str] = None,
+        actual_model: Optional[str] = None,
+        total_cost: Optional[float] = None,
     ) -> None:
         if span_data := self._map_call_id_to_span_data.pop(call_id, None):
             if exception:
                 error_info = error_info_collector.collect(exception)
                 span_data.update(error_info=error_info)
-            span_data.update(output={"output": outputs}).init_end_time()
+            # Prepare the update dict
+            update_kwargs: Dict[str, Any] = {
+                "output": {"output": outputs},
+                "usage": usage,
+                "total_cost": total_cost,
+            }
+            # Handle LLM routers like OpenRouter that return the actual serving provider/model
+            if extra_metadata is None:
+                extra_metadata = {}
+            # Update provider if actual provider differs (e.g., OpenRouter -> Hyperbolic)
+            if (
+                actual_provider is not None
+                and span_data.provider is not None
+                and span_data.provider.lower() != actual_provider.lower()
+            ):
+                # Store the original provider (e.g., "openrouter") in metadata
+                extra_metadata["llm_router"] = span_data.provider
+                # Update to the actual provider for accurate cost tracking
+                update_kwargs["provider"] = actual_provider.lower()
+            if (
+                actual_model is not None
+                and span_data.model is not None
+                and span_data.model != actual_model
+            ):
+                # Store the original model (e.g., "@preset/qwen") in metadata
+                extra_metadata["original_model"] = span_data.model
+                # Update to the actual model for accurate cost tracking
+                update_kwargs["model"] = actual_model
+            # Only set metadata if we have something to add
+            if extra_metadata:
+                update_kwargs["metadata"] = extra_metadata
+            span_data.update(**update_kwargs).init_end_time()
             if tracing_runtime_config.is_tracing_active():
                 self._opik_client.span(**span_data.as_parameters)
@@ -231,7 +276,7 @@ class OpikCallback(dspy_callback.BaseCallback):
             trace_id = current_callback_context_data.id
             parent_span_id = None
-        span_type = self._get_span_type(instance)
+        span_type = get_span_type(instance)
         return span.SpanData(
             trace_id=trace_id,
@@ -263,6 +308,13 @@ class OpikCallback(dspy_callback.BaseCallback):
             name=f"{span_data.name}: {provider} - {model}",
         )
         self._map_call_id_to_span_data[call_id] = span_data
+        # Store LM instance and expected messages for extracting usage
+        self._map_call_id_to_lm_info[call_id] = (
+            instance,
+            inputs.get("messages"),
+        )
         self._set_current_context_data(span_data)
     def on_lm_end(
@@ -271,10 +323,22 @@ class OpikCallback(dspy_callback.BaseCallback):
         outputs: Optional[Dict[str, Any]],
         exception: Optional[Exception] = None,
     ) -> None:
+        lm_info = self._extract_lm_info_from_history(call_id)
+        # Add cache_hit to span metadata only when we have a definitive value
+        extra_metadata = (
+            {"cache_hit": lm_info.cache_hit} if lm_info.cache_hit is not None else None
+        )
         self._end_span(
             call_id=call_id,
             exception=exception,
             outputs=outputs,
+            usage=lm_info.usage,
+            extra_metadata=extra_metadata,
+            actual_provider=lm_info.actual_provider,
+            actual_model=lm_info.actual_model,
+            total_cost=lm_info.total_cost,
         )
     def on_tool_start(
@@ -316,14 +380,36 @@ class OpikCallback(dspy_callback.BaseCallback):
             return span_data
         return self._context_storage.get_trace_data()
-    def _get_span_type(self, instance: Any) -> types.SpanType:
-        if isinstance(instance, dspy.Predict):
-            return "llm"
-        elif isinstance(instance, dspy.LM):
-            return "llm"
-        elif isinstance(instance, dspy.Tool):
-            return "tool"
-        return "general"
+    def _extract_lm_info_from_history(self, call_id: str) -> LMHistoryInfo:
+        """
+        Extract token usage, cache status, actual provider, and cost from the LM's history.
+        DSPy stores usage information in the LM's history after each call.
+        We verify the history entry matches our expected messages to handle
+        potential race conditions with concurrent LM calls.
+        For routers like OpenRouter, the response contains the actual provider
+        that served the request (e.g., "Novita", "Together"), which differs from
+        the router name used in the model string (e.g., "openrouter").
+        The cost field is provided by providers like OpenRouter and includes
+        accurate pricing for all token types (reasoning, cache, multimodal).
+        Returns:
+            LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
+        """
+        lm_info = self._map_call_id_to_lm_info.pop(call_id, None)
+        if lm_info is None:
+            return LMHistoryInfo(
+                usage=None,
+                cache_hit=None,
+                actual_provider=None,
+                actual_model=None,
+                total_cost=None,
+            )
+        lm_instance, expected_messages = lm_info
+        return extract_lm_info_from_history(lm_instance, expected_messages)
     def _get_opik_metadata(self, instance: Any) -> Dict[str, Any]:
         graph = None

opik/integrations/dspy/parsers.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Parsers and data structures for extracting information from DSPy LM responses.
+This module contains utilities for parsing DSPy LM history entries and
+extracting relevant information like usage, provider, and cost data.
+"""
+from dataclasses import dataclass
+from typing import Any, Optional
+import logging
+import dspy
+from opik import llm_usage, types
+LOGGER = logging.getLogger(__name__)
+@dataclass
+class LMHistoryInfo:
+    """
+    Information extracted from a DSPy LM history entry.
+    This dataclass holds the parsed information from an LM call's history,
+    including usage statistics, cache status, provider information, and cost.
+    Attributes:
+        usage: Token usage information (prompt, completion, total tokens)
+        cache_hit: Whether the response was served from cache.
+            True if cached, False if not, None if unknown.
+        actual_provider: The actual provider that served the request.
+            This is useful for LLM routers like OpenRouter that may route
+            to different underlying providers (e.g., "Novita", "Together").
+        actual_model: The actual model that served the request.
+            This is useful for LLM routers like OpenRouter when using presets
+            (e.g., "@preset/qwen" resolves to "qwen/qwen3-235b-a22b-2507").
+        total_cost: The total cost of the request from the provider.
+            This includes accurate pricing for all token types.
+    """
+    usage: Optional[llm_usage.OpikUsage]
+    cache_hit: Optional[bool]
+    actual_provider: Optional[str]
+    actual_model: Optional[str]
+    total_cost: Optional[float]
+def get_span_type(instance: Any) -> types.SpanType:
+    """
+    Determine the span type based on the DSPy instance type.
+    Args:
+        instance: A DSPy module, LM, or tool instance.
+    Returns:
+        The appropriate span type: "llm" for Predict/LM, "tool" for Tool,
+        or "general" for other types.
+    """
+    if isinstance(instance, dspy.Predict):
+        return "llm"
+    elif isinstance(instance, dspy.LM):
+        return "llm"
+    elif isinstance(instance, dspy.Tool):
+        return "tool"
+    return "general"
+def extract_lm_info_from_history(
+    lm_instance: Any,
+    expected_messages: Optional[Any],
+) -> LMHistoryInfo:
+    """
+    Extract token usage, cache status, actual provider, and cost from the LM's history.
+    DSPy stores usage information in the LM's history after each call.
+    We verify the history entry matches our expected messages to handle
+    potential race conditions with concurrent LM calls.
+    For routers like OpenRouter, the response contains the actual provider
+    that served the request (e.g., "Novita", "Together"), which differs from
+    the router name used in the model string (e.g., "openrouter").
+    The cost field is provided by providers like OpenRouter and includes
+    accurate pricing for all token types (reasoning, cache, multimodal).
+    Args:
+        lm_instance: The DSPy LM instance that has the history.
+        expected_messages: The expected messages to match in the history entry.
+    Returns:
+        LMHistoryInfo containing usage, cache_hit, actual_provider, and total_cost.
+    """
+    empty_result = LMHistoryInfo(
+        usage=None,
+        cache_hit=None,
+        actual_provider=None,
+        actual_model=None,
+        total_cost=None,
+    )
+    if not hasattr(lm_instance, "history") or not lm_instance.history:
+        return empty_result
+    try:
+        last_entry = lm_instance.history[-1]
+        # Verify we have the correct history entry by checking messages match
+        if last_entry.get("messages") != expected_messages:
+            LOGGER.debug(
+                "History entry messages don't match expected messages, "
+                "skipping usage extraction (possibly due to concurrent LM calls)"
+            )
+            return empty_result
+        response = last_entry.get("response")
+        usage_dict = last_entry.get("usage")
+        # Extract actual provider and model from response (useful for routers like OpenRouter)
+        # The response is a LiteLLM ModelResponse object with 'provider' and 'model' attributes
+        # when using routers like OpenRouter
+        actual_provider: Optional[str] = None
+        actual_model: Optional[str] = None
+        if response is not None:
+            if hasattr(response, "provider"):
+                actual_provider = response.provider
+            if hasattr(response, "model"):
+                actual_model = response.model
+        # Extract cost from history entry or usage dict
+        # OpenRouter and other providers return accurate cost including all token types
+        total_cost: Optional[float] = None
+        if (cost := last_entry.get("cost") or 0) > 0:
+            total_cost = cost
+        elif usage_dict and (cost := usage_dict.get("cost") or 0) > 0:
+            total_cost = cost
+        # Get explicit cache_hit if set, otherwise infer from usage (empty = cached)
+        if response is None:
+            cache_hit = not usage_dict
+        elif hasattr(response, "cache_hit") and response.cache_hit is not None:
+            cache_hit = response.cache_hit
+        else:
+            # Fallback: infer from usage (empty = cached)
+            cache_hit = not usage_dict
+        if usage_dict:
+            usage = llm_usage.build_opik_usage_from_unknown_provider(usage_dict)
+            return LMHistoryInfo(
+                usage=usage,
+                cache_hit=cache_hit,
+                actual_provider=actual_provider,
+                actual_model=actual_model,
+                total_cost=total_cost,
+            )
+        else:
+            return LMHistoryInfo(
+                usage=None,
+                cache_hit=cache_hit,
+                actual_provider=actual_provider,
+                actual_model=actual_model,
+                total_cost=None,
+            )
+    except Exception:
+        LOGGER.debug(
+            "Failed to extract info from DSPy LM history",
+            exc_info=True,
+        )
+        return empty_result

opik/integrations/harbor/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+Opik integration for Harbor benchmark evaluation framework.
+Example:
+    >>> from opik.integrations.harbor import track_harbor
+    >>> job = Job(config)
+    >>> tracked_job = track_harbor(job)
+    >>> result = await tracked_job.run()
+Or enable tracking globally (for CLI usage):
+    >>> from opik.integrations.harbor import track_harbor
+    >>> track_harbor()
+"""
+from .opik_tracker import track_harbor, reset_harbor_tracking
+__all__ = ["track_harbor", "reset_harbor_tracking"]

opik 1.9.39__py3-none-any.whl → 1.9.86__py3-none-any.whl

opik 1.9.39py3-none-any.whl → 1.9.86py3-none-any.whl