PyPI - langwatch - Versions diffs - 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

langwatch 0.9.0py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

langwatch/__init__.py +6 -3
langwatch/__version__.py +1 -1
langwatch/client.py +16 -0
langwatch/domain/__init__.py +3 -0
langwatch/dspy/__init__.py +67 -34
langwatch/evaluation/__init__.py +518 -17
langwatch/evaluations.py +183 -353
langwatch/experiment/__init__.py +108 -0
langwatch/{evaluation/evaluation.py → experiment/experiment.py} +44 -5
langwatch/{evaluation → experiment}/platform_run.py +40 -67
langwatch/litellm.py +7 -0
langwatch/openai.py +61 -34
langwatch/prompts/local_loader.py +12 -0
langwatch/prompts/prompt_facade.py +10 -3
langwatch/types.py +5 -0
langwatch/utils/initialization.py +12 -2
langwatch/utils/utils.py +3 -1
{langwatch-0.9.0.dist-info → langwatch-0.10.1.dist-info}/METADATA +1 -1
{langwatch-0.9.0.dist-info → langwatch-0.10.1.dist-info}/RECORD +20 -19
{langwatch-0.9.0.dist-info → langwatch-0.10.1.dist-info}/WHEEL +0 -0

langwatch/__init__.py CHANGED Viewed

@@ -17,7 +17,8 @@ from typing import TYPE_CHECKING
 # Type hints for IntelliSense (only imported for typing)
 if TYPE_CHECKING:
     import langwatch.evaluations as evaluations
-    import langwatch.evaluation as evaluation
+    import langwatch.experiment as experiment
+    import langwatch.evaluation as evaluation  # Deprecated, use experiment
     import langwatch.dataset as dataset
     import langwatch.dspy as dspy
     import langwatch.langchain as langchain
@@ -41,7 +42,8 @@ def _api_key():
 # Lazy loading configuration
 _LAZY_MODULES = {
     "evaluations": "langwatch.evaluations",
-    "evaluation": "langwatch.evaluation",
+    "experiment": "langwatch.experiment",
+    "evaluation": "langwatch.evaluation",  # Deprecated, use experiment
     "dataset": "langwatch.dataset",
     "dspy": "langwatch.dspy",  # Special handling
     "langchain": "langwatch.langchain",  # Special handling
@@ -150,7 +152,8 @@ __all__ = [
     "ensure_setup",
     "get_current_trace",
     "get_current_span",
-    "evaluation",
+    "experiment",
+    "evaluation",  # Deprecated, use experiment
     "dataset",
     "evaluations",
     "langchain",

langwatch/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information for LangWatch."""
-__version__ = "0.9.0" # x-release-please-version
+__version__ = "0.10.1" # x-release-please-version

langwatch/client.py CHANGED Viewed

@@ -50,6 +50,7 @@ class Client(LangWatchClientProtocol):
     _registered_instrumentors: ClassVar[
         dict[opentelemetry.trace.TracerProvider, set[BaseInstrumentor]]
     ] = {}
+    _prompts_path: ClassVar[Optional[str]] = None
     # Regular attributes for protocol compatibility
     base_attributes: BaseAttributes
@@ -69,6 +70,7 @@ class Client(LangWatchClientProtocol):
         span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
         ignore_global_tracer_provider_override_warning: Optional[bool] = None,
         skip_open_telemetry_setup: Optional[bool] = None,
+        prompts_path: Optional[str] = None,
     ) -> "Client":
         """Ensure only one instance of Client exists (singleton pattern)."""
         if cls._instance is None:
@@ -88,6 +90,7 @@ class Client(LangWatchClientProtocol):
         span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
         ignore_global_tracer_provider_override_warning: Optional[bool] = None,
         skip_open_telemetry_setup: Optional[bool] = None,
+        prompts_path: Optional[str] = None,
     ):
         """
         Initialize the LangWatch tracing client.
@@ -140,6 +143,8 @@ class Client(LangWatchClientProtocol):
                 )
             if skip_open_telemetry_setup is not None:
                 Client._skip_open_telemetry_setup = skip_open_telemetry_setup
+            if prompts_path is not None:
+                Client._prompts_path = prompts_path
             if base_attributes is not None:
                 Client._base_attributes = base_attributes
                 # Ensure required SDK attributes remain present after reconfiguration
@@ -215,6 +220,9 @@ class Client(LangWatchClientProtocol):
         if skip_open_telemetry_setup is not None:
             Client._skip_open_telemetry_setup = skip_open_telemetry_setup
+        if prompts_path is not None:
+            Client._prompts_path = prompts_path
         if base_attributes is not None:
             Client._base_attributes = base_attributes
         elif not Client._base_attributes:
@@ -284,6 +292,7 @@ class Client(LangWatchClientProtocol):
         span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
         ignore_global_tracer_provider_override_warning: Optional[bool] = None,
         skip_open_telemetry_setup: Optional[bool] = None,
+        prompts_path: Optional[str] = None,
     ) -> "Client":
         """Create or get the singleton instance of the LangWatch client. Internal use only."""
         if cls._instance is None:
@@ -299,6 +308,7 @@ class Client(LangWatchClientProtocol):
                 span_exclude_rules=span_exclude_rules,
                 ignore_global_tracer_provider_override_warning=ignore_global_tracer_provider_override_warning,
                 skip_open_telemetry_setup=skip_open_telemetry_setup,
+                prompts_path=prompts_path,
             )
         return cls._instance
@@ -327,6 +337,7 @@ class Client(LangWatchClientProtocol):
         cls._skip_open_telemetry_setup = False
         cls._tracer_provider = None
         cls._rest_api_client = None
+        cls._prompts_path = None
         cls._registered_instrumentors.clear()
     @classmethod
@@ -416,6 +427,11 @@ class Client(LangWatchClientProtocol):
         """Get whether OpenTelemetry setup is skipped."""
         return Client._skip_open_telemetry_setup
+    @property
+    def prompts_path(self) -> Optional[str]:
+        """Get the base path for local prompt files."""
+        return Client._prompts_path
     @disable_sending.setter
     def disable_sending(self, value: bool) -> None:
         """Set whether sending is disabled. Spans are still created; the exporter conditionally drops them."""

langwatch/domain/__init__.py CHANGED Viewed

@@ -43,6 +43,7 @@ class ChatMessage(TypedDict, total=False):
     tool_calls: Optional[List[ToolCall]]
     tool_call_id: Optional[str]
     name: Optional[str]
+    reasoning_content: Optional[str]
 class TypedValueChatMessages(TypedDict):
@@ -156,6 +157,7 @@ SpanTypes = Literal[
 class SpanMetrics(TypedDict, total=False):
     prompt_tokens: Optional[int]
     completion_tokens: Optional[int]
+    reasoning_tokens: Optional[int]
     cost: Optional[float]
     first_token_ms: Optional[int]
@@ -179,6 +181,7 @@ class SpanParams(TypedDict, total=False):
     functions: Optional[List[Dict[str, Any]]]
     user: Optional[str]
     response_format: Optional[Union[Dict[str, Any], BaseModel]]
+    reasoning_effort: Optional[str]
 class BaseSpan(TypedDict):

langwatch/dspy/__init__.py CHANGED Viewed

@@ -6,7 +6,8 @@ import warnings
 import dspy
 from typing import Callable, List, Optional, Any, Type, Union
 from langwatch.utils.exceptions import better_raise_for_status
-from langwatch.utils.transformation import truncate_object_recursively
+from langwatch.utils.transformation import SerializableWithStringFallback, truncate_object_recursively
+from langwatch.utils.utils import safe_get
 from langwatch.telemetry.tracing import LangWatchTrace
 from typing_extensions import TypedDict
 import langwatch
@@ -824,6 +825,7 @@ class DSPyTracer:
                 "functions",
                 "user",
                 "response_format",
+                "reasoning_effort",
             ]
             for param in params:
                 if all_kwargs.get(param):
@@ -842,23 +844,44 @@ class DSPyTracer:
             result = self.__class__.__original_call__(self, prompt, messages, **kwargs)  # type: ignore
+            history = self.history[-1] if len(self.history) > 0 else None
             if span:
-                span.update(output=result)
+                # Capture full message from history (includes reasoning_content) instead of just result
+                choices = safe_get(history, "response", "choices")
+                if choices and len(choices) > 0:
+                    messages_output = []
+                    for choice in choices:
+                        msg = safe_get(choice, "message")
+                        if msg is not None:
+                            # Convert Pydantic model to dict if needed
+                            if hasattr(msg, "model_dump"):
+                                msg = msg.model_dump(exclude_unset=True)
+                            elif hasattr(msg, "dict"):
+                                msg = msg.dict(exclude_unset=True)
+                            messages_output.append(msg)
+                    if messages_output:
+                        span.update(output=messages_output)
+                    else:
+                        span.update(output=result)
+                else:
+                    span.update(output=result)
-            history = self.history[-1] if len(self.history) > 0 else None
-            if (
-                history
-                and "usage" in history
-                and "completion_tokens" in history["usage"]
-                and "prompt_tokens" in history["usage"]
-                and span
-            ):
-                span.update(
-                    metrics={
-                        "completion_tokens": history["usage"]["completion_tokens"],
-                        "prompt_tokens": history["usage"]["prompt_tokens"],
-                    }
+            completion_tokens = safe_get(history, "usage", "completion_tokens")
+            prompt_tokens = safe_get(history, "usage", "prompt_tokens")
+            if span and completion_tokens is not None and prompt_tokens is not None:
+                metrics = {
+                    "completion_tokens": completion_tokens,
+                    "prompt_tokens": prompt_tokens,
+                }
+                # Capture reasoning_tokens if available
+                reasoning_tokens = safe_get(
+                    history, "usage", "completion_tokens_details", "reasoning_tokens"
                 )
+                if reasoning_tokens is not None:
+                    metrics["reasoning_tokens"] = reasoning_tokens
+                span.update(metrics=metrics)
             return result
@@ -884,26 +907,36 @@ class DSPyTracer:
             result = self.__class__.__original_basic_request__(self, prompt, **kwargs)  # type: ignore
-            if (
-                span
-                and "choices" in result
-                and len(result["choices"]) == 1
-                and "message" in result["choices"][0]
-            ):
-                span.update(output=[result["choices"][0]["message"]])
-            if (
-                span
-                and "usage" in result
-                and "completion_tokens" in result["usage"]
-                and "prompt_tokens" in result["usage"]
-            ):
-                span.update(
-                    metrics={
-                        "completion_tokens": result["usage"]["completion_tokens"],
-                        "prompt_tokens": result["usage"]["prompt_tokens"],
-                    }
+            # Capture full messages from choices (includes reasoning_content)
+            choices = safe_get(result, "choices")
+            if span and choices and len(choices) > 0:
+                messages_output = []
+                for choice in choices:
+                    msg = safe_get(choice, "message")
+                    if msg is not None:
+                        # Convert Pydantic model to dict if needed
+                        if hasattr(msg, "model_dump"):
+                            msg = msg.model_dump(exclude_unset=True)
+                        elif hasattr(msg, "dict"):
+                            msg = msg.dict(exclude_unset=True)
+                        messages_output.append(msg)
+                if messages_output:
+                    span.update(output=messages_output)
+            completion_tokens = safe_get(result, "usage", "completion_tokens")
+            prompt_tokens = safe_get(result, "usage", "prompt_tokens")
+            if span and completion_tokens is not None and prompt_tokens is not None:
+                metrics = {
+                    "completion_tokens": completion_tokens,
+                    "prompt_tokens": prompt_tokens,
+                }
+                # Capture reasoning_tokens if available
+                reasoning_tokens = safe_get(
+                    result, "usage", "completion_tokens_details", "reasoning_tokens"
                 )
+                if reasoning_tokens is not None:
+                    metrics["reasoning_tokens"] = reasoning_tokens
+                span.update(metrics=metrics)
             return result

langwatch 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

langwatch 0.9.0py3-none-any.whl → 0.10.1py3-none-any.whl