PyPI - langwatch - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

langwatch 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

langwatch/__version__.py +1 -1
langwatch/batch_evaluation.py +5 -4
langwatch/dspy/__init__.py +7 -34
langwatch/evaluation/__init__.py +28 -1
langwatch/evaluation/evaluation.py +412 -22
langwatch/evaluation/platform_run.py +462 -0
langwatch/evaluations.py +3 -2
langwatch/login.py +2 -1
langwatch/telemetry/tracing.py +3 -2
langwatch/utils/exceptions.py +22 -1
{langwatch-0.8.0.dist-info → langwatch-0.9.0.dist-info}/METADATA +1 -1
{langwatch-0.8.0.dist-info → langwatch-0.9.0.dist-info}/RECORD +13 -12
{langwatch-0.8.0.dist-info → langwatch-0.9.0.dist-info}/WHEEL +0 -0

langwatch/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information for LangWatch."""
-__version__ = "0.8.0" # x-release-please-version
+__version__ = "0.9.0" # x-release-please-version

langwatch/batch_evaluation.py CHANGED Viewed

@@ -24,6 +24,7 @@ from tqdm import tqdm
 import pandas as pd
 from langwatch.types import Money
+from langwatch.utils.exceptions import better_raise_for_status
 class EvaluationResult(BaseModel):
@@ -150,7 +151,7 @@ class BatchEvaluation:
             raise ValueError(
                 "API key is not valid, please try to login again with langwatch.login()"
             )
-        response.raise_for_status()
+        better_raise_for_status(response)
         experiment_path = response.json()["path"]
         self.experiment_slug = response.json()["slug"]
@@ -368,7 +369,7 @@ class BatchEvaluation:
             json=body,
             timeout=60,
         )
-        response.raise_for_status()
+        better_raise_for_status(response)
     def wait_for_completion(self):
         async def wait_for_completion(self):
@@ -414,7 +415,7 @@ async def run_evaluation(
         async with httpx.AsyncClient(timeout=900) as client:
             response = await client.post(**request_params)
-            response.raise_for_status()
+            better_raise_for_status(response)
         result = response.json()
@@ -462,7 +463,7 @@ def get_dataset(
     with httpx.Client(timeout=300) as client:
         response = client.get(**request_params)
-        response.raise_for_status()
+        better_raise_for_status(response)
     result = response.json()

langwatch/dspy/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 import warnings
 import dspy
 from typing import Callable, List, Optional, Any, Type, Union
+from langwatch.utils.exceptions import better_raise_for_status
 from langwatch.utils.transformation import truncate_object_recursively
 from langwatch.telemetry.tracing import LangWatchTrace
 from typing_extensions import TypedDict
@@ -193,7 +194,7 @@ class LangWatchDSPy:
             raise ValueError(
                 "API key is not valid, please try to login again with langwatch.login()"
             )
-        response.raise_for_status()
+        better_raise_for_status(response)
         if optimizer and evaluator:
             raise ValueError("You can only provide an optimizer or an evaluator, not both.")
@@ -386,7 +387,7 @@ class LangWatchDSPy:
             data=json.dumps(data),  # type: ignore
             timeout=60,
         )
-        response.raise_for_status()
+        better_raise_for_status(response)
         self.steps_buffer = []
     def tracer(self, trace: LangWatchTrace):
@@ -736,10 +737,6 @@ class DSPyTracer:
             dspy.Module.__original_call__ = dspy.Module.__call__  # type: ignore
             dspy.Module.__call__ = self.patched_module_call()
-        if not hasattr(dspy.Predict, "__original_forward__"):
-            dspy.Predict.__original_forward__ = dspy.Predict.forward  # type: ignore
-            dspy.Predict.forward = self.patched_predict_forward()
         language_model_classes = dspy.LM.__subclasses__()
         for lm in language_model_classes:
             if not hasattr(lm, "__original_basic_request__") and hasattr(
@@ -775,7 +772,7 @@ class DSPyTracer:
     def patched_module_call(self):
         self_ = self
-        @langwatch.span(ignore_missing_trace_warning=True, type="module")
+        @langwatch.span(ignore_missing_trace_warning=True, type="module", capture_output=False)
         def __call__(self: dspy.Module, *args, **kwargs):
             span = self_.safe_get_current_span()
             signature = (
@@ -800,34 +797,10 @@ class DSPyTracer:
         return __call__
-    def patched_predict_forward(self):
-        self_ = self
-        @langwatch.span(ignore_missing_trace_warning=True, type="module")
-        def forward(self: dspy.Predict, **kwargs):
-            span = self_.safe_get_current_span()
-            signature = kwargs.get("signature", self.signature)
-            if span and signature and hasattr(signature, "__name__"):
-                span.update(name=f"{self.__class__.__name__}({signature.__name__})")
-            elif span:
-                span.update(name=f"{self.__class__.__name__}.forward")
-            prediction = self.__class__.__original_forward__(self, **kwargs)  # type: ignore
-            if span and isinstance(prediction, dspy.Prediction):
-                span.update(output=prediction._store)  # type: ignore
-            elif span:
-                span.update(output=prediction)  # type: ignore
-            return prediction
-        return forward
     def patched_language_model_call(self):
         self_ = self
-        @langwatch.span(ignore_missing_trace_warning=True, type="llm")
+        @langwatch.span(ignore_missing_trace_warning=True, type="llm", capture_output=False)
         def call(self: dspy.LM, prompt=None, messages=None, **kwargs):
             all_kwargs = self.kwargs | kwargs
             model = self.model
@@ -894,7 +867,7 @@ class DSPyTracer:
     def patched_legacy_language_model_request(self):
         self_ = self
-        @langwatch.span(ignore_missing_trace_warning=True, type="llm")
+        @langwatch.span(ignore_missing_trace_warning=True, type="llm", capture_output=False)
         def basic_request(self: dspy.LM, prompt, **kwargs):
             all_kwargs = self.kwargs | kwargs
             model = all_kwargs.get("model", None)
@@ -946,7 +919,7 @@ class DSPyTracer:
             ) is not getattr(dspy.Retrieve, "forward", None):
                 return self.__class__.__original_forward__(self, *args, **kwargs)  # type: ignore
-            @langwatch.span(ignore_missing_trace_warning=True, type="rag")
+            @langwatch.span(ignore_missing_trace_warning=True, type="rag", capture_output=False)
             def forward(self, *args, **kwargs):
                 result = self.__class__.__original_forward__(self, *args, **kwargs)  # type: ignore

langwatch/evaluation/__init__.py CHANGED Viewed

@@ -1,9 +1,36 @@
 from typing import Optional
 from langwatch.evaluation.evaluation import Evaluation
-from .evaluation import Evaluation
+from langwatch.evaluation.platform_run import (
+    evaluate,
+    run,  # Deprecated, kept for backwards compatibility
+    EvaluationRunResult,
+    EvaluationRunSummary,
+    EvaluationNotFoundError,
+    EvaluationTimeoutError,
+    EvaluationRunFailedError,
+    EvaluationsApiError,
+    TargetStats,
+    EvaluatorStats,
+)
 def init(name: str, *, run_id: Optional[str] = None) -> Evaluation:
     evaluation = Evaluation(name, run_id=run_id)
     evaluation.init()
     return evaluation
+__all__ = [
+    "init",
+    "evaluate",
+    "run",  # Deprecated
+    "Evaluation",
+    "EvaluationRunResult",
+    "EvaluationRunSummary",
+    "EvaluationNotFoundError",
+    "EvaluationTimeoutError",
+    "EvaluationRunFailedError",
+    "EvaluationsApiError",
+    "TargetStats",
+    "EvaluatorStats",
+]

langwatch 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

langwatch 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl