langwatch 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langwatch/__init__.py +6 -3
- langwatch/__version__.py +1 -1
- langwatch/evaluation/__init__.py +518 -17
- langwatch/evaluations.py +183 -353
- langwatch/experiment/__init__.py +108 -0
- langwatch/{evaluation/evaluation.py → experiment/experiment.py} +44 -5
- langwatch/{evaluation → experiment}/platform_run.py +40 -67
- {langwatch-0.9.0.dist-info → langwatch-0.10.0.dist-info}/METADATA +1 -1
- {langwatch-0.9.0.dist-info → langwatch-0.10.0.dist-info}/RECORD +10 -9
- {langwatch-0.9.0.dist-info → langwatch-0.10.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
langwatch.experiment - Run experiments on LangWatch platform or via SDK.
|
|
3
|
+
|
|
4
|
+
This module provides two ways to run experiments:
|
|
5
|
+
|
|
6
|
+
1. Platform experiments (CI/CD):
|
|
7
|
+
Run experiments configured in the LangWatch platform UI.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import langwatch
|
|
11
|
+
|
|
12
|
+
result = langwatch.experiment.run("my-experiment-slug")
|
|
13
|
+
result.print_summary()
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
2. SDK-defined experiments:
|
|
17
|
+
Define and run experiments programmatically.
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import langwatch
|
|
21
|
+
|
|
22
|
+
experiment = langwatch.experiment.init("my-experiment")
|
|
23
|
+
|
|
24
|
+
for index, row in experiment.loop(df.iterrows(), threads=4):
|
|
25
|
+
async def task(index, row):
|
|
26
|
+
result = await my_agent(row["input"])
|
|
27
|
+
experiment.evaluate(
|
|
28
|
+
"langevals/exact_match",
|
|
29
|
+
index=index,
|
|
30
|
+
data={"output": result, "expected_output": row["expected"]},
|
|
31
|
+
settings={},
|
|
32
|
+
)
|
|
33
|
+
experiment.submit(task, index, row)
|
|
34
|
+
```
|
|
35
|
+
"""
|
|
36
|
+
from typing import Optional
|
|
37
|
+
|
|
38
|
+
# Re-export the Experiment class for SDK-defined experiments
|
|
39
|
+
from langwatch.experiment.experiment import Experiment
|
|
40
|
+
|
|
41
|
+
# Re-export the platform run function and related types
|
|
42
|
+
from langwatch.experiment.platform_run import (
|
|
43
|
+
run,
|
|
44
|
+
ExperimentRunResult,
|
|
45
|
+
ExperimentRunSummary,
|
|
46
|
+
ExperimentNotFoundError,
|
|
47
|
+
ExperimentTimeoutError,
|
|
48
|
+
ExperimentRunFailedError,
|
|
49
|
+
ExperimentsApiError,
|
|
50
|
+
TargetStats,
|
|
51
|
+
EvaluatorStats,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def init(name: str, *, run_id: Optional[str] = None) -> Experiment:
|
|
56
|
+
"""
|
|
57
|
+
Initialize an SDK-defined experiment.
|
|
58
|
+
|
|
59
|
+
This creates an Experiment instance that you can use to run evaluators
|
|
60
|
+
programmatically using datasets and custom logic.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
name: Name for this experiment run
|
|
64
|
+
run_id: Optional custom run ID (auto-generated if not provided)
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Experiment instance with methods:
|
|
68
|
+
- loop(): Iterate over dataset rows with parallel execution
|
|
69
|
+
- evaluate(): Run an evaluator on the current row
|
|
70
|
+
- log(): Log custom metrics
|
|
71
|
+
- submit(): Submit async tasks
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
```python
|
|
75
|
+
import langwatch
|
|
76
|
+
|
|
77
|
+
experiment = langwatch.experiment.init("my-experiment")
|
|
78
|
+
|
|
79
|
+
for index, row in experiment.loop(df.iterrows(), threads=4):
|
|
80
|
+
async def task(index, row):
|
|
81
|
+
result = await my_agent(row["input"])
|
|
82
|
+
experiment.evaluate(
|
|
83
|
+
"langevals/exact_match",
|
|
84
|
+
index=index,
|
|
85
|
+
data={"output": result, "expected_output": row["expected"]},
|
|
86
|
+
settings={},
|
|
87
|
+
)
|
|
88
|
+
experiment.submit(task, index, row)
|
|
89
|
+
```
|
|
90
|
+
"""
|
|
91
|
+
experiment = Experiment(name, run_id=run_id)
|
|
92
|
+
experiment.init()
|
|
93
|
+
return experiment
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
__all__ = [
|
|
97
|
+
"init",
|
|
98
|
+
"run",
|
|
99
|
+
"Experiment",
|
|
100
|
+
"ExperimentRunResult",
|
|
101
|
+
"ExperimentRunSummary",
|
|
102
|
+
"ExperimentNotFoundError",
|
|
103
|
+
"ExperimentTimeoutError",
|
|
104
|
+
"ExperimentRunFailedError",
|
|
105
|
+
"ExperimentsApiError",
|
|
106
|
+
"TargetStats",
|
|
107
|
+
"EvaluatorStats",
|
|
108
|
+
]
|
|
@@ -135,7 +135,7 @@ class IterationInfo(TypedDict):
|
|
|
135
135
|
error: Optional[Exception]
|
|
136
136
|
|
|
137
137
|
|
|
138
|
-
class
|
|
138
|
+
class Experiment:
|
|
139
139
|
_executor: ThreadPoolExecutor
|
|
140
140
|
_futures: List[Future[Any]]
|
|
141
141
|
_current_index: int
|
|
@@ -255,7 +255,7 @@ class Evaluation:
|
|
|
255
255
|
progress_bar.close()
|
|
256
256
|
|
|
257
257
|
except Exception as e:
|
|
258
|
-
|
|
258
|
+
Experiment._log_results(
|
|
259
259
|
langwatch.get_api_key() or "",
|
|
260
260
|
{
|
|
261
261
|
"experiment_slug": self.experiment_slug,
|
|
@@ -456,7 +456,7 @@ class Evaluation:
|
|
|
456
456
|
|
|
457
457
|
# Start a new thread to send the batch
|
|
458
458
|
thread = threading.Thread(
|
|
459
|
-
target=
|
|
459
|
+
target=Experiment._log_results,
|
|
460
460
|
args=(langwatch.get_api_key(), body),
|
|
461
461
|
)
|
|
462
462
|
thread.start()
|
|
@@ -485,7 +485,7 @@ class Evaluation:
|
|
|
485
485
|
better_raise_for_status(response)
|
|
486
486
|
|
|
487
487
|
def _wait_for_completion(self):
|
|
488
|
-
async def wait_for_completion(self:
|
|
488
|
+
async def wait_for_completion(self: Experiment):
|
|
489
489
|
# Send any remaining batch
|
|
490
490
|
self._send_batch(finished=True)
|
|
491
491
|
|
|
@@ -837,7 +837,7 @@ class Evaluation:
|
|
|
837
837
|
with self.lock:
|
|
838
838
|
self.batch["evaluations"].append(eval)
|
|
839
839
|
|
|
840
|
-
def
|
|
840
|
+
def evaluate(
|
|
841
841
|
self,
|
|
842
842
|
evaluator_id: str,
|
|
843
843
|
index: Union[int, Hashable],
|
|
@@ -846,6 +846,17 @@ class Evaluation:
|
|
|
846
846
|
name: Optional[str] = None,
|
|
847
847
|
as_guardrail: bool = False,
|
|
848
848
|
):
|
|
849
|
+
"""
|
|
850
|
+
Run an evaluator on the current row.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
evaluator_id: The evaluator type/slug (e.g., "langevals/exact_match", "ragas/faithfulness")
|
|
854
|
+
index: The row index for this evaluation
|
|
855
|
+
data: Data to pass to the evaluator (e.g., {"input": ..., "output": ..., "expected_output": ...})
|
|
856
|
+
settings: Evaluator-specific settings
|
|
857
|
+
name: Optional display name for the evaluation (defaults to evaluator_id)
|
|
858
|
+
as_guardrail: Whether to run as a guardrail (stricter pass/fail)
|
|
859
|
+
"""
|
|
849
860
|
duration: Optional[int] = None
|
|
850
861
|
|
|
851
862
|
start_time = time.time()
|
|
@@ -871,3 +882,31 @@ class Evaluation:
|
|
|
871
882
|
duration=duration,
|
|
872
883
|
cost=result.cost,
|
|
873
884
|
)
|
|
885
|
+
|
|
886
|
+
def run(
|
|
887
|
+
self,
|
|
888
|
+
evaluator_id: str,
|
|
889
|
+
index: Union[int, Hashable],
|
|
890
|
+
data: Dict[str, Any],
|
|
891
|
+
settings: Dict[str, Any],
|
|
892
|
+
name: Optional[str] = None,
|
|
893
|
+
as_guardrail: bool = False,
|
|
894
|
+
):
|
|
895
|
+
"""
|
|
896
|
+
Deprecated: Use `evaluate()` instead.
|
|
897
|
+
"""
|
|
898
|
+
import warnings
|
|
899
|
+
|
|
900
|
+
warnings.warn(
|
|
901
|
+
"evaluation.run() is deprecated, use evaluation.evaluate() instead",
|
|
902
|
+
DeprecationWarning,
|
|
903
|
+
stacklevel=2,
|
|
904
|
+
)
|
|
905
|
+
return self.evaluate(
|
|
906
|
+
evaluator_id=evaluator_id,
|
|
907
|
+
index=index,
|
|
908
|
+
data=data,
|
|
909
|
+
settings=settings,
|
|
910
|
+
name=name,
|
|
911
|
+
as_guardrail=as_guardrail,
|
|
912
|
+
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Runner for platform-configured
|
|
2
|
+
Runner for platform-configured experiments (Experiments Workbench).
|
|
3
3
|
|
|
4
4
|
This module provides the `run()` function to execute evaluations that are
|
|
5
5
|
configured in the LangWatch platform from CI/CD pipelines or scripts.
|
|
@@ -35,16 +35,16 @@ def _replace_url_domain(url: str, new_base: str) -> str:
|
|
|
35
35
|
))
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
class
|
|
39
|
-
"""Raised when
|
|
38
|
+
class ExperimentNotFoundError(Exception):
|
|
39
|
+
"""Raised when experiment slug doesn't exist."""
|
|
40
40
|
|
|
41
41
|
def __init__(self, slug: str):
|
|
42
42
|
self.slug = slug
|
|
43
43
|
super().__init__(f"Evaluation not found: {slug}")
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
class
|
|
47
|
-
"""Raised when
|
|
46
|
+
class ExperimentTimeoutError(Exception):
|
|
47
|
+
"""Raised when experiment run times out."""
|
|
48
48
|
|
|
49
49
|
def __init__(self, run_id: str, progress: int, total: int):
|
|
50
50
|
self.run_id = run_id
|
|
@@ -55,8 +55,8 @@ class EvaluationTimeoutError(Exception):
|
|
|
55
55
|
)
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
class
|
|
59
|
-
"""Raised when
|
|
58
|
+
class ExperimentRunFailedError(Exception):
|
|
59
|
+
"""Raised when experiment run fails."""
|
|
60
60
|
|
|
61
61
|
def __init__(self, run_id: str, error: str):
|
|
62
62
|
self.run_id = run_id
|
|
@@ -64,7 +64,7 @@ class EvaluationRunFailedError(Exception):
|
|
|
64
64
|
super().__init__(f"Evaluation run failed: {error}")
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
class
|
|
67
|
+
class ExperimentsApiError(Exception):
|
|
68
68
|
"""Raised for other API errors."""
|
|
69
69
|
|
|
70
70
|
def __init__(self, message: str, status_code: int):
|
|
@@ -97,8 +97,8 @@ class EvaluatorStats:
|
|
|
97
97
|
|
|
98
98
|
|
|
99
99
|
@dataclass
|
|
100
|
-
class
|
|
101
|
-
"""Summary of a completed
|
|
100
|
+
class ExperimentRunSummary:
|
|
101
|
+
"""Summary of a completed experiment run."""
|
|
102
102
|
|
|
103
103
|
run_id: str
|
|
104
104
|
total_cells: int
|
|
@@ -115,7 +115,7 @@ class EvaluationRunSummary:
|
|
|
115
115
|
|
|
116
116
|
|
|
117
117
|
@dataclass
|
|
118
|
-
class
|
|
118
|
+
class ExperimentRunResult:
|
|
119
119
|
"""Result of running a platform evaluation."""
|
|
120
120
|
|
|
121
121
|
run_id: str
|
|
@@ -125,7 +125,7 @@ class EvaluationRunResult:
|
|
|
125
125
|
pass_rate: float
|
|
126
126
|
duration: int
|
|
127
127
|
run_url: str
|
|
128
|
-
summary:
|
|
128
|
+
summary: ExperimentRunSummary
|
|
129
129
|
|
|
130
130
|
def print_summary(self, exit_on_failure: Optional[bool] = None) -> None:
|
|
131
131
|
"""
|
|
@@ -161,42 +161,42 @@ def _is_notebook() -> bool:
|
|
|
161
161
|
return False
|
|
162
162
|
|
|
163
163
|
|
|
164
|
-
def
|
|
164
|
+
def run(
|
|
165
165
|
slug: str,
|
|
166
166
|
*,
|
|
167
167
|
poll_interval: float = 2.0,
|
|
168
168
|
timeout: float = 600.0,
|
|
169
169
|
on_progress: Optional[Callable[[int, int], None]] = None,
|
|
170
170
|
api_key: Optional[str] = None,
|
|
171
|
-
) ->
|
|
171
|
+
) -> ExperimentRunResult:
|
|
172
172
|
"""
|
|
173
|
-
Run a platform-configured
|
|
173
|
+
Run a platform-configured experiment and wait for completion.
|
|
174
174
|
|
|
175
|
-
This runs an
|
|
176
|
-
The
|
|
175
|
+
This runs an Experiment that you have configured in the LangWatch platform.
|
|
176
|
+
The experiment will execute all targets and evaluators defined in the configuration.
|
|
177
177
|
|
|
178
178
|
Args:
|
|
179
|
-
slug: The slug of the
|
|
179
|
+
slug: The slug of the experiment to run (found in the experiment URL)
|
|
180
180
|
poll_interval: Seconds between status checks (default: 2.0)
|
|
181
181
|
timeout: Maximum seconds to wait for completion (default: 600.0 = 10 minutes)
|
|
182
182
|
on_progress: Optional callback for progress updates (completed, total)
|
|
183
183
|
api_key: Optional API key override (uses LANGWATCH_API_KEY env var by default)
|
|
184
184
|
|
|
185
185
|
Returns:
|
|
186
|
-
|
|
186
|
+
ExperimentRunResult with pass rate and summary. Call result.print_summary()
|
|
187
187
|
to display results and exit with code 1 on failure.
|
|
188
188
|
|
|
189
189
|
Raises:
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
ExperimentNotFoundError: If the experiment slug doesn't exist
|
|
191
|
+
ExperimentTimeoutError: If the experiment doesn't complete within timeout
|
|
192
|
+
ExperimentRunFailedError: If the experiment fails
|
|
193
|
+
ExperimentsApiError: For other API errors
|
|
194
194
|
|
|
195
195
|
Example:
|
|
196
196
|
```python
|
|
197
197
|
import langwatch
|
|
198
198
|
|
|
199
|
-
result = langwatch.
|
|
199
|
+
result = langwatch.experiment.run("my-experiment-slug")
|
|
200
200
|
result.print_summary()
|
|
201
201
|
```
|
|
202
202
|
"""
|
|
@@ -219,7 +219,7 @@ def evaluate(
|
|
|
219
219
|
api_run_url = start_response.get("runUrl", "")
|
|
220
220
|
run_url = _replace_url_domain(api_run_url, endpoint) if api_run_url else ""
|
|
221
221
|
|
|
222
|
-
print(f"Started
|
|
222
|
+
print(f"Started experiment run: {run_id}")
|
|
223
223
|
if run_url:
|
|
224
224
|
print(f"Follow live: {run_url}")
|
|
225
225
|
|
|
@@ -238,7 +238,7 @@ def evaluate(
|
|
|
238
238
|
if time.time() - start_time > timeout:
|
|
239
239
|
print() # Newline after progress
|
|
240
240
|
status = _get_run_status(run_id, endpoint, effective_api_key)
|
|
241
|
-
raise
|
|
241
|
+
raise ExperimentTimeoutError(
|
|
242
242
|
run_id, status.get("progress", 0), status.get("total", 0)
|
|
243
243
|
)
|
|
244
244
|
|
|
@@ -267,7 +267,7 @@ def evaluate(
|
|
|
267
267
|
|
|
268
268
|
if run_status == "failed":
|
|
269
269
|
print() # Newline after progress
|
|
270
|
-
raise
|
|
270
|
+
raise ExperimentRunFailedError(
|
|
271
271
|
run_id, status.get("error", "Unknown error")
|
|
272
272
|
)
|
|
273
273
|
|
|
@@ -278,7 +278,7 @@ def evaluate(
|
|
|
278
278
|
|
|
279
279
|
|
|
280
280
|
def _start_run(slug: str, endpoint: str, api_key: str) -> dict:
|
|
281
|
-
"""Start an
|
|
281
|
+
"""Start an experiment run."""
|
|
282
282
|
with httpx.Client(timeout=60) as client:
|
|
283
283
|
response = client.post(
|
|
284
284
|
f"{endpoint}/api/evaluations/v3/{slug}/run",
|
|
@@ -286,12 +286,12 @@ def _start_run(slug: str, endpoint: str, api_key: str) -> dict:
|
|
|
286
286
|
)
|
|
287
287
|
|
|
288
288
|
if response.status_code == 404:
|
|
289
|
-
raise
|
|
289
|
+
raise ExperimentNotFoundError(slug)
|
|
290
290
|
if response.status_code == 401:
|
|
291
|
-
raise
|
|
291
|
+
raise ExperimentsApiError("Unauthorized - check your API key", 401)
|
|
292
292
|
if not response.is_success:
|
|
293
293
|
error_body = response.json() if response.content else {}
|
|
294
|
-
raise
|
|
294
|
+
raise ExperimentsApiError(
|
|
295
295
|
error_body.get("error", f"Failed to start evaluation: {response.status_code}"),
|
|
296
296
|
response.status_code,
|
|
297
297
|
)
|
|
@@ -308,12 +308,12 @@ def _get_run_status(run_id: str, endpoint: str, api_key: str) -> dict:
|
|
|
308
308
|
)
|
|
309
309
|
|
|
310
310
|
if response.status_code == 404:
|
|
311
|
-
raise
|
|
311
|
+
raise ExperimentsApiError(f"Run not found: {run_id}", 404)
|
|
312
312
|
if response.status_code == 401:
|
|
313
|
-
raise
|
|
313
|
+
raise ExperimentsApiError("Unauthorized - check your API key", 401)
|
|
314
314
|
if not response.is_success:
|
|
315
315
|
error_body = response.json() if response.content else {}
|
|
316
|
-
raise
|
|
316
|
+
raise ExperimentsApiError(
|
|
317
317
|
error_body.get("error", f"Failed to get run status: {response.status_code}"),
|
|
318
318
|
response.status_code,
|
|
319
319
|
)
|
|
@@ -326,7 +326,7 @@ def _build_result(
|
|
|
326
326
|
status: Literal["completed", "failed", "stopped"],
|
|
327
327
|
summary_data: dict,
|
|
328
328
|
run_url: str,
|
|
329
|
-
) ->
|
|
329
|
+
) -> ExperimentRunResult:
|
|
330
330
|
"""Build the result object from API response."""
|
|
331
331
|
total_cells = summary_data.get("totalCells", 0)
|
|
332
332
|
completed_cells = summary_data.get("completedCells", 0)
|
|
@@ -368,7 +368,7 @@ def _build_result(
|
|
|
368
368
|
)
|
|
369
369
|
)
|
|
370
370
|
|
|
371
|
-
summary =
|
|
371
|
+
summary = ExperimentRunSummary(
|
|
372
372
|
run_id=run_id,
|
|
373
373
|
total_cells=total_cells,
|
|
374
374
|
completed_cells=completed_cells,
|
|
@@ -383,7 +383,7 @@ def _build_result(
|
|
|
383
383
|
total_cost=summary_data.get("totalCost", 0),
|
|
384
384
|
)
|
|
385
385
|
|
|
386
|
-
return
|
|
386
|
+
return ExperimentRunResult(
|
|
387
387
|
run_id=run_id,
|
|
388
388
|
status=status,
|
|
389
389
|
passed=total_passed,
|
|
@@ -395,12 +395,12 @@ def _build_result(
|
|
|
395
395
|
)
|
|
396
396
|
|
|
397
397
|
|
|
398
|
-
def _print_summary(result:
|
|
399
|
-
"""Print a CI-friendly summary of the
|
|
398
|
+
def _print_summary(result: ExperimentRunResult) -> None:
|
|
399
|
+
"""Print a CI-friendly summary of the experiment results."""
|
|
400
400
|
summary = result.summary
|
|
401
401
|
|
|
402
402
|
print("\n" + "═" * 60)
|
|
403
|
-
print("
|
|
403
|
+
print(" EXPERIMENT RESULTS")
|
|
404
404
|
print("═" * 60)
|
|
405
405
|
print(f" Run ID: {result.run_id}")
|
|
406
406
|
print(f" Status: {result.status.upper()}")
|
|
@@ -433,30 +433,3 @@ def _print_summary(result: EvaluationRunResult) -> None:
|
|
|
433
433
|
print("═" * 60 + "\n")
|
|
434
434
|
|
|
435
435
|
|
|
436
|
-
def run(
|
|
437
|
-
slug: str,
|
|
438
|
-
*,
|
|
439
|
-
poll_interval: float = 2.0,
|
|
440
|
-
timeout: float = 600.0,
|
|
441
|
-
on_progress: Optional[Callable[[int, int], None]] = None,
|
|
442
|
-
api_key: Optional[str] = None,
|
|
443
|
-
) -> EvaluationRunResult:
|
|
444
|
-
"""
|
|
445
|
-
Deprecated: Use `evaluate()` instead.
|
|
446
|
-
|
|
447
|
-
Run a platform-configured evaluation and wait for completion.
|
|
448
|
-
"""
|
|
449
|
-
import warnings
|
|
450
|
-
|
|
451
|
-
warnings.warn(
|
|
452
|
-
"langwatch.evaluation.run() is deprecated, use langwatch.evaluation.evaluate() instead",
|
|
453
|
-
DeprecationWarning,
|
|
454
|
-
stacklevel=2,
|
|
455
|
-
)
|
|
456
|
-
return evaluate(
|
|
457
|
-
slug,
|
|
458
|
-
poll_interval=poll_interval,
|
|
459
|
-
timeout=timeout,
|
|
460
|
-
on_progress=on_progress,
|
|
461
|
-
api_key=api_key,
|
|
462
|
-
)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
langwatch/__init__.py,sha256=
|
|
2
|
-
langwatch/__version__.py,sha256=
|
|
1
|
+
langwatch/__init__.py,sha256=VGkLDw3h8hOtzyaTMObWupGTQosn4E17Dk5zcfDmy7g,4462
|
|
2
|
+
langwatch/__version__.py,sha256=o5DKyCSbKMRVAiPvoV2UuvLgN1NaTqfqaz2GumN3K3A,92
|
|
3
3
|
langwatch/attributes.py,sha256=nXdI_G85wQQCAdAcwjCiLYdEYj3wATmfgCmhlf6dVIk,3910
|
|
4
4
|
langwatch/batch_evaluation.py,sha256=Y_S3teXpHV07U-vvJYyV1PB6d0CgyFM_rTzPp6GnEBo,16165
|
|
5
5
|
langwatch/client.py,sha256=WTNcYSik7kZ2kH-qGDnhbMTosc8e_Xhab_lZlfh5TC8,25559
|
|
6
|
-
langwatch/evaluations.py,sha256
|
|
6
|
+
langwatch/evaluations.py,sha256=-rvlpw8J3-0lMn9tdnte1Z3qHpuE60DGB3gmI8VMexQ,8983
|
|
7
7
|
langwatch/guardrails.py,sha256=4d320HyklXPUVszF34aWsDKGzuvPggcDM_f45_eJTnc,1352
|
|
8
8
|
langwatch/langchain.py,sha256=HjbBBIDwwt688g0W4K0EsZGuGBbo1Mv5LQ-7Mkn56iQ,18726
|
|
9
9
|
langwatch/litellm.py,sha256=mPcw5cLykt0SQf9bTNSoT7elMx4gj-wZ_K2PC14Bw50,11998
|
|
@@ -16,9 +16,10 @@ langwatch/types.py,sha256=h6r3tNTzWqENx-9j_JPmOMZfFoKq9SNpEtxpAACk2G0,3114
|
|
|
16
16
|
langwatch/dataset/__init__.py,sha256=hZBcbjXuBO2qE5osJtd9wIE9f45F6-jpNTrne5nk4eE,2606
|
|
17
17
|
langwatch/domain/__init__.py,sha256=gSCOV3WkRhp_--9D1vxw7BYpnMRbpGh-2NbsXd4KZC0,6074
|
|
18
18
|
langwatch/dspy/__init__.py,sha256=wp8AmobV8XGVWOI8MQFmXPHu-8Wq3wvjB6YiHQm9Fdg,33007
|
|
19
|
-
langwatch/evaluation/__init__.py,sha256=
|
|
20
|
-
langwatch/
|
|
21
|
-
langwatch/
|
|
19
|
+
langwatch/evaluation/__init__.py,sha256=8SOSZZbSzXa1jL-9Zlyt0f9u5sOA_TrO1J61ueASBLI,16980
|
|
20
|
+
langwatch/experiment/__init__.py,sha256=nv2OfoNMMZwUA9KfozW2ZNaR1-J1LCmU4NykjGfe9is,3001
|
|
21
|
+
langwatch/experiment/experiment.py,sha256=5xj58FKVC0y_LxgfwjJZP9lDp7tZ9FUUbERBtui_nC8,33026
|
|
22
|
+
langwatch/experiment/platform_run.py,sha256=qiy_bwp786TbkH4HIlZVlJPmCtQlStAq9vUdG4-3VdU,13850
|
|
22
23
|
langwatch/exporters/filterable_batch_span_exporter.py,sha256=MlhZjui56XD6p2sa8kEGyr-Hb3wqudknngmemnB4Twg,2142
|
|
23
24
|
langwatch/generated/langwatch_rest_api_client/__init__.py,sha256=8r-9pAj7fK7vnVX3mT0y_zS4B9ZRqD6RZiBo5fPra60,156
|
|
24
25
|
langwatch/generated/langwatch_rest_api_client/client.py,sha256=o_mdLqyBCQstu5tS1WZFwqIEbGwkvWQ7eQjuCJw_5VY,12419
|
|
@@ -416,6 +417,6 @@ langwatch/utils/initialization.py,sha256=1KoZmkHOvGEVF0j-4t4xRQdA_2C_SPiF7qFXqEG
|
|
|
416
417
|
langwatch/utils/module.py,sha256=KLBNOK3mA9gCSifCcQX_lOtU48BJQDWvFKtF6NMvwVA,688
|
|
417
418
|
langwatch/utils/transformation.py,sha256=76MGXyrYTxM0Yri36NJqLK-XxL4BBYdmKWAXXlw3D4Q,7690
|
|
418
419
|
langwatch/utils/utils.py,sha256=ZCOSie4o9LdJ7odshNfCNjmgwgQ27ojc5ENqt1rXuSs,596
|
|
419
|
-
langwatch-0.
|
|
420
|
-
langwatch-0.
|
|
421
|
-
langwatch-0.
|
|
420
|
+
langwatch-0.10.0.dist-info/METADATA,sha256=ZiqO5NpMuwsb60yAVPverSp8GXlMyIctxaDgO3DSRoA,13193
|
|
421
|
+
langwatch-0.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
422
|
+
langwatch-0.10.0.dist-info/RECORD,,
|
|
File without changes
|