langwatch 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ """
2
+ langwatch.experiment - Run experiments on LangWatch platform or via SDK.
3
+
4
+ This module provides two ways to run experiments:
5
+
6
+ 1. Platform experiments (CI/CD):
7
+ Run experiments configured in the LangWatch platform UI.
8
+
9
+ ```python
10
+ import langwatch
11
+
12
+ result = langwatch.experiment.run("my-experiment-slug")
13
+ result.print_summary()
14
+ ```
15
+
16
+ 2. SDK-defined experiments:
17
+ Define and run experiments programmatically.
18
+
19
+ ```python
20
+ import langwatch
21
+
22
+ experiment = langwatch.experiment.init("my-experiment")
23
+
24
+ for index, row in experiment.loop(df.iterrows(), threads=4):
25
+ async def task(index, row):
26
+ result = await my_agent(row["input"])
27
+ experiment.evaluate(
28
+ "langevals/exact_match",
29
+ index=index,
30
+ data={"output": result, "expected_output": row["expected"]},
31
+ settings={},
32
+ )
33
+ experiment.submit(task, index, row)
34
+ ```
35
+ """
36
+ from typing import Optional
37
+
38
+ # Re-export the Experiment class for SDK-defined experiments
39
+ from langwatch.experiment.experiment import Experiment
40
+
41
+ # Re-export the platform run function and related types
42
+ from langwatch.experiment.platform_run import (
43
+ run,
44
+ ExperimentRunResult,
45
+ ExperimentRunSummary,
46
+ ExperimentNotFoundError,
47
+ ExperimentTimeoutError,
48
+ ExperimentRunFailedError,
49
+ ExperimentsApiError,
50
+ TargetStats,
51
+ EvaluatorStats,
52
+ )
53
+
54
+
55
+ def init(name: str, *, run_id: Optional[str] = None) -> Experiment:
56
+ """
57
+ Initialize an SDK-defined experiment.
58
+
59
+ This creates an Experiment instance that you can use to run evaluators
60
+ programmatically using datasets and custom logic.
61
+
62
+ Args:
63
+ name: Name for this experiment run
64
+ run_id: Optional custom run ID (auto-generated if not provided)
65
+
66
+ Returns:
67
+ Experiment instance with methods:
68
+ - loop(): Iterate over dataset rows with parallel execution
69
+ - evaluate(): Run an evaluator on the current row
70
+ - log(): Log custom metrics
71
+ - submit(): Submit async tasks
72
+
73
+ Example:
74
+ ```python
75
+ import langwatch
76
+
77
+ experiment = langwatch.experiment.init("my-experiment")
78
+
79
+ for index, row in experiment.loop(df.iterrows(), threads=4):
80
+ async def task(index, row):
81
+ result = await my_agent(row["input"])
82
+ experiment.evaluate(
83
+ "langevals/exact_match",
84
+ index=index,
85
+ data={"output": result, "expected_output": row["expected"]},
86
+ settings={},
87
+ )
88
+ experiment.submit(task, index, row)
89
+ ```
90
+ """
91
+ experiment = Experiment(name, run_id=run_id)
92
+ experiment.init()
93
+ return experiment
94
+
95
+
96
+ __all__ = [
97
+ "init",
98
+ "run",
99
+ "Experiment",
100
+ "ExperimentRunResult",
101
+ "ExperimentRunSummary",
102
+ "ExperimentNotFoundError",
103
+ "ExperimentTimeoutError",
104
+ "ExperimentRunFailedError",
105
+ "ExperimentsApiError",
106
+ "TargetStats",
107
+ "EvaluatorStats",
108
+ ]