loopgain 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loopgain-0.2.0 → loopgain-0.4.0}/PKG-INFO +84 -21
- {loopgain-0.2.0 → loopgain-0.4.0}/README.md +83 -20
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/__init__.py +10 -0
- loopgain-0.4.0/loopgain/__main__.py +8 -0
- loopgain-0.4.0/loopgain/_version.py +10 -0
- loopgain-0.4.0/loopgain/classifier.py +357 -0
- loopgain-0.4.0/loopgain/cli.py +109 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/core.py +110 -9
- loopgain-0.4.0/loopgain/funnel.py +572 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/autogen.py +4 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/claude_agent_sdk.py +4 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/crewai.py +4 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/langchain.py +4 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/langgraph.py +4 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/openai_agents.py +4 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain.egg-info/PKG-INFO +84 -21
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain.egg-info/SOURCES.txt +10 -1
- loopgain-0.4.0/loopgain.egg-info/entry_points.txt +2 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/pyproject.toml +4 -1
- loopgain-0.4.0/tests/test_classifier_mock_validation.py +269 -0
- loopgain-0.4.0/tests/test_classifier_synthetic.py +330 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/tests/test_core.py +15 -5
- loopgain-0.4.0/tests/test_funnel.py +366 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/tests/test_stress.py +26 -12
- loopgain-0.4.0/tests/test_termination_safety.py +115 -0
- loopgain-0.2.0/loopgain/_version.py +0 -9
- {loopgain-0.2.0 → loopgain-0.4.0}/LICENSE +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/integrations/__init__.py +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain/telemetry.py +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain.egg-info/dependency_links.txt +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain.egg-info/requires.txt +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/loopgain.egg-info/top_level.txt +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/setup.cfg +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/tests/test_integrations.py +0 -0
- {loopgain-0.2.0 → loopgain-0.4.0}/tests/test_telemetry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
|
|
5
5
|
Author-email: Dave Fitzsimmons <hello@loopgain.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -51,12 +51,12 @@ Dynamic: license-file
|
|
|
51
51
|
|
|
52
52
|
**Barkhausen stability monitor for AI agent loops.**
|
|
53
53
|
|
|
54
|
-
Replace `max_iterations=5` with a real-time loop
|
|
54
|
+
Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named states — knowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
|
|
55
55
|
|
|
56
56
|
[](https://pypi.org/project/loopgain/)
|
|
57
57
|
[](https://pypi.org/project/loopgain/)
|
|
58
58
|
[](LICENSE)
|
|
59
|
-
[](tests/)
|
|
60
60
|
|
|
61
61
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
62
62
|
|
|
@@ -97,7 +97,7 @@ while lg.should_continue():
|
|
|
97
97
|
output = reviser.revise(output, errors)
|
|
98
98
|
|
|
99
99
|
result = lg.result
|
|
100
|
-
print(result.outcome) # "converged" | "oscillating" | "diverged" | "max_iterations"
|
|
100
|
+
print(result.outcome) # "converged" | "oscillating" | "diverged" | "stalled" | "max_iterations"
|
|
101
101
|
print(result.best_output) # the lowest-error iteration's output
|
|
102
102
|
print(result.iterations_used)
|
|
103
103
|
print(result.gain_margin) # 1 / max(Aβ_smooth)
|
|
@@ -108,30 +108,56 @@ print(result.savings_vs_fixed_cap)
|
|
|
108
108
|
|
|
109
109
|
---
|
|
110
110
|
|
|
111
|
+
## Defining your error signal
|
|
112
|
+
|
|
113
|
+
The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
|
|
114
|
+
|
|
115
|
+
Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
|
|
116
|
+
|
|
117
|
+
| Loop | Error signal = |
|
|
118
|
+
| --- | --- |
|
|
119
|
+
| Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
|
|
120
|
+
| JSON / structured extraction | number of **schema violations** |
|
|
121
|
+
| RAG with self-correction | number of **required facts still missing** |
|
|
122
|
+
| Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
|
|
123
|
+
| Lint / format loop | **lint error count** |
|
|
124
|
+
|
|
125
|
+
The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
|
|
126
|
+
|
|
127
|
+
If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
|
|
128
|
+
|
|
129
|
+
Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
111
133
|
## How it works
|
|
112
134
|
|
|
113
|
-
LoopGain measures empirical loop gain at every iteration
|
|
135
|
+
LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
|
|
114
136
|
|
|
115
137
|
```
|
|
116
|
-
|
|
117
|
-
|
|
138
|
+
E_ratio = E_current / E_first # cumulative reduction
|
|
139
|
+
slope_log = OLS slope of log10(E) # geometric trend direction
|
|
140
|
+
slope_p = t-test p-value of slope # statistical significance
|
|
141
|
+
osc_std = std of detrended log10(E) # oscillation magnitude
|
|
118
142
|
```
|
|
119
143
|
|
|
120
|
-
It
|
|
144
|
+
It routes the trajectory into one of five named states:
|
|
121
145
|
|
|
122
|
-
|
|
|
146
|
+
| State | Condition | Action |
|
|
123
147
|
| --- | --- | --- |
|
|
124
|
-
|
|
|
125
|
-
| `
|
|
126
|
-
| `
|
|
127
|
-
| `
|
|
128
|
-
|
|
|
148
|
+
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
|
|
149
|
+
| `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
|
|
150
|
+
| `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
|
|
151
|
+
| `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
|
|
152
|
+
| `DIVERGING` | positive slope with `p < 0.05` AND cumulative > 110% | Abort — roll back to best-so-far |
|
|
129
153
|
|
|
130
154
|
Plus a short-circuit: if observed error drops at or below `target_error`, the loop stops immediately with state `TARGET_MET`. The default `target_error=0.0` short-circuits on exactly zero error — the natural completion signal for verifier-driven loops. Pass `target_error=None` to disable the short-circuit and rely on stability detection alone.
|
|
131
155
|
|
|
132
|
-
The
|
|
156
|
+
The decision is **conservative by design**: requiring both statistical significance and meaningful cumulative motion before terminating prevents false-positive aborts on noisy real-LLM error series. Validated at 98.8% macro-averaged accuracy across 5 regimes on N=1000 deterministic-mock trajectories (see `RESULTS_v2_classifier.md`). The STALLING ceiling of ~94% is the t-test's irreducible 5% type-I error rate, not a classifier weakness.
|
|
133
157
|
|
|
134
|
-
|
|
158
|
+
**Recommended minimum: 6 iterations** for reliable trend significance. At n≤4 the t-test is severely underpowered (df=2 requires |t|>4.3 for p<0.05) — the classifier conservatively falls back to STALLING when evidence is thin. The thresholds are derived analytically (control theory + statistical convention), not fitted; tune them per domain via the `TrajectoryThresholds` argument once you have production traces.
|
|
159
|
+
|
|
160
|
+
**Legacy single-feature classifier:** the original v0.1 single-Aβ-band classifier (thresholds 0.3 / 0.85 / 0.95 / 1.05) is still available via `LoopGain(classifier='legacy_bands')` for callers that have empirically tuned the bands to a specific workload.
|
|
135
161
|
|
|
136
162
|
---
|
|
137
163
|
|
|
@@ -161,16 +187,27 @@ This transforms divergence detection from "abort with garbage" into "abort with
|
|
|
161
187
|
|
|
162
188
|
---
|
|
163
189
|
|
|
190
|
+
## What LoopGain does and doesn't guarantee
|
|
191
|
+
|
|
192
|
+
LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
|
|
193
|
+
|
|
194
|
+
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
195
|
+
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
164
199
|
## API reference
|
|
165
200
|
|
|
166
|
-
### `LoopGain(target_error=0.0, max_iterations=
|
|
201
|
+
### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
|
|
167
202
|
|
|
168
203
|
Construct the monitor.
|
|
169
204
|
|
|
170
205
|
- `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
|
|
171
|
-
- `max_iterations` — Hard safety
|
|
172
|
-
- `thresholds` — Custom `ThresholdBands`
|
|
173
|
-
- `
|
|
206
|
+
- `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
|
|
207
|
+
- `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
|
|
208
|
+
- `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
|
|
209
|
+
- `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
|
|
210
|
+
- `smoothing_window` — EMA window for the smoothed Aβ series (always maintained for visualization, regardless of classifier choice). Default 3.
|
|
174
211
|
- `assumed_fixed_cap` — Used to compute `savings_vs_fixed_cap`. Default 10.
|
|
175
212
|
|
|
176
213
|
### `lg.observe(errors, output=None) -> str`
|
|
@@ -183,7 +220,7 @@ Returns `False` once a terminal state fires.
|
|
|
183
220
|
|
|
184
221
|
### `lg.state -> str`
|
|
185
222
|
|
|
186
|
-
Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`.
|
|
223
|
+
Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`. The corresponding terminal `result.outcome` values are `converged`, `oscillating`, `diverged`, `stalled` (v0.2 trajectory mode only — STALLING terminating after 2 consecutive readings), `max_iterations`, or `in_progress`.
|
|
187
224
|
|
|
188
225
|
### `lg.eta -> int | None`
|
|
189
226
|
|
|
@@ -233,6 +270,32 @@ What is sent: state transitions, Aβ summary (min/max/median), gain margin, roll
|
|
|
233
270
|
|
|
234
271
|
The hosted endpoint at `telemetry.loopgain.ai` is one acceptable destination. The [receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are both open-source — self-host to keep telemetry fully under your control.
|
|
235
272
|
|
|
273
|
+
> **This is not the same as anonymous usage telemetry.** `send_telemetry` sends *your* loop data to *your* dashboard, and only when you call it. There's a separate, opt-in **funnel** telemetry described below. The two never share data or code.
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## Anonymous funnel telemetry (opt-in, off by default)
|
|
278
|
+
|
|
279
|
+
LoopGain can report **anonymous usage counts** so a solo maintainer can tell whether the library is actually being used — install → first `observe()` → recurring use. **It is opt-in and default-decline: nothing is sent unless you explicitly turn it on.**
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
loopgain telemetry --show # status + exactly what would be sent
|
|
283
|
+
loopgain telemetry --enable # opt in (or: export LOOPGAIN_TELEMETRY=1)
|
|
284
|
+
loopgain telemetry --disable # opt out (or: export LOOPGAIN_TELEMETRY=0)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
`DO_NOT_TRACK=1` is honored as a hard opt-out, and CI environments are auto-detected and declined silently. When enabled, payloads carry only a locally-generated random id (not derived from your machine), hour-bucketed timestamps, library/Python/OS versions, the adapter in use, and a coarse outcome count. **Prompts, outputs, error contents, keys, paths, and IPs are never collected.** Delivery is batched, async, https-only, and fail-silent — it can never break your loop. Full details and the privacy contract: **[TELEMETRY.md](TELEMETRY.md)**.
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## Command-line interface
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
loopgain --version # or: loopgain version
|
|
295
|
+
loopgain telemetry --show # inspect / control anonymous funnel telemetry
|
|
296
|
+
python -m loopgain telemetry --show # equivalent, without the console script
|
|
297
|
+
```
|
|
298
|
+
|
|
236
299
|
---
|
|
237
300
|
|
|
238
301
|
## Framework adapters
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
**Barkhausen stability monitor for AI agent loops.**
|
|
4
4
|
|
|
5
|
-
Replace `max_iterations=5` with a real-time loop
|
|
5
|
+
Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named states — knowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
|
|
6
6
|
|
|
7
7
|
[](https://pypi.org/project/loopgain/)
|
|
8
8
|
[](https://pypi.org/project/loopgain/)
|
|
9
9
|
[](LICENSE)
|
|
10
|
-
[](tests/)
|
|
11
11
|
|
|
12
12
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
13
13
|
|
|
@@ -48,7 +48,7 @@ while lg.should_continue():
|
|
|
48
48
|
output = reviser.revise(output, errors)
|
|
49
49
|
|
|
50
50
|
result = lg.result
|
|
51
|
-
print(result.outcome) # "converged" | "oscillating" | "diverged" | "max_iterations"
|
|
51
|
+
print(result.outcome) # "converged" | "oscillating" | "diverged" | "stalled" | "max_iterations"
|
|
52
52
|
print(result.best_output) # the lowest-error iteration's output
|
|
53
53
|
print(result.iterations_used)
|
|
54
54
|
print(result.gain_margin) # 1 / max(Aβ_smooth)
|
|
@@ -59,30 +59,56 @@ print(result.savings_vs_fixed_cap)
|
|
|
59
59
|
|
|
60
60
|
---
|
|
61
61
|
|
|
62
|
+
## Defining your error signal
|
|
63
|
+
|
|
64
|
+
The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
|
|
65
|
+
|
|
66
|
+
Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
|
|
67
|
+
|
|
68
|
+
| Loop | Error signal = |
|
|
69
|
+
| --- | --- |
|
|
70
|
+
| Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
|
|
71
|
+
| JSON / structured extraction | number of **schema violations** |
|
|
72
|
+
| RAG with self-correction | number of **required facts still missing** |
|
|
73
|
+
| Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
|
|
74
|
+
| Lint / format loop | **lint error count** |
|
|
75
|
+
|
|
76
|
+
The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
|
|
77
|
+
|
|
78
|
+
If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
|
|
79
|
+
|
|
80
|
+
Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
62
84
|
## How it works
|
|
63
85
|
|
|
64
|
-
LoopGain measures empirical loop gain at every iteration
|
|
86
|
+
LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
|
|
65
87
|
|
|
66
88
|
```
|
|
67
|
-
|
|
68
|
-
|
|
89
|
+
E_ratio = E_current / E_first # cumulative reduction
|
|
90
|
+
slope_log = OLS slope of log10(E) # geometric trend direction
|
|
91
|
+
slope_p = t-test p-value of slope # statistical significance
|
|
92
|
+
osc_std = std of detrended log10(E) # oscillation magnitude
|
|
69
93
|
```
|
|
70
94
|
|
|
71
|
-
It
|
|
95
|
+
It routes the trajectory into one of five named states:
|
|
72
96
|
|
|
73
|
-
|
|
|
97
|
+
| State | Condition | Action |
|
|
74
98
|
| --- | --- | --- |
|
|
75
|
-
|
|
|
76
|
-
| `
|
|
77
|
-
| `
|
|
78
|
-
| `
|
|
79
|
-
|
|
|
99
|
+
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
|
|
100
|
+
| `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
|
|
101
|
+
| `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
|
|
102
|
+
| `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
|
|
103
|
+
| `DIVERGING` | positive slope with `p < 0.05` AND cumulative > 110% | Abort — roll back to best-so-far |
|
|
80
104
|
|
|
81
105
|
Plus a short-circuit: if observed error drops at or below `target_error`, the loop stops immediately with state `TARGET_MET`. The default `target_error=0.0` short-circuits on exactly zero error — the natural completion signal for verifier-driven loops. Pass `target_error=None` to disable the short-circuit and rely on stability detection alone.
|
|
82
106
|
|
|
83
|
-
The
|
|
107
|
+
The decision is **conservative by design**: requiring both statistical significance and meaningful cumulative motion before terminating prevents false-positive aborts on noisy real-LLM error series. Validated at 98.8% macro-averaged accuracy across 5 regimes on N=1000 deterministic-mock trajectories (see `RESULTS_v2_classifier.md`). The STALLING ceiling of ~94% is the t-test's irreducible 5% type-I error rate, not a classifier weakness.
|
|
84
108
|
|
|
85
|
-
|
|
109
|
+
**Recommended minimum: 6 iterations** for reliable trend significance. At n≤4 the t-test is severely underpowered (df=2 requires |t|>4.3 for p<0.05) — the classifier conservatively falls back to STALLING when evidence is thin. The thresholds are derived analytically (control theory + statistical convention), not fitted; tune them per domain via the `TrajectoryThresholds` argument once you have production traces.
|
|
110
|
+
|
|
111
|
+
**Legacy single-feature classifier:** the original v0.1 single-Aβ-band classifier (thresholds 0.3 / 0.85 / 0.95 / 1.05) is still available via `LoopGain(classifier='legacy_bands')` for callers that have empirically tuned the bands to a specific workload.
|
|
86
112
|
|
|
87
113
|
---
|
|
88
114
|
|
|
@@ -112,16 +138,27 @@ This transforms divergence detection from "abort with garbage" into "abort with
|
|
|
112
138
|
|
|
113
139
|
---
|
|
114
140
|
|
|
141
|
+
## What LoopGain does and doesn't guarantee
|
|
142
|
+
|
|
143
|
+
LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
|
|
144
|
+
|
|
145
|
+
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
146
|
+
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
115
150
|
## API reference
|
|
116
151
|
|
|
117
|
-
### `LoopGain(target_error=0.0, max_iterations=
|
|
152
|
+
### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
|
|
118
153
|
|
|
119
154
|
Construct the monitor.
|
|
120
155
|
|
|
121
156
|
- `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
|
|
122
|
-
- `max_iterations` — Hard safety
|
|
123
|
-
- `thresholds` — Custom `ThresholdBands`
|
|
124
|
-
- `
|
|
157
|
+
- `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
|
|
158
|
+
- `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
|
|
159
|
+
- `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
|
|
160
|
+
- `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
|
|
161
|
+
- `smoothing_window` — EMA window for the smoothed Aβ series (always maintained for visualization, regardless of classifier choice). Default 3.
|
|
125
162
|
- `assumed_fixed_cap` — Used to compute `savings_vs_fixed_cap`. Default 10.
|
|
126
163
|
|
|
127
164
|
### `lg.observe(errors, output=None) -> str`
|
|
@@ -134,7 +171,7 @@ Returns `False` once a terminal state fires.
|
|
|
134
171
|
|
|
135
172
|
### `lg.state -> str`
|
|
136
173
|
|
|
137
|
-
Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`.
|
|
174
|
+
Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`. The corresponding terminal `result.outcome` values are `converged`, `oscillating`, `diverged`, `stalled` (v0.2 trajectory mode only — STALLING terminating after 2 consecutive readings), `max_iterations`, or `in_progress`.
|
|
138
175
|
|
|
139
176
|
### `lg.eta -> int | None`
|
|
140
177
|
|
|
@@ -184,6 +221,32 @@ What is sent: state transitions, Aβ summary (min/max/median), gain margin, roll
|
|
|
184
221
|
|
|
185
222
|
The hosted endpoint at `telemetry.loopgain.ai` is one acceptable destination. The [receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are both open-source — self-host to keep telemetry fully under your control.
|
|
186
223
|
|
|
224
|
+
> **This is not the same as anonymous usage telemetry.** `send_telemetry` sends *your* loop data to *your* dashboard, and only when you call it. There's a separate, opt-in **funnel** telemetry described below. The two never share data or code.
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Anonymous funnel telemetry (opt-in, off by default)
|
|
229
|
+
|
|
230
|
+
LoopGain can report **anonymous usage counts** so a solo maintainer can tell whether the library is actually being used — install → first `observe()` → recurring use. **It is opt-in and default-decline: nothing is sent unless you explicitly turn it on.**
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
loopgain telemetry --show # status + exactly what would be sent
|
|
234
|
+
loopgain telemetry --enable # opt in (or: export LOOPGAIN_TELEMETRY=1)
|
|
235
|
+
loopgain telemetry --disable # opt out (or: export LOOPGAIN_TELEMETRY=0)
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
`DO_NOT_TRACK=1` is honored as a hard opt-out, and CI environments are auto-detected and declined silently. When enabled, payloads carry only a locally-generated random id (not derived from your machine), hour-bucketed timestamps, library/Python/OS versions, the adapter in use, and a coarse outcome count. **Prompts, outputs, error contents, keys, paths, and IPs are never collected.** Delivery is batched, async, https-only, and fail-silent — it can never break your loop. Full details and the privacy contract: **[TELEMETRY.md](TELEMETRY.md)**.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Command-line interface
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
loopgain --version # or: loopgain version
|
|
246
|
+
loopgain telemetry --show # inspect / control anonymous funnel telemetry
|
|
247
|
+
python -m loopgain telemetry --show # equivalent, without the console script
|
|
248
|
+
```
|
|
249
|
+
|
|
187
250
|
---
|
|
188
251
|
|
|
189
252
|
## Framework adapters
|
|
@@ -10,6 +10,12 @@ Public API:
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
from loopgain._version import __version__
|
|
13
|
+
from loopgain.classifier import (
|
|
14
|
+
TrajectoryFeatures,
|
|
15
|
+
TrajectoryThresholds,
|
|
16
|
+
classify_trajectory,
|
|
17
|
+
extract_features,
|
|
18
|
+
)
|
|
13
19
|
from loopgain.core import (
|
|
14
20
|
LoopGain,
|
|
15
21
|
LoopGainResult,
|
|
@@ -29,6 +35,10 @@ __all__ = [
|
|
|
29
35
|
"LoopGain",
|
|
30
36
|
"LoopGainResult",
|
|
31
37
|
"ThresholdBands",
|
|
38
|
+
"TrajectoryThresholds",
|
|
39
|
+
"TrajectoryFeatures",
|
|
40
|
+
"classify_trajectory",
|
|
41
|
+
"extract_features",
|
|
32
42
|
"INIT",
|
|
33
43
|
"FAST_CONVERGE",
|
|
34
44
|
"CONVERGING",
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Single source of truth for the package version.
|
|
2
|
+
|
|
3
|
+
``loopgain/__init__.py``, ``loopgain/telemetry.py`` (product receiver), and
|
|
4
|
+
``loopgain/funnel.py`` (opt-in funnel telemetry) all import ``__version__``
|
|
5
|
+
from here so the value never drifts between ``__version__`` and the
|
|
6
|
+
``library_version`` field on any telemetry payload. Update this file (and
|
|
7
|
+
``pyproject.toml``) for each release.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__version__ = "0.4.0"
|