loopgain 0.3.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loopgain-0.3.0 → loopgain-0.4.1}/PKG-INFO +43 -22
- {loopgain-0.3.0 → loopgain-0.4.1}/README.md +41 -20
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/_version.py +1 -1
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/classifier.py +37 -3
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/core.py +16 -3
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/PKG-INFO +43 -22
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/SOURCES.txt +2 -1
- {loopgain-0.3.0 → loopgain-0.4.1}/pyproject.toml +9 -2
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_classifier_synthetic.py +14 -4
- loopgain-0.4.1/tests/test_termination_safety.py +115 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/LICENSE +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/__init__.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/__main__.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/cli.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/funnel.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/__init__.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/autogen.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/claude_agent_sdk.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/crewai.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/langchain.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/langgraph.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/openai_agents.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/telemetry.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/dependency_links.txt +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/entry_points.txt +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/requires.txt +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/top_level.txt +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/setup.cfg +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_classifier_mock_validation.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_core.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_funnel.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_integrations.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_stress.py +0 -0
- {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_telemetry.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgain
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
|
|
5
5
|
Author-email: Dave Fitzsimmons <hello@loopgain.ai>
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Project-URL: Homepage, https://loopgain.ai
|
|
@@ -49,14 +49,16 @@ Dynamic: license-file
|
|
|
49
49
|
|
|
50
50
|
# LoopGain
|
|
51
51
|
|
|
52
|
-
**
|
|
52
|
+
**An open-source cost controller for AI agent loops.**
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually converged — and rolls back before it degrades — instead of running to a fixed `max_iterations` cap.
|
|
55
|
+
|
|
56
|
+
> **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
|
|
55
57
|
|
|
56
58
|
[](https://pypi.org/project/loopgain/)
|
|
57
59
|
[](https://pypi.org/project/loopgain/)
|
|
58
60
|
[](LICENSE)
|
|
59
|
-
[](tests/)
|
|
60
62
|
|
|
61
63
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
62
64
|
|
|
@@ -68,7 +70,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
|
|
|
68
70
|
|
|
69
71
|
## Why
|
|
70
72
|
|
|
71
|
-
Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic
|
|
73
|
+
Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
|
|
72
74
|
|
|
73
75
|
---
|
|
74
76
|
|
|
@@ -108,6 +110,28 @@ print(result.savings_vs_fixed_cap)
|
|
|
108
110
|
|
|
109
111
|
---
|
|
110
112
|
|
|
113
|
+
## Defining your error signal
|
|
114
|
+
|
|
115
|
+
The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
|
|
116
|
+
|
|
117
|
+
Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
|
|
118
|
+
|
|
119
|
+
| Loop | Error signal = |
|
|
120
|
+
| --- | --- |
|
|
121
|
+
| Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
|
|
122
|
+
| JSON / structured extraction | number of **schema violations** |
|
|
123
|
+
| RAG with self-correction | number of **required facts still missing** |
|
|
124
|
+
| Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
|
|
125
|
+
| Lint / format loop | **lint error count** |
|
|
126
|
+
|
|
127
|
+
The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
|
|
128
|
+
|
|
129
|
+
If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
|
|
130
|
+
|
|
131
|
+
Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
111
135
|
## How it works
|
|
112
136
|
|
|
113
137
|
LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
|
|
@@ -123,7 +147,7 @@ It routes the trajectory into one of five named states:
|
|
|
123
147
|
|
|
124
148
|
| State | Condition | Action |
|
|
125
149
|
| --- | --- | --- |
|
|
126
|
-
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue
|
|
150
|
+
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
|
|
127
151
|
| `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
|
|
128
152
|
| `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
|
|
129
153
|
| `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
|
|
@@ -139,18 +163,6 @@ The decision is **conservative by design**: requiring both statistical significa
|
|
|
139
163
|
|
|
140
164
|
---
|
|
141
165
|
|
|
142
|
-
## ETA prediction
|
|
143
|
-
|
|
144
|
-
When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
|
|
145
|
-
|
|
146
|
-
```
|
|
147
|
-
n_remaining = log(E_target / E_current) / log(Aβ_smooth)
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
|
|
151
|
-
|
|
152
|
-
---
|
|
153
|
-
|
|
154
166
|
## Best-so-far rollback
|
|
155
167
|
|
|
156
168
|
LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
|
|
@@ -165,14 +177,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
|
|
|
165
177
|
|
|
166
178
|
---
|
|
167
179
|
|
|
180
|
+
## What LoopGain does and doesn't guarantee
|
|
181
|
+
|
|
182
|
+
LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
|
|
183
|
+
|
|
184
|
+
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
185
|
+
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
168
189
|
## API reference
|
|
169
190
|
|
|
170
|
-
### `LoopGain(target_error=0.0, max_iterations=
|
|
191
|
+
### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
|
|
171
192
|
|
|
172
193
|
Construct the monitor.
|
|
173
194
|
|
|
174
195
|
- `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
|
|
175
|
-
- `max_iterations` — Hard safety
|
|
196
|
+
- `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
|
|
176
197
|
- `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
|
|
177
198
|
- `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
|
|
178
199
|
- `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
|
|
@@ -193,7 +214,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
|
|
|
193
214
|
|
|
194
215
|
### `lg.eta -> int | None`
|
|
195
216
|
|
|
196
|
-
|
|
217
|
+
Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
|
|
197
218
|
|
|
198
219
|
### `lg.gain_margin -> float | None`
|
|
199
220
|
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
# LoopGain
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**An open-source cost controller for AI agent loops.**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually converged — and rolls back before it degrades — instead of running to a fixed `max_iterations` cap.
|
|
6
|
+
|
|
7
|
+
> **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
|
|
6
8
|
|
|
7
9
|
[](https://pypi.org/project/loopgain/)
|
|
8
10
|
[](https://pypi.org/project/loopgain/)
|
|
9
11
|
[](LICENSE)
|
|
10
|
-
[](tests/)
|
|
11
13
|
|
|
12
14
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
13
15
|
|
|
@@ -19,7 +21,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
|
|
|
19
21
|
|
|
20
22
|
## Why
|
|
21
23
|
|
|
22
|
-
Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic
|
|
24
|
+
Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
|
|
23
25
|
|
|
24
26
|
---
|
|
25
27
|
|
|
@@ -59,6 +61,28 @@ print(result.savings_vs_fixed_cap)
|
|
|
59
61
|
|
|
60
62
|
---
|
|
61
63
|
|
|
64
|
+
## Defining your error signal
|
|
65
|
+
|
|
66
|
+
The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
|
|
67
|
+
|
|
68
|
+
Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
|
|
69
|
+
|
|
70
|
+
| Loop | Error signal = |
|
|
71
|
+
| --- | --- |
|
|
72
|
+
| Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
|
|
73
|
+
| JSON / structured extraction | number of **schema violations** |
|
|
74
|
+
| RAG with self-correction | number of **required facts still missing** |
|
|
75
|
+
| Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
|
|
76
|
+
| Lint / format loop | **lint error count** |
|
|
77
|
+
|
|
78
|
+
The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
|
|
79
|
+
|
|
80
|
+
If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
|
|
81
|
+
|
|
82
|
+
Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
62
86
|
## How it works
|
|
63
87
|
|
|
64
88
|
LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
|
|
@@ -74,7 +98,7 @@ It routes the trajectory into one of five named states:
|
|
|
74
98
|
|
|
75
99
|
| State | Condition | Action |
|
|
76
100
|
| --- | --- | --- |
|
|
77
|
-
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue
|
|
101
|
+
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
|
|
78
102
|
| `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
|
|
79
103
|
| `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
|
|
80
104
|
| `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
|
|
@@ -90,18 +114,6 @@ The decision is **conservative by design**: requiring both statistical significa
|
|
|
90
114
|
|
|
91
115
|
---
|
|
92
116
|
|
|
93
|
-
## ETA prediction
|
|
94
|
-
|
|
95
|
-
When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
|
|
96
|
-
|
|
97
|
-
```
|
|
98
|
-
n_remaining = log(E_target / E_current) / log(Aβ_smooth)
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
|
|
102
|
-
|
|
103
|
-
---
|
|
104
|
-
|
|
105
117
|
## Best-so-far rollback
|
|
106
118
|
|
|
107
119
|
LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
|
|
@@ -116,14 +128,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
|
|
|
116
128
|
|
|
117
129
|
---
|
|
118
130
|
|
|
131
|
+
## What LoopGain does and doesn't guarantee
|
|
132
|
+
|
|
133
|
+
LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
|
|
134
|
+
|
|
135
|
+
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
136
|
+
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
119
140
|
## API reference
|
|
120
141
|
|
|
121
|
-
### `LoopGain(target_error=0.0, max_iterations=
|
|
142
|
+
### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
|
|
122
143
|
|
|
123
144
|
Construct the monitor.
|
|
124
145
|
|
|
125
146
|
- `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
|
|
126
|
-
- `max_iterations` — Hard safety
|
|
147
|
+
- `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
|
|
127
148
|
- `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
|
|
128
149
|
- `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
|
|
129
150
|
- `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
|
|
@@ -144,7 +165,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
|
|
|
144
165
|
|
|
145
166
|
### `lg.eta -> int | None`
|
|
146
167
|
|
|
147
|
-
|
|
168
|
+
Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
|
|
148
169
|
|
|
149
170
|
### `lg.gain_margin -> float | None`
|
|
150
171
|
|
|
@@ -66,6 +66,20 @@ DEFAULT_OSC_STD_THRESHOLD = 0.30
|
|
|
66
66
|
# for the oscillation gate.
|
|
67
67
|
DEFAULT_SLOPE_TOL = 0.05
|
|
68
68
|
|
|
69
|
+
# Liveness gate: number of iterations a loop may go without achieving a new
|
|
70
|
+
# best (lowest) error before its "continue" verdicts (FAST_CONVERGE /
|
|
71
|
+
# CONVERGING) are withdrawn so it can reach STALLING / OSCILLATING and
|
|
72
|
+
# terminate. Without this, a loop that drops a lot and then plateaus or
|
|
73
|
+
# oscillates *below* the cumulative thresholds keeps its historical win
|
|
74
|
+
# forever and never terminates. Derivation: the continue-states are claims
|
|
75
|
+
# about *ongoing* progress; cumulative reduction (E_current/E_first) and a
|
|
76
|
+
# whole-history slope are claims about the *past* and do not expire. We treat
|
|
77
|
+
# "no new low in N steps" as the loop having stopped improving. N is small
|
|
78
|
+
# (3) so a sustained plateau is caught quickly, but the consecutive-STALLING
|
|
79
|
+
# termination rule (2 readings) still protects a loop that briefly stalls and
|
|
80
|
+
# then resumes hitting new lows.
|
|
81
|
+
DEFAULT_STALL_PATIENCE = 3
|
|
82
|
+
|
|
69
83
|
# Numerical floor to avoid log(0).
|
|
70
84
|
_EPS = 1e-12
|
|
71
85
|
|
|
@@ -85,6 +99,7 @@ class TrajectoryThresholds:
|
|
|
85
99
|
div_margin: float = DEFAULT_DIV_MARGIN
|
|
86
100
|
osc_std_threshold: float = DEFAULT_OSC_STD_THRESHOLD
|
|
87
101
|
slope_tol: float = DEFAULT_SLOPE_TOL
|
|
102
|
+
stall_patience: int = DEFAULT_STALL_PATIENCE
|
|
88
103
|
|
|
89
104
|
|
|
90
105
|
@dataclass(frozen=True)
|
|
@@ -276,6 +291,18 @@ def classify_trajectory(
|
|
|
276
291
|
|
|
277
292
|
f = extract_features(error_history)
|
|
278
293
|
|
|
294
|
+
# Liveness signal: how many iterations since the loop last achieved a new
|
|
295
|
+
# best (lowest) error. A genuinely converging loop keeps hitting new lows,
|
|
296
|
+
# so this stays small; a loop that dropped a lot and then plateaued (or is
|
|
297
|
+
# oscillating below the cumulative thresholds) has a large value. We use it
|
|
298
|
+
# to withdraw the "continue" verdicts (FAST_CONVERGE / CONVERGING) once a
|
|
299
|
+
# loop has stopped improving, so it can reach STALLING / OSCILLATING and
|
|
300
|
+
# terminate instead of riding its historical cumulative win forever. See
|
|
301
|
+
# DEFAULT_STALL_PATIENCE.
|
|
302
|
+
hist = list(error_history)
|
|
303
|
+
iters_since_best = (n - 1) - hist.index(min(hist))
|
|
304
|
+
still_improving = iters_since_best < th.stall_patience
|
|
305
|
+
|
|
279
306
|
# n == 2 special case: with two observations, the slope is well defined
|
|
280
307
|
# but its p-value is not (zero residual degrees of freedom). Fall back to
|
|
281
308
|
# the sign of the change. This is the same conservatism as a Wilcoxon
|
|
@@ -291,13 +318,20 @@ def classify_trajectory(
|
|
|
291
318
|
return STALLING
|
|
292
319
|
|
|
293
320
|
# Order matters: FAST_CONVERGE precedes CONVERGING; both precede the
|
|
294
|
-
# remaining gates.
|
|
295
|
-
|
|
321
|
+
# remaining gates. Both continue-verdicts are gated on `still_improving`:
|
|
322
|
+
# a loop that has stopped hitting new lows is no longer "converging" no
|
|
323
|
+
# matter how large its historical cumulative reduction was, and must be
|
|
324
|
+
# allowed to fall through to STALLING / OSCILLATING so it can terminate.
|
|
325
|
+
if f.e_ratio <= th.e_ratio_fast and still_improving:
|
|
296
326
|
return FAST_CONVERGE
|
|
297
327
|
|
|
298
328
|
slope_significant = f.slope_p < th.p_sig
|
|
299
329
|
|
|
300
|
-
if
|
|
330
|
+
if (
|
|
331
|
+
f.slope_log < 0
|
|
332
|
+
and still_improving
|
|
333
|
+
and (slope_significant or f.e_ratio <= th.e_ratio_conv)
|
|
334
|
+
):
|
|
301
335
|
return CONVERGING
|
|
302
336
|
|
|
303
337
|
if f.slope_log > 0 and slope_significant and f.e_ratio > 1.0 + th.div_margin:
|
|
@@ -40,6 +40,16 @@ DEFAULT_STALLING = 0.95
|
|
|
40
40
|
DEFAULT_OSCILLATING_UPPER = 1.05
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
# Bounded-by-default safety backstop. The loop should normally terminate on a
|
|
44
|
+
# stability verdict (target met / oscillating / diverging / stalled) long
|
|
45
|
+
# before this; it exists only so the library can never run truly unbounded if
|
|
46
|
+
# a loop never converges and never stalls (e.g. infinitesimal-but-real progress
|
|
47
|
+
# with target_error=None). Generous relative to typical loop lengths (the
|
|
48
|
+
# bench capped at 20). Pass max_iterations=None to opt into a fully unbounded
|
|
49
|
+
# loop, or a smaller integer to cap tighter.
|
|
50
|
+
DEFAULT_MAX_ITERATIONS = 50
|
|
51
|
+
|
|
52
|
+
|
|
43
53
|
# State names. Exported for use in switch/case in user code.
|
|
44
54
|
INIT = "INIT"
|
|
45
55
|
FAST_CONVERGE = "FAST_CONVERGE"
|
|
@@ -165,8 +175,11 @@ class LoopGain:
|
|
|
165
175
|
tests, no validation errors, etc.). Pass ``None`` to disable
|
|
166
176
|
the short-circuit entirely and rely only on stability
|
|
167
177
|
detection and ``max_iterations``.
|
|
168
|
-
max_iterations: Hard safety
|
|
169
|
-
|
|
178
|
+
max_iterations: Hard safety backstop. Default
|
|
179
|
+
``DEFAULT_MAX_ITERATIONS`` (50) so the loop can never run
|
|
180
|
+
unbounded; normally a stability verdict terminates it long
|
|
181
|
+
before this. Pass ``None`` to opt into a fully unbounded loop,
|
|
182
|
+
or a smaller integer to cap tighter.
|
|
170
183
|
thresholds: Custom ``ThresholdBands`` (legacy single-feature
|
|
171
184
|
classifier only). Default is the canonical 0.3 / 0.85 / 0.95 /
|
|
172
185
|
1.05. Ignored when ``classifier='trajectory'``.
|
|
@@ -190,7 +203,7 @@ class LoopGain:
|
|
|
190
203
|
def __init__(
|
|
191
204
|
self,
|
|
192
205
|
target_error: Optional[float] = 0.0,
|
|
193
|
-
max_iterations: Optional[int] =
|
|
206
|
+
max_iterations: Optional[int] = DEFAULT_MAX_ITERATIONS,
|
|
194
207
|
thresholds: Optional[ThresholdBands] = None,
|
|
195
208
|
trajectory_thresholds: Optional[TrajectoryThresholds] = None,
|
|
196
209
|
classifier: str = "trajectory",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgain
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
|
|
5
5
|
Author-email: Dave Fitzsimmons <hello@loopgain.ai>
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Project-URL: Homepage, https://loopgain.ai
|
|
@@ -49,14 +49,16 @@ Dynamic: license-file
|
|
|
49
49
|
|
|
50
50
|
# LoopGain
|
|
51
51
|
|
|
52
|
-
**
|
|
52
|
+
**An open-source cost controller for AI agent loops.**
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually converged — and rolls back before it degrades — instead of running to a fixed `max_iterations` cap.
|
|
55
|
+
|
|
56
|
+
> **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
|
|
55
57
|
|
|
56
58
|
[](https://pypi.org/project/loopgain/)
|
|
57
59
|
[](https://pypi.org/project/loopgain/)
|
|
58
60
|
[](LICENSE)
|
|
59
|
-
[](tests/)
|
|
60
62
|
|
|
61
63
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
62
64
|
|
|
@@ -68,7 +70,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
|
|
|
68
70
|
|
|
69
71
|
## Why
|
|
70
72
|
|
|
71
|
-
Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic
|
|
73
|
+
Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
|
|
72
74
|
|
|
73
75
|
---
|
|
74
76
|
|
|
@@ -108,6 +110,28 @@ print(result.savings_vs_fixed_cap)
|
|
|
108
110
|
|
|
109
111
|
---
|
|
110
112
|
|
|
113
|
+
## Defining your error signal
|
|
114
|
+
|
|
115
|
+
The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
|
|
116
|
+
|
|
117
|
+
Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
|
|
118
|
+
|
|
119
|
+
| Loop | Error signal = |
|
|
120
|
+
| --- | --- |
|
|
121
|
+
| Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
|
|
122
|
+
| JSON / structured extraction | number of **schema violations** |
|
|
123
|
+
| RAG with self-correction | number of **required facts still missing** |
|
|
124
|
+
| Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
|
|
125
|
+
| Lint / format loop | **lint error count** |
|
|
126
|
+
|
|
127
|
+
The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
|
|
128
|
+
|
|
129
|
+
If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
|
|
130
|
+
|
|
131
|
+
Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
111
135
|
## How it works
|
|
112
136
|
|
|
113
137
|
LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
|
|
@@ -123,7 +147,7 @@ It routes the trajectory into one of five named states:
|
|
|
123
147
|
|
|
124
148
|
| State | Condition | Action |
|
|
125
149
|
| --- | --- | --- |
|
|
126
|
-
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue
|
|
150
|
+
| `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
|
|
127
151
|
| `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
|
|
128
152
|
| `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
|
|
129
153
|
| `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
|
|
@@ -139,18 +163,6 @@ The decision is **conservative by design**: requiring both statistical significa
|
|
|
139
163
|
|
|
140
164
|
---
|
|
141
165
|
|
|
142
|
-
## ETA prediction
|
|
143
|
-
|
|
144
|
-
When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
|
|
145
|
-
|
|
146
|
-
```
|
|
147
|
-
n_remaining = log(E_target / E_current) / log(Aβ_smooth)
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
|
|
151
|
-
|
|
152
|
-
---
|
|
153
|
-
|
|
154
166
|
## Best-so-far rollback
|
|
155
167
|
|
|
156
168
|
LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
|
|
@@ -165,14 +177,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
|
|
|
165
177
|
|
|
166
178
|
---
|
|
167
179
|
|
|
180
|
+
## What LoopGain does and doesn't guarantee
|
|
181
|
+
|
|
182
|
+
LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
|
|
183
|
+
|
|
184
|
+
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
185
|
+
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
168
189
|
## API reference
|
|
169
190
|
|
|
170
|
-
### `LoopGain(target_error=0.0, max_iterations=
|
|
191
|
+
### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
|
|
171
192
|
|
|
172
193
|
Construct the monitor.
|
|
173
194
|
|
|
174
195
|
- `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
|
|
175
|
-
- `max_iterations` — Hard safety
|
|
196
|
+
- `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
|
|
176
197
|
- `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
|
|
177
198
|
- `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
|
|
178
199
|
- `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
|
|
@@ -193,7 +214,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
|
|
|
193
214
|
|
|
194
215
|
### `lg.eta -> int | None`
|
|
195
216
|
|
|
196
|
-
|
|
217
|
+
Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
|
|
197
218
|
|
|
198
219
|
### `lg.gain_margin -> float | None`
|
|
199
220
|
|
|
@@ -4,8 +4,10 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "loopgain"
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
# Single source of truth: loopgain/_version.py (read dynamically below).
|
|
8
|
+
# Bump the version in that one file per release; this no longer duplicates it.
|
|
9
|
+
dynamic = ["version"]
|
|
10
|
+
description = "An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback."
|
|
9
11
|
authors = [{name = "Dave Fitzsimmons", email = "hello@loopgain.ai"}]
|
|
10
12
|
readme = "README.md"
|
|
11
13
|
license = {text = "Apache-2.0"}
|
|
@@ -100,6 +102,11 @@ all = [
|
|
|
100
102
|
# zero-dep. Install with `pip install 'loopgain[examples]'`.
|
|
101
103
|
examples = ["anthropic>=0.40.0"]
|
|
102
104
|
|
|
105
|
+
[tool.setuptools.dynamic]
|
|
106
|
+
# Reads the literal ``__version__ = "x.y.z"`` from loopgain/_version.py via AST
|
|
107
|
+
# (no import), so pyproject.toml never duplicates the version string.
|
|
108
|
+
version = {attr = "loopgain._version.__version__"}
|
|
109
|
+
|
|
103
110
|
[tool.setuptools.packages.find]
|
|
104
111
|
where = ["."]
|
|
105
112
|
include = ["loopgain*"]
|
|
@@ -158,12 +158,22 @@ def test_pure_stall_no_trend():
|
|
|
158
158
|
)
|
|
159
159
|
|
|
160
160
|
|
|
161
|
-
def
|
|
162
|
-
"""
|
|
163
|
-
|
|
161
|
+
def test_floor_convergence_already_flat_at_floor_stalls():
|
|
162
|
+
"""A loop already pinned at the numerical floor from iteration 0, flat,
|
|
163
|
+
classifies as STALLING — not FAST_CONVERGE.
|
|
164
|
+
|
|
165
|
+
Updated 2026-06 with the liveness-gate fix (see DEFAULT_STALL_PATIENCE).
|
|
166
|
+
Previously this returned FAST_CONVERGE on the strength of cumulative
|
|
167
|
+
reduction alone — but FAST_CONVERGE is a *continue* verdict, so an
|
|
168
|
+
at-floor flat loop would have continued (and, with no max_iterations,
|
|
169
|
+
run unbounded) instead of stopping. STALLING is the correct verdict: the
|
|
170
|
+
loop has made no progress for `stall_patience` iterations, so it
|
|
171
|
+
terminates via the consecutive-stall rule and returns best-so-far (the
|
|
172
|
+
floor value — a fine answer). In real use the `target_error`
|
|
173
|
+
short-circuit (next test) handles the at-target case directly."""
|
|
164
174
|
trajectory = [1e-15] * 5
|
|
165
175
|
state = classify_trajectory(trajectory)
|
|
166
|
-
assert state ==
|
|
176
|
+
assert state == STALLING
|
|
167
177
|
|
|
168
178
|
|
|
169
179
|
def test_target_met_short_circuit():
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Termination-safety tests: a loop must not run unbounded.
|
|
2
|
+
|
|
3
|
+
Regression coverage for the FAST_CONVERGE/CONVERGING liveness bug (2026-06):
|
|
4
|
+
the trajectory classifier used *cumulative* reduction (E_current/E_first) and a
|
|
5
|
+
*whole-history* slope to emit the "continue" verdicts FAST_CONVERGE and
|
|
6
|
+
CONVERGING. A loop that reduced its error and then plateaued (or oscillated)
|
|
7
|
+
*below* the cumulative thresholds kept its historical win forever — it was
|
|
8
|
+
pinned in a continue-state, never reached STALLING/OSCILLATING, and with the
|
|
9
|
+
(then-default) max_iterations=None it ran forever.
|
|
10
|
+
|
|
11
|
+
The fix has two independent layers, each tested here:
|
|
12
|
+
1. A liveness gate on the continue-verdicts: a loop that has not achieved a
|
|
13
|
+
new best error in `stall_patience` iterations is no longer treated as
|
|
14
|
+
"improving", so it can reach STALLING/OSCILLATING and terminate.
|
|
15
|
+
2. A bounded default max_iterations backstop, so the library can never run
|
|
16
|
+
truly unbounded even if a future classifier path regresses.
|
|
17
|
+
|
|
18
|
+
Output quality was never at risk (best-so-far rollback held the good answer);
|
|
19
|
+
the bug was a *liveness* failure — the loop never returned to hand it back.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import pytest
|
|
25
|
+
|
|
26
|
+
from loopgain import CONVERGING, FAST_CONVERGE, LoopGain, classify_trajectory
|
|
27
|
+
|
|
28
|
+
# Hard test guard: large enough that a *correctly* terminating loop never hits
|
|
29
|
+
# it, small enough that a regression (unbounded loop) fails fast instead of
|
|
30
|
+
# hanging the suite.
|
|
31
|
+
GUARD = 500
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _run_to_termination(lg: LoopGain, errors, guard: int = GUARD):
|
|
35
|
+
"""Drive a loop, plateauing/repeating the last error, until it terminates
|
|
36
|
+
or hits the guard. Returns (iterations_run, hit_guard)."""
|
|
37
|
+
i = 0
|
|
38
|
+
while lg.should_continue():
|
|
39
|
+
e = errors[i] if i < len(errors) else errors[-1]
|
|
40
|
+
lg.observe(e, output=f"o{i}")
|
|
41
|
+
i += 1
|
|
42
|
+
if i >= guard:
|
|
43
|
+
return i, True
|
|
44
|
+
return i, False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ----- Layer 1: classifier liveness gate -----
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_plateau_below_fast_floor_terminates_without_max_iter():
|
|
51
|
+
"""Error drops to 8% of initial then plateaus. e_ratio<=0.1 used to pin
|
|
52
|
+
FAST_CONVERGE forever. Must now terminate via STALLING."""
|
|
53
|
+
lg = LoopGain(max_iterations=None, target_error=None)
|
|
54
|
+
n, hit_guard = _run_to_termination(lg, [100, 8, 8, 8, 8, 8, 8, 8])
|
|
55
|
+
assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
|
|
56
|
+
assert not lg.should_continue()
|
|
57
|
+
assert lg.result.best_error == 8.0 # best-so-far still returned
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_plateau_above_fast_floor_terminates_without_max_iter():
|
|
61
|
+
"""Error drops to 30% of initial (below E_RATIO_CONV=0.5) then plateaus.
|
|
62
|
+
e_ratio<=0.5 with a whole-history negative slope used to pin CONVERGING
|
|
63
|
+
forever. Must now terminate."""
|
|
64
|
+
lg = LoopGain(max_iterations=None, target_error=None)
|
|
65
|
+
n, hit_guard = _run_to_termination(lg, [100, 30, 30, 30, 30, 30, 30, 30])
|
|
66
|
+
assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
|
|
67
|
+
assert not lg.should_continue()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_oscillation_below_floor_terminates_without_max_iter():
|
|
71
|
+
"""Oscillation entirely below the 10% cumulative floor used to be shadowed
|
|
72
|
+
by FAST_CONVERGE. Must now terminate (OSCILLATING or STALLING)."""
|
|
73
|
+
lg = LoopGain(max_iterations=None, target_error=None)
|
|
74
|
+
n, hit_guard = _run_to_termination(lg, [100, 5, 8, 5, 8, 5, 8, 5, 8])
|
|
75
|
+
assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
|
|
76
|
+
assert not lg.should_continue()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_classifier_flags_plateau_after_big_drop_as_terminable():
|
|
80
|
+
"""Direct classifier check: a big drop followed by a flat tail must NOT be
|
|
81
|
+
reported as a continue-state (FAST_CONVERGE/CONVERGING)."""
|
|
82
|
+
plateau_low = [100, 8, 8, 8, 8, 8]
|
|
83
|
+
plateau_mid = [100, 30, 30, 30, 30, 30]
|
|
84
|
+
assert classify_trajectory(plateau_low) not in (FAST_CONVERGE, CONVERGING)
|
|
85
|
+
assert classify_trajectory(plateau_mid) not in (FAST_CONVERGE, CONVERGING)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_genuine_fast_converge_still_continues():
|
|
89
|
+
"""Guard against over-correction: a monotone steep decline that keeps
|
|
90
|
+
hitting new lows must still read FAST_CONVERGE (continue), not be
|
|
91
|
+
prematurely stalled."""
|
|
92
|
+
monotone = [100, 25, 6, 1.5, 0.4, 0.1] # new low every step
|
|
93
|
+
assert classify_trajectory(monotone) == FAST_CONVERGE
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_genuine_converging_still_continues():
|
|
97
|
+
"""A steady decline landing between the two cumulative thresholds must
|
|
98
|
+
still read CONVERGING while it is still hitting new lows."""
|
|
99
|
+
converging = [10.0, 8.0, 6.4, 5.1, 4.1, 3.3] # ~0.8x/step, new low every step
|
|
100
|
+
assert classify_trajectory(converging) == CONVERGING
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ----- Layer 2: bounded default backstop -----
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_default_max_iterations_is_a_bounded_backstop():
|
|
107
|
+
"""The default config must not be able to run unbounded. A never-improving
|
|
108
|
+
loop under all-default construction must terminate at the backstop."""
|
|
109
|
+
lg = LoopGain() # all defaults
|
|
110
|
+
assert lg.max_iterations is not None, "default max_iterations must be bounded"
|
|
111
|
+
# A strictly increasing error never converges/stalls into best-so-far early
|
|
112
|
+
# under every classifier path; the backstop must still stop it.
|
|
113
|
+
i, hit_guard = _run_to_termination(lg, list(range(1, GUARD + 5)))
|
|
114
|
+
assert not hit_guard, "default backstop failed to bound the loop"
|
|
115
|
+
assert not lg.should_continue()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|