loopgain 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {loopgain-0.3.0 → loopgain-0.4.1}/PKG-INFO +43 -22
  2. {loopgain-0.3.0 → loopgain-0.4.1}/README.md +41 -20
  3. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/_version.py +1 -1
  4. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/classifier.py +37 -3
  5. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/core.py +16 -3
  6. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/PKG-INFO +43 -22
  7. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/SOURCES.txt +2 -1
  8. {loopgain-0.3.0 → loopgain-0.4.1}/pyproject.toml +9 -2
  9. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_classifier_synthetic.py +14 -4
  10. loopgain-0.4.1/tests/test_termination_safety.py +115 -0
  11. {loopgain-0.3.0 → loopgain-0.4.1}/LICENSE +0 -0
  12. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/__init__.py +0 -0
  13. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/__main__.py +0 -0
  14. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/cli.py +0 -0
  15. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/funnel.py +0 -0
  16. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/__init__.py +0 -0
  17. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/autogen.py +0 -0
  18. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/claude_agent_sdk.py +0 -0
  19. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/crewai.py +0 -0
  20. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/langchain.py +0 -0
  21. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/langgraph.py +0 -0
  22. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/integrations/openai_agents.py +0 -0
  23. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain/telemetry.py +0 -0
  24. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/dependency_links.txt +0 -0
  25. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/entry_points.txt +0 -0
  26. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/requires.txt +0 -0
  27. {loopgain-0.3.0 → loopgain-0.4.1}/loopgain.egg-info/top_level.txt +0 -0
  28. {loopgain-0.3.0 → loopgain-0.4.1}/setup.cfg +0 -0
  29. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_classifier_mock_validation.py +0 -0
  30. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_core.py +0 -0
  31. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_funnel.py +0 -0
  32. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_integrations.py +0 -0
  33. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_stress.py +0 -0
  34. {loopgain-0.3.0 → loopgain-0.4.1}/tests/test_telemetry.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.3.0
4
- Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
3
+ Version: 0.4.1
4
+ Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
7
7
  Project-URL: Homepage, https://loopgain.ai
@@ -49,14 +49,16 @@ Dynamic: license-file
49
49
 
50
50
  # LoopGain
51
51
 
52
- **Barkhausen stability monitor for AI agent loops.**
52
+ **An open-source cost controller for AI agent loops.**
53
53
 
54
- Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named statesknowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
54
+ AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually convergedand rolls back before it degrades instead of running to a fixed `max_iterations` cap.
55
+
56
+ > **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
55
57
 
56
58
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
57
59
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
58
60
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
59
- [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
61
+ [![Tests](https://img.shields.io/badge/tests-202_passing-brightgreen.svg)](tests/)
60
62
 
61
63
  **Home:** [loopgain.ai](https://loopgain.ai)
62
64
 
@@ -68,7 +70,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
68
70
 
69
71
  ## Why
70
72
 
71
- Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stability monitor based on the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
73
+ Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
72
74
 
73
75
  ---
74
76
 
@@ -108,6 +110,28 @@ print(result.savings_vs_fixed_cap)
108
110
 
109
111
  ---
110
112
 
113
+ ## Defining your error signal
114
+
115
+ The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
116
+
117
+ Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
118
+
119
+ | Loop | Error signal = |
120
+ | --- | --- |
121
+ | Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
122
+ | JSON / structured extraction | number of **schema violations** |
123
+ | RAG with self-correction | number of **required facts still missing** |
124
+ | Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
125
+ | Lint / format loop | **lint error count** |
126
+
127
+ The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
128
+
129
+ If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
130
+
131
+ Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
132
+
133
+ ---
134
+
111
135
  ## How it works
112
136
 
113
137
  LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
@@ -123,7 +147,7 @@ It routes the trajectory into one of five named states:
123
147
 
124
148
  | State | Condition | Action |
125
149
  | --- | --- | --- |
126
- | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
150
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
127
151
  | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
128
152
  | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
129
153
  | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
@@ -139,18 +163,6 @@ The decision is **conservative by design**: requiring both statistical significa
139
163
 
140
164
  ---
141
165
 
142
- ## ETA prediction
143
-
144
- When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
145
-
146
- ```
147
- n_remaining = log(E_target / E_current) / log(Aβ_smooth)
148
- ```
149
-
150
- Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
151
-
152
- ---
153
-
154
166
  ## Best-so-far rollback
155
167
 
156
168
  LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
@@ -165,14 +177,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
165
177
 
166
178
  ---
167
179
 
180
+ ## What LoopGain does and doesn't guarantee
181
+
182
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
183
+
184
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
185
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
186
+
187
+ ---
188
+
168
189
  ## API reference
169
190
 
170
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
191
+ ### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
171
192
 
172
193
  Construct the monitor.
173
194
 
174
195
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
175
- - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
196
+ - `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
176
197
  - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
177
198
  - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
178
199
  - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
@@ -193,7 +214,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
193
214
 
194
215
  ### `lg.eta -> int | None`
195
216
 
196
- Predicted iterations to reach target. `None` when not well-defined.
217
+ Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
197
218
 
198
219
  ### `lg.gain_margin -> float | None`
199
220
 
@@ -1,13 +1,15 @@
1
1
  # LoopGain
2
2
 
3
- **Barkhausen stability monitor for AI agent loops.**
3
+ **An open-source cost controller for AI agent loops.**
4
4
 
5
- Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named statesknowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
5
+ AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually convergedand rolls back before it degrades instead of running to a fixed `max_iterations` cap.
6
+
7
+ > **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
6
8
 
7
9
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
8
10
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
9
11
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
10
- [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
12
+ [![Tests](https://img.shields.io/badge/tests-202_passing-brightgreen.svg)](tests/)
11
13
 
12
14
  **Home:** [loopgain.ai](https://loopgain.ai)
13
15
 
@@ -19,7 +21,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
19
21
 
20
22
  ## Why
21
23
 
22
- Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stability monitor based on the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
24
+ Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
23
25
 
24
26
  ---
25
27
 
@@ -59,6 +61,28 @@ print(result.savings_vs_fixed_cap)
59
61
 
60
62
  ---
61
63
 
64
+ ## Defining your error signal
65
+
66
+ The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
67
+
68
+ Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
69
+
70
+ | Loop | Error signal = |
71
+ | --- | --- |
72
+ | Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
73
+ | JSON / structured extraction | number of **schema violations** |
74
+ | RAG with self-correction | number of **required facts still missing** |
75
+ | Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
76
+ | Lint / format loop | **lint error count** |
77
+
78
+ The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
79
+
80
+ If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
81
+
82
+ Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
83
+
84
+ ---
85
+
62
86
  ## How it works
63
87
 
64
88
  LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
@@ -74,7 +98,7 @@ It routes the trajectory into one of five named states:
74
98
 
75
99
  | State | Condition | Action |
76
100
  | --- | --- | --- |
77
- | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
101
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
78
102
  | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
79
103
  | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
80
104
  | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
@@ -90,18 +114,6 @@ The decision is **conservative by design**: requiring both statistical significa
90
114
 
91
115
  ---
92
116
 
93
- ## ETA prediction
94
-
95
- When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
96
-
97
- ```
98
- n_remaining = log(E_target / E_current) / log(Aβ_smooth)
99
- ```
100
-
101
- Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
102
-
103
- ---
104
-
105
117
  ## Best-so-far rollback
106
118
 
107
119
  LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
@@ -116,14 +128,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
116
128
 
117
129
  ---
118
130
 
131
+ ## What LoopGain does and doesn't guarantee
132
+
133
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
134
+
135
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
136
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
137
+
138
+ ---
139
+
119
140
  ## API reference
120
141
 
121
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
142
+ ### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
122
143
 
123
144
  Construct the monitor.
124
145
 
125
146
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
126
- - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
147
+ - `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
127
148
  - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
128
149
  - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
129
150
  - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
@@ -144,7 +165,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
144
165
 
145
166
  ### `lg.eta -> int | None`
146
167
 
147
- Predicted iterations to reach target. `None` when not well-defined.
168
+ Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
148
169
 
149
170
  ### `lg.gain_margin -> float | None`
150
171
 
@@ -7,4 +7,4 @@ from here so the value never drifts between ``__version__`` and the
7
7
  ``pyproject.toml``) for each release.
8
8
  """
9
9
 
10
- __version__ = "0.3.0"
10
+ __version__ = "0.4.1"
@@ -66,6 +66,20 @@ DEFAULT_OSC_STD_THRESHOLD = 0.30
66
66
  # for the oscillation gate.
67
67
  DEFAULT_SLOPE_TOL = 0.05
68
68
 
69
+ # Liveness gate: number of iterations a loop may go without achieving a new
70
+ # best (lowest) error before its "continue" verdicts (FAST_CONVERGE /
71
+ # CONVERGING) are withdrawn so it can reach STALLING / OSCILLATING and
72
+ # terminate. Without this, a loop that drops a lot and then plateaus or
73
+ # oscillates *below* the cumulative thresholds keeps its historical win
74
+ # forever and never terminates. Derivation: the continue-states are claims
75
+ # about *ongoing* progress; cumulative reduction (E_current/E_first) and a
76
+ # whole-history slope are claims about the *past* and do not expire. We treat
77
+ # "no new low in N steps" as the loop having stopped improving. N is small
78
+ # (3) so a sustained plateau is caught quickly, but the consecutive-STALLING
79
+ # termination rule (2 readings) still protects a loop that briefly stalls and
80
+ # then resumes hitting new lows.
81
+ DEFAULT_STALL_PATIENCE = 3
82
+
69
83
  # Numerical floor to avoid log(0).
70
84
  _EPS = 1e-12
71
85
 
@@ -85,6 +99,7 @@ class TrajectoryThresholds:
85
99
  div_margin: float = DEFAULT_DIV_MARGIN
86
100
  osc_std_threshold: float = DEFAULT_OSC_STD_THRESHOLD
87
101
  slope_tol: float = DEFAULT_SLOPE_TOL
102
+ stall_patience: int = DEFAULT_STALL_PATIENCE
88
103
 
89
104
 
90
105
  @dataclass(frozen=True)
@@ -276,6 +291,18 @@ def classify_trajectory(
276
291
 
277
292
  f = extract_features(error_history)
278
293
 
294
+ # Liveness signal: how many iterations since the loop last achieved a new
295
+ # best (lowest) error. A genuinely converging loop keeps hitting new lows,
296
+ # so this stays small; a loop that dropped a lot and then plateaued (or is
297
+ # oscillating below the cumulative thresholds) has a large value. We use it
298
+ # to withdraw the "continue" verdicts (FAST_CONVERGE / CONVERGING) once a
299
+ # loop has stopped improving, so it can reach STALLING / OSCILLATING and
300
+ # terminate instead of riding its historical cumulative win forever. See
301
+ # DEFAULT_STALL_PATIENCE.
302
+ hist = list(error_history)
303
+ iters_since_best = (n - 1) - hist.index(min(hist))
304
+ still_improving = iters_since_best < th.stall_patience
305
+
279
306
  # n == 2 special case: with two observations, the slope is well defined
280
307
  # but its p-value is not (zero residual degrees of freedom). Fall back to
281
308
  # the sign of the change. This is the same conservatism as a Wilcoxon
@@ -291,13 +318,20 @@ def classify_trajectory(
291
318
  return STALLING
292
319
 
293
320
  # Order matters: FAST_CONVERGE precedes CONVERGING; both precede the
294
- # remaining gates.
295
- if f.e_ratio <= th.e_ratio_fast:
321
+ # remaining gates. Both continue-verdicts are gated on `still_improving`:
322
+ # a loop that has stopped hitting new lows is no longer "converging" no
323
+ # matter how large its historical cumulative reduction was, and must be
324
+ # allowed to fall through to STALLING / OSCILLATING so it can terminate.
325
+ if f.e_ratio <= th.e_ratio_fast and still_improving:
296
326
  return FAST_CONVERGE
297
327
 
298
328
  slope_significant = f.slope_p < th.p_sig
299
329
 
300
- if f.slope_log < 0 and (slope_significant or f.e_ratio <= th.e_ratio_conv):
330
+ if (
331
+ f.slope_log < 0
332
+ and still_improving
333
+ and (slope_significant or f.e_ratio <= th.e_ratio_conv)
334
+ ):
301
335
  return CONVERGING
302
336
 
303
337
  if f.slope_log > 0 and slope_significant and f.e_ratio > 1.0 + th.div_margin:
@@ -40,6 +40,16 @@ DEFAULT_STALLING = 0.95
40
40
  DEFAULT_OSCILLATING_UPPER = 1.05
41
41
 
42
42
 
43
+ # Bounded-by-default safety backstop. The loop should normally terminate on a
44
+ # stability verdict (target met / oscillating / diverging / stalled) long
45
+ # before this; it exists only so the library can never run truly unbounded if
46
+ # a loop never converges and never stalls (e.g. infinitesimal-but-real progress
47
+ # with target_error=None). Generous relative to typical loop lengths (the
48
+ # bench capped at 20). Pass max_iterations=None to opt into a fully unbounded
49
+ # loop, or a smaller integer to cap tighter.
50
+ DEFAULT_MAX_ITERATIONS = 50
51
+
52
+
43
53
  # State names. Exported for use in switch/case in user code.
44
54
  INIT = "INIT"
45
55
  FAST_CONVERGE = "FAST_CONVERGE"
@@ -165,8 +175,11 @@ class LoopGain:
165
175
  tests, no validation errors, etc.). Pass ``None`` to disable
166
176
  the short-circuit entirely and rely only on stability
167
177
  detection and ``max_iterations``.
168
- max_iterations: Hard safety cap. Default ``None`` (rely on
169
- stability detection). Recommended ~20-50 for production.
178
+ max_iterations: Hard safety backstop. Default
179
+ ``DEFAULT_MAX_ITERATIONS`` (50) so the loop can never run
180
+ unbounded; normally a stability verdict terminates it long
181
+ before this. Pass ``None`` to opt into a fully unbounded loop,
182
+ or a smaller integer to cap tighter.
170
183
  thresholds: Custom ``ThresholdBands`` (legacy single-feature
171
184
  classifier only). Default is the canonical 0.3 / 0.85 / 0.95 /
172
185
  1.05. Ignored when ``classifier='trajectory'``.
@@ -190,7 +203,7 @@ class LoopGain:
190
203
  def __init__(
191
204
  self,
192
205
  target_error: Optional[float] = 0.0,
193
- max_iterations: Optional[int] = None,
206
+ max_iterations: Optional[int] = DEFAULT_MAX_ITERATIONS,
194
207
  thresholds: Optional[ThresholdBands] = None,
195
208
  trajectory_thresholds: Optional[TrajectoryThresholds] = None,
196
209
  classifier: str = "trajectory",
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.3.0
4
- Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
3
+ Version: 0.4.1
4
+ Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
7
7
  Project-URL: Homepage, https://loopgain.ai
@@ -49,14 +49,16 @@ Dynamic: license-file
49
49
 
50
50
  # LoopGain
51
51
 
52
- **Barkhausen stability monitor for AI agent loops.**
52
+ **An open-source cost controller for AI agent loops.**
53
53
 
54
- Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named statesknowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
54
+ AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually convergedand rolls back before it degrades instead of running to a fixed `max_iterations` cap.
55
+
56
+ > **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
55
57
 
56
58
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
57
59
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
58
60
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
59
- [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
61
+ [![Tests](https://img.shields.io/badge/tests-202_passing-brightgreen.svg)](tests/)
60
62
 
61
63
  **Home:** [loopgain.ai](https://loopgain.ai)
62
64
 
@@ -68,7 +70,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
68
70
 
69
71
  ## Why
70
72
 
71
- Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stability monitor based on the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
73
+ Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
72
74
 
73
75
  ---
74
76
 
@@ -108,6 +110,28 @@ print(result.savings_vs_fixed_cap)
108
110
 
109
111
  ---
110
112
 
113
+ ## Defining your error signal
114
+
115
+ The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
116
+
117
+ Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
118
+
119
+ | Loop | Error signal = |
120
+ | --- | --- |
121
+ | Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
122
+ | JSON / structured extraction | number of **schema violations** |
123
+ | RAG with self-correction | number of **required facts still missing** |
124
+ | Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
125
+ | Lint / format loop | **lint error count** |
126
+
127
+ The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
128
+
129
+ If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
130
+
131
+ Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
132
+
133
+ ---
134
+
111
135
  ## How it works
112
136
 
113
137
  LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
@@ -123,7 +147,7 @@ It routes the trajectory into one of five named states:
123
147
 
124
148
  | State | Condition | Action |
125
149
  | --- | --- | --- |
126
- | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
150
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
127
151
  | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
128
152
  | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
129
153
  | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
@@ -139,18 +163,6 @@ The decision is **conservative by design**: requiring both statistical significa
139
163
 
140
164
  ---
141
165
 
142
- ## ETA prediction
143
-
144
- When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
145
-
146
- ```
147
- n_remaining = log(E_target / E_current) / log(Aβ_smooth)
148
- ```
149
-
150
- Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
151
-
152
- ---
153
-
154
166
  ## Best-so-far rollback
155
167
 
156
168
  LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
@@ -165,14 +177,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
165
177
 
166
178
  ---
167
179
 
180
+ ## What LoopGain does and doesn't guarantee
181
+
182
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
183
+
184
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
185
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
186
+
187
+ ---
188
+
168
189
  ## API reference
169
190
 
170
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
191
+ ### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
171
192
 
172
193
  Construct the monitor.
173
194
 
174
195
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
175
- - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
196
+ - `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
176
197
  - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
177
198
  - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
178
199
  - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
@@ -193,7 +214,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
193
214
 
194
215
  ### `lg.eta -> int | None`
195
216
 
196
- Predicted iterations to reach target. `None` when not well-defined.
217
+ Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
197
218
 
198
219
  ### `lg.gain_margin -> float | None`
199
220
 
@@ -28,4 +28,5 @@ tests/test_core.py
28
28
  tests/test_funnel.py
29
29
  tests/test_integrations.py
30
30
  tests/test_stress.py
31
- tests/test_telemetry.py
31
+ tests/test_telemetry.py
32
+ tests/test_termination_safety.py
@@ -4,8 +4,10 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "loopgain"
7
- version = "0.3.0"
8
- description = "Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction."
7
+ # Single source of truth: loopgain/_version.py (read dynamically below).
8
+ # Bump the version in that one file per release; this no longer duplicates it.
9
+ dynamic = ["version"]
10
+ description = "An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback."
9
11
  authors = [{name = "Dave Fitzsimmons", email = "hello@loopgain.ai"}]
10
12
  readme = "README.md"
11
13
  license = {text = "Apache-2.0"}
@@ -100,6 +102,11 @@ all = [
100
102
  # zero-dep. Install with `pip install 'loopgain[examples]'`.
101
103
  examples = ["anthropic>=0.40.0"]
102
104
 
105
+ [tool.setuptools.dynamic]
106
+ # Reads the literal ``__version__ = "x.y.z"`` from loopgain/_version.py via AST
107
+ # (no import), so pyproject.toml never duplicates the version string.
108
+ version = {attr = "loopgain._version.__version__"}
109
+
103
110
  [tool.setuptools.packages.find]
104
111
  where = ["."]
105
112
  include = ["loopgain*"]
@@ -158,12 +158,22 @@ def test_pure_stall_no_trend():
158
158
  )
159
159
 
160
160
 
161
- def test_floor_convergence_already_at_target():
162
- """If error is already 0 at observation 1, classifier returns
163
- FAST_CONVERGE (cumulative reduction to floor)."""
161
+ def test_floor_convergence_already_flat_at_floor_stalls():
162
+ """A loop already pinned at the numerical floor from iteration 0, flat,
163
+ classifies as STALLING not FAST_CONVERGE.
164
+
165
+ Updated 2026-06 with the liveness-gate fix (see DEFAULT_STALL_PATIENCE).
166
+ Previously this returned FAST_CONVERGE on the strength of cumulative
167
+ reduction alone — but FAST_CONVERGE is a *continue* verdict, so an
168
+ at-floor flat loop would have continued (and, with no max_iterations,
169
+ run unbounded) instead of stopping. STALLING is the correct verdict: the
170
+ loop has made no progress for `stall_patience` iterations, so it
171
+ terminates via the consecutive-stall rule and returns best-so-far (the
172
+ floor value — a fine answer). In real use the `target_error`
173
+ short-circuit (next test) handles the at-target case directly."""
164
174
  trajectory = [1e-15] * 5
165
175
  state = classify_trajectory(trajectory)
166
- assert state == FAST_CONVERGE
176
+ assert state == STALLING
167
177
 
168
178
 
169
179
  def test_target_met_short_circuit():
@@ -0,0 +1,115 @@
1
+ """Termination-safety tests: a loop must not run unbounded.
2
+
3
+ Regression coverage for the FAST_CONVERGE/CONVERGING liveness bug (2026-06):
4
+ the trajectory classifier used *cumulative* reduction (E_current/E_first) and a
5
+ *whole-history* slope to emit the "continue" verdicts FAST_CONVERGE and
6
+ CONVERGING. A loop that reduced its error and then plateaued (or oscillated)
7
+ *below* the cumulative thresholds kept its historical win forever — it was
8
+ pinned in a continue-state, never reached STALLING/OSCILLATING, and with the
9
+ (then-default) max_iterations=None it ran forever.
10
+
11
+ The fix has two independent layers, each tested here:
12
+ 1. A liveness gate on the continue-verdicts: a loop that has not achieved a
13
+ new best error in `stall_patience` iterations is no longer treated as
14
+ "improving", so it can reach STALLING/OSCILLATING and terminate.
15
+ 2. A bounded default max_iterations backstop, so the library can never run
16
+ truly unbounded even if a future classifier path regresses.
17
+
18
+ Output quality was never at risk (best-so-far rollback held the good answer);
19
+ the bug was a *liveness* failure — the loop never returned to hand it back.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import pytest
25
+
26
+ from loopgain import CONVERGING, FAST_CONVERGE, LoopGain, classify_trajectory
27
+
28
+ # Hard test guard: large enough that a *correctly* terminating loop never hits
29
+ # it, small enough that a regression (unbounded loop) fails fast instead of
30
+ # hanging the suite.
31
+ GUARD = 500
32
+
33
+
34
+ def _run_to_termination(lg: LoopGain, errors, guard: int = GUARD):
35
+ """Drive a loop, plateauing/repeating the last error, until it terminates
36
+ or hits the guard. Returns (iterations_run, hit_guard)."""
37
+ i = 0
38
+ while lg.should_continue():
39
+ e = errors[i] if i < len(errors) else errors[-1]
40
+ lg.observe(e, output=f"o{i}")
41
+ i += 1
42
+ if i >= guard:
43
+ return i, True
44
+ return i, False
45
+
46
+
47
+ # ----- Layer 1: classifier liveness gate -----
48
+
49
+
50
+ def test_plateau_below_fast_floor_terminates_without_max_iter():
51
+ """Error drops to 8% of initial then plateaus. e_ratio<=0.1 used to pin
52
+ FAST_CONVERGE forever. Must now terminate via STALLING."""
53
+ lg = LoopGain(max_iterations=None, target_error=None)
54
+ n, hit_guard = _run_to_termination(lg, [100, 8, 8, 8, 8, 8, 8, 8])
55
+ assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
56
+ assert not lg.should_continue()
57
+ assert lg.result.best_error == 8.0 # best-so-far still returned
58
+
59
+
60
+ def test_plateau_above_fast_floor_terminates_without_max_iter():
61
+ """Error drops to 30% of initial (below E_RATIO_CONV=0.5) then plateaus.
62
+ e_ratio<=0.5 with a whole-history negative slope used to pin CONVERGING
63
+ forever. Must now terminate."""
64
+ lg = LoopGain(max_iterations=None, target_error=None)
65
+ n, hit_guard = _run_to_termination(lg, [100, 30, 30, 30, 30, 30, 30, 30])
66
+ assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
67
+ assert not lg.should_continue()
68
+
69
+
70
+ def test_oscillation_below_floor_terminates_without_max_iter():
71
+ """Oscillation entirely below the 10% cumulative floor used to be shadowed
72
+ by FAST_CONVERGE. Must now terminate (OSCILLATING or STALLING)."""
73
+ lg = LoopGain(max_iterations=None, target_error=None)
74
+ n, hit_guard = _run_to_termination(lg, [100, 5, 8, 5, 8, 5, 8, 5, 8])
75
+ assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
76
+ assert not lg.should_continue()
77
+
78
+
79
+ def test_classifier_flags_plateau_after_big_drop_as_terminable():
80
+ """Direct classifier check: a big drop followed by a flat tail must NOT be
81
+ reported as a continue-state (FAST_CONVERGE/CONVERGING)."""
82
+ plateau_low = [100, 8, 8, 8, 8, 8]
83
+ plateau_mid = [100, 30, 30, 30, 30, 30]
84
+ assert classify_trajectory(plateau_low) not in (FAST_CONVERGE, CONVERGING)
85
+ assert classify_trajectory(plateau_mid) not in (FAST_CONVERGE, CONVERGING)
86
+
87
+
88
+ def test_genuine_fast_converge_still_continues():
89
+ """Guard against over-correction: a monotone steep decline that keeps
90
+ hitting new lows must still read FAST_CONVERGE (continue), not be
91
+ prematurely stalled."""
92
+ monotone = [100, 25, 6, 1.5, 0.4, 0.1] # new low every step
93
+ assert classify_trajectory(monotone) == FAST_CONVERGE
94
+
95
+
96
+ def test_genuine_converging_still_continues():
97
+ """A steady decline landing between the two cumulative thresholds must
98
+ still read CONVERGING while it is still hitting new lows."""
99
+ converging = [10.0, 8.0, 6.4, 5.1, 4.1, 3.3] # ~0.8x/step, new low every step
100
+ assert classify_trajectory(converging) == CONVERGING
101
+
102
+
103
+ # ----- Layer 2: bounded default backstop -----
104
+
105
+
106
+ def test_default_max_iterations_is_a_bounded_backstop():
107
+ """The default config must not be able to run unbounded. A never-improving
108
+ loop under all-default construction must terminate at the backstop."""
109
+ lg = LoopGain() # all defaults
110
+ assert lg.max_iterations is not None, "default max_iterations must be bounded"
111
+ # A strictly increasing error never converges/stalls into best-so-far early
112
+ # under every classifier path; the backstop must still stop it.
113
+ i, hit_guard = _run_to_termination(lg, list(range(1, GUARD + 5)))
114
+ assert not hit_guard, "default backstop failed to bound the loop"
115
+ assert not lg.should_continue()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes