loopgain 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {loopgain-0.3.0 → loopgain-0.4.0}/PKG-INFO +34 -3
  2. {loopgain-0.3.0 → loopgain-0.4.0}/README.md +33 -2
  3. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/_version.py +1 -1
  4. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/classifier.py +37 -3
  5. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/core.py +16 -3
  6. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain.egg-info/PKG-INFO +34 -3
  7. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain.egg-info/SOURCES.txt +2 -1
  8. {loopgain-0.3.0 → loopgain-0.4.0}/pyproject.toml +1 -1
  9. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_classifier_synthetic.py +14 -4
  10. loopgain-0.4.0/tests/test_termination_safety.py +115 -0
  11. {loopgain-0.3.0 → loopgain-0.4.0}/LICENSE +0 -0
  12. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/__init__.py +0 -0
  13. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/__main__.py +0 -0
  14. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/cli.py +0 -0
  15. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/funnel.py +0 -0
  16. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/__init__.py +0 -0
  17. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/autogen.py +0 -0
  18. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/claude_agent_sdk.py +0 -0
  19. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/crewai.py +0 -0
  20. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/langchain.py +0 -0
  21. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/langgraph.py +0 -0
  22. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/integrations/openai_agents.py +0 -0
  23. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain/telemetry.py +0 -0
  24. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain.egg-info/dependency_links.txt +0 -0
  25. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain.egg-info/entry_points.txt +0 -0
  26. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain.egg-info/requires.txt +0 -0
  27. {loopgain-0.3.0 → loopgain-0.4.0}/loopgain.egg-info/top_level.txt +0 -0
  28. {loopgain-0.3.0 → loopgain-0.4.0}/setup.cfg +0 -0
  29. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_classifier_mock_validation.py +0 -0
  30. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_core.py +0 -0
  31. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_funnel.py +0 -0
  32. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_integrations.py +0 -0
  33. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_stress.py +0 -0
  34. {loopgain-0.3.0 → loopgain-0.4.0}/tests/test_telemetry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
@@ -108,6 +108,28 @@ print(result.savings_vs_fixed_cap)
108
108
 
109
109
  ---
110
110
 
111
+ ## Defining your error signal
112
+
113
+ The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
114
+
115
+ Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
116
+
117
+ | Loop | Error signal = |
118
+ | --- | --- |
119
+ | Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
120
+ | JSON / structured extraction | number of **schema violations** |
121
+ | RAG with self-correction | number of **required facts still missing** |
122
+ | Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
123
+ | Lint / format loop | **lint error count** |
124
+
125
+ The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
126
+
127
+ If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
128
+
129
+ Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
130
+
131
+ ---
132
+
111
133
  ## How it works
112
134
 
113
135
  LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
@@ -165,14 +187,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
165
187
 
166
188
  ---
167
189
 
190
+ ## What LoopGain does and doesn't guarantee
191
+
192
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
193
+
194
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
195
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
196
+
197
+ ---
198
+
168
199
  ## API reference
169
200
 
170
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
201
+ ### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
171
202
 
172
203
  Construct the monitor.
173
204
 
174
205
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
175
- - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
206
+ - `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
176
207
  - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
177
208
  - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
178
209
  - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
@@ -59,6 +59,28 @@ print(result.savings_vs_fixed_cap)
59
59
 
60
60
  ---
61
61
 
62
+ ## Defining your error signal
63
+
64
+ The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
65
+
66
+ Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
67
+
68
+ | Loop | Error signal = |
69
+ | --- | --- |
70
+ | Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
71
+ | JSON / structured extraction | number of **schema violations** |
72
+ | RAG with self-correction | number of **required facts still missing** |
73
+ | Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
74
+ | Lint / format loop | **lint error count** |
75
+
76
+ The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
77
+
78
+ If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
79
+
80
+ Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
81
+
82
+ ---
83
+
62
84
  ## How it works
63
85
 
64
86
  LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
@@ -116,14 +138,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
116
138
 
117
139
  ---
118
140
 
141
+ ## What LoopGain does and doesn't guarantee
142
+
143
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
144
+
145
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
146
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
147
+
148
+ ---
149
+
119
150
  ## API reference
120
151
 
121
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
152
+ ### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
122
153
 
123
154
  Construct the monitor.
124
155
 
125
156
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
126
- - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
157
+ - `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
127
158
  - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
128
159
  - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
129
160
  - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
@@ -7,4 +7,4 @@ from here so the value never drifts between ``__version__`` and the
7
7
  ``pyproject.toml``) for each release.
8
8
  """
9
9
 
10
- __version__ = "0.3.0"
10
+ __version__ = "0.4.0"
@@ -66,6 +66,20 @@ DEFAULT_OSC_STD_THRESHOLD = 0.30
66
66
  # for the oscillation gate.
67
67
  DEFAULT_SLOPE_TOL = 0.05
68
68
 
69
+ # Liveness gate: number of iterations a loop may go without achieving a new
70
+ # best (lowest) error before its "continue" verdicts (FAST_CONVERGE /
71
+ # CONVERGING) are withdrawn so it can reach STALLING / OSCILLATING and
72
+ # terminate. Without this, a loop that drops a lot and then plateaus or
73
+ # oscillates *below* the cumulative thresholds keeps its historical win
74
+ # forever and never terminates. Derivation: the continue-states are claims
75
+ # about *ongoing* progress; cumulative reduction (E_current/E_first) and a
76
+ # whole-history slope are claims about the *past* and do not expire. We treat
77
+ # "no new low in N steps" as the loop having stopped improving. N is small
78
+ # (3) so a sustained plateau is caught quickly, but the consecutive-STALLING
79
+ # termination rule (2 readings) still protects a loop that briefly stalls and
80
+ # then resumes hitting new lows.
81
+ DEFAULT_STALL_PATIENCE = 3
82
+
69
83
  # Numerical floor to avoid log(0).
70
84
  _EPS = 1e-12
71
85
 
@@ -85,6 +99,7 @@ class TrajectoryThresholds:
85
99
  div_margin: float = DEFAULT_DIV_MARGIN
86
100
  osc_std_threshold: float = DEFAULT_OSC_STD_THRESHOLD
87
101
  slope_tol: float = DEFAULT_SLOPE_TOL
102
+ stall_patience: int = DEFAULT_STALL_PATIENCE
88
103
 
89
104
 
90
105
  @dataclass(frozen=True)
@@ -276,6 +291,18 @@ def classify_trajectory(
276
291
 
277
292
  f = extract_features(error_history)
278
293
 
294
+ # Liveness signal: how many iterations since the loop last achieved a new
295
+ # best (lowest) error. A genuinely converging loop keeps hitting new lows,
296
+ # so this stays small; a loop that dropped a lot and then plateaued (or is
297
+ # oscillating below the cumulative thresholds) has a large value. We use it
298
+ # to withdraw the "continue" verdicts (FAST_CONVERGE / CONVERGING) once a
299
+ # loop has stopped improving, so it can reach STALLING / OSCILLATING and
300
+ # terminate instead of riding its historical cumulative win forever. See
301
+ # DEFAULT_STALL_PATIENCE.
302
+ hist = list(error_history)
303
+ iters_since_best = (n - 1) - hist.index(min(hist))
304
+ still_improving = iters_since_best < th.stall_patience
305
+
279
306
  # n == 2 special case: with two observations, the slope is well defined
280
307
  # but its p-value is not (zero residual degrees of freedom). Fall back to
281
308
  # the sign of the change. This is the same conservatism as a Wilcoxon
@@ -291,13 +318,20 @@ def classify_trajectory(
291
318
  return STALLING
292
319
 
293
320
  # Order matters: FAST_CONVERGE precedes CONVERGING; both precede the
294
- # remaining gates.
295
- if f.e_ratio <= th.e_ratio_fast:
321
+ # remaining gates. Both continue-verdicts are gated on `still_improving`:
322
+ # a loop that has stopped hitting new lows is no longer "converging" no
323
+ # matter how large its historical cumulative reduction was, and must be
324
+ # allowed to fall through to STALLING / OSCILLATING so it can terminate.
325
+ if f.e_ratio <= th.e_ratio_fast and still_improving:
296
326
  return FAST_CONVERGE
297
327
 
298
328
  slope_significant = f.slope_p < th.p_sig
299
329
 
300
- if f.slope_log < 0 and (slope_significant or f.e_ratio <= th.e_ratio_conv):
330
+ if (
331
+ f.slope_log < 0
332
+ and still_improving
333
+ and (slope_significant or f.e_ratio <= th.e_ratio_conv)
334
+ ):
301
335
  return CONVERGING
302
336
 
303
337
  if f.slope_log > 0 and slope_significant and f.e_ratio > 1.0 + th.div_margin:
@@ -40,6 +40,16 @@ DEFAULT_STALLING = 0.95
40
40
  DEFAULT_OSCILLATING_UPPER = 1.05
41
41
 
42
42
 
43
+ # Bounded-by-default safety backstop. The loop should normally terminate on a
44
+ # stability verdict (target met / oscillating / diverging / stalled) long
45
+ # before this; it exists only so the library can never run truly unbounded if
46
+ # a loop never converges and never stalls (e.g. infinitesimal-but-real progress
47
+ # with target_error=None). Generous relative to typical loop lengths (the
48
+ # bench capped at 20). Pass max_iterations=None to opt into a fully unbounded
49
+ # loop, or a smaller integer to cap tighter.
50
+ DEFAULT_MAX_ITERATIONS = 50
51
+
52
+
43
53
  # State names. Exported for use in switch/case in user code.
44
54
  INIT = "INIT"
45
55
  FAST_CONVERGE = "FAST_CONVERGE"
@@ -165,8 +175,11 @@ class LoopGain:
165
175
  tests, no validation errors, etc.). Pass ``None`` to disable
166
176
  the short-circuit entirely and rely only on stability
167
177
  detection and ``max_iterations``.
168
- max_iterations: Hard safety cap. Default ``None`` (rely on
169
- stability detection). Recommended ~20-50 for production.
178
+ max_iterations: Hard safety backstop. Default
179
+ ``DEFAULT_MAX_ITERATIONS`` (50) so the loop can never run
180
+ unbounded; normally a stability verdict terminates it long
181
+ before this. Pass ``None`` to opt into a fully unbounded loop,
182
+ or a smaller integer to cap tighter.
170
183
  thresholds: Custom ``ThresholdBands`` (legacy single-feature
171
184
  classifier only). Default is the canonical 0.3 / 0.85 / 0.95 /
172
185
  1.05. Ignored when ``classifier='trajectory'``.
@@ -190,7 +203,7 @@ class LoopGain:
190
203
  def __init__(
191
204
  self,
192
205
  target_error: Optional[float] = 0.0,
193
- max_iterations: Optional[int] = None,
206
+ max_iterations: Optional[int] = DEFAULT_MAX_ITERATIONS,
194
207
  thresholds: Optional[ThresholdBands] = None,
195
208
  trajectory_thresholds: Optional[TrajectoryThresholds] = None,
196
209
  classifier: str = "trajectory",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
@@ -108,6 +108,28 @@ print(result.savings_vs_fixed_cap)
108
108
 
109
109
  ---
110
110
 
111
+ ## Defining your error signal
112
+
113
+ The one thing you provide is the **error signal**: a single non-negative number, every iteration, that says how wrong the current output is. **Lower is better; zero means done.** LoopGain doesn't know what your loop does — it just watches that number's trajectory and decides whether to keep going, stop, or roll back.
114
+
115
+ Your loop already has some way of knowing the output isn't good yet (or it wouldn't keep revising). Turn that into a number:
116
+
117
+ | Loop | Error signal = |
118
+ | --- | --- |
119
+ | Agentic coding (write code → run tests) | number of **failing tests** (10 → 3 → 0) |
120
+ | JSON / structured extraction | number of **schema violations** |
121
+ | RAG with self-correction | number of **required facts still missing** |
122
+ | Self-refinement with an LLM judge | judge's **gap to target** (e.g. `10 − quality_score`) |
123
+ | Lint / format loop | **lint error count** |
124
+
125
+ The only rules: non-negative, and **smaller as the output gets better**. Returning the raw list of problems works directly — `observe()` uses its length as the magnitude (e.g. hand it the list of failing tests).
126
+
127
+ If your quality is fuzzy and has no natural "zero," run with `target_error=None`: LoopGain then stops when the number **stops improving**, wherever that plateau is, instead of waiting for an exact target.
128
+
129
+ Every stop/continue decision is made from this one number, so **LoopGain is only as good as the error signal you give it** — pick one that genuinely tracks output quality.
130
+
131
+ ---
132
+
111
133
  ## How it works
112
134
 
113
135
  LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
@@ -165,14 +187,23 @@ This transforms divergence detection from "abort with garbage" into "abort with
165
187
 
166
188
  ---
167
189
 
190
+ ## What LoopGain does and doesn't guarantee
191
+
192
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
193
+
194
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
195
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
196
+
197
+ ---
198
+
168
199
  ## API reference
169
200
 
170
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
201
+ ### `LoopGain(target_error=0.0, max_iterations=50, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
171
202
 
172
203
  Construct the monitor.
173
204
 
174
205
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
175
- - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
206
+ - `max_iterations` — Hard safety backstop. Default `50` so the loop can never run unbounded; a stability verdict normally terminates it well before this. Pass `None` to opt into a fully unbounded loop (only safe if your loop is guaranteed to reach `target_error` or a stop-state), or a smaller integer to cap tighter.
176
207
  - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
177
208
  - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
178
209
  - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
@@ -28,4 +28,5 @@ tests/test_core.py
28
28
  tests/test_funnel.py
29
29
  tests/test_integrations.py
30
30
  tests/test_stress.py
31
- tests/test_telemetry.py
31
+ tests/test_telemetry.py
32
+ tests/test_termination_safety.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "loopgain"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction."
9
9
  authors = [{name = "Dave Fitzsimmons", email = "hello@loopgain.ai"}]
10
10
  readme = "README.md"
@@ -158,12 +158,22 @@ def test_pure_stall_no_trend():
158
158
  )
159
159
 
160
160
 
161
- def test_floor_convergence_already_at_target():
162
- """If error is already 0 at observation 1, classifier returns
163
- FAST_CONVERGE (cumulative reduction to floor)."""
161
+ def test_floor_convergence_already_flat_at_floor_stalls():
162
+ """A loop already pinned at the numerical floor from iteration 0, flat,
163
+ classifies as STALLING not FAST_CONVERGE.
164
+
165
+ Updated 2026-06 with the liveness-gate fix (see DEFAULT_STALL_PATIENCE).
166
+ Previously this returned FAST_CONVERGE on the strength of cumulative
167
+ reduction alone — but FAST_CONVERGE is a *continue* verdict, so an
168
+ at-floor flat loop would have continued (and, with no max_iterations,
169
+ run unbounded) instead of stopping. STALLING is the correct verdict: the
170
+ loop has made no progress for `stall_patience` iterations, so it
171
+ terminates via the consecutive-stall rule and returns best-so-far (the
172
+ floor value — a fine answer). In real use the `target_error`
173
+ short-circuit (next test) handles the at-target case directly."""
164
174
  trajectory = [1e-15] * 5
165
175
  state = classify_trajectory(trajectory)
166
- assert state == FAST_CONVERGE
176
+ assert state == STALLING
167
177
 
168
178
 
169
179
  def test_target_met_short_circuit():
@@ -0,0 +1,115 @@
1
+ """Termination-safety tests: a loop must not run unbounded.
2
+
3
+ Regression coverage for the FAST_CONVERGE/CONVERGING liveness bug (2026-06):
4
+ the trajectory classifier used *cumulative* reduction (E_current/E_first) and a
5
+ *whole-history* slope to emit the "continue" verdicts FAST_CONVERGE and
6
+ CONVERGING. A loop that reduced its error and then plateaued (or oscillated)
7
+ *below* the cumulative thresholds kept its historical win forever — it was
8
+ pinned in a continue-state, never reached STALLING/OSCILLATING, and with the
9
+ (then-default) max_iterations=None it ran forever.
10
+
11
+ The fix has two independent layers, each tested here:
12
+ 1. A liveness gate on the continue-verdicts: a loop that has not achieved a
13
+ new best error in `stall_patience` iterations is no longer treated as
14
+ "improving", so it can reach STALLING/OSCILLATING and terminate.
15
+ 2. A bounded default max_iterations backstop, so the library can never run
16
+ truly unbounded even if a future classifier path regresses.
17
+
18
+ Output quality was never at risk (best-so-far rollback held the good answer);
19
+ the bug was a *liveness* failure — the loop never returned to hand it back.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import pytest
25
+
26
+ from loopgain import CONVERGING, FAST_CONVERGE, LoopGain, classify_trajectory
27
+
28
+ # Hard test guard: large enough that a *correctly* terminating loop never hits
29
+ # it, small enough that a regression (unbounded loop) fails fast instead of
30
+ # hanging the suite.
31
+ GUARD = 500
32
+
33
+
34
+ def _run_to_termination(lg: LoopGain, errors, guard: int = GUARD):
35
+ """Drive a loop, plateauing/repeating the last error, until it terminates
36
+ or hits the guard. Returns (iterations_run, hit_guard)."""
37
+ i = 0
38
+ while lg.should_continue():
39
+ e = errors[i] if i < len(errors) else errors[-1]
40
+ lg.observe(e, output=f"o{i}")
41
+ i += 1
42
+ if i >= guard:
43
+ return i, True
44
+ return i, False
45
+
46
+
47
+ # ----- Layer 1: classifier liveness gate -----
48
+
49
+
50
+ def test_plateau_below_fast_floor_terminates_without_max_iter():
51
+ """Error drops to 8% of initial then plateaus. e_ratio<=0.1 used to pin
52
+ FAST_CONVERGE forever. Must now terminate via STALLING."""
53
+ lg = LoopGain(max_iterations=None, target_error=None)
54
+ n, hit_guard = _run_to_termination(lg, [100, 8, 8, 8, 8, 8, 8, 8])
55
+ assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
56
+ assert not lg.should_continue()
57
+ assert lg.result.best_error == 8.0 # best-so-far still returned
58
+
59
+
60
+ def test_plateau_above_fast_floor_terminates_without_max_iter():
61
+ """Error drops to 30% of initial (below E_RATIO_CONV=0.5) then plateaus.
62
+ e_ratio<=0.5 with a whole-history negative slope used to pin CONVERGING
63
+ forever. Must now terminate."""
64
+ lg = LoopGain(max_iterations=None, target_error=None)
65
+ n, hit_guard = _run_to_termination(lg, [100, 30, 30, 30, 30, 30, 30, 30])
66
+ assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
67
+ assert not lg.should_continue()
68
+
69
+
70
+ def test_oscillation_below_floor_terminates_without_max_iter():
71
+ """Oscillation entirely below the 10% cumulative floor used to be shadowed
72
+ by FAST_CONVERGE. Must now terminate (OSCILLATING or STALLING)."""
73
+ lg = LoopGain(max_iterations=None, target_error=None)
74
+ n, hit_guard = _run_to_termination(lg, [100, 5, 8, 5, 8, 5, 8, 5, 8])
75
+ assert not hit_guard, f"loop did not terminate within {GUARD} iters (unbounded)"
76
+ assert not lg.should_continue()
77
+
78
+
79
+ def test_classifier_flags_plateau_after_big_drop_as_terminable():
80
+ """Direct classifier check: a big drop followed by a flat tail must NOT be
81
+ reported as a continue-state (FAST_CONVERGE/CONVERGING)."""
82
+ plateau_low = [100, 8, 8, 8, 8, 8]
83
+ plateau_mid = [100, 30, 30, 30, 30, 30]
84
+ assert classify_trajectory(plateau_low) not in (FAST_CONVERGE, CONVERGING)
85
+ assert classify_trajectory(plateau_mid) not in (FAST_CONVERGE, CONVERGING)
86
+
87
+
88
+ def test_genuine_fast_converge_still_continues():
89
+ """Guard against over-correction: a monotone steep decline that keeps
90
+ hitting new lows must still read FAST_CONVERGE (continue), not be
91
+ prematurely stalled."""
92
+ monotone = [100, 25, 6, 1.5, 0.4, 0.1] # new low every step
93
+ assert classify_trajectory(monotone) == FAST_CONVERGE
94
+
95
+
96
+ def test_genuine_converging_still_continues():
97
+ """A steady decline landing between the two cumulative thresholds must
98
+ still read CONVERGING while it is still hitting new lows."""
99
+ converging = [10.0, 8.0, 6.4, 5.1, 4.1, 3.3] # ~0.8x/step, new low every step
100
+ assert classify_trajectory(converging) == CONVERGING
101
+
102
+
103
+ # ----- Layer 2: bounded default backstop -----
104
+
105
+
106
+ def test_default_max_iterations_is_a_bounded_backstop():
107
+ """The default config must not be able to run unbounded. A never-improving
108
+ loop under all-default construction must terminate at the backstop."""
109
+ lg = LoopGain() # all defaults
110
+ assert lg.max_iterations is not None, "default max_iterations must be bounded"
111
+ # A strictly increasing error never converges/stalls into best-so-far early
112
+ # under every classifier path; the backstop must still stop it.
113
+ i, hit_guard = _run_to_termination(lg, list(range(1, GUARD + 5)))
114
+ assert not hit_guard, "default backstop failed to bound the loop"
115
+ assert not lg.should_continue()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes