loopgain 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {loopgain-0.4.0 → loopgain-0.4.2}/PKG-INFO +13 -23
  2. {loopgain-0.4.0 → loopgain-0.4.2}/README.md +11 -21
  3. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/_version.py +1 -1
  4. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/classifier.py +8 -3
  5. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/telemetry.py +5 -0
  6. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain.egg-info/PKG-INFO +13 -23
  7. {loopgain-0.4.0 → loopgain-0.4.2}/pyproject.toml +9 -2
  8. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_classifier_mock_validation.py +7 -5
  9. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_classifier_synthetic.py +36 -1
  10. {loopgain-0.4.0 → loopgain-0.4.2}/LICENSE +0 -0
  11. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/__init__.py +0 -0
  12. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/__main__.py +0 -0
  13. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/cli.py +0 -0
  14. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/core.py +0 -0
  15. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/funnel.py +0 -0
  16. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/__init__.py +0 -0
  17. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/autogen.py +0 -0
  18. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/claude_agent_sdk.py +0 -0
  19. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/crewai.py +0 -0
  20. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/langchain.py +0 -0
  21. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/langgraph.py +0 -0
  22. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain/integrations/openai_agents.py +0 -0
  23. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain.egg-info/SOURCES.txt +0 -0
  24. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain.egg-info/dependency_links.txt +0 -0
  25. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain.egg-info/entry_points.txt +0 -0
  26. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain.egg-info/requires.txt +0 -0
  27. {loopgain-0.4.0 → loopgain-0.4.2}/loopgain.egg-info/top_level.txt +0 -0
  28. {loopgain-0.4.0 → loopgain-0.4.2}/setup.cfg +0 -0
  29. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_core.py +0 -0
  30. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_funnel.py +0 -0
  31. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_integrations.py +0 -0
  32. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_stress.py +0 -0
  33. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_telemetry.py +0 -0
  34. {loopgain-0.4.0 → loopgain-0.4.2}/tests/test_termination_safety.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.4.0
4
- Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
3
+ Version: 0.4.2
4
+ Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
7
7
  Project-URL: Homepage, https://loopgain.ai
@@ -49,14 +49,16 @@ Dynamic: license-file
49
49
 
50
50
  # LoopGain
51
51
 
52
- **Barkhausen stability monitor for AI agent loops.**
52
+ **An open-source cost controller for AI agent loops.**
53
53
 
54
- Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named statesknowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
54
+ AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually convergedand rolls back before it degrades instead of running to a fixed `max_iterations` cap.
55
+
56
+ > **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
55
57
 
56
58
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
57
59
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
58
60
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
59
- [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
61
+ [![Tests](https://img.shields.io/badge/tests-200%2B_passing-brightgreen.svg)](tests/)
60
62
 
61
63
  **Home:** [loopgain.ai](https://loopgain.ai)
62
64
 
@@ -68,7 +70,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
68
70
 
69
71
  ## Why
70
72
 
71
- Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stability monitor based on the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
73
+ Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
72
74
 
73
75
  ---
74
76
 
@@ -145,7 +147,7 @@ It routes the trajectory into one of five named states:
145
147
 
146
148
  | State | Condition | Action |
147
149
  | --- | --- | --- |
148
- | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
150
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
149
151
  | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
150
152
  | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
151
153
  | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
@@ -161,18 +163,6 @@ The decision is **conservative by design**: requiring both statistical significa
161
163
 
162
164
  ---
163
165
 
164
- ## ETA prediction
165
-
166
- When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
167
-
168
- ```
169
- n_remaining = log(E_target / E_current) / log(Aβ_smooth)
170
- ```
171
-
172
- Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
173
-
174
- ---
175
-
176
166
  ## Best-so-far rollback
177
167
 
178
168
  LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
@@ -189,10 +179,10 @@ This transforms divergence detection from "abort with garbage" into "abort with
189
179
 
190
180
  ## What LoopGain does and doesn't guarantee
191
181
 
192
- LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
182
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
193
183
 
194
- - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
195
- - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
184
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
185
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
196
186
 
197
187
  ---
198
188
 
@@ -224,7 +214,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
224
214
 
225
215
  ### `lg.eta -> int | None`
226
216
 
227
- Predicted iterations to reach target. `None` when not well-defined.
217
+ Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
228
218
 
229
219
  ### `lg.gain_margin -> float | None`
230
220
 
@@ -1,13 +1,15 @@
1
1
  # LoopGain
2
2
 
3
- **Barkhausen stability monitor for AI agent loops.**
3
+ **An open-source cost controller for AI agent loops.**
4
4
 
5
- Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named statesknowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
5
+ AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually convergedand rolls back before it degrades instead of running to a fixed `max_iterations` cap.
6
+
7
+ > **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
6
8
 
7
9
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
8
10
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
9
11
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
10
- [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
12
+ [![Tests](https://img.shields.io/badge/tests-200%2B_passing-brightgreen.svg)](tests/)
11
13
 
12
14
  **Home:** [loopgain.ai](https://loopgain.ai)
13
15
 
@@ -19,7 +21,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
19
21
 
20
22
  ## Why
21
23
 
22
- Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stability monitor based on the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
24
+ Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
23
25
 
24
26
  ---
25
27
 
@@ -96,7 +98,7 @@ It routes the trajectory into one of five named states:
96
98
 
97
99
  | State | Condition | Action |
98
100
  | --- | --- | --- |
99
- | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
101
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
100
102
  | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
101
103
  | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
102
104
  | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
@@ -112,18 +114,6 @@ The decision is **conservative by design**: requiring both statistical significa
112
114
 
113
115
  ---
114
116
 
115
- ## ETA prediction
116
-
117
- When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
118
-
119
- ```
120
- n_remaining = log(E_target / E_current) / log(Aβ_smooth)
121
- ```
122
-
123
- Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
124
-
125
- ---
126
-
127
117
  ## Best-so-far rollback
128
118
 
129
119
  LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
@@ -140,10 +130,10 @@ This transforms divergence detection from "abort with garbage" into "abort with
140
130
 
141
131
  ## What LoopGain does and doesn't guarantee
142
132
 
143
- LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
133
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
144
134
 
145
- - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
146
- - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
135
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
136
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
147
137
 
148
138
  ---
149
139
 
@@ -175,7 +165,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
175
165
 
176
166
  ### `lg.eta -> int | None`
177
167
 
178
- Predicted iterations to reach target. `None` when not well-defined.
168
+ Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
179
169
 
180
170
  ### `lg.gain_margin -> float | None`
181
171
 
@@ -7,4 +7,4 @@ from here so the value never drifts between ``__version__`` and the
7
7
  ``pyproject.toml``) for each release.
8
8
  """
9
9
 
10
- __version__ = "0.4.0"
10
+ __version__ = "0.4.2"
@@ -184,9 +184,14 @@ def _two_sided_t_p(t_abs: float, df: int) -> float:
184
184
  # exact: cdf_t(t,1) = 0.5 + arctan(t)/pi
185
185
  return 2.0 * (0.5 - math.atan(t_abs) / math.pi)
186
186
  if df == 2:
187
- # exact one-sided survival: 1 - (1 + t²/2)^(-1) doubled
188
- return min(1.0, 2.0 * (1.0 - t_abs / math.sqrt(2.0 + t_abs * t_abs) / 1.0) * 0.5
189
- + 2.0 * (0.5 - 0.5 * t_abs / math.sqrt(2.0 + t_abs * t_abs)))
187
+ # Exact two-sided p-value for Student-t with df=2. The df=2 CDF is
188
+ # F(t) = 1/2 + t / (2·√(2 + t²)), so the one-sided survival is
189
+ # P(T > t) = 1/2 t / (2·√(2 + t²)) and the two-sided p is
190
+ # 2·P(T > |t|) = 1 − |t| / √(2 + t²).
191
+ # (The previous implementation returned twice this — it required
192
+ # |t| > 6.21 for p<0.05 instead of the correct |t| > 4.30, making
193
+ # the n=4 classifier far too conservative. See test_classifier.)
194
+ return max(0.0, 1.0 - t_abs / math.sqrt(2.0 + t_abs * t_abs))
190
195
  # Wilson-Hilferty: transform t² ~ F(1, df), then F → chi-square via
191
196
  # cube-root approximation. For our purposes the simpler normal-approx
192
197
  # to the t with the Hill / Abramowitz adjustment is enough.
@@ -178,6 +178,11 @@ def build_payload(
178
178
  "savings_vs_fixed_cap": result.savings_vs_fixed_cap,
179
179
  "convergence_profile_summary": profile_summary,
180
180
  "rollback_triggered": result.outcome in ("oscillating", "diverged"),
181
+ # Index (0-based) of the lowest-error iteration. Lets the receiver
182
+ # derive iterations-to-best (best_index+1) and iterations-past-best
183
+ # (iterations_used-1-best_index) — the "Iteration Waste" view.
184
+ # Privacy-safe: an integer position, no output/error content.
185
+ "best_index": result.best_index,
181
186
  # v2: first computable eta snapshot, for ETA calibration dashboard.
182
187
  # Predicted total iterations = first_eta_at_iteration +
183
188
  # first_eta_prediction; compare to iterations_used to compute the
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.4.0
4
- Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
3
+ Version: 0.4.2
4
+ Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
7
7
  Project-URL: Homepage, https://loopgain.ai
@@ -49,14 +49,16 @@ Dynamic: license-file
49
49
 
50
50
  # LoopGain
51
51
 
52
- **Barkhausen stability monitor for AI agent loops.**
52
+ **An open-source cost controller for AI agent loops.**
53
53
 
54
- Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named statesknowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
54
+ AI agent loops waste time and money when they don't know when to stop. LoopGain measures the loop in real time and stops it the moment it has actually convergedand rolls back before it degrades instead of running to a fixed `max_iterations` cap.
55
+
56
+ > **Across 2,000 paired trials over 10 cells**, LoopGain reduced total API spend by **92.8%** vs `max_iter=20`, dropped median wall-clock latency from 30.9s to 2.1s (**~15×**), preserved output quality on natural-distribution workloads (W1–W4: judge winrate 0.50–0.63, CI excluding null on most cells), and improved output quality on engineered-failure workloads (W5: winrate 0.92–0.95 across three adapters). Weighted-average pairwise preference for LG vs B20 across 1,800 judge comparisons: **0.678**. Zero of six kill criteria fired.
55
57
 
56
58
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
57
59
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
58
60
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
59
- [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
61
+ [![Tests](https://img.shields.io/badge/tests-200%2B_passing-brightgreen.svg)](tests/)
60
62
 
61
63
  **Home:** [loopgain.ai](https://loopgain.ai)
62
64
 
@@ -68,7 +70,7 @@ Works for **any iterative AI workflow with a measurable error signal** — verif
68
70
 
69
71
  ## Why
70
72
 
71
- Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stability monitor based on the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
73
+ Production agent loops universally use `max_iterations=N` as their termination policy. It's the embarrassing default of agentic AI: you either waste compute (loop stops too late) or ship bad output (loop stops too early). LoopGain replaces it with a control-theoretic stop-and-rollback policy grounded in the **Barkhausen criterion** — a foundational result from electrical-engineering feedback-oscillator analysis (1921).
72
74
 
73
75
  ---
74
76
 
@@ -145,7 +147,7 @@ It routes the trajectory into one of five named states:
145
147
 
146
148
  | State | Condition | Action |
147
149
  | --- | --- | --- |
148
- | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
150
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue |
149
151
  | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
150
152
  | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings — return best-so-far |
151
153
  | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
@@ -161,18 +163,6 @@ The decision is **conservative by design**: requiring both statistical significa
161
163
 
162
164
  ---
163
165
 
164
- ## ETA prediction
165
-
166
- When the loop is converging (`Aβ_smooth < 1`), LoopGain produces a closed-form prediction of iterations remaining:
167
-
168
- ```
169
- n_remaining = log(E_target / E_current) / log(Aβ_smooth)
170
- ```
171
-
172
- Available as `lg.eta` mid-loop. Returns `None` when the prediction isn't well-defined (no Aβ yet, target zero, or non-converging gain).
173
-
174
- ---
175
-
176
166
  ## Best-so-far rollback
177
167
 
178
168
  LoopGain keeps a buffer of all observed outputs paired with their error scores. On termination it returns `argmin(error)`, not the last iteration:
@@ -189,10 +179,10 @@ This transforms divergence detection from "abort with garbage" into "abort with
189
179
 
190
180
  ## What LoopGain does and doesn't guarantee
191
181
 
192
- LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **93.5% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
182
+ LoopGain saves money by stopping a loop once it stops improving — fewer iterations, fewer tokens. In our [public benchmark](https://github.com/loopgain-ai/loopgain-bench), that was a **92.8% median cut in API spend** vs `max_iterations=20`, with output quality preserved. Two honest limits:
193
183
 
194
- - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
195
- - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤3.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
184
+ - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
185
+ - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
196
186
 
197
187
  ---
198
188
 
@@ -224,7 +214,7 @@ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `O
224
214
 
225
215
  ### `lg.eta -> int | None`
226
216
 
227
- Predicted iterations to reach target. `None` when not well-defined.
217
+ Best-effort closed-form estimate of iterations remaining, exposed for instrumentation. Returns `None` whenever it isn't well-defined — which is most of the time on real, jump-dominated loops, so don't depend on it for control.
228
218
 
229
219
  ### `lg.gain_margin -> float | None`
230
220
 
@@ -4,8 +4,10 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "loopgain"
7
- version = "0.4.0"
8
- description = "Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction."
7
+ # Single source of truth: loopgain/_version.py (read dynamically below).
8
+ # Bump the version in that one file per release; this no longer duplicates it.
9
+ dynamic = ["version"]
10
+ description = "An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback."
9
11
  authors = [{name = "Dave Fitzsimmons", email = "hello@loopgain.ai"}]
10
12
  readme = "README.md"
11
13
  license = {text = "Apache-2.0"}
@@ -100,6 +102,11 @@ all = [
100
102
  # zero-dep. Install with `pip install 'loopgain[examples]'`.
101
103
  examples = ["anthropic>=0.40.0"]
102
104
 
105
+ [tool.setuptools.dynamic]
106
+ # Reads the literal ``__version__ = "x.y.z"`` from loopgain/_version.py via AST
107
+ # (no import), so pyproject.toml never duplicates the version string.
108
+ version = {attr = "loopgain._version.__version__"}
109
+
103
110
  [tool.setuptools.packages.find]
104
111
  where = ["."]
105
112
  include = ["loopgain*"]
@@ -223,11 +223,13 @@ def test_loop_length_robustness():
223
223
  - n=8 (df=6): ≥ 90% (the default real-loop length)
224
224
  - n=12 (df=10): ≥ 95%
225
225
  """
226
- # n=4 is intentionally excluded: with df=2 the t-test requires |t|>4.3
227
- # for p<0.05, which is a fundamental statistical-power floor. The
228
- # classifier correctly falls back to STALLING (insufficient evidence)
229
- # for most convergent trajectories at n=4. Documented as a
230
- # min-recommended-iterations limit, not a bug.
226
+ # n=4 is intentionally excluded from the high-accuracy thresholds below:
227
+ # with df=2 the t-test correctly requires |t|>4.30 for p<0.05 (see
228
+ # test_two_sided_t_p_df2_exact), a fundamental statistical-power floor at
229
+ # this length. The classifier falls back to cumulative E_ratio when the
230
+ # slope test is underpowered. This is a min-recommended-iterations limit,
231
+ # not a bug. (Historically the df=2 p-value was computed at 2x its true
232
+ # value, requiring |t|>6.21 and worsening this floor — now fixed.)
231
233
  LEN_THRESHOLDS = {6: 0.80, 8: 0.90, 12: 0.95}
232
234
  for n, threshold in LEN_THRESHOLDS.items():
233
235
  for gen, expected in [
@@ -33,7 +33,42 @@ from loopgain import (
33
33
  classify_trajectory,
34
34
  extract_features,
35
35
  )
36
- from loopgain.classifier import _ols_slope_and_p
36
+ from loopgain.classifier import _ols_slope_and_p, _two_sided_t_p
37
+
38
+
39
+ # ----- Two-sided t p-value closed forms -----
40
+
41
+
42
+ def test_two_sided_t_p_df1_exact():
43
+ """df=1 is the Cauchy distribution: two-sided p = 1 - 2·atan(t)/pi."""
44
+ for t in (0.0, 0.5, 1.0, 2.0, 5.0, 12.706):
45
+ expected = 1.0 - 2.0 * math.atan(t) / math.pi
46
+ assert _two_sided_t_p(t, 1) == pytest.approx(expected, abs=1e-9)
47
+ # t=1 is the median of |T| for df=1 → two-sided p = 0.5.
48
+ assert _two_sided_t_p(1.0, 1) == pytest.approx(0.5, abs=1e-9)
49
+
50
+
51
+ def test_two_sided_t_p_df2_exact():
52
+ """df=2 closed form: two-sided p = 1 - |t|/sqrt(2 + t^2).
53
+
54
+ Regression guard for the doubled-p bug: the critical value for p=0.05
55
+ at df=2 is t=4.302653. The previous implementation returned ~0.10 here
56
+ (2x too large), which forced |t|>6.21 for significance and made the n=4
57
+ classifier far too conservative.
58
+ """
59
+ for t in (0.0, 0.5, 1.0, 2.0, 5.0):
60
+ expected = 1.0 - t / math.sqrt(2.0 + t * t)
61
+ assert _two_sided_t_p(t, 2) == pytest.approx(expected, abs=1e-9)
62
+ # The exact 5% two-sided critical value for df=2.
63
+ assert _two_sided_t_p(4.302653, 2) == pytest.approx(0.05, abs=1e-4)
64
+ # p is a probability: monotone non-increasing in t, bounded to [0, 1].
65
+ assert _two_sided_t_p(0.0, 2) == pytest.approx(1.0, abs=1e-9)
66
+ prev = 1.1
67
+ for t in (0.0, 0.5, 1.0, 2.0, 4.0, 8.0, 50.0):
68
+ p = _two_sided_t_p(t, 2)
69
+ assert 0.0 <= p <= 1.0
70
+ assert p <= prev + 1e-12
71
+ prev = p
37
72
 
38
73
 
39
74
  # ----- OLS slope / p-value building blocks -----
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes