loopgain 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loopgain-0.4.1 → loopgain-0.4.3}/PKG-INFO +3 -2
- {loopgain-0.4.1 → loopgain-0.4.3}/README.md +2 -1
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/_version.py +1 -1
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/classifier.py +8 -3
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/core.py +15 -1
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/telemetry.py +67 -20
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/PKG-INFO +3 -2
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_classifier_mock_validation.py +7 -5
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_classifier_synthetic.py +36 -1
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_telemetry.py +114 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/LICENSE +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/__init__.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/__main__.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/cli.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/funnel.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/__init__.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/autogen.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/claude_agent_sdk.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/crewai.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/langchain.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/langgraph.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/openai_agents.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/SOURCES.txt +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/dependency_links.txt +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/entry_points.txt +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/requires.txt +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/top_level.txt +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/pyproject.toml +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/setup.cfg +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_core.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_funnel.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_integrations.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_stress.py +0 -0
- {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_termination_safety.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgain
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
|
|
5
5
|
Author-email: Dave Fitzsimmons <hello@loopgain.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -58,7 +58,7 @@ AI agent loops waste time and money when they don't know when to stop. LoopGain
|
|
|
58
58
|
[](https://pypi.org/project/loopgain/)
|
|
59
59
|
[](https://pypi.org/project/loopgain/)
|
|
60
60
|
[](LICENSE)
|
|
61
|
-
[](tests/)
|
|
62
62
|
|
|
63
63
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
64
64
|
|
|
@@ -183,6 +183,7 @@ LoopGain saves money by stopping a loop once it stops improving — fewer iterat
|
|
|
183
183
|
|
|
184
184
|
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
185
185
|
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
186
|
+
- **LoopGain is only as right as your verifier.** It acts on the error signal you give it. If your verifier reports zero errors, LoopGain trusts that and stops — so a verifier with blind spots can report success on an answer that is still wrong, and LoopGain will confidently stop there. This is not the plateau case above: the error reads zero and the loop looks like a clean success, so neither LoopGain nor its convergence signal can flag it. The quality of the stop is bounded by the quality of the check behind your error signal. Pair LoopGain with the strongest verifier you can afford at the stop — executable tests over a sampled subset, a schema or type check over a vibe, a held-out check the loop didn't optimize against.
|
|
186
187
|
|
|
187
188
|
---
|
|
188
189
|
|
|
@@ -9,7 +9,7 @@ AI agent loops waste time and money when they don't know when to stop. LoopGain
|
|
|
9
9
|
[](https://pypi.org/project/loopgain/)
|
|
10
10
|
[](https://pypi.org/project/loopgain/)
|
|
11
11
|
[](LICENSE)
|
|
12
|
-
[](tests/)
|
|
13
13
|
|
|
14
14
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
15
15
|
|
|
@@ -134,6 +134,7 @@ LoopGain saves money by stopping a loop once it stops improving — fewer iterat
|
|
|
134
134
|
|
|
135
135
|
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
136
136
|
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
137
|
+
- **LoopGain is only as right as your verifier.** It acts on the error signal you give it. If your verifier reports zero errors, LoopGain trusts that and stops — so a verifier with blind spots can report success on an answer that is still wrong, and LoopGain will confidently stop there. This is not the plateau case above: the error reads zero and the loop looks like a clean success, so neither LoopGain nor its convergence signal can flag it. The quality of the stop is bounded by the quality of the check behind your error signal. Pair LoopGain with the strongest verifier you can afford at the stop — executable tests over a sampled subset, a schema or type check over a vibe, a held-out check the loop didn't optimize against.
|
|
137
138
|
|
|
138
139
|
---
|
|
139
140
|
|
|
@@ -184,9 +184,14 @@ def _two_sided_t_p(t_abs: float, df: int) -> float:
|
|
|
184
184
|
# exact: cdf_t(t,1) = 0.5 + arctan(t)/pi
|
|
185
185
|
return 2.0 * (0.5 - math.atan(t_abs) / math.pi)
|
|
186
186
|
if df == 2:
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
|
|
187
|
+
# Exact two-sided p-value for Student-t with df=2. The df=2 CDF is
|
|
188
|
+
# F(t) = 1/2 + t / (2·√(2 + t²)), so the one-sided survival is
|
|
189
|
+
# P(T > t) = 1/2 − t / (2·√(2 + t²)) and the two-sided p is
|
|
190
|
+
# 2·P(T > |t|) = 1 − |t| / √(2 + t²).
|
|
191
|
+
# (The previous implementation returned twice this — it required
|
|
192
|
+
# |t| > 6.21 for p<0.05 instead of the correct |t| > 4.30, making
|
|
193
|
+
# the n=4 classifier far too conservative. See test_classifier.)
|
|
194
|
+
return max(0.0, 1.0 - t_abs / math.sqrt(2.0 + t_abs * t_abs))
|
|
190
195
|
# Wilson-Hilferty: transform t² ~ F(1, df), then F → chi-square via
|
|
191
196
|
# cube-root approximation. For our purposes the simpler normal-approx
|
|
192
197
|
# to the t with the Hill / Abramowitz adjustment is enough.
|
|
@@ -514,6 +514,8 @@ class LoopGain:
|
|
|
514
514
|
loop_type: Optional[str] = None,
|
|
515
515
|
team: Optional[str] = None,
|
|
516
516
|
include_per_iteration: bool = True,
|
|
517
|
+
retries: int = 2,
|
|
518
|
+
retry_backoff: float = 0.25,
|
|
517
519
|
) -> bool:
|
|
518
520
|
"""Send anonymized telemetry to a receiver endpoint.
|
|
519
521
|
|
|
@@ -544,6 +546,12 @@ class LoopGain:
|
|
|
544
546
|
per-iteration Aβ + error trajectories (capped) so the
|
|
545
547
|
dashboard's Loop Detail scrubber works. Set ``False`` to
|
|
546
548
|
send only aggregate summary stats.
|
|
549
|
+
retries: Additional attempts if a send fails *transiently*
|
|
550
|
+
(timeout, connection error, 5xx/429). Default 2 (up to 3
|
|
551
|
+
attempts). Set to 0 for single-shot. Deterministic failures
|
|
552
|
+
(bad token, etc.) are never retried.
|
|
553
|
+
retry_backoff: Base seconds between attempts; the nth retry waits
|
|
554
|
+
``retry_backoff * n``. Default 0.25.
|
|
547
555
|
|
|
548
556
|
Returns:
|
|
549
557
|
``True`` on 2xx response, ``False`` otherwise.
|
|
@@ -572,5 +580,11 @@ class LoopGain:
|
|
|
572
580
|
include_per_iteration=include_per_iteration,
|
|
573
581
|
)
|
|
574
582
|
return send_payload(
|
|
575
|
-
endpoint,
|
|
583
|
+
endpoint,
|
|
584
|
+
token,
|
|
585
|
+
payload,
|
|
586
|
+
timeout=timeout,
|
|
587
|
+
allow_insecure=allow_insecure,
|
|
588
|
+
retries=retries,
|
|
589
|
+
retry_backoff=retry_backoff,
|
|
576
590
|
)
|
|
@@ -22,7 +22,9 @@ from __future__ import annotations
|
|
|
22
22
|
|
|
23
23
|
import json
|
|
24
24
|
import math
|
|
25
|
+
import socket
|
|
25
26
|
import statistics
|
|
27
|
+
import time
|
|
26
28
|
import urllib.error
|
|
27
29
|
import urllib.request
|
|
28
30
|
from datetime import datetime, timezone
|
|
@@ -178,6 +180,11 @@ def build_payload(
|
|
|
178
180
|
"savings_vs_fixed_cap": result.savings_vs_fixed_cap,
|
|
179
181
|
"convergence_profile_summary": profile_summary,
|
|
180
182
|
"rollback_triggered": result.outcome in ("oscillating", "diverged"),
|
|
183
|
+
# Index (0-based) of the lowest-error iteration. Lets the receiver
|
|
184
|
+
# derive iterations-to-best (best_index+1) and iterations-past-best
|
|
185
|
+
# (iterations_used-1-best_index) — the "Iteration Waste" view.
|
|
186
|
+
# Privacy-safe: an integer position, no output/error content.
|
|
187
|
+
"best_index": result.best_index,
|
|
181
188
|
# v2: first computable eta snapshot, for ETA calibration dashboard.
|
|
182
189
|
# Predicted total iterations = first_eta_at_iteration +
|
|
183
190
|
# first_eta_prediction; compare to iterations_used to compute the
|
|
@@ -213,18 +220,43 @@ def build_payload(
|
|
|
213
220
|
return payload
|
|
214
221
|
|
|
215
222
|
|
|
223
|
+
def _is_transient(exc: BaseException) -> bool:
|
|
224
|
+
"""Is this send failure worth retrying?
|
|
225
|
+
|
|
226
|
+
Transient = timeout, connection/DNS error, or a 5xx/429 from the server —
|
|
227
|
+
a later attempt might succeed. Deterministic failures (4xx other than 429,
|
|
228
|
+
a refused redirect) will never succeed on retry, so they are *not*
|
|
229
|
+
transient and we give up immediately.
|
|
230
|
+
"""
|
|
231
|
+
if isinstance(exc, urllib.error.HTTPError): # subclass of URLError — check first
|
|
232
|
+
return exc.code >= 500 or exc.code == 429
|
|
233
|
+
return isinstance(exc, (TimeoutError, socket.timeout, urllib.error.URLError, OSError))
|
|
234
|
+
|
|
235
|
+
|
|
216
236
|
def send_payload(
|
|
217
237
|
endpoint: str,
|
|
218
238
|
token: str,
|
|
219
239
|
payload: dict[str, Any],
|
|
220
240
|
timeout: float = 2.0,
|
|
221
241
|
allow_insecure: bool = False,
|
|
242
|
+
retries: int = 2,
|
|
243
|
+
retry_backoff: float = 0.25,
|
|
222
244
|
) -> bool:
|
|
223
245
|
"""POST a telemetry payload to the given endpoint.
|
|
224
246
|
|
|
225
247
|
Best-effort: errors are swallowed; never raises. Returns ``True`` if
|
|
226
248
|
the server returned a 2xx status, ``False`` otherwise.
|
|
227
249
|
|
|
250
|
+
A single send is one HTTP POST with a ``timeout``-second deadline. The
|
|
251
|
+
warm round-trip to the hosted receiver is ~150 ms, so the default 2 s
|
|
252
|
+
timeout has wide headroom; the failure mode in practice is a *transient*
|
|
253
|
+
outlier (a cold database first-write, a momentary network blip) that
|
|
254
|
+
blows past it. Because a low-frequency caller may send only one aggregate
|
|
255
|
+
per run, a single dropped send loses that whole run's data — so a transient
|
|
256
|
+
failure is retried up to ``retries`` times with a short linear backoff.
|
|
257
|
+
Deterministic failures (bad token, malformed payload, refused redirect)
|
|
258
|
+
are *not* retried. Still best-effort throughout: the loop never raises.
|
|
259
|
+
|
|
228
260
|
Args:
|
|
229
261
|
endpoint: Telemetry receiver URL (e.g.,
|
|
230
262
|
``https://telemetry.loopgain.ai/v1/aggregate``). Must use
|
|
@@ -235,13 +267,18 @@ def send_payload(
|
|
|
235
267
|
token: Bearer token issued by the receiver. Identifies the customer
|
|
236
268
|
account; rotatable; not linked to any production secrets.
|
|
237
269
|
payload: Dict from ``build_payload``.
|
|
238
|
-
timeout: Per-
|
|
270
|
+
timeout: Per-attempt timeout in seconds. Default 2.0.
|
|
239
271
|
allow_insecure: If ``True``, permit ``http://`` endpoints. Intended
|
|
240
272
|
for local development against a self-hosted receiver on
|
|
241
273
|
``http://localhost``. Default ``False``.
|
|
274
|
+
retries: Number of *additional* attempts after the first if the send
|
|
275
|
+
fails transiently. Default 2 (so up to 3 attempts total). Set to
|
|
276
|
+
0 to restore single-shot behavior.
|
|
277
|
+
retry_backoff: Base seconds to sleep between attempts; the nth retry
|
|
278
|
+
waits ``retry_backoff * n`` (0.25 s, 0.50 s, …). Default 0.25.
|
|
242
279
|
|
|
243
280
|
Returns:
|
|
244
|
-
``True`` on 2xx response, ``False`` otherwise.
|
|
281
|
+
``True`` on a 2xx response, ``False`` otherwise.
|
|
245
282
|
"""
|
|
246
283
|
# Refuse to attach the bearer token to anything but http(s); silently
|
|
247
284
|
# best-effort so a misconfigured endpoint can't break the user's loop.
|
|
@@ -258,23 +295,33 @@ def send_payload(
|
|
|
258
295
|
|
|
259
296
|
try:
|
|
260
297
|
body = json.dumps(payload).encode("utf-8")
|
|
261
|
-
req = urllib.request.Request(
|
|
262
|
-
endpoint,
|
|
263
|
-
data=body,
|
|
264
|
-
method="POST",
|
|
265
|
-
headers={
|
|
266
|
-
"Content-Type": "application/json",
|
|
267
|
-
"Authorization": f"Bearer {token}",
|
|
268
|
-
"User-Agent": f"loopgain/{LIBRARY_VERSION}",
|
|
269
|
-
},
|
|
270
|
-
)
|
|
271
|
-
# Use the no-redirect seam so a malicious or misconfigured
|
|
272
|
-
# endpoint can't 302 the bearer token to a different host.
|
|
273
|
-
with _open_request(req, timeout) as resp:
|
|
274
|
-
return 200 <= resp.status < 300
|
|
275
298
|
except Exception:
|
|
276
|
-
#
|
|
277
|
-
# Catches URLError, HTTPError, TimeoutError, OSError, plus the
|
|
278
|
-
# ValueError that urllib raises for malformed URLs (e.g., missing scheme),
|
|
279
|
-
# plus any JSON-encoding edge case in the payload.
|
|
299
|
+
# A payload that won't JSON-encode will never send — don't retry.
|
|
280
300
|
return False
|
|
301
|
+
|
|
302
|
+
req = urllib.request.Request(
|
|
303
|
+
endpoint,
|
|
304
|
+
data=body,
|
|
305
|
+
method="POST",
|
|
306
|
+
headers={
|
|
307
|
+
"Content-Type": "application/json",
|
|
308
|
+
"Authorization": f"Bearer {token}",
|
|
309
|
+
"User-Agent": f"loopgain/{LIBRARY_VERSION}",
|
|
310
|
+
},
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
attempts = max(1, retries + 1)
|
|
314
|
+
for i in range(attempts):
|
|
315
|
+
try:
|
|
316
|
+
# Use the no-redirect seam so a malicious or misconfigured
|
|
317
|
+
# endpoint can't 302 the bearer token to a different host.
|
|
318
|
+
with _open_request(req, timeout) as resp:
|
|
319
|
+
return 200 <= resp.status < 300
|
|
320
|
+
except Exception as exc:
|
|
321
|
+
# Best-effort: never break the user's loop because telemetry failed.
|
|
322
|
+
# Retry only transient failures, and only if attempts remain.
|
|
323
|
+
last = i == attempts - 1
|
|
324
|
+
if last or not _is_transient(exc):
|
|
325
|
+
return False
|
|
326
|
+
time.sleep(retry_backoff * (i + 1))
|
|
327
|
+
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgain
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
|
|
5
5
|
Author-email: Dave Fitzsimmons <hello@loopgain.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -58,7 +58,7 @@ AI agent loops waste time and money when they don't know when to stop. LoopGain
|
|
|
58
58
|
[](https://pypi.org/project/loopgain/)
|
|
59
59
|
[](https://pypi.org/project/loopgain/)
|
|
60
60
|
[](LICENSE)
|
|
61
|
-
[](tests/)
|
|
62
62
|
|
|
63
63
|
**Home:** [loopgain.ai](https://loopgain.ai)
|
|
64
64
|
|
|
@@ -183,6 +183,7 @@ LoopGain saves money by stopping a loop once it stops improving — fewer iterat
|
|
|
183
183
|
|
|
184
184
|
- **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
|
|
185
185
|
- **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
|
|
186
|
+
- **LoopGain is only as right as your verifier.** It acts on the error signal you give it. If your verifier reports zero errors, LoopGain trusts that and stops — so a verifier with blind spots can report success on an answer that is still wrong, and LoopGain will confidently stop there. This is not the plateau case above: the error reads zero and the loop looks like a clean success, so neither LoopGain nor its convergence signal can flag it. The quality of the stop is bounded by the quality of the check behind your error signal. Pair LoopGain with the strongest verifier you can afford at the stop — executable tests over a sampled subset, a schema or type check over a vibe, a held-out check the loop didn't optimize against.
|
|
186
187
|
|
|
187
188
|
---
|
|
188
189
|
|
|
@@ -223,11 +223,13 @@ def test_loop_length_robustness():
|
|
|
223
223
|
- n=8 (df=6): ≥ 90% (the default real-loop length)
|
|
224
224
|
- n=12 (df=10): ≥ 95%
|
|
225
225
|
"""
|
|
226
|
-
# n=4 is intentionally excluded
|
|
227
|
-
#
|
|
228
|
-
#
|
|
229
|
-
#
|
|
230
|
-
# min-recommended-iterations limit,
|
|
226
|
+
# n=4 is intentionally excluded from the high-accuracy thresholds below:
|
|
227
|
+
# with df=2 the t-test correctly requires |t|>4.30 for p<0.05 (see
|
|
228
|
+
# test_two_sided_t_p_df2_exact), a fundamental statistical-power floor at
|
|
229
|
+
# this length. The classifier falls back to cumulative E_ratio when the
|
|
230
|
+
# slope test is underpowered. This is a min-recommended-iterations limit,
|
|
231
|
+
# not a bug. (Historically the df=2 p-value was computed at 2x its true
|
|
232
|
+
# value, requiring |t|>6.21 and worsening this floor — now fixed.)
|
|
231
233
|
LEN_THRESHOLDS = {6: 0.80, 8: 0.90, 12: 0.95}
|
|
232
234
|
for n, threshold in LEN_THRESHOLDS.items():
|
|
233
235
|
for gen, expected in [
|
|
@@ -33,7 +33,42 @@ from loopgain import (
|
|
|
33
33
|
classify_trajectory,
|
|
34
34
|
extract_features,
|
|
35
35
|
)
|
|
36
|
-
from loopgain.classifier import _ols_slope_and_p
|
|
36
|
+
from loopgain.classifier import _ols_slope_and_p, _two_sided_t_p
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ----- Two-sided t p-value closed forms -----
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_two_sided_t_p_df1_exact():
|
|
43
|
+
"""df=1 is the Cauchy distribution: two-sided p = 1 - 2·atan(t)/pi."""
|
|
44
|
+
for t in (0.0, 0.5, 1.0, 2.0, 5.0, 12.706):
|
|
45
|
+
expected = 1.0 - 2.0 * math.atan(t) / math.pi
|
|
46
|
+
assert _two_sided_t_p(t, 1) == pytest.approx(expected, abs=1e-9)
|
|
47
|
+
# t=1 is the median of |T| for df=1 → two-sided p = 0.5.
|
|
48
|
+
assert _two_sided_t_p(1.0, 1) == pytest.approx(0.5, abs=1e-9)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_two_sided_t_p_df2_exact():
|
|
52
|
+
"""df=2 closed form: two-sided p = 1 - |t|/sqrt(2 + t^2).
|
|
53
|
+
|
|
54
|
+
Regression guard for the doubled-p bug: the critical value for p=0.05
|
|
55
|
+
at df=2 is t=4.302653. The previous implementation returned ~0.10 here
|
|
56
|
+
(2x too large), which forced |t|>6.21 for significance and made the n=4
|
|
57
|
+
classifier far too conservative.
|
|
58
|
+
"""
|
|
59
|
+
for t in (0.0, 0.5, 1.0, 2.0, 5.0):
|
|
60
|
+
expected = 1.0 - t / math.sqrt(2.0 + t * t)
|
|
61
|
+
assert _two_sided_t_p(t, 2) == pytest.approx(expected, abs=1e-9)
|
|
62
|
+
# The exact 5% two-sided critical value for df=2.
|
|
63
|
+
assert _two_sided_t_p(4.302653, 2) == pytest.approx(0.05, abs=1e-4)
|
|
64
|
+
# p is a probability: monotone non-increasing in t, bounded to [0, 1].
|
|
65
|
+
assert _two_sided_t_p(0.0, 2) == pytest.approx(1.0, abs=1e-9)
|
|
66
|
+
prev = 1.1
|
|
67
|
+
for t in (0.0, 0.5, 1.0, 2.0, 4.0, 8.0, 50.0):
|
|
68
|
+
p = _two_sided_t_p(t, 2)
|
|
69
|
+
assert 0.0 <= p <= 1.0
|
|
70
|
+
assert p <= prev + 1e-12
|
|
71
|
+
prev = p
|
|
37
72
|
|
|
38
73
|
|
|
39
74
|
# ----- OLS slope / p-value building blocks -----
|
|
@@ -660,3 +660,117 @@ def test_send_payload_refuses_redirects():
|
|
|
660
660
|
req = urllib.request.Request("https://example.com/")
|
|
661
661
|
with pytest.raises(urllib.error.HTTPError):
|
|
662
662
|
method(req, io.BytesIO(b""), 302, "Found", {})
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
# ----- send_payload retry behavior (transient failures) -----
|
|
666
|
+
|
|
667
|
+
import socket as _socket
|
|
668
|
+
import urllib.error as _uerr
|
|
669
|
+
|
|
670
|
+
from loopgain import telemetry as _tele
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
class _OkResp:
|
|
674
|
+
status = 202
|
|
675
|
+
|
|
676
|
+
def __enter__(self):
|
|
677
|
+
return self
|
|
678
|
+
|
|
679
|
+
def __exit__(self, *args):
|
|
680
|
+
pass
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def _retry_payload():
|
|
684
|
+
return build_payload(_make_terminated_loop(), workload_id="retry-test")
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def test_send_payload_retries_transient_then_succeeds(monkeypatch):
|
|
688
|
+
"""A transient failure (timeout) is retried; a later success returns True."""
|
|
689
|
+
calls = {"n": 0}
|
|
690
|
+
|
|
691
|
+
def flaky(req, timeout=None):
|
|
692
|
+
calls["n"] += 1
|
|
693
|
+
if calls["n"] < 3:
|
|
694
|
+
raise _socket.timeout("slow first attempts")
|
|
695
|
+
return _OkResp()
|
|
696
|
+
|
|
697
|
+
sleeps: list[float] = []
|
|
698
|
+
monkeypatch.setattr("loopgain.telemetry._open_request", flaky)
|
|
699
|
+
monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: sleeps.append(s))
|
|
700
|
+
|
|
701
|
+
ok = send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload())
|
|
702
|
+
assert ok is True
|
|
703
|
+
assert calls["n"] == 3 # two transient failures, third succeeds
|
|
704
|
+
assert sleeps == [0.25, 0.5] # linear backoff between attempts
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def test_send_payload_gives_up_after_retries_on_persistent_5xx(monkeypatch):
|
|
708
|
+
"""A persistent transient (503) exhausts retries and returns False."""
|
|
709
|
+
calls = {"n": 0}
|
|
710
|
+
|
|
711
|
+
def always_503(req, timeout=None):
|
|
712
|
+
calls["n"] += 1
|
|
713
|
+
raise _uerr.HTTPError("https://t.example", 503, "unavailable", {}, None)
|
|
714
|
+
|
|
715
|
+
monkeypatch.setattr("loopgain.telemetry._open_request", always_503)
|
|
716
|
+
monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: None)
|
|
717
|
+
|
|
718
|
+
ok = send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload(), retries=2)
|
|
719
|
+
assert ok is False
|
|
720
|
+
assert calls["n"] == 3 # 1 initial + 2 retries
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def test_send_payload_does_not_retry_deterministic_4xx(monkeypatch):
|
|
724
|
+
"""A 401 will never succeed on retry — fail fast, no backoff."""
|
|
725
|
+
calls = {"n": 0}
|
|
726
|
+
slept = {"n": 0}
|
|
727
|
+
|
|
728
|
+
def unauthorized(req, timeout=None):
|
|
729
|
+
calls["n"] += 1
|
|
730
|
+
raise _uerr.HTTPError("https://t.example", 401, "unauthorized", {}, None)
|
|
731
|
+
|
|
732
|
+
monkeypatch.setattr("loopgain.telemetry._open_request", unauthorized)
|
|
733
|
+
monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: slept.__setitem__("n", slept["n"] + 1))
|
|
734
|
+
|
|
735
|
+
ok = send_payload("https://t.example/v1/aggregate", token="bad", payload=_retry_payload())
|
|
736
|
+
assert ok is False
|
|
737
|
+
assert calls["n"] == 1 # no retry on a deterministic 4xx
|
|
738
|
+
assert slept["n"] == 0
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def test_send_payload_retries_zero_is_single_shot(monkeypatch):
|
|
742
|
+
"""retries=0 restores the original single-attempt behavior."""
|
|
743
|
+
calls = {"n": 0}
|
|
744
|
+
|
|
745
|
+
def timeout(req, timeout=None):
|
|
746
|
+
calls["n"] += 1
|
|
747
|
+
raise TimeoutError()
|
|
748
|
+
|
|
749
|
+
monkeypatch.setattr("loopgain.telemetry._open_request", timeout)
|
|
750
|
+
monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: None)
|
|
751
|
+
|
|
752
|
+
ok = send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload(), retries=0)
|
|
753
|
+
assert ok is False
|
|
754
|
+
assert calls["n"] == 1
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
def test_send_payload_never_raises_on_unexpected_error(monkeypatch):
|
|
758
|
+
"""A non-transient, unexpected error is swallowed (best-effort), no retry."""
|
|
759
|
+
def boom(req, timeout=None):
|
|
760
|
+
raise RuntimeError("unexpected")
|
|
761
|
+
|
|
762
|
+
monkeypatch.setattr("loopgain.telemetry._open_request", boom)
|
|
763
|
+
monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: None)
|
|
764
|
+
|
|
765
|
+
assert send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload()) is False
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
def test_is_transient_classification():
|
|
769
|
+
assert _tele._is_transient(TimeoutError()) is True
|
|
770
|
+
assert _tele._is_transient(_socket.timeout()) is True
|
|
771
|
+
assert _tele._is_transient(_uerr.URLError("dns")) is True
|
|
772
|
+
assert _tele._is_transient(_uerr.HTTPError("u", 503, "x", {}, None)) is True
|
|
773
|
+
assert _tele._is_transient(_uerr.HTTPError("u", 429, "x", {}, None)) is True
|
|
774
|
+
assert _tele._is_transient(_uerr.HTTPError("u", 400, "x", {}, None)) is False
|
|
775
|
+
assert _tele._is_transient(_uerr.HTTPError("u", 401, "x", {}, None)) is False
|
|
776
|
+
assert _tele._is_transient(RuntimeError("x")) is False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|