loopgain 0.4.1__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {loopgain-0.4.1 → loopgain-0.4.3}/PKG-INFO +3 -2
  2. {loopgain-0.4.1 → loopgain-0.4.3}/README.md +2 -1
  3. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/_version.py +1 -1
  4. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/classifier.py +8 -3
  5. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/core.py +15 -1
  6. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/telemetry.py +67 -20
  7. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/PKG-INFO +3 -2
  8. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_classifier_mock_validation.py +7 -5
  9. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_classifier_synthetic.py +36 -1
  10. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_telemetry.py +114 -0
  11. {loopgain-0.4.1 → loopgain-0.4.3}/LICENSE +0 -0
  12. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/__init__.py +0 -0
  13. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/__main__.py +0 -0
  14. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/cli.py +0 -0
  15. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/funnel.py +0 -0
  16. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/__init__.py +0 -0
  17. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/autogen.py +0 -0
  18. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/claude_agent_sdk.py +0 -0
  19. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/crewai.py +0 -0
  20. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/langchain.py +0 -0
  21. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/langgraph.py +0 -0
  22. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain/integrations/openai_agents.py +0 -0
  23. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/SOURCES.txt +0 -0
  24. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/dependency_links.txt +0 -0
  25. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/entry_points.txt +0 -0
  26. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/requires.txt +0 -0
  27. {loopgain-0.4.1 → loopgain-0.4.3}/loopgain.egg-info/top_level.txt +0 -0
  28. {loopgain-0.4.1 → loopgain-0.4.3}/pyproject.toml +0 -0
  29. {loopgain-0.4.1 → loopgain-0.4.3}/setup.cfg +0 -0
  30. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_core.py +0 -0
  31. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_funnel.py +0 -0
  32. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_integrations.py +0 -0
  33. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_stress.py +0 -0
  34. {loopgain-0.4.1 → loopgain-0.4.3}/tests/test_termination_safety.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
@@ -58,7 +58,7 @@ AI agent loops waste time and money when they don't know when to stop. LoopGain
58
58
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
59
59
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
60
60
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
61
- [![Tests](https://img.shields.io/badge/tests-202_passing-brightgreen.svg)](tests/)
61
+ [![Tests](https://img.shields.io/badge/tests-200%2B_passing-brightgreen.svg)](tests/)
62
62
 
63
63
  **Home:** [loopgain.ai](https://loopgain.ai)
64
64
 
@@ -183,6 +183,7 @@ LoopGain saves money by stopping a loop once it stops improving — fewer iterat
183
183
 
184
184
  - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
185
185
  - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
186
+ - **LoopGain is only as right as your verifier.** It acts on the error signal you give it. If your verifier reports zero errors, LoopGain trusts that and stops — so a verifier with blind spots can report success on an answer that is still wrong, and LoopGain will confidently stop there. This is not the plateau case above: the error reads zero and the loop looks like a clean success, so neither LoopGain nor its convergence signal can flag it. The quality of the stop is bounded by the quality of the check behind your error signal. Pair LoopGain with the strongest verifier you can afford at the stop — executable tests over a sampled subset, a schema or type check over a vibe, a held-out check the loop didn't optimize against.
186
187
 
187
188
  ---
188
189
 
@@ -9,7 +9,7 @@ AI agent loops waste time and money when they don't know when to stop. LoopGain
9
9
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
10
10
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
11
11
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
12
- [![Tests](https://img.shields.io/badge/tests-202_passing-brightgreen.svg)](tests/)
12
+ [![Tests](https://img.shields.io/badge/tests-200%2B_passing-brightgreen.svg)](tests/)
13
13
 
14
14
  **Home:** [loopgain.ai](https://loopgain.ai)
15
15
 
@@ -134,6 +134,7 @@ LoopGain saves money by stopping a loop once it stops improving — fewer iterat
134
134
 
135
135
  - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
136
136
  - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
137
+ - **LoopGain is only as right as your verifier.** It acts on the error signal you give it. If your verifier reports zero errors, LoopGain trusts that and stops — so a verifier with blind spots can report success on an answer that is still wrong, and LoopGain will confidently stop there. This is not the plateau case above: the error reads zero and the loop looks like a clean success, so neither LoopGain nor its convergence signal can flag it. The quality of the stop is bounded by the quality of the check behind your error signal. Pair LoopGain with the strongest verifier you can afford at the stop — executable tests over a sampled subset, a schema or type check over a vibe, a held-out check the loop didn't optimize against.
137
138
 
138
139
  ---
139
140
 
@@ -7,4 +7,4 @@ from here so the value never drifts between ``__version__`` and the
7
7
  ``pyproject.toml``) for each release.
8
8
  """
9
9
 
10
- __version__ = "0.4.1"
10
+ __version__ = "0.4.3"
@@ -184,9 +184,14 @@ def _two_sided_t_p(t_abs: float, df: int) -> float:
184
184
  # exact: cdf_t(t,1) = 0.5 + arctan(t)/pi
185
185
  return 2.0 * (0.5 - math.atan(t_abs) / math.pi)
186
186
  if df == 2:
187
- # exact one-sided survival: 1 - (1 + t²/2)^(-1) doubled
188
- return min(1.0, 2.0 * (1.0 - t_abs / math.sqrt(2.0 + t_abs * t_abs) / 1.0) * 0.5
189
- + 2.0 * (0.5 - 0.5 * t_abs / math.sqrt(2.0 + t_abs * t_abs)))
187
+ # Exact two-sided p-value for Student-t with df=2. The df=2 CDF is
188
+ # F(t) = 1/2 + t / (2·√(2 + t²)), so the one-sided survival is
189
+ # P(T > t) = 1/2 t / (2·√(2 + t²)) and the two-sided p is
190
+ # 2·P(T > |t|) = 1 − |t| / √(2 + t²).
191
+ # (The previous implementation returned twice this — it required
192
+ # |t| > 6.21 for p<0.05 instead of the correct |t| > 4.30, making
193
+ # the n=4 classifier far too conservative. See test_classifier.)
194
+ return max(0.0, 1.0 - t_abs / math.sqrt(2.0 + t_abs * t_abs))
190
195
  # Wilson-Hilferty: transform t² ~ F(1, df), then F → chi-square via
191
196
  # cube-root approximation. For our purposes the simpler normal-approx
192
197
  # to the t with the Hill / Abramowitz adjustment is enough.
@@ -514,6 +514,8 @@ class LoopGain:
514
514
  loop_type: Optional[str] = None,
515
515
  team: Optional[str] = None,
516
516
  include_per_iteration: bool = True,
517
+ retries: int = 2,
518
+ retry_backoff: float = 0.25,
517
519
  ) -> bool:
518
520
  """Send anonymized telemetry to a receiver endpoint.
519
521
 
@@ -544,6 +546,12 @@ class LoopGain:
544
546
  per-iteration Aβ + error trajectories (capped) so the
545
547
  dashboard's Loop Detail scrubber works. Set ``False`` to
546
548
  send only aggregate summary stats.
549
+ retries: Additional attempts if a send fails *transiently*
550
+ (timeout, connection error, 5xx/429). Default 2 (up to 3
551
+ attempts). Set to 0 for single-shot. Deterministic failures
552
+ (bad token, etc.) are never retried.
553
+ retry_backoff: Base seconds between attempts; the nth retry waits
554
+ ``retry_backoff * n``. Default 0.25.
547
555
 
548
556
  Returns:
549
557
  ``True`` on 2xx response, ``False`` otherwise.
@@ -572,5 +580,11 @@ class LoopGain:
572
580
  include_per_iteration=include_per_iteration,
573
581
  )
574
582
  return send_payload(
575
- endpoint, token, payload, timeout=timeout, allow_insecure=allow_insecure
583
+ endpoint,
584
+ token,
585
+ payload,
586
+ timeout=timeout,
587
+ allow_insecure=allow_insecure,
588
+ retries=retries,
589
+ retry_backoff=retry_backoff,
576
590
  )
@@ -22,7 +22,9 @@ from __future__ import annotations
22
22
 
23
23
  import json
24
24
  import math
25
+ import socket
25
26
  import statistics
27
+ import time
26
28
  import urllib.error
27
29
  import urllib.request
28
30
  from datetime import datetime, timezone
@@ -178,6 +180,11 @@ def build_payload(
178
180
  "savings_vs_fixed_cap": result.savings_vs_fixed_cap,
179
181
  "convergence_profile_summary": profile_summary,
180
182
  "rollback_triggered": result.outcome in ("oscillating", "diverged"),
183
+ # Index (0-based) of the lowest-error iteration. Lets the receiver
184
+ # derive iterations-to-best (best_index+1) and iterations-past-best
185
+ # (iterations_used-1-best_index) — the "Iteration Waste" view.
186
+ # Privacy-safe: an integer position, no output/error content.
187
+ "best_index": result.best_index,
181
188
  # v2: first computable eta snapshot, for ETA calibration dashboard.
182
189
  # Predicted total iterations = first_eta_at_iteration +
183
190
  # first_eta_prediction; compare to iterations_used to compute the
@@ -213,18 +220,43 @@ def build_payload(
213
220
  return payload
214
221
 
215
222
 
223
+ def _is_transient(exc: BaseException) -> bool:
224
+ """Is this send failure worth retrying?
225
+
226
+ Transient = timeout, connection/DNS error, or a 5xx/429 from the server —
227
+ a later attempt might succeed. Deterministic failures (4xx other than 429,
228
+ a refused redirect) will never succeed on retry, so they are *not*
229
+ transient and we give up immediately.
230
+ """
231
+ if isinstance(exc, urllib.error.HTTPError): # subclass of URLError — check first
232
+ return exc.code >= 500 or exc.code == 429
233
+ return isinstance(exc, (TimeoutError, socket.timeout, urllib.error.URLError, OSError))
234
+
235
+
216
236
  def send_payload(
217
237
  endpoint: str,
218
238
  token: str,
219
239
  payload: dict[str, Any],
220
240
  timeout: float = 2.0,
221
241
  allow_insecure: bool = False,
242
+ retries: int = 2,
243
+ retry_backoff: float = 0.25,
222
244
  ) -> bool:
223
245
  """POST a telemetry payload to the given endpoint.
224
246
 
225
247
  Best-effort: errors are swallowed; never raises. Returns ``True`` if
226
248
  the server returned a 2xx status, ``False`` otherwise.
227
249
 
250
+ A single send is one HTTP POST with a ``timeout``-second deadline. The
251
+ warm round-trip to the hosted receiver is ~150 ms, so the default 2 s
252
+ timeout has wide headroom; the failure mode in practice is a *transient*
253
+ outlier (a cold database first-write, a momentary network blip) that
254
+ blows past it. Because a low-frequency caller may send only one aggregate
255
+ per run, a single dropped send loses that whole run's data — so a transient
256
+ failure is retried up to ``retries`` times with a short linear backoff.
257
+ Deterministic failures (bad token, malformed payload, refused redirect)
258
+ are *not* retried. Still best-effort throughout: the loop never raises.
259
+
228
260
  Args:
229
261
  endpoint: Telemetry receiver URL (e.g.,
230
262
  ``https://telemetry.loopgain.ai/v1/aggregate``). Must use
@@ -235,13 +267,18 @@ def send_payload(
235
267
  token: Bearer token issued by the receiver. Identifies the customer
236
268
  account; rotatable; not linked to any production secrets.
237
269
  payload: Dict from ``build_payload``.
238
- timeout: Per-request timeout in seconds. Default 2.0.
270
+ timeout: Per-attempt timeout in seconds. Default 2.0.
239
271
  allow_insecure: If ``True``, permit ``http://`` endpoints. Intended
240
272
  for local development against a self-hosted receiver on
241
273
  ``http://localhost``. Default ``False``.
274
+ retries: Number of *additional* attempts after the first if the send
275
+ fails transiently. Default 2 (so up to 3 attempts total). Set to
276
+ 0 to restore single-shot behavior.
277
+ retry_backoff: Base seconds to sleep between attempts; the nth retry
278
+ waits ``retry_backoff * n`` (0.25 s, 0.50 s, …). Default 0.25.
242
279
 
243
280
  Returns:
244
- ``True`` on 2xx response, ``False`` otherwise.
281
+ ``True`` on a 2xx response, ``False`` otherwise.
245
282
  """
246
283
  # Refuse to attach the bearer token to anything but http(s); silently
247
284
  # best-effort so a misconfigured endpoint can't break the user's loop.
@@ -258,23 +295,33 @@ def send_payload(
258
295
 
259
296
  try:
260
297
  body = json.dumps(payload).encode("utf-8")
261
- req = urllib.request.Request(
262
- endpoint,
263
- data=body,
264
- method="POST",
265
- headers={
266
- "Content-Type": "application/json",
267
- "Authorization": f"Bearer {token}",
268
- "User-Agent": f"loopgain/{LIBRARY_VERSION}",
269
- },
270
- )
271
- # Use the no-redirect seam so a malicious or misconfigured
272
- # endpoint can't 302 the bearer token to a different host.
273
- with _open_request(req, timeout) as resp:
274
- return 200 <= resp.status < 300
275
298
  except Exception:
276
- # Best-effort: never break the user's loop because telemetry failed.
277
- # Catches URLError, HTTPError, TimeoutError, OSError, plus the
278
- # ValueError that urllib raises for malformed URLs (e.g., missing scheme),
279
- # plus any JSON-encoding edge case in the payload.
299
+ # A payload that won't JSON-encode will never send don't retry.
280
300
  return False
301
+
302
+ req = urllib.request.Request(
303
+ endpoint,
304
+ data=body,
305
+ method="POST",
306
+ headers={
307
+ "Content-Type": "application/json",
308
+ "Authorization": f"Bearer {token}",
309
+ "User-Agent": f"loopgain/{LIBRARY_VERSION}",
310
+ },
311
+ )
312
+
313
+ attempts = max(1, retries + 1)
314
+ for i in range(attempts):
315
+ try:
316
+ # Use the no-redirect seam so a malicious or misconfigured
317
+ # endpoint can't 302 the bearer token to a different host.
318
+ with _open_request(req, timeout) as resp:
319
+ return 200 <= resp.status < 300
320
+ except Exception as exc:
321
+ # Best-effort: never break the user's loop because telemetry failed.
322
+ # Retry only transient failures, and only if attempts remain.
323
+ last = i == attempts - 1
324
+ if last or not _is_transient(exc):
325
+ return False
326
+ time.sleep(retry_backoff * (i + 1))
327
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: An open-source cost controller for AI agent loops. Stops a loop when it has actually converged and rolls back before it degrades — replacing the max_iterations guess with a real-time loop-gain (Aβ) monitor with five named threshold bands and best-so-far rollback.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
@@ -58,7 +58,7 @@ AI agent loops waste time and money when they don't know when to stop. LoopGain
58
58
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
59
59
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
60
60
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
61
- [![Tests](https://img.shields.io/badge/tests-202_passing-brightgreen.svg)](tests/)
61
+ [![Tests](https://img.shields.io/badge/tests-200%2B_passing-brightgreen.svg)](tests/)
62
62
 
63
63
  **Home:** [loopgain.ai](https://loopgain.ai)
64
64
 
@@ -183,6 +183,7 @@ LoopGain saves money by stopping a loop once it stops improving — fewer iterat
183
183
 
184
184
  - **Savings depend on your workload.** Loops that usually succeed fast save the most (~96%); adversarial, failure-prone loops save less (~78–84%). The headline is a blend — run the benchmark on your own loops before quoting a number.
185
185
  - **LoopGain detects convergence, not correctness.** It stops when your error signal stops improving — which means more iterations won't help, *not* that the loop succeeded. On the benchmark this preserved quality (it rarely stopped early on a worse output; false-stop rate ≤4.5%), but a loop can stall with the error still above zero — a plateau at, say, 2 failing tests. So check `result.best_error` (or your own pass/fail) before you trust the output: if it plateaued short of your target, that's a quality gap LoopGain can't see, and a false stop that forces a rerun is the one way it eats into the savings. LoopGain decides *when to stop*; you decide *whether the answer is good enough*.
186
+ - **LoopGain is only as right as your verifier.** It acts on the error signal you give it. If your verifier reports zero errors, LoopGain trusts that and stops — so a verifier with blind spots can report success on an answer that is still wrong, and LoopGain will confidently stop there. This is not the plateau case above: the error reads zero and the loop looks like a clean success, so neither LoopGain nor its convergence signal can flag it. The quality of the stop is bounded by the quality of the check behind your error signal. Pair LoopGain with the strongest verifier you can afford at the stop — executable tests over a sampled subset, a schema or type check over a vibe, a held-out check the loop didn't optimize against.
186
187
 
187
188
  ---
188
189
 
@@ -223,11 +223,13 @@ def test_loop_length_robustness():
223
223
  - n=8 (df=6): ≥ 90% (the default real-loop length)
224
224
  - n=12 (df=10): ≥ 95%
225
225
  """
226
- # n=4 is intentionally excluded: with df=2 the t-test requires |t|>4.3
227
- # for p<0.05, which is a fundamental statistical-power floor. The
228
- # classifier correctly falls back to STALLING (insufficient evidence)
229
- # for most convergent trajectories at n=4. Documented as a
230
- # min-recommended-iterations limit, not a bug.
226
+ # n=4 is intentionally excluded from the high-accuracy thresholds below:
227
+ # with df=2 the t-test correctly requires |t|>4.30 for p<0.05 (see
228
+ # test_two_sided_t_p_df2_exact), a fundamental statistical-power floor at
229
+ # this length. The classifier falls back to cumulative E_ratio when the
230
+ # slope test is underpowered. This is a min-recommended-iterations limit,
231
+ # not a bug. (Historically the df=2 p-value was computed at 2x its true
232
+ # value, requiring |t|>6.21 and worsening this floor — now fixed.)
231
233
  LEN_THRESHOLDS = {6: 0.80, 8: 0.90, 12: 0.95}
232
234
  for n, threshold in LEN_THRESHOLDS.items():
233
235
  for gen, expected in [
@@ -33,7 +33,42 @@ from loopgain import (
33
33
  classify_trajectory,
34
34
  extract_features,
35
35
  )
36
- from loopgain.classifier import _ols_slope_and_p
36
+ from loopgain.classifier import _ols_slope_and_p, _two_sided_t_p
37
+
38
+
39
+ # ----- Two-sided t p-value closed forms -----
40
+
41
+
42
+ def test_two_sided_t_p_df1_exact():
43
+ """df=1 is the Cauchy distribution: two-sided p = 1 - 2·atan(t)/pi."""
44
+ for t in (0.0, 0.5, 1.0, 2.0, 5.0, 12.706):
45
+ expected = 1.0 - 2.0 * math.atan(t) / math.pi
46
+ assert _two_sided_t_p(t, 1) == pytest.approx(expected, abs=1e-9)
47
+ # t=1 is the median of |T| for df=1 → two-sided p = 0.5.
48
+ assert _two_sided_t_p(1.0, 1) == pytest.approx(0.5, abs=1e-9)
49
+
50
+
51
+ def test_two_sided_t_p_df2_exact():
52
+ """df=2 closed form: two-sided p = 1 - |t|/sqrt(2 + t^2).
53
+
54
+ Regression guard for the doubled-p bug: the critical value for p=0.05
55
+ at df=2 is t=4.302653. The previous implementation returned ~0.10 here
56
+ (2x too large), which forced |t|>6.21 for significance and made the n=4
57
+ classifier far too conservative.
58
+ """
59
+ for t in (0.0, 0.5, 1.0, 2.0, 5.0):
60
+ expected = 1.0 - t / math.sqrt(2.0 + t * t)
61
+ assert _two_sided_t_p(t, 2) == pytest.approx(expected, abs=1e-9)
62
+ # The exact 5% two-sided critical value for df=2.
63
+ assert _two_sided_t_p(4.302653, 2) == pytest.approx(0.05, abs=1e-4)
64
+ # p is a probability: monotone non-increasing in t, bounded to [0, 1].
65
+ assert _two_sided_t_p(0.0, 2) == pytest.approx(1.0, abs=1e-9)
66
+ prev = 1.1
67
+ for t in (0.0, 0.5, 1.0, 2.0, 4.0, 8.0, 50.0):
68
+ p = _two_sided_t_p(t, 2)
69
+ assert 0.0 <= p <= 1.0
70
+ assert p <= prev + 1e-12
71
+ prev = p
37
72
 
38
73
 
39
74
  # ----- OLS slope / p-value building blocks -----
@@ -660,3 +660,117 @@ def test_send_payload_refuses_redirects():
660
660
  req = urllib.request.Request("https://example.com/")
661
661
  with pytest.raises(urllib.error.HTTPError):
662
662
  method(req, io.BytesIO(b""), 302, "Found", {})
663
+
664
+
665
+ # ----- send_payload retry behavior (transient failures) -----
666
+
667
+ import socket as _socket
668
+ import urllib.error as _uerr
669
+
670
+ from loopgain import telemetry as _tele
671
+
672
+
673
+ class _OkResp:
674
+ status = 202
675
+
676
+ def __enter__(self):
677
+ return self
678
+
679
+ def __exit__(self, *args):
680
+ pass
681
+
682
+
683
+ def _retry_payload():
684
+ return build_payload(_make_terminated_loop(), workload_id="retry-test")
685
+
686
+
687
+ def test_send_payload_retries_transient_then_succeeds(monkeypatch):
688
+ """A transient failure (timeout) is retried; a later success returns True."""
689
+ calls = {"n": 0}
690
+
691
+ def flaky(req, timeout=None):
692
+ calls["n"] += 1
693
+ if calls["n"] < 3:
694
+ raise _socket.timeout("slow first attempts")
695
+ return _OkResp()
696
+
697
+ sleeps: list[float] = []
698
+ monkeypatch.setattr("loopgain.telemetry._open_request", flaky)
699
+ monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: sleeps.append(s))
700
+
701
+ ok = send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload())
702
+ assert ok is True
703
+ assert calls["n"] == 3 # two transient failures, third succeeds
704
+ assert sleeps == [0.25, 0.5] # linear backoff between attempts
705
+
706
+
707
+ def test_send_payload_gives_up_after_retries_on_persistent_5xx(monkeypatch):
708
+ """A persistent transient (503) exhausts retries and returns False."""
709
+ calls = {"n": 0}
710
+
711
+ def always_503(req, timeout=None):
712
+ calls["n"] += 1
713
+ raise _uerr.HTTPError("https://t.example", 503, "unavailable", {}, None)
714
+
715
+ monkeypatch.setattr("loopgain.telemetry._open_request", always_503)
716
+ monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: None)
717
+
718
+ ok = send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload(), retries=2)
719
+ assert ok is False
720
+ assert calls["n"] == 3 # 1 initial + 2 retries
721
+
722
+
723
+ def test_send_payload_does_not_retry_deterministic_4xx(monkeypatch):
724
+ """A 401 will never succeed on retry — fail fast, no backoff."""
725
+ calls = {"n": 0}
726
+ slept = {"n": 0}
727
+
728
+ def unauthorized(req, timeout=None):
729
+ calls["n"] += 1
730
+ raise _uerr.HTTPError("https://t.example", 401, "unauthorized", {}, None)
731
+
732
+ monkeypatch.setattr("loopgain.telemetry._open_request", unauthorized)
733
+ monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: slept.__setitem__("n", slept["n"] + 1))
734
+
735
+ ok = send_payload("https://t.example/v1/aggregate", token="bad", payload=_retry_payload())
736
+ assert ok is False
737
+ assert calls["n"] == 1 # no retry on a deterministic 4xx
738
+ assert slept["n"] == 0
739
+
740
+
741
+ def test_send_payload_retries_zero_is_single_shot(monkeypatch):
742
+ """retries=0 restores the original single-attempt behavior."""
743
+ calls = {"n": 0}
744
+
745
+ def timeout(req, timeout=None):
746
+ calls["n"] += 1
747
+ raise TimeoutError()
748
+
749
+ monkeypatch.setattr("loopgain.telemetry._open_request", timeout)
750
+ monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: None)
751
+
752
+ ok = send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload(), retries=0)
753
+ assert ok is False
754
+ assert calls["n"] == 1
755
+
756
+
757
+ def test_send_payload_never_raises_on_unexpected_error(monkeypatch):
758
+ """A non-transient, unexpected error is swallowed (best-effort), no retry."""
759
+ def boom(req, timeout=None):
760
+ raise RuntimeError("unexpected")
761
+
762
+ monkeypatch.setattr("loopgain.telemetry._open_request", boom)
763
+ monkeypatch.setattr("loopgain.telemetry.time.sleep", lambda s: None)
764
+
765
+ assert send_payload("https://t.example/v1/aggregate", token="t", payload=_retry_payload()) is False
766
+
767
+
768
+ def test_is_transient_classification():
769
+ assert _tele._is_transient(TimeoutError()) is True
770
+ assert _tele._is_transient(_socket.timeout()) is True
771
+ assert _tele._is_transient(_uerr.URLError("dns")) is True
772
+ assert _tele._is_transient(_uerr.HTTPError("u", 503, "x", {}, None)) is True
773
+ assert _tele._is_transient(_uerr.HTTPError("u", 429, "x", {}, None)) is True
774
+ assert _tele._is_transient(_uerr.HTTPError("u", 400, "x", {}, None)) is False
775
+ assert _tele._is_transient(_uerr.HTTPError("u", 401, "x", {}, None)) is False
776
+ assert _tele._is_transient(RuntimeError("x")) is False
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes