loopgain 0.1.9__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {loopgain-0.1.9 → loopgain-0.3.0}/PKG-INFO +177 -28
  2. {loopgain-0.1.9 → loopgain-0.3.0}/README.md +166 -26
  3. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/__init__.py +10 -0
  4. loopgain-0.3.0/loopgain/__main__.py +8 -0
  5. loopgain-0.3.0/loopgain/_version.py +10 -0
  6. loopgain-0.3.0/loopgain/classifier.py +323 -0
  7. loopgain-0.3.0/loopgain/cli.py +109 -0
  8. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/core.py +94 -6
  9. loopgain-0.3.0/loopgain/funnel.py +572 -0
  10. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/integrations/__init__.py +31 -12
  11. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/integrations/autogen.py +4 -0
  12. loopgain-0.3.0/loopgain/integrations/claude_agent_sdk.py +210 -0
  13. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/integrations/crewai.py +4 -0
  14. loopgain-0.3.0/loopgain/integrations/langchain.py +191 -0
  15. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/integrations/langgraph.py +4 -0
  16. loopgain-0.3.0/loopgain/integrations/openai_agents.py +201 -0
  17. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain.egg-info/PKG-INFO +177 -28
  18. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain.egg-info/SOURCES.txt +11 -0
  19. loopgain-0.3.0/loopgain.egg-info/entry_points.txt +2 -0
  20. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain.egg-info/requires.txt +12 -0
  21. {loopgain-0.1.9 → loopgain-0.3.0}/pyproject.toml +20 -3
  22. loopgain-0.3.0/tests/test_classifier_mock_validation.py +269 -0
  23. loopgain-0.3.0/tests/test_classifier_synthetic.py +320 -0
  24. {loopgain-0.1.9 → loopgain-0.3.0}/tests/test_core.py +15 -5
  25. loopgain-0.3.0/tests/test_funnel.py +366 -0
  26. {loopgain-0.1.9 → loopgain-0.3.0}/tests/test_stress.py +26 -12
  27. loopgain-0.1.9/loopgain/_version.py +0 -9
  28. {loopgain-0.1.9 → loopgain-0.3.0}/LICENSE +0 -0
  29. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain/telemetry.py +0 -0
  30. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain.egg-info/dependency_links.txt +0 -0
  31. {loopgain-0.1.9 → loopgain-0.3.0}/loopgain.egg-info/top_level.txt +0 -0
  32. {loopgain-0.1.9 → loopgain-0.3.0}/setup.cfg +0 -0
  33. {loopgain-0.1.9 → loopgain-0.3.0}/tests/test_integrations.py +0 -0
  34. {loopgain-0.1.9 → loopgain-0.3.0}/tests/test_telemetry.py +0 -0
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgain
3
- Version: 0.1.9
3
+ Version: 0.3.0
4
4
  Summary: Barkhausen stability monitor for AI agent loops. Real-time loop-gain (Aβ) monitoring with five named threshold bands, best-so-far rollback, and ETA prediction.
5
5
  Author-email: Dave Fitzsimmons <hello@loopgain.ai>
6
6
  License: Apache-2.0
7
7
  Project-URL: Homepage, https://loopgain.ai
8
8
  Project-URL: Repository, https://github.com/loopgain-ai/loopgain
9
9
  Project-URL: Issues, https://github.com/loopgain-ai/loopgain/issues
10
- Keywords: ai,agent,ai-agent,ai-agents,agentic,agentic-ai,llm,llm-agent,llm-orchestration,agent-orchestration,agent-loop,verify-revise,verify-revise-loop,gvr,generator-verifier-reviser,convergence,divergence-detection,infinite-loop,infinite-loop-detection,loop-detection,loop-stability,stability-monitor,early-stopping,max-iterations,barkhausen,barkhausen-criterion,control-theory,feedback-loop,feedback-loop-stability,loop-gain,rollback,best-so-far,langgraph,crewai,autogen,claude,anthropic,openai
10
+ Keywords: ai,agent,ai-agent,ai-agents,agentic,agentic-ai,llm,llm-agent,llm-orchestration,agent-orchestration,agent-loop,verify-revise,verify-revise-loop,gvr,generator-verifier-reviser,convergence,divergence-detection,infinite-loop,infinite-loop-detection,loop-detection,loop-stability,stability-monitor,early-stopping,max-iterations,barkhausen,barkhausen-criterion,control-theory,feedback-loop,feedback-loop-stability,loop-gain,rollback,best-so-far,langgraph,crewai,autogen,langchain,openai-agents,openai-agents-sdk,claude-agent-sdk,claude,anthropic,openai
11
11
  Classifier: Development Status :: 3 - Alpha
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: License :: OSI Approved :: Apache Software License
@@ -30,10 +30,19 @@ Provides-Extra: crewai
30
30
  Requires-Dist: crewai>=0.30; extra == "crewai"
31
31
  Provides-Extra: autogen
32
32
  Requires-Dist: autogen-agentchat>=0.4; extra == "autogen"
33
+ Provides-Extra: langchain
34
+ Requires-Dist: langchain>=1.0; extra == "langchain"
35
+ Provides-Extra: openai-agents
36
+ Requires-Dist: openai-agents>=0.1; extra == "openai-agents"
37
+ Provides-Extra: claude-agent-sdk
38
+ Requires-Dist: claude-agent-sdk>=0.2; extra == "claude-agent-sdk"
33
39
  Provides-Extra: all
34
40
  Requires-Dist: langgraph>=0.2; extra == "all"
35
41
  Requires-Dist: crewai>=0.30; extra == "all"
36
42
  Requires-Dist: autogen-agentchat>=0.4; extra == "all"
43
+ Requires-Dist: langchain>=1.0; extra == "all"
44
+ Requires-Dist: openai-agents>=0.1; extra == "all"
45
+ Requires-Dist: claude-agent-sdk>=0.2; extra == "all"
37
46
  Provides-Extra: examples
38
47
  Requires-Dist: anthropic>=0.40.0; extra == "examples"
39
48
  Dynamic: license-file
@@ -42,16 +51,16 @@ Dynamic: license-file
42
51
 
43
52
  **Barkhausen stability monitor for AI agent loops.**
44
53
 
45
- Replace `max_iterations=5` with a real-time loop-gain (`Aβ`) monitor that knows whether your agent loop is converging, stalling, oscillating, or diverging and what to do in each case.
54
+ Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named states — knowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
46
55
 
47
56
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
48
57
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
49
58
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
50
- [![Tests](https://img.shields.io/badge/tests-119_passing-brightgreen.svg)](tests/)
59
+ [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
51
60
 
52
61
  **Home:** [loopgain.ai](https://loopgain.ai)
53
62
 
54
- Works for **any iterative AI workflow with a measurable error signal** — verify-revise loops, refinement passes, tool-use retry chains, RAG with self-correction, code-gen with linter feedback, multi-step reasoning loops. **Pre-built adapters for [LangGraph](#langgraph), [CrewAI](#crewai), and [AutoGen](#autogen-v04)**; drop-in via the raw API for **Claude Agent SDK** and any custom stack. Pure Python, no runtime dependencies.
63
+ Works for **any iterative AI workflow with a measurable error signal** — verify-revise loops, refinement passes, tool-use retry chains, RAG with self-correction, code-gen with linter feedback, multi-step reasoning loops. **Pre-built adapters for [LangGraph](#langgraph), [CrewAI](#crewai), [AutoGen](#autogen-v04), [LangChain](#langchain), [OpenAI Agents SDK](#openai-agents-sdk), and [Claude Agent SDK](#claude-agent-sdk)**; drop-in via the raw API for any custom stack. Pure Python, no runtime dependencies.
55
64
 
56
65
  **Keywords:** AI agent loops · agentic AI · infinite loop detection · divergence detection · early stopping · convergence · agent orchestration · LLM stability · generator-verifier-reviser · feedback-loop control.
57
66
 
@@ -88,7 +97,7 @@ while lg.should_continue():
88
97
  output = reviser.revise(output, errors)
89
98
 
90
99
  result = lg.result
91
- print(result.outcome) # "converged" | "oscillating" | "diverged" | "max_iterations"
100
+ print(result.outcome) # "converged" | "oscillating" | "diverged" | "stalled" | "max_iterations"
92
101
  print(result.best_output) # the lowest-error iteration's output
93
102
  print(result.iterations_used)
94
103
  print(result.gain_margin) # 1 / max(Aβ_smooth)
@@ -101,28 +110,32 @@ print(result.savings_vs_fixed_cap)
101
110
 
102
111
  ## How it works
103
112
 
104
- LoopGain measures empirical loop gain at every iteration, then smooths it with an EMA:
113
+ LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
105
114
 
106
115
  ```
107
- Aβ(n) = E(n) / E(n-1)
108
- Aβ_smooth = EMA(Aβ, w=3)
116
+ E_ratio = E_current / E_first # cumulative reduction
117
+ slope_log = OLS slope of log10(E) # geometric trend direction
118
+ slope_p = t-test p-value of slope # statistical significance
119
+ osc_std = std of detrended log10(E) # oscillation magnitude
109
120
  ```
110
121
 
111
- It classifies `Aβ_smooth` into five named bands:
122
+ It routes the trajectory into one of five named states:
112
123
 
113
- | `Aβ_smooth` range | State | Action |
124
+ | State | Condition | Action |
114
125
  | --- | --- | --- |
115
- | `< 0.3` | `FAST_CONVERGE` | Continue, predict ETA |
116
- | `0.3 < 0.85` | `CONVERGING` | Continue, watch for upward drift |
117
- | `0.85 < 0.95` | `STALLING` | Warndiminishing returns |
118
- | `0.95 1.05` | `OSCILLATING` | Break — return best-so-far |
119
- | `> 1.05` | `DIVERGING` | Abort — roll back to best-so-far |
126
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
127
+ | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
128
+ | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings return best-so-far |
129
+ | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
130
+ | `DIVERGING` | positive slope with `p < 0.05` AND cumulative > 110% | Abort — roll back to best-so-far |
120
131
 
121
132
  Plus a short-circuit: if observed error drops at or below `target_error`, the loop stops immediately with state `TARGET_MET`. The default `target_error=0.0` short-circuits on exactly zero error — the natural completion signal for verifier-driven loops. Pass `target_error=None` to disable the short-circuit and rely on stability detection alone.
122
133
 
123
- The `±0.05` noise band around `Aβ=1` absorbs stochastic jitter from agent outputs without triggering false-positive aborts. The `0.85` `STALLING` boundary is an early warning by the time `Aβ` crosses `1.0`, you've already wasted iterations.
134
+ The decision is **conservative by design**: requiring both statistical significance and meaningful cumulative motion before terminating prevents false-positive aborts on noisy real-LLM error series. Validated at 98.8% macro-averaged accuracy across 5 regimes on N=1000 deterministic-mock trajectories (see `RESULTS_v2_classifier.md`). The STALLING ceiling of ~94% is the t-test's irreducible 5% type-I error rate, not a classifier weakness.
124
135
 
125
- These threshold defaults are derived from the Barkhausen-stability analysis and serve as reasonable starting points. Tune them per domain (via the `ThresholdBands` argument) once you have production traces.
136
+ **Recommended minimum: 6 iterations** for reliable trend significance. At n≤4 the t-test is severely underpowered (df=2 requires |t|>4.3 for p<0.05) the classifier conservatively falls back to STALLING when evidence is thin. The thresholds are derived analytically (control theory + statistical convention), not fitted; tune them per domain via the `TrajectoryThresholds` argument once you have production traces.
137
+
138
+ **Legacy single-feature classifier:** the original v0.1 single-Aβ-band classifier (thresholds 0.3 / 0.85 / 0.95 / 1.05) is still available via `LoopGain(classifier='legacy_bands')` for callers that have empirically tuned the bands to a specific workload.
126
139
 
127
140
  ---
128
141
 
@@ -154,14 +167,16 @@ This transforms divergence detection from "abort with garbage" into "abort with
154
167
 
155
168
  ## API reference
156
169
 
157
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, smoothing_window=3, assumed_fixed_cap=10)`
170
+ ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
158
171
 
159
172
  Construct the monitor.
160
173
 
161
174
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
162
175
  - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
163
- - `thresholds` — Custom `ThresholdBands` if defaults don't fit your domain.
164
- - `smoothing_window` — EMA window for the smoothed Aβ. Default 3.
176
+ - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
177
+ - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
178
+ - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
179
+ - `smoothing_window` — EMA window for the smoothed Aβ series (always maintained for visualization, regardless of classifier choice). Default 3.
165
180
  - `assumed_fixed_cap` — Used to compute `savings_vs_fixed_cap`. Default 10.
166
181
 
167
182
  ### `lg.observe(errors, output=None) -> str`
@@ -174,7 +189,7 @@ Returns `False` once a terminal state fires.
174
189
 
175
190
  ### `lg.state -> str`
176
191
 
177
- Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`.
192
+ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`. The corresponding terminal `result.outcome` values are `converged`, `oscillating`, `diverged`, `stalled` (v0.2 trajectory mode only — STALLING terminating after 2 consecutive readings), `max_iterations`, or `in_progress`.
178
193
 
179
194
  ### `lg.eta -> int | None`
180
195
 
@@ -224,6 +239,32 @@ What is sent: state transitions, Aβ summary (min/max/median), gain margin, roll
224
239
 
225
240
  The hosted endpoint at `telemetry.loopgain.ai` is one acceptable destination. The [receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are both open-source — self-host to keep telemetry fully under your control.
226
241
 
242
+ > **This is not the same as anonymous usage telemetry.** `send_telemetry` sends *your* loop data to *your* dashboard, and only when you call it. There's a separate, opt-in **funnel** telemetry described below. The two never share data or code.
243
+
244
+ ---
245
+
246
+ ## Anonymous funnel telemetry (opt-in, off by default)
247
+
248
+ LoopGain can report **anonymous usage counts** so a solo maintainer can tell whether the library is actually being used — install → first `observe()` → recurring use. **It is opt-in and default-decline: nothing is sent unless you explicitly turn it on.**
249
+
250
+ ```bash
251
+ loopgain telemetry --show # status + exactly what would be sent
252
+ loopgain telemetry --enable # opt in (or: export LOOPGAIN_TELEMETRY=1)
253
+ loopgain telemetry --disable # opt out (or: export LOOPGAIN_TELEMETRY=0)
254
+ ```
255
+
256
+ `DO_NOT_TRACK=1` is honored as a hard opt-out, and CI environments are auto-detected and declined silently. When enabled, payloads carry only a locally-generated random id (not derived from your machine), hour-bucketed timestamps, library/Python/OS versions, the adapter in use, and a coarse outcome count. **Prompts, outputs, error contents, keys, paths, and IPs are never collected.** Delivery is batched, async, https-only, and fail-silent — it can never break your loop. Full details and the privacy contract: **[TELEMETRY.md](TELEMETRY.md)**.
257
+
258
+ ---
259
+
260
+ ## Command-line interface
261
+
262
+ ```bash
263
+ loopgain --version # or: loopgain version
264
+ loopgain telemetry --show # inspect / control anonymous funnel telemetry
265
+ python -m loopgain telemetry --show # equivalent, without the console script
266
+ ```
267
+
227
268
  ---
228
269
 
229
270
  ## Framework adapters
@@ -231,10 +272,13 @@ The hosted endpoint at `telemetry.loopgain.ai` is one acceptable destination. Th
231
272
  Thin wrappers under `loopgain.integrations` drive each major agent framework's iteration with a `LoopGain` monitor and auto-stamp `framework="<name>"` on telemetry. The frameworks themselves are **optional dependencies** — install the extra you need:
232
273
 
233
274
  ```bash
234
- pip install 'loopgain[langgraph]' # LangGraph
235
- pip install 'loopgain[crewai]' # CrewAI
236
- pip install 'loopgain[autogen]' # AutoGen v0.4+
237
- pip install 'loopgain[all]' # all three
275
+ pip install 'loopgain[langgraph]' # LangGraph
276
+ pip install 'loopgain[crewai]' # CrewAI
277
+ pip install 'loopgain[autogen]' # AutoGen v0.4+
278
+ pip install 'loopgain[langchain]' # LangChain (create_agent / AgentExecutor)
279
+ pip install 'loopgain[openai-agents]' # OpenAI Agents SDK
280
+ pip install 'loopgain[claude-agent-sdk]' # Anthropic Claude Agent SDK
281
+ pip install 'loopgain[all]' # all six
238
282
  ```
239
283
 
240
284
  All adapters take a `LoopGain` instance plus an `error_fn` you provide — the framework doesn't know what your error signal is, so the adapter doesn't either. `error_fn` returns a non-negative number (or `None` to skip an iteration).
@@ -321,15 +365,120 @@ lg.send_telemetry(
321
365
 
322
366
  Pass a `cancellation_token` to `adapter.run(...)` and the adapter will cancel it when LoopGain reaches a terminal state (target met, oscillation, divergence). The legacy v0.2 `ConversableAgent.initiate_chat` API is **not** supported — use the v0.4 event-driven runtime.
323
367
 
368
+ ### LangChain
369
+
370
+ Duck-types against any LangChain agent that exposes `.stream(input, **kwargs)` / `.astream(input, **kwargs)` — both the current `langchain.agents.create_agent()` (v1+) and the legacy `AgentExecutor`. The adapter forwards `**stream_kwargs` verbatim, so the chunk shape your `error_fn` sees is the one your agent emits.
371
+
372
+ ```python
373
+ from langchain.agents import create_agent
374
+ from loopgain import LoopGain
375
+ from loopgain.integrations import LangChainAdapter
376
+
377
+ agent = create_agent(model="gpt-5-nano", tools=[get_weather])
378
+ lg = LoopGain(target_error=0.0, max_iterations=20)
379
+
380
+ def error_fn(chunk):
381
+ if chunk.get("type") != "updates":
382
+ return None
383
+ # Count unresolved tool calls; drops to 0 once the agent stops calling tools.
384
+ return sum(
385
+ 1 for _, update in chunk["data"].items()
386
+ if getattr(update.get("messages", [None])[-1], "tool_calls", None)
387
+ )
388
+
389
+ adapter = LangChainAdapter(lg=lg, error_fn=error_fn)
390
+ final = adapter.run(
391
+ agent,
392
+ {"messages": [{"role": "user", "content": "What's the weather?"}]},
393
+ stream_mode="updates",
394
+ version="v2",
395
+ )
396
+
397
+ lg.send_telemetry(
398
+ endpoint=...,
399
+ token=...,
400
+ framework=adapter.framework_name, # "langchain"
401
+ )
402
+ ```
403
+
404
+ For legacy `AgentExecutor`: just drop the `stream_mode` / `version` kwargs; each yielded chunk is an `AddableDict` per step (parse `intermediate_steps` or the terminal `output` key in your `error_fn`).
405
+
406
+ ### OpenAI Agents SDK
407
+
408
+ Wraps `Runner.run_streamed(agent, input).stream_events()`. The SDK is async-first; the adapter mirrors that. A `run_sync` helper wraps the async path with `asyncio.run` for synchronous callers.
409
+
410
+ ```python
411
+ from agents import Agent, function_tool
412
+ from loopgain import LoopGain
413
+ from loopgain.integrations import OpenAIAgentsAdapter
414
+
415
+ agent = Agent(name="Reviser", instructions="...", tools=[...])
416
+
417
+ lg = LoopGain(target_error=0.0, max_iterations=20)
418
+
419
+ def error_fn(event):
420
+ # Default observes only run_item_stream_event; pull the verifier's
421
+ # reported failure count off tool outputs.
422
+ if event.item.type == "tool_call_output_item":
423
+ return float(event.item.output.get("failures", 0))
424
+ return None
425
+
426
+ adapter = OpenAIAgentsAdapter(lg=lg, error_fn=error_fn)
427
+ result = await adapter.run(agent, input="Fix the bug.")
428
+ print(result.final_output)
429
+
430
+ lg.send_telemetry(
431
+ endpoint=...,
432
+ token=...,
433
+ framework=adapter.framework_name, # "openai-agents"
434
+ )
435
+ ```
436
+
437
+ By default the adapter only forwards `run_item_stream_event` to `error_fn` — pass `observe_event_types=None` to see every event (including raw token deltas and agent-handoff notifications). When LoopGain reaches a terminal state, the adapter best-effort calls `.cancel()` on the underlying `RunResultStreaming`.
438
+
439
+ ### Claude Agent SDK
440
+
441
+ Wraps Anthropic's `claude_agent_sdk.query(prompt=..., options=...)` async iterator. By default observes only `AssistantMessage` (skips `UserMessage` / `SystemMessage` / `ResultMessage`); override with `observe_message_types=None` or a custom tuple.
442
+
443
+ ```python
444
+ from claude_agent_sdk import ClaudeAgentOptions, TextBlock
445
+ from loopgain import LoopGain
446
+ from loopgain.integrations import ClaudeAgentSDKAdapter
447
+
448
+ def error_fn(message):
449
+ # Count `FAIL:` markers a self-verifying persona emits.
450
+ for block in getattr(message, "content", []):
451
+ if isinstance(block, TextBlock):
452
+ return float(block.text.count("FAIL:"))
453
+ return None
454
+
455
+ lg = LoopGain(target_error=0.0, max_iterations=20)
456
+ adapter = ClaudeAgentSDKAdapter(lg=lg, error_fn=error_fn)
457
+
458
+ options = ClaudeAgentOptions(system_prompt="Self-verify each draft.")
459
+ result = await adapter.run(
460
+ prompt="Write a haiku about feedback loops.",
461
+ options=options,
462
+ )
463
+
464
+ lg.send_telemetry(
465
+ endpoint=...,
466
+ token=...,
467
+ framework=adapter.framework_name, # "claude-agent-sdk"
468
+ )
469
+ ```
470
+
471
+ For the bidirectional `ClaudeSDKClient` use case, pass `message_iterator=client.receive_messages()` instead of `prompt=...`.
472
+
324
473
  ### Custom integrations
325
474
 
326
- For frameworks without an adapter, the raw `LoopGain.observe()` API works against any iterable. The adapters are 100-200 lines each — copy one of `loopgain/integrations/{langgraph,crewai,autogen}.py` as a starting point.
475
+ For frameworks without an adapter, the raw `LoopGain.observe()` API works against any iterable. The adapters are 100-200 lines each — copy one of `loopgain/integrations/{langgraph,crewai,autogen,langchain,openai_agents,claude_agent_sdk}.py` as a starting point.
327
476
 
328
477
  ---
329
478
 
330
479
  ## Status
331
480
 
332
- **Initial public release.** Core library shipped (current version: see the PyPI badge at the top). Framework adapters (LangGraph, CrewAI, AutoGen) are installable as optional extras. The cloud-aggregator [telemetry receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are live as separate open-source repos. The math and the API surface are stable.
481
+ **Initial public release.** Core library shipped (current version: see the PyPI badge at the top). Framework adapters (LangGraph, CrewAI, AutoGen, LangChain, OpenAI Agents SDK, Claude Agent SDK) are installable as optional extras. The cloud-aggregator [telemetry receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are live as separate open-source repos. The math and the API surface are stable.
333
482
 
334
483
  This is alpha software. The API may break before 1.0 if production usage surfaces design issues; pin the version.
335
484
 
@@ -2,16 +2,16 @@
2
2
 
3
3
  **Barkhausen stability monitor for AI agent loops.**
4
4
 
5
- Replace `max_iterations=5` with a real-time loop-gain (`Aβ`) monitor that knows whether your agent loop is converging, stalling, oscillating, or diverging and what to do in each case.
5
+ Replace `max_iterations=5` with a real-time trajectory classifier that reads four features off the loop's error series and routes it into one of five named states — knowing whether your agent loop is converging, stalling, oscillating, or diverging, and what to do in each case.
6
6
 
7
7
  [![PyPI](https://img.shields.io/pypi/v/loopgain.svg)](https://pypi.org/project/loopgain/)
8
8
  [![Python](https://img.shields.io/pypi/pyversions/loopgain.svg)](https://pypi.org/project/loopgain/)
9
9
  [![License](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE)
10
- [![Tests](https://img.shields.io/badge/tests-119_passing-brightgreen.svg)](tests/)
10
+ [![Tests](https://img.shields.io/badge/tests-157_passing-brightgreen.svg)](tests/)
11
11
 
12
12
  **Home:** [loopgain.ai](https://loopgain.ai)
13
13
 
14
- Works for **any iterative AI workflow with a measurable error signal** — verify-revise loops, refinement passes, tool-use retry chains, RAG with self-correction, code-gen with linter feedback, multi-step reasoning loops. **Pre-built adapters for [LangGraph](#langgraph), [CrewAI](#crewai), and [AutoGen](#autogen-v04)**; drop-in via the raw API for **Claude Agent SDK** and any custom stack. Pure Python, no runtime dependencies.
14
+ Works for **any iterative AI workflow with a measurable error signal** — verify-revise loops, refinement passes, tool-use retry chains, RAG with self-correction, code-gen with linter feedback, multi-step reasoning loops. **Pre-built adapters for [LangGraph](#langgraph), [CrewAI](#crewai), [AutoGen](#autogen-v04), [LangChain](#langchain), [OpenAI Agents SDK](#openai-agents-sdk), and [Claude Agent SDK](#claude-agent-sdk)**; drop-in via the raw API for any custom stack. Pure Python, no runtime dependencies.
15
15
 
16
16
  **Keywords:** AI agent loops · agentic AI · infinite loop detection · divergence detection · early stopping · convergence · agent orchestration · LLM stability · generator-verifier-reviser · feedback-loop control.
17
17
 
@@ -48,7 +48,7 @@ while lg.should_continue():
48
48
  output = reviser.revise(output, errors)
49
49
 
50
50
  result = lg.result
51
- print(result.outcome) # "converged" | "oscillating" | "diverged" | "max_iterations"
51
+ print(result.outcome) # "converged" | "oscillating" | "diverged" | "stalled" | "max_iterations"
52
52
  print(result.best_output) # the lowest-error iteration's output
53
53
  print(result.iterations_used)
54
54
  print(result.gain_margin) # 1 / max(Aβ_smooth)
@@ -61,28 +61,32 @@ print(result.savings_vs_fixed_cap)
61
61
 
62
62
  ## How it works
63
63
 
64
- LoopGain measures empirical loop gain at every iteration, then smooths it with an EMA:
64
+ LoopGain measures empirical loop gain (`Aβ = E(n) / E(n-1)`) at every iteration and exposes it as a smoothed time series for visualization. The decision engine, however, classifies the **full error trajectory** using four features:
65
65
 
66
66
  ```
67
- Aβ(n) = E(n) / E(n-1)
68
- Aβ_smooth = EMA(Aβ, w=3)
67
+ E_ratio = E_current / E_first # cumulative reduction
68
+ slope_log = OLS slope of log10(E) # geometric trend direction
69
+ slope_p = t-test p-value of slope # statistical significance
70
+ osc_std = std of detrended log10(E) # oscillation magnitude
69
71
  ```
70
72
 
71
- It classifies `Aβ_smooth` into five named bands:
73
+ It routes the trajectory into one of five named states:
72
74
 
73
- | `Aβ_smooth` range | State | Action |
75
+ | State | Condition | Action |
74
76
  | --- | --- | --- |
75
- | `< 0.3` | `FAST_CONVERGE` | Continue, predict ETA |
76
- | `0.3 < 0.85` | `CONVERGING` | Continue, watch for upward drift |
77
- | `0.85 < 0.95` | `STALLING` | Warndiminishing returns |
78
- | `0.95 1.05` | `OSCILLATING` | Break — return best-so-far |
79
- | `> 1.05` | `DIVERGING` | Abort — roll back to best-so-far |
77
+ | `FAST_CONVERGE` | cumulative reduction to ≤ 10% of E_first | Continue, predict ETA |
78
+ | `CONVERGING` | negative slope with `p < 0.05`, OR cumulative ≤ 50% | Continue, watch for upward drift |
79
+ | `STALLING` | no significant slope, no detectable oscillation | Stop after 2 consecutive readings return best-so-far |
80
+ | `OSCILLATING` | high residual variance with flat trend | Stop — return best-so-far |
81
+ | `DIVERGING` | positive slope with `p < 0.05` AND cumulative > 110% | Abort — roll back to best-so-far |
80
82
 
81
83
  Plus a short-circuit: if observed error drops at or below `target_error`, the loop stops immediately with state `TARGET_MET`. The default `target_error=0.0` short-circuits on exactly zero error — the natural completion signal for verifier-driven loops. Pass `target_error=None` to disable the short-circuit and rely on stability detection alone.
82
84
 
83
- The `±0.05` noise band around `Aβ=1` absorbs stochastic jitter from agent outputs without triggering false-positive aborts. The `0.85` `STALLING` boundary is an early warning by the time `Aβ` crosses `1.0`, you've already wasted iterations.
85
+ The decision is **conservative by design**: requiring both statistical significance and meaningful cumulative motion before terminating prevents false-positive aborts on noisy real-LLM error series. Validated at 98.8% macro-averaged accuracy across 5 regimes on N=1000 deterministic-mock trajectories (see `RESULTS_v2_classifier.md`). The STALLING ceiling of ~94% is the t-test's irreducible 5% type-I error rate, not a classifier weakness.
84
86
 
85
- These threshold defaults are derived from the Barkhausen-stability analysis and serve as reasonable starting points. Tune them per domain (via the `ThresholdBands` argument) once you have production traces.
87
+ **Recommended minimum: 6 iterations** for reliable trend significance. At n≤4 the t-test is severely underpowered (df=2 requires |t|>4.3 for p<0.05) the classifier conservatively falls back to STALLING when evidence is thin. The thresholds are derived analytically (control theory + statistical convention), not fitted; tune them per domain via the `TrajectoryThresholds` argument once you have production traces.
88
+
89
+ **Legacy single-feature classifier:** the original v0.1 single-Aβ-band classifier (thresholds 0.3 / 0.85 / 0.95 / 1.05) is still available via `LoopGain(classifier='legacy_bands')` for callers that have empirically tuned the bands to a specific workload.
86
90
 
87
91
  ---
88
92
 
@@ -114,14 +118,16 @@ This transforms divergence detection from "abort with garbage" into "abort with
114
118
 
115
119
  ## API reference
116
120
 
117
- ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, smoothing_window=3, assumed_fixed_cap=10)`
121
+ ### `LoopGain(target_error=0.0, max_iterations=None, thresholds=None, trajectory_thresholds=None, classifier='trajectory', smoothing_window=3, assumed_fixed_cap=10)`
118
122
 
119
123
  Construct the monitor.
120
124
 
121
125
  - `target_error` — Stop when an observed error drops at or below this. Default `0.0` short-circuits on exactly zero error (the natural completion signal for verifier-driven loops). Pass `None` to disable the short-circuit entirely.
122
126
  - `max_iterations` — Hard safety cap. Default `None` (rely on stability detection). Recommended ~20–50 for production.
123
- - `thresholds` — Custom `ThresholdBands` if defaults don't fit your domain.
124
- - `smoothing_window` — EMA window for the smoothed Aβ. Default 3.
127
+ - `thresholds` — Custom `ThresholdBands` for the legacy single-Aβ-band classifier. Ignored when `classifier='trajectory'`.
128
+ - `trajectory_thresholds` — Custom `TrajectoryThresholds` for the multi-feature classifier (the default). Override only with workload-specific evidence.
129
+ - `classifier` — `'trajectory'` (default, v0.2 multi-feature classifier) or `'legacy_bands'` (v0.1 single-Aβ-band classifier).
130
+ - `smoothing_window` — EMA window for the smoothed Aβ series (always maintained for visualization, regardless of classifier choice). Default 3.
125
131
  - `assumed_fixed_cap` — Used to compute `savings_vs_fixed_cap`. Default 10.
126
132
 
127
133
  ### `lg.observe(errors, output=None) -> str`
@@ -134,7 +140,7 @@ Returns `False` once a terminal state fires.
134
140
 
135
141
  ### `lg.state -> str`
136
142
 
137
- Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`.
143
+ Current state name. One of `INIT`, `FAST_CONVERGE`, `CONVERGING`, `STALLING`, `OSCILLATING`, `DIVERGING`, `TARGET_MET`, `MAX_ITERATIONS`. The corresponding terminal `result.outcome` values are `converged`, `oscillating`, `diverged`, `stalled` (v0.2 trajectory mode only — STALLING terminating after 2 consecutive readings), `max_iterations`, or `in_progress`.
138
144
 
139
145
  ### `lg.eta -> int | None`
140
146
 
@@ -184,6 +190,32 @@ What is sent: state transitions, Aβ summary (min/max/median), gain margin, roll
184
190
 
185
191
  The hosted endpoint at `telemetry.loopgain.ai` is one acceptable destination. The [receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are both open-source — self-host to keep telemetry fully under your control.
186
192
 
193
+ > **This is not the same as anonymous usage telemetry.** `send_telemetry` sends *your* loop data to *your* dashboard, and only when you call it. There's a separate, opt-in **funnel** telemetry described below. The two never share data or code.
194
+
195
+ ---
196
+
197
+ ## Anonymous funnel telemetry (opt-in, off by default)
198
+
199
+ LoopGain can report **anonymous usage counts** so a solo maintainer can tell whether the library is actually being used — install → first `observe()` → recurring use. **It is opt-in and default-decline: nothing is sent unless you explicitly turn it on.**
200
+
201
+ ```bash
202
+ loopgain telemetry --show # status + exactly what would be sent
203
+ loopgain telemetry --enable # opt in (or: export LOOPGAIN_TELEMETRY=1)
204
+ loopgain telemetry --disable # opt out (or: export LOOPGAIN_TELEMETRY=0)
205
+ ```
206
+
207
+ `DO_NOT_TRACK=1` is honored as a hard opt-out, and CI environments are auto-detected and declined silently. When enabled, payloads carry only a locally-generated random id (not derived from your machine), hour-bucketed timestamps, library/Python/OS versions, the adapter in use, and a coarse outcome count. **Prompts, outputs, error contents, keys, paths, and IPs are never collected.** Delivery is batched, async, https-only, and fail-silent — it can never break your loop. Full details and the privacy contract: **[TELEMETRY.md](TELEMETRY.md)**.
208
+
209
+ ---
210
+
211
+ ## Command-line interface
212
+
213
+ ```bash
214
+ loopgain --version # or: loopgain version
215
+ loopgain telemetry --show # inspect / control anonymous funnel telemetry
216
+ python -m loopgain telemetry --show # equivalent, without the console script
217
+ ```
218
+
187
219
  ---
188
220
 
189
221
  ## Framework adapters
@@ -191,10 +223,13 @@ The hosted endpoint at `telemetry.loopgain.ai` is one acceptable destination. Th
191
223
  Thin wrappers under `loopgain.integrations` drive each major agent framework's iteration with a `LoopGain` monitor and auto-stamp `framework="<name>"` on telemetry. The frameworks themselves are **optional dependencies** — install the extra you need:
192
224
 
193
225
  ```bash
194
- pip install 'loopgain[langgraph]' # LangGraph
195
- pip install 'loopgain[crewai]' # CrewAI
196
- pip install 'loopgain[autogen]' # AutoGen v0.4+
197
- pip install 'loopgain[all]' # all three
226
+ pip install 'loopgain[langgraph]' # LangGraph
227
+ pip install 'loopgain[crewai]' # CrewAI
228
+ pip install 'loopgain[autogen]' # AutoGen v0.4+
229
+ pip install 'loopgain[langchain]' # LangChain (create_agent / AgentExecutor)
230
+ pip install 'loopgain[openai-agents]' # OpenAI Agents SDK
231
+ pip install 'loopgain[claude-agent-sdk]' # Anthropic Claude Agent SDK
232
+ pip install 'loopgain[all]' # all six
198
233
  ```
199
234
 
200
235
  All adapters take a `LoopGain` instance plus an `error_fn` you provide — the framework doesn't know what your error signal is, so the adapter doesn't either. `error_fn` returns a non-negative number (or `None` to skip an iteration).
@@ -281,15 +316,120 @@ lg.send_telemetry(
281
316
 
282
317
  Pass a `cancellation_token` to `adapter.run(...)` and the adapter will cancel it when LoopGain reaches a terminal state (target met, oscillation, divergence). The legacy v0.2 `ConversableAgent.initiate_chat` API is **not** supported — use the v0.4 event-driven runtime.
283
318
 
319
+ ### LangChain
320
+
321
+ Duck-types against any LangChain agent that exposes `.stream(input, **kwargs)` / `.astream(input, **kwargs)` — both the current `langchain.agents.create_agent()` (v1+) and the legacy `AgentExecutor`. The adapter forwards `**stream_kwargs` verbatim, so the chunk shape your `error_fn` sees is the one your agent emits.
322
+
323
+ ```python
324
+ from langchain.agents import create_agent
325
+ from loopgain import LoopGain
326
+ from loopgain.integrations import LangChainAdapter
327
+
328
+ agent = create_agent(model="gpt-5-nano", tools=[get_weather])
329
+ lg = LoopGain(target_error=0.0, max_iterations=20)
330
+
331
+ def error_fn(chunk):
332
+ if chunk.get("type") != "updates":
333
+ return None
334
+ # Count unresolved tool calls; drops to 0 once the agent stops calling tools.
335
+ return sum(
336
+ 1 for _, update in chunk["data"].items()
337
+ if getattr(update.get("messages", [None])[-1], "tool_calls", None)
338
+ )
339
+
340
+ adapter = LangChainAdapter(lg=lg, error_fn=error_fn)
341
+ final = adapter.run(
342
+ agent,
343
+ {"messages": [{"role": "user", "content": "What's the weather?"}]},
344
+ stream_mode="updates",
345
+ version="v2",
346
+ )
347
+
348
+ lg.send_telemetry(
349
+ endpoint=...,
350
+ token=...,
351
+ framework=adapter.framework_name, # "langchain"
352
+ )
353
+ ```
354
+
355
+ For legacy `AgentExecutor`: just drop the `stream_mode` / `version` kwargs; each yielded chunk is an `AddableDict` per step (parse `intermediate_steps` or the terminal `output` key in your `error_fn`).
356
+
357
+ ### OpenAI Agents SDK
358
+
359
+ Wraps `Runner.run_streamed(agent, input).stream_events()`. The SDK is async-first; the adapter mirrors that. A `run_sync` helper wraps the async path with `asyncio.run` for synchronous callers.
360
+
361
+ ```python
362
+ from agents import Agent, function_tool
363
+ from loopgain import LoopGain
364
+ from loopgain.integrations import OpenAIAgentsAdapter
365
+
366
+ agent = Agent(name="Reviser", instructions="...", tools=[...])
367
+
368
+ lg = LoopGain(target_error=0.0, max_iterations=20)
369
+
370
+ def error_fn(event):
371
+ # Default observes only run_item_stream_event; pull the verifier's
372
+ # reported failure count off tool outputs.
373
+ if event.item.type == "tool_call_output_item":
374
+ return float(event.item.output.get("failures", 0))
375
+ return None
376
+
377
+ adapter = OpenAIAgentsAdapter(lg=lg, error_fn=error_fn)
378
+ result = await adapter.run(agent, input="Fix the bug.")
379
+ print(result.final_output)
380
+
381
+ lg.send_telemetry(
382
+ endpoint=...,
383
+ token=...,
384
+ framework=adapter.framework_name, # "openai-agents"
385
+ )
386
+ ```
387
+
388
+ By default the adapter only forwards `run_item_stream_event` to `error_fn` — pass `observe_event_types=None` to see every event (including raw token deltas and agent-handoff notifications). When LoopGain reaches a terminal state, the adapter best-effort calls `.cancel()` on the underlying `RunResultStreaming`.
389
+
390
+ ### Claude Agent SDK
391
+
392
+ Wraps Anthropic's `claude_agent_sdk.query(prompt=..., options=...)` async iterator. By default observes only `AssistantMessage` (skips `UserMessage` / `SystemMessage` / `ResultMessage`); override with `observe_message_types=None` or a custom tuple.
393
+
394
+ ```python
395
+ from claude_agent_sdk import ClaudeAgentOptions, TextBlock
396
+ from loopgain import LoopGain
397
+ from loopgain.integrations import ClaudeAgentSDKAdapter
398
+
399
+ def error_fn(message):
400
+ # Count `FAIL:` markers a self-verifying persona emits.
401
+ for block in getattr(message, "content", []):
402
+ if isinstance(block, TextBlock):
403
+ return float(block.text.count("FAIL:"))
404
+ return None
405
+
406
+ lg = LoopGain(target_error=0.0, max_iterations=20)
407
+ adapter = ClaudeAgentSDKAdapter(lg=lg, error_fn=error_fn)
408
+
409
+ options = ClaudeAgentOptions(system_prompt="Self-verify each draft.")
410
+ result = await adapter.run(
411
+ prompt="Write a haiku about feedback loops.",
412
+ options=options,
413
+ )
414
+
415
+ lg.send_telemetry(
416
+ endpoint=...,
417
+ token=...,
418
+ framework=adapter.framework_name, # "claude-agent-sdk"
419
+ )
420
+ ```
421
+
422
+ For the bidirectional `ClaudeSDKClient` use case, pass `message_iterator=client.receive_messages()` instead of `prompt=...`.
423
+
284
424
  ### Custom integrations
285
425
 
286
- For frameworks without an adapter, the raw `LoopGain.observe()` API works against any iterable. The adapters are 100-200 lines each — copy one of `loopgain/integrations/{langgraph,crewai,autogen}.py` as a starting point.
426
+ For frameworks without an adapter, the raw `LoopGain.observe()` API works against any iterable. The adapters are 100-200 lines each — copy one of `loopgain/integrations/{langgraph,crewai,autogen,langchain,openai_agents,claude_agent_sdk}.py` as a starting point.
287
427
 
288
428
  ---
289
429
 
290
430
  ## Status
291
431
 
292
- **Initial public release.** Core library shipped (current version: see the PyPI badge at the top). Framework adapters (LangGraph, CrewAI, AutoGen) are installable as optional extras. The cloud-aggregator [telemetry receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are live as separate open-source repos. The math and the API surface are stable.
432
+ **Initial public release.** Core library shipped (current version: see the PyPI badge at the top). Framework adapters (LangGraph, CrewAI, AutoGen, LangChain, OpenAI Agents SDK, Claude Agent SDK) are installable as optional extras. The cloud-aggregator [telemetry receiver](https://github.com/loopgain-ai/telemetry-receiver) and [dashboard](https://github.com/loopgain-ai/dashboard) are live as separate open-source repos. The math and the API surface are stable.
293
433
 
294
434
  This is alpha software. The API may break before 1.0 if production usage surfaces design issues; pin the version.
295
435
 
@@ -10,6 +10,12 @@ Public API:
10
10
  """
11
11
 
12
12
  from loopgain._version import __version__
13
+ from loopgain.classifier import (
14
+ TrajectoryFeatures,
15
+ TrajectoryThresholds,
16
+ classify_trajectory,
17
+ extract_features,
18
+ )
13
19
  from loopgain.core import (
14
20
  LoopGain,
15
21
  LoopGainResult,
@@ -29,6 +35,10 @@ __all__ = [
29
35
  "LoopGain",
30
36
  "LoopGainResult",
31
37
  "ThresholdBands",
38
+ "TrajectoryThresholds",
39
+ "TrajectoryFeatures",
40
+ "classify_trajectory",
41
+ "extract_features",
32
42
  "INIT",
33
43
  "FAST_CONVERGE",
34
44
  "CONVERGING",
@@ -0,0 +1,8 @@
1
+ """Enable ``python -m loopgain`` to invoke the CLI."""
2
+
3
+ import sys
4
+
5
+ from loopgain.cli import main
6
+
7
+ if __name__ == "__main__":
8
+ sys.exit(main())
@@ -0,0 +1,10 @@
1
+ """Single source of truth for the package version.
2
+
3
+ ``loopgain/__init__.py``, ``loopgain/telemetry.py`` (product receiver), and
4
+ ``loopgain/funnel.py`` (opt-in funnel telemetry) all import ``__version__``
5
+ from here so the value never drifts between ``__version__`` and the
6
+ ``library_version`` field on any telemetry payload. Update this file (and
7
+ ``pyproject.toml``) for each release.
8
+ """
9
+
10
+ __version__ = "0.3.0"