agentspend-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. agentspend_sdk-0.1.0.dist-info/METADATA +131 -0
  2. agentspend_sdk-0.1.0.dist-info/RECORD +44 -0
  3. agentspend_sdk-0.1.0.dist-info/WHEEL +4 -0
  4. agentspend_sdk-0.1.0.dist-info/entry_points.txt +3 -0
  5. token_aud/__init__.py +3 -0
  6. token_aud/agent/__init__.py +41 -0
  7. token_aud/agent/adaptive.py +228 -0
  8. token_aud/agent/loop_guard.py +102 -0
  9. token_aud/agent/policy.py +163 -0
  10. token_aud/agent/router.py +253 -0
  11. token_aud/agent/runtime.py +358 -0
  12. token_aud/agent/step_classifier.py +61 -0
  13. token_aud/agent/telemetry.py +155 -0
  14. token_aud/api/__init__.py +0 -0
  15. token_aud/api/app.py +149 -0
  16. token_aud/api/routes/__init__.py +0 -0
  17. token_aud/api/serve.py +12 -0
  18. token_aud/cli/__init__.py +0 -0
  19. token_aud/cli/analyze.py +213 -0
  20. token_aud/cli/configure.py +113 -0
  21. token_aud/cli/main.py +24 -0
  22. token_aud/config.py +42 -0
  23. token_aud/core/__init__.py +0 -0
  24. token_aud/core/auditor.py +325 -0
  25. token_aud/core/judge.py +204 -0
  26. token_aud/core/pricing.py +139 -0
  27. token_aud/core/sampler.py +312 -0
  28. token_aud/core/savings.py +286 -0
  29. token_aud/data/__init__.py +0 -0
  30. token_aud/data/default_routing_policy.yaml +185 -0
  31. token_aud/data/pricing.json +349 -0
  32. token_aud/db/__init__.py +0 -0
  33. token_aud/db/session.py +31 -0
  34. token_aud/models/__init__.py +0 -0
  35. token_aud/models/db.py +77 -0
  36. token_aud/models/schemas.py +99 -0
  37. token_aud/parsers/__init__.py +10 -0
  38. token_aud/parsers/anthropic.py +50 -0
  39. token_aud/parsers/base.py +198 -0
  40. token_aud/parsers/generic_csv.py +43 -0
  41. token_aud/parsers/openai.py +50 -0
  42. token_aud/reports/__init__.py +6 -0
  43. token_aud/reports/html.py +219 -0
  44. token_aud/reports/terminal.py +118 -0
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentspend-sdk
3
+ Version: 0.1.0
4
+ Summary: AgentSpend — runtime cost optimizer for AI agents. Route LLM calls to the cheapest capable model with fallbacks, loop guards, and telemetry.
5
+ Requires-Python: <3.14,>=3.12
6
+ Requires-Dist: aiosqlite>=0.20
7
+ Requires-Dist: alembic>=1.14
8
+ Requires-Dist: fastapi>=0.115
9
+ Requires-Dist: google-auth>=2.48.0
10
+ Requires-Dist: google-cloud-aiplatform>=1.139.0
11
+ Requires-Dist: litellm>=1.50
12
+ Requires-Dist: pandas>=2.2
13
+ Requires-Dist: pydantic-settings>=2.0
14
+ Requires-Dist: pydantic>=2.0
15
+ Requires-Dist: python-multipart>=0.0.9
16
+ Requires-Dist: pyyaml>=6.0
17
+ Requires-Dist: rich>=13.0
18
+ Requires-Dist: sqlalchemy>=2.0
19
+ Requires-Dist: typer>=0.15
20
+ Requires-Dist: uvicorn[standard]>=0.32
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=9.0; extra == 'dev'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # token-aud
26
+
27
+ AI cost optimization toolkit for LLM workloads.
28
+
29
+ `token-aud` now includes two complementary workflows:
30
+
31
+ - **Audit mode (CLI/API):** Analyze historical usage logs and estimate savings opportunities with Student-Teacher-Judge sampling.
32
+ - **AgentSpend SDK:** Route live agent steps (`plan`, `reason`, `tool`, `verify`, `draft`, `summarize`) to cost/quality-appropriate models with fallbacks, loop guards, and telemetry.
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ uv sync --no-editable
38
+ ```
39
+
40
+ For local development tooling (tests):
41
+
42
+ ```bash
43
+ uv sync --no-editable --extra dev
44
+ ```
45
+
46
+ ## Quick Start (AgentSpend)
47
+
48
+ Run these three commands from repo root:
49
+
50
+ ```bash
51
+ uv sync --no-editable --extra dev
52
+ uv run --no-sync python -m pytest tests/agent -q
53
+ uv run --no-sync python examples/agent_routing_demo.py
54
+ ```
55
+
56
+ Expected results:
57
+
58
+ - Agent tests pass.
59
+ - Demo prints routed step decisions, per-step telemetry, and total run cost.
60
+ - `agent_telemetry.jsonl` is generated locally.
61
+
62
+ ## AgentSpend Usage
63
+
64
+ ### 1) Default policy
65
+
66
+ ```python
67
+ from token_aud.agent import AgentSpend
68
+
69
+ agent = AgentSpend.default()
70
+ result = agent.route_call(
71
+ step="plan",
72
+ messages=[{"role": "user", "content": "Break this task into a plan"}],
73
+ )
74
+
75
+ print(result.model_used, result.cost_usd, result.content)
76
+ ```
77
+
78
+ ### 2) Custom policy YAML
79
+
80
+ ```python
81
+ from token_aud.agent import AgentSpend
82
+
83
+ agent = AgentSpend.from_yaml("routing_policy.yaml")
84
+ result = agent.route_call(
85
+ step="reason",
86
+ messages=[{"role": "user", "content": "Compare two architectures"}],
87
+ )
88
+
89
+ print(result.model_used, result.fallbacks_tried)
90
+ ```
91
+
92
+ Built-in default policy path:
93
+
94
+ - `src/token_aud/data/default_routing_policy.yaml`
95
+
96
+ ## AgentSpend Core Components
97
+
98
+ - `src/token_aud/agent/policy.py`: Pydantic policy schema + YAML loading
99
+ - `src/token_aud/agent/router.py`: deterministic model selection
100
+ - `src/token_aud/agent/runtime.py`: `route_call()` execution + fallbacks
101
+ - `src/token_aud/agent/loop_guard.py`: repeated-turn loop detection
102
+ - `src/token_aud/agent/telemetry.py`: JSONL/HTTP telemetry sinks
103
+ - `src/token_aud/agent/adaptive.py`: optional adaptive routing layer
104
+
105
+ ## AgentSpend Examples
106
+
107
+ - `examples/agent_routing_demo.py`: end-to-end routed run with telemetry
108
+ - `examples/custom_policy_demo.py`: loop escalation and hard-stop behavior
109
+ - `examples/framework_agnostic_integration.py`: generic agent-loop integration with explicit success feedback
110
+ - `scripts/summarize_telemetry.py`: convert `agent_telemetry.jsonl` into cost/fallback/latency summary
111
+
112
+ ```bash
113
+ uv run --no-sync python scripts/summarize_telemetry.py agent_telemetry.jsonl
114
+ ```
115
+
116
+ ## Audit CLI (legacy + still supported)
117
+
118
+ ```bash
119
+ uv run --no-sync token-aud --help
120
+ uv run --no-sync token-aud analyze sample_data.csv --dry-run
121
+ ```
122
+
123
+ ## Environment Variables
124
+
125
+ Common provider credentials:
126
+
127
+ - `OPENAI_API_KEY`
128
+ - `ANTHROPIC_API_KEY`
129
+ - `GEMINI_API_KEY` or `GOOGLE_API_KEY` (depending on provider path)
130
+
131
+ For Google Vertex flows, ensure ADC is configured (`gcloud auth application-default login`).
@@ -0,0 +1,44 @@
1
+ token_aud/__init__.py,sha256=OG88Yz6qUP3fEmFr1pMe9azFeKICUErBif0uPcZ32is,89
2
+ token_aud/config.py,sha256=qjtn77ezAgllzGig-eP-YT9HX8XrIyKN8hqngDI9LBU,1425
3
+ token_aud/agent/__init__.py,sha256=upJj-JKnQ0Kds4ISsS8056MGh7i-ZVDaLNw9FQrMjns,1105
4
+ token_aud/agent/adaptive.py,sha256=dwne4Tb_fnj-vr8pw-KAzJiyC-7F3MMRJts337Lnd-I,7575
5
+ token_aud/agent/loop_guard.py,sha256=3fFUcEEq8HcPDm93JXN3oRMpHI7OcDRTTMaT9Uvuww0,3246
6
+ token_aud/agent/policy.py,sha256=Jcf8LsDHPGp9GwgbByGGmb_ZIhmdJ8LZdTkNAliFEEA,5120
7
+ token_aud/agent/router.py,sha256=Nzf1-Buj3YjkbUs5dY882hfh-ElGHOtYEKhsL2nAZb0,8570
8
+ token_aud/agent/runtime.py,sha256=alDlarqPj0UpXULbAib3QeuT0sz199Xm48ajfINf3hA,12883
9
+ token_aud/agent/step_classifier.py,sha256=c7GOZR28-W4xM-jbJ7qcos012phn5b2_WCGQqeq8JCg,2101
10
+ token_aud/agent/telemetry.py,sha256=JrW7rM8ykYbrBprQXLODYx5cWyhtOmEleInXM-wQhcA,4308
11
+ token_aud/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ token_aud/api/app.py,sha256=VgFvLysKxJdPd2Awxo-t3sVTP5ag06o5TYW6IKe06c0,4830
13
+ token_aud/api/serve.py,sha256=t20bOSJ-eYm0s7x6ctuJ7if6GpL0vvJp4E8Oxsw3hTg,194
14
+ token_aud/api/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ token_aud/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ token_aud/cli/analyze.py,sha256=RAtTxtq0t1hQpbK6R2IQuNme34inaSV_DQOfVHDD7Aw,7802
17
+ token_aud/cli/configure.py,sha256=q88mVw7LU2Vv1_asJrDyOTF_caDQYyjnwR306Ls4Ev4,3598
18
+ token_aud/cli/main.py,sha256=pOxnOZypVp882Mf3zUW1nz0dv6EkxWIwHlP0bVuGKrw,692
19
+ token_aud/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ token_aud/core/auditor.py,sha256=3ptT4GadRVgXa2lYYh1YM9x2NfAZ5tFJ8HOpKVZ2POA,11309
21
+ token_aud/core/judge.py,sha256=Ew3bpEnw3q9bYctRleVpO5VkhtOIo1D7Azh9fn29R-8,6763
22
+ token_aud/core/pricing.py,sha256=bIPlpEBymdKG0YEkUfp4QHTjS4yLkI4znnk8N8H2bA4,5108
23
+ token_aud/core/sampler.py,sha256=Gujbb8emZWV0I1DWJ9jfR7m3HFRDntS1U4foBaa3nGY,11126
24
+ token_aud/core/savings.py,sha256=LVdhbzT7glp2xDVKUfFaLx_ObdhJUkB7Gbgssw6OXB0,9546
25
+ token_aud/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ token_aud/data/default_routing_policy.yaml,sha256=M6Rpm0bGp4MujPj73mzZMaftFuAvnAOYgfL0EPC62r8,4852
27
+ token_aud/data/pricing.json,sha256=4xsTV_h2zFwXREgD9P4s8lEaQPf_xvJGpKIR_Y80kbM,11218
28
+ token_aud/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ token_aud/db/session.py,sha256=0Tpm5I5eu1r5KEtlsGUmmve79uIIGhY6eirSzT8fDQQ,959
30
+ token_aud/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ token_aud/models/db.py,sha256=U7r-oe1tr3KMGRbcjmhf3vkUD4yhMWVHrrs2OwQvE8I,3972
32
+ token_aud/models/schemas.py,sha256=JHrH9nqwQsl0njCzd8GkS3Nta7PBr6XlvCcqs9Td-gs,4582
33
+ token_aud/parsers/__init__.py,sha256=9EoHC62-G5nvkFUMW7vL7OHUlRUy2QL1hxV1Dj6bMo0,264
34
+ token_aud/parsers/anthropic.py,sha256=3jgW3uykihEABLy2a-21kDEshcgh2ITIBunmelqLmxE,1527
35
+ token_aud/parsers/base.py,sha256=KRJy_CVXpsYyFDQse_GUXch4afKFjOMQJcI7FFx2vJk,7481
36
+ token_aud/parsers/generic_csv.py,sha256=6s7BzHPyx49tHi9GzYWGcFmr3jwaTzZuv9hCV5CQQ4Y,1409
37
+ token_aud/parsers/openai.py,sha256=T-O6JbRM4XcNIXv4XYQqfIvvRioMkBqbe8hwwNhDPjI,1505
38
+ token_aud/reports/__init__.py,sha256=wMW8GxhQBrN6fwfQbOEE1LBsQSH_Lgcdk_ra_dvAdV8,203
39
+ token_aud/reports/html.py,sha256=iVGq9ecvE0VAkKriZEbJSaxd7T4LZepj2iDOTTIZgVM,9198
40
+ token_aud/reports/terminal.py,sha256=xnWzZb8Uhv_I35Z1Htcjhg3xhItmeiL5wDOMLRvG56M,3987
41
+ agentspend_sdk-0.1.0.dist-info/METADATA,sha256=rHvsCL2yHS7zUJ5jLcjdp1-kti87c2fSjAI3wMi3ccY,3811
42
+ agentspend_sdk-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
43
+ agentspend_sdk-0.1.0.dist-info/entry_points.txt,sha256=Gu8y3XoWEBEcGVYCOfzRVyBKdUjjP5WzLFy1-hCn8Xg,96
44
+ agentspend_sdk-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ token-aud = token_aud.cli.main:app
3
+ token-aud-serve = token_aud.api.serve:main
token_aud/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """token-aud: AI cost optimization through model benchmarking."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,41 @@
1
+ """AgentSpend SDK — agent-native dynamic model routing.
2
+
3
+ Public API:
4
+ from token_aud.agent import AgentSpend, RoutingPolicy, RouteContext
5
+
6
+ agent = AgentSpend.from_yaml("routing_policy.yaml")
7
+ result = agent.route_call(step="plan", messages=[...])
8
+ """
9
+
10
+ from token_aud.agent.adaptive import AdaptiveRouter, AdaptiveSuggestion
11
+ from token_aud.agent.loop_guard import LoopGuard
12
+ from token_aud.agent.policy import RoutingPolicy, StepType
13
+ from token_aud.agent.router import RouteContext, RouteDecision, decide_model
14
+ from token_aud.agent.runtime import AgentSpend, RouteResult
15
+ from token_aud.agent.step_classifier import classify_step
16
+ from token_aud.agent.telemetry import (
17
+ CallbackSink,
18
+ HttpSink,
19
+ JsonlSink,
20
+ TelemetryEmitter,
21
+ TelemetryEvent,
22
+ )
23
+
24
+ __all__ = [
25
+ "AdaptiveRouter",
26
+ "AdaptiveSuggestion",
27
+ "AgentSpend",
28
+ "CallbackSink",
29
+ "HttpSink",
30
+ "JsonlSink",
31
+ "LoopGuard",
32
+ "RouteContext",
33
+ "RouteDecision",
34
+ "RouteResult",
35
+ "RoutingPolicy",
36
+ "StepType",
37
+ "TelemetryEmitter",
38
+ "TelemetryEvent",
39
+ "classify_step",
40
+ "decide_model",
41
+ ]
@@ -0,0 +1,228 @@
1
+ """Adaptive routing — learns from telemetry outcomes to adjust model selection.
2
+
3
+ This module provides an optional layer on top of the deterministic router.
4
+ It tracks per-step success rates and costs, then suggests cheaper models
5
+ when confidence is high enough.
6
+
7
+ Key design constraints:
8
+ - Deterministic mode is always the default and the safe fallback.
9
+ - Adaptive mode requires a minimum sample size before overriding.
10
+ - A rollback switch instantly disables adaptation.
11
+ - All state is local and serializable (no external DB needed).
12
+
13
+ Usage:
14
+ from token_aud.agent.adaptive import AdaptiveRouter
15
+
16
+ adaptive = AdaptiveRouter(min_samples=20, confidence_threshold=0.85)
17
+ adaptive.record_outcome(step="plan", model="gpt-4o-mini", success=True, cost=0.001)
18
+ suggestion = adaptive.suggest(step="plan", current_model="gpt-4o")
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ from collections import defaultdict
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+
28
+
29
+ @dataclass
30
+ class ModelStats:
31
+ """Accumulated statistics for a model on a specific step type."""
32
+
33
+ total_calls: int = 0
34
+ success_count: int = 0
35
+ total_cost: float = 0.0
36
+ total_latency_ms: float = 0.0
37
+
38
+ @property
39
+ def success_rate(self) -> float:
40
+ if self.total_calls == 0:
41
+ return 0.0
42
+ return self.success_count / self.total_calls
43
+
44
+ @property
45
+ def avg_cost(self) -> float:
46
+ if self.total_calls == 0:
47
+ return 0.0
48
+ return self.total_cost / self.total_calls
49
+
50
+ @property
51
+ def avg_latency_ms(self) -> float:
52
+ if self.total_calls == 0:
53
+ return 0.0
54
+ return self.total_latency_ms / self.total_calls
55
+
56
+
57
+ @dataclass
58
+ class AdaptiveSuggestion:
59
+ """A suggestion from the adaptive router."""
60
+
61
+ suggested_model: str | None
62
+ reason: str
63
+ confidence: float
64
+ stats: ModelStats | None = None
65
+
66
+
67
+ class AdaptiveRouter:
68
+ """Learns from outcomes to suggest cheaper models per step type.
69
+
70
+ This is opt-in and sits alongside the deterministic router. It does not
71
+ replace it — the runtime checks this for suggestions before falling back
72
+ to the deterministic path.
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ min_samples: int = 20,
78
+ confidence_threshold: float = 0.85,
79
+ enabled: bool = True,
80
+ ) -> None:
81
+ self._min_samples = min_samples
82
+ self._confidence_threshold = confidence_threshold
83
+ self._enabled = enabled
84
+ self._stats: dict[str, dict[str, ModelStats]] = defaultdict(
85
+ lambda: defaultdict(ModelStats)
86
+ )
87
+
88
+ @property
89
+ def enabled(self) -> bool:
90
+ return self._enabled
91
+
92
+ def enable(self) -> None:
93
+ self._enabled = True
94
+
95
+ def disable(self) -> None:
96
+ """Rollback switch — immediately disables adaptive suggestions."""
97
+ self._enabled = False
98
+
99
+ def record_outcome(
100
+ self,
101
+ step: str,
102
+ model: str,
103
+ success: bool,
104
+ cost: float = 0.0,
105
+ latency_ms: float = 0.0,
106
+ ) -> None:
107
+ """Record the outcome of a routed call for learning."""
108
+ stats = self._stats[step][model]
109
+ stats.total_calls += 1
110
+ if success:
111
+ stats.success_count += 1
112
+ stats.total_cost += cost
113
+ stats.total_latency_ms += latency_ms
114
+
115
+ def suggest(
116
+ self,
117
+ step: str,
118
+ current_model: str,
119
+ candidate_models: list[str] | None = None,
120
+ ) -> AdaptiveSuggestion:
121
+ """Suggest a potentially cheaper model based on historical success rates.
122
+
123
+ Returns a suggestion only if:
124
+ 1. Adaptive routing is enabled
125
+ 2. There's enough data (>= min_samples) for the candidate
126
+ 3. The candidate's success rate >= confidence_threshold
127
+ 4. The candidate is cheaper on average than the current model
128
+ """
129
+ if not self._enabled:
130
+ return AdaptiveSuggestion(
131
+ suggested_model=None,
132
+ reason="Adaptive routing disabled",
133
+ confidence=0.0,
134
+ )
135
+
136
+ step_stats = self._stats.get(step, {})
137
+ current_stats = step_stats.get(current_model)
138
+
139
+ if candidate_models is None:
140
+ candidate_models = [m for m in step_stats if m != current_model]
141
+
142
+ best: AdaptiveSuggestion | None = None
143
+
144
+ for candidate in candidate_models:
145
+ cand_stats = step_stats.get(candidate)
146
+ if cand_stats is None or cand_stats.total_calls < self._min_samples:
147
+ continue
148
+
149
+ if cand_stats.success_rate < self._confidence_threshold:
150
+ continue
151
+
152
+ if current_stats and cand_stats.avg_cost >= current_stats.avg_cost:
153
+ continue
154
+
155
+ suggestion = AdaptiveSuggestion(
156
+ suggested_model=candidate,
157
+ reason=(
158
+ f"{candidate} has {cand_stats.success_rate:.0%} success rate "
159
+ f"over {cand_stats.total_calls} calls at ${cand_stats.avg_cost:.6f}/call avg"
160
+ ),
161
+ confidence=cand_stats.success_rate,
162
+ stats=cand_stats,
163
+ )
164
+
165
+ if best is None or (cand_stats.avg_cost < (best.stats.avg_cost if best.stats else float("inf"))):
166
+ best = suggestion
167
+
168
+ if best is not None:
169
+ return best
170
+
171
+ return AdaptiveSuggestion(
172
+ suggested_model=None,
173
+ reason="No confident cheaper alternative found",
174
+ confidence=0.0,
175
+ )
176
+
177
+ def get_stats(self, step: str | None = None) -> dict:
178
+ """Return stats as a plain dict for inspection or serialization."""
179
+ if step:
180
+ return {
181
+ model: {
182
+ "total_calls": s.total_calls,
183
+ "success_rate": s.success_rate,
184
+ "avg_cost": s.avg_cost,
185
+ "avg_latency_ms": s.avg_latency_ms,
186
+ }
187
+ for model, s in self._stats.get(step, {}).items()
188
+ }
189
+ return {
190
+ step_name: {
191
+ model: {
192
+ "total_calls": s.total_calls,
193
+ "success_rate": s.success_rate,
194
+ "avg_cost": s.avg_cost,
195
+ "avg_latency_ms": s.avg_latency_ms,
196
+ }
197
+ for model, s in models.items()
198
+ }
199
+ for step_name, models in self._stats.items()
200
+ }
201
+
202
+ def save(self, path: str | Path) -> None:
203
+ """Persist stats to a JSON file."""
204
+ raw: dict = {}
205
+ for step_name, models in self._stats.items():
206
+ raw[step_name] = {}
207
+ for model, s in models.items():
208
+ raw[step_name][model] = {
209
+ "total_calls": s.total_calls,
210
+ "success_count": s.success_count,
211
+ "total_cost": s.total_cost,
212
+ "total_latency_ms": s.total_latency_ms,
213
+ }
214
+ Path(path).write_text(json.dumps(raw, indent=2))
215
+
216
+ def load(self, path: str | Path) -> None:
217
+ """Load stats from a previously saved JSON file."""
218
+ p = Path(path)
219
+ if not p.exists():
220
+ return
221
+ raw = json.loads(p.read_text())
222
+ for step_name, models in raw.items():
223
+ for model, data in models.items():
224
+ stats = self._stats[step_name][model]
225
+ stats.total_calls = data["total_calls"]
226
+ stats.success_count = data["success_count"]
227
+ stats.total_cost = data["total_cost"]
228
+ stats.total_latency_ms = data.get("total_latency_ms", 0.0)
@@ -0,0 +1,102 @@
1
+ """Loop guard — detects agent loops from repeated similar messages.
2
+
3
+ Tracks recent message fingerprints per session. When consecutive turns
4
+ have high textual similarity or identical content, signals a loop so the
5
+ router can escalate or halt.
6
+
7
+ Budget guard logic lives in router.py (it's part of the routing decision).
8
+ This module focuses on turn-level repetition detection.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections import deque
14
+ from difflib import SequenceMatcher
15
+
16
+ from token_aud.agent.policy import LoopGuardAction, LoopGuardConfig
17
+
18
+
19
+ class LoopGuard:
20
+ """Stateful loop detector for an agent run."""
21
+
22
+ def __init__(self, config: LoopGuardConfig) -> None:
23
+ self._config = config
24
+ self._recent_fingerprints: deque[str] = deque(
25
+ maxlen=config.repeated_turn_limit + 2
26
+ )
27
+ self._consecutive_similar: int = 0
28
+ self._escalation_count: int = 0
29
+
30
+ def reset(self) -> None:
31
+ """Clear all state between runs."""
32
+ self._recent_fingerprints.clear()
33
+ self._consecutive_similar = 0
34
+ self._escalation_count = 0
35
+
36
+ def check(self, messages: list[dict[str, str]]) -> bool:
37
+ """Check if the current turn looks like a loop.
38
+
39
+ Args:
40
+ messages: The message list about to be sent to the LLM.
41
+
42
+ Returns:
43
+ True if a loop is detected.
44
+ """
45
+ if not self._config.enabled:
46
+ return False
47
+
48
+ fingerprint = self._fingerprint(messages)
49
+ if not fingerprint:
50
+ return False
51
+
52
+ is_similar = self._is_similar_to_recent(fingerprint)
53
+ self._recent_fingerprints.append(fingerprint)
54
+
55
+ if is_similar:
56
+ self._consecutive_similar += 1
57
+ else:
58
+ self._consecutive_similar = 0
59
+
60
+ if self._consecutive_similar >= self._config.repeated_turn_limit:
61
+ return True
62
+
63
+ return False
64
+
65
+ def record_escalation(self) -> None:
66
+ """Record that the router escalated due to a loop detection."""
67
+ self._escalation_count += 1
68
+
69
+ @property
70
+ def should_hard_stop(self) -> bool:
71
+ """True if escalation count exceeds the hard_stop_after threshold."""
72
+ limit = self._config.on_trigger.hard_stop_after
73
+ return self._escalation_count >= limit
74
+
75
+ @property
76
+ def action(self) -> LoopGuardAction:
77
+ return self._config.on_trigger.action
78
+
79
+ @property
80
+ def escalation_count(self) -> int:
81
+ return self._escalation_count
82
+
83
+ # --- Internals ---
84
+
85
+ def _fingerprint(self, messages: list[dict[str, str]]) -> str:
86
+ """Extract a comparable fingerprint from the last message."""
87
+ if not messages:
88
+ return ""
89
+ for msg in reversed(messages):
90
+ content = msg.get("content", "")
91
+ if content:
92
+ return content.strip()[:500]
93
+ return ""
94
+
95
+ def _is_similar_to_recent(self, fingerprint: str) -> bool:
96
+ """Check if the fingerprint is similar to any recent one."""
97
+ threshold = self._config.similarity_threshold
98
+ for recent in self._recent_fingerprints:
99
+ ratio = SequenceMatcher(None, fingerprint, recent).ratio()
100
+ if ratio >= threshold:
101
+ return True
102
+ return False