agentspend-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentspend_sdk-0.1.0.dist-info/METADATA +131 -0
- agentspend_sdk-0.1.0.dist-info/RECORD +44 -0
- agentspend_sdk-0.1.0.dist-info/WHEEL +4 -0
- agentspend_sdk-0.1.0.dist-info/entry_points.txt +3 -0
- token_aud/__init__.py +3 -0
- token_aud/agent/__init__.py +41 -0
- token_aud/agent/adaptive.py +228 -0
- token_aud/agent/loop_guard.py +102 -0
- token_aud/agent/policy.py +163 -0
- token_aud/agent/router.py +253 -0
- token_aud/agent/runtime.py +358 -0
- token_aud/agent/step_classifier.py +61 -0
- token_aud/agent/telemetry.py +155 -0
- token_aud/api/__init__.py +0 -0
- token_aud/api/app.py +149 -0
- token_aud/api/routes/__init__.py +0 -0
- token_aud/api/serve.py +12 -0
- token_aud/cli/__init__.py +0 -0
- token_aud/cli/analyze.py +213 -0
- token_aud/cli/configure.py +113 -0
- token_aud/cli/main.py +24 -0
- token_aud/config.py +42 -0
- token_aud/core/__init__.py +0 -0
- token_aud/core/auditor.py +325 -0
- token_aud/core/judge.py +204 -0
- token_aud/core/pricing.py +139 -0
- token_aud/core/sampler.py +312 -0
- token_aud/core/savings.py +286 -0
- token_aud/data/__init__.py +0 -0
- token_aud/data/default_routing_policy.yaml +185 -0
- token_aud/data/pricing.json +349 -0
- token_aud/db/__init__.py +0 -0
- token_aud/db/session.py +31 -0
- token_aud/models/__init__.py +0 -0
- token_aud/models/db.py +77 -0
- token_aud/models/schemas.py +99 -0
- token_aud/parsers/__init__.py +10 -0
- token_aud/parsers/anthropic.py +50 -0
- token_aud/parsers/base.py +198 -0
- token_aud/parsers/generic_csv.py +43 -0
- token_aud/parsers/openai.py +50 -0
- token_aud/reports/__init__.py +6 -0
- token_aud/reports/html.py +219 -0
- token_aud/reports/terminal.py +118 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentspend-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AgentSpend — runtime cost optimizer for AI agents. Route LLM calls to the cheapest capable model with fallbacks, loop guards, and telemetry.
|
|
5
|
+
Requires-Python: <3.14,>=3.12
|
|
6
|
+
Requires-Dist: aiosqlite>=0.20
|
|
7
|
+
Requires-Dist: alembic>=1.14
|
|
8
|
+
Requires-Dist: fastapi>=0.115
|
|
9
|
+
Requires-Dist: google-auth>=2.48.0
|
|
10
|
+
Requires-Dist: google-cloud-aiplatform>=1.139.0
|
|
11
|
+
Requires-Dist: litellm>=1.50
|
|
12
|
+
Requires-Dist: pandas>=2.2
|
|
13
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
14
|
+
Requires-Dist: pydantic>=2.0
|
|
15
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Requires-Dist: rich>=13.0
|
|
18
|
+
Requires-Dist: sqlalchemy>=2.0
|
|
19
|
+
Requires-Dist: typer>=0.15
|
|
20
|
+
Requires-Dist: uvicorn[standard]>=0.32
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=9.0; extra == 'dev'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# token-aud
|
|
26
|
+
|
|
27
|
+
AI cost optimization toolkit for LLM workloads.
|
|
28
|
+
|
|
29
|
+
`token-aud` now includes two complementary workflows:
|
|
30
|
+
|
|
31
|
+
- **Audit mode (CLI/API):** Analyze historical usage logs and estimate savings opportunities with Student-Teacher-Judge sampling.
|
|
32
|
+
- **AgentSpend SDK:** Route live agent steps (`plan`, `reason`, `tool`, `verify`, `draft`, `summarize`) to cost/quality-appropriate models with fallbacks, loop guards, and telemetry.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
uv sync --no-editable
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
For local development tooling (tests):
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
uv sync --no-editable --extra dev
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Quick Start (AgentSpend)
|
|
47
|
+
|
|
48
|
+
Run these three commands from repo root:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uv sync --no-editable --extra dev
|
|
52
|
+
uv run --no-sync python -m pytest tests/agent -q
|
|
53
|
+
uv run --no-sync python examples/agent_routing_demo.py
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Expected results:
|
|
57
|
+
|
|
58
|
+
- Agent tests pass.
|
|
59
|
+
- Demo prints routed step decisions, per-step telemetry, and total run cost.
|
|
60
|
+
- `agent_telemetry.jsonl` is generated locally.
|
|
61
|
+
|
|
62
|
+
## AgentSpend Usage
|
|
63
|
+
|
|
64
|
+
### 1) Default policy
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from token_aud.agent import AgentSpend
|
|
68
|
+
|
|
69
|
+
agent = AgentSpend.default()
|
|
70
|
+
result = agent.route_call(
|
|
71
|
+
step="plan",
|
|
72
|
+
messages=[{"role": "user", "content": "Break this task into a plan"}],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
print(result.model_used, result.cost_usd, result.content)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 2) Custom policy YAML
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from token_aud.agent import AgentSpend
|
|
82
|
+
|
|
83
|
+
agent = AgentSpend.from_yaml("routing_policy.yaml")
|
|
84
|
+
result = agent.route_call(
|
|
85
|
+
step="reason",
|
|
86
|
+
messages=[{"role": "user", "content": "Compare two architectures"}],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
print(result.model_used, result.fallbacks_tried)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Built-in default policy path:
|
|
93
|
+
|
|
94
|
+
- `src/token_aud/data/default_routing_policy.yaml`
|
|
95
|
+
|
|
96
|
+
## AgentSpend Core Components
|
|
97
|
+
|
|
98
|
+
- `src/token_aud/agent/policy.py`: Pydantic policy schema + YAML loading
|
|
99
|
+
- `src/token_aud/agent/router.py`: deterministic model selection
|
|
100
|
+
- `src/token_aud/agent/runtime.py`: `route_call()` execution + fallbacks
|
|
101
|
+
- `src/token_aud/agent/loop_guard.py`: repeated-turn loop detection
|
|
102
|
+
- `src/token_aud/agent/telemetry.py`: JSONL/HTTP telemetry sinks
|
|
103
|
+
- `src/token_aud/agent/adaptive.py`: optional adaptive routing layer
|
|
104
|
+
|
|
105
|
+
## AgentSpend Examples
|
|
106
|
+
|
|
107
|
+
- `examples/agent_routing_demo.py`: end-to-end routed run with telemetry
|
|
108
|
+
- `examples/custom_policy_demo.py`: loop escalation and hard-stop behavior
|
|
109
|
+
- `examples/framework_agnostic_integration.py`: generic agent-loop integration with explicit success feedback
|
|
110
|
+
- `scripts/summarize_telemetry.py`: convert `agent_telemetry.jsonl` into cost/fallback/latency summary
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
uv run --no-sync python scripts/summarize_telemetry.py agent_telemetry.jsonl
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Audit CLI (legacy + still supported)
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
uv run --no-sync token-aud --help
|
|
120
|
+
uv run --no-sync token-aud analyze sample_data.csv --dry-run
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Environment Variables
|
|
124
|
+
|
|
125
|
+
Common provider credentials:
|
|
126
|
+
|
|
127
|
+
- `OPENAI_API_KEY`
|
|
128
|
+
- `ANTHROPIC_API_KEY`
|
|
129
|
+
- `GEMINI_API_KEY` or `GOOGLE_API_KEY` (depending on provider path)
|
|
130
|
+
|
|
131
|
+
For Google Vertex flows, ensure ADC is configured (`gcloud auth application-default login`).
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
token_aud/__init__.py,sha256=OG88Yz6qUP3fEmFr1pMe9azFeKICUErBif0uPcZ32is,89
|
|
2
|
+
token_aud/config.py,sha256=qjtn77ezAgllzGig-eP-YT9HX8XrIyKN8hqngDI9LBU,1425
|
|
3
|
+
token_aud/agent/__init__.py,sha256=upJj-JKnQ0Kds4ISsS8056MGh7i-ZVDaLNw9FQrMjns,1105
|
|
4
|
+
token_aud/agent/adaptive.py,sha256=dwne4Tb_fnj-vr8pw-KAzJiyC-7F3MMRJts337Lnd-I,7575
|
|
5
|
+
token_aud/agent/loop_guard.py,sha256=3fFUcEEq8HcPDm93JXN3oRMpHI7OcDRTTMaT9Uvuww0,3246
|
|
6
|
+
token_aud/agent/policy.py,sha256=Jcf8LsDHPGp9GwgbByGGmb_ZIhmdJ8LZdTkNAliFEEA,5120
|
|
7
|
+
token_aud/agent/router.py,sha256=Nzf1-Buj3YjkbUs5dY882hfh-ElGHOtYEKhsL2nAZb0,8570
|
|
8
|
+
token_aud/agent/runtime.py,sha256=alDlarqPj0UpXULbAib3QeuT0sz199Xm48ajfINf3hA,12883
|
|
9
|
+
token_aud/agent/step_classifier.py,sha256=c7GOZR28-W4xM-jbJ7qcos012phn5b2_WCGQqeq8JCg,2101
|
|
10
|
+
token_aud/agent/telemetry.py,sha256=JrW7rM8ykYbrBprQXLODYx5cWyhtOmEleInXM-wQhcA,4308
|
|
11
|
+
token_aud/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
token_aud/api/app.py,sha256=VgFvLysKxJdPd2Awxo-t3sVTP5ag06o5TYW6IKe06c0,4830
|
|
13
|
+
token_aud/api/serve.py,sha256=t20bOSJ-eYm0s7x6ctuJ7if6GpL0vvJp4E8Oxsw3hTg,194
|
|
14
|
+
token_aud/api/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
token_aud/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
token_aud/cli/analyze.py,sha256=RAtTxtq0t1hQpbK6R2IQuNme34inaSV_DQOfVHDD7Aw,7802
|
|
17
|
+
token_aud/cli/configure.py,sha256=q88mVw7LU2Vv1_asJrDyOTF_caDQYyjnwR306Ls4Ev4,3598
|
|
18
|
+
token_aud/cli/main.py,sha256=pOxnOZypVp882Mf3zUW1nz0dv6EkxWIwHlP0bVuGKrw,692
|
|
19
|
+
token_aud/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
token_aud/core/auditor.py,sha256=3ptT4GadRVgXa2lYYh1YM9x2NfAZ5tFJ8HOpKVZ2POA,11309
|
|
21
|
+
token_aud/core/judge.py,sha256=Ew3bpEnw3q9bYctRleVpO5VkhtOIo1D7Azh9fn29R-8,6763
|
|
22
|
+
token_aud/core/pricing.py,sha256=bIPlpEBymdKG0YEkUfp4QHTjS4yLkI4znnk8N8H2bA4,5108
|
|
23
|
+
token_aud/core/sampler.py,sha256=Gujbb8emZWV0I1DWJ9jfR7m3HFRDntS1U4foBaa3nGY,11126
|
|
24
|
+
token_aud/core/savings.py,sha256=LVdhbzT7glp2xDVKUfFaLx_ObdhJUkB7Gbgssw6OXB0,9546
|
|
25
|
+
token_aud/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
+
token_aud/data/default_routing_policy.yaml,sha256=M6Rpm0bGp4MujPj73mzZMaftFuAvnAOYgfL0EPC62r8,4852
|
|
27
|
+
token_aud/data/pricing.json,sha256=4xsTV_h2zFwXREgD9P4s8lEaQPf_xvJGpKIR_Y80kbM,11218
|
|
28
|
+
token_aud/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
+
token_aud/db/session.py,sha256=0Tpm5I5eu1r5KEtlsGUmmve79uIIGhY6eirSzT8fDQQ,959
|
|
30
|
+
token_aud/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
token_aud/models/db.py,sha256=U7r-oe1tr3KMGRbcjmhf3vkUD4yhMWVHrrs2OwQvE8I,3972
|
|
32
|
+
token_aud/models/schemas.py,sha256=JHrH9nqwQsl0njCzd8GkS3Nta7PBr6XlvCcqs9Td-gs,4582
|
|
33
|
+
token_aud/parsers/__init__.py,sha256=9EoHC62-G5nvkFUMW7vL7OHUlRUy2QL1hxV1Dj6bMo0,264
|
|
34
|
+
token_aud/parsers/anthropic.py,sha256=3jgW3uykihEABLy2a-21kDEshcgh2ITIBunmelqLmxE,1527
|
|
35
|
+
token_aud/parsers/base.py,sha256=KRJy_CVXpsYyFDQse_GUXch4afKFjOMQJcI7FFx2vJk,7481
|
|
36
|
+
token_aud/parsers/generic_csv.py,sha256=6s7BzHPyx49tHi9GzYWGcFmr3jwaTzZuv9hCV5CQQ4Y,1409
|
|
37
|
+
token_aud/parsers/openai.py,sha256=T-O6JbRM4XcNIXv4XYQqfIvvRioMkBqbe8hwwNhDPjI,1505
|
|
38
|
+
token_aud/reports/__init__.py,sha256=wMW8GxhQBrN6fwfQbOEE1LBsQSH_Lgcdk_ra_dvAdV8,203
|
|
39
|
+
token_aud/reports/html.py,sha256=iVGq9ecvE0VAkKriZEbJSaxd7T4LZepj2iDOTTIZgVM,9198
|
|
40
|
+
token_aud/reports/terminal.py,sha256=xnWzZb8Uhv_I35Z1Htcjhg3xhItmeiL5wDOMLRvG56M,3987
|
|
41
|
+
agentspend_sdk-0.1.0.dist-info/METADATA,sha256=rHvsCL2yHS7zUJ5jLcjdp1-kti87c2fSjAI3wMi3ccY,3811
|
|
42
|
+
agentspend_sdk-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
43
|
+
agentspend_sdk-0.1.0.dist-info/entry_points.txt,sha256=Gu8y3XoWEBEcGVYCOfzRVyBKdUjjP5WzLFy1-hCn8Xg,96
|
|
44
|
+
agentspend_sdk-0.1.0.dist-info/RECORD,,
|
token_aud/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""AgentSpend SDK — agent-native dynamic model routing.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
from token_aud.agent import AgentSpend, RoutingPolicy, RouteContext
|
|
5
|
+
|
|
6
|
+
agent = AgentSpend.from_yaml("routing_policy.yaml")
|
|
7
|
+
result = agent.route_call(step="plan", messages=[...])
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from token_aud.agent.adaptive import AdaptiveRouter, AdaptiveSuggestion
|
|
11
|
+
from token_aud.agent.loop_guard import LoopGuard
|
|
12
|
+
from token_aud.agent.policy import RoutingPolicy, StepType
|
|
13
|
+
from token_aud.agent.router import RouteContext, RouteDecision, decide_model
|
|
14
|
+
from token_aud.agent.runtime import AgentSpend, RouteResult
|
|
15
|
+
from token_aud.agent.step_classifier import classify_step
|
|
16
|
+
from token_aud.agent.telemetry import (
|
|
17
|
+
CallbackSink,
|
|
18
|
+
HttpSink,
|
|
19
|
+
JsonlSink,
|
|
20
|
+
TelemetryEmitter,
|
|
21
|
+
TelemetryEvent,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"AdaptiveRouter",
|
|
26
|
+
"AdaptiveSuggestion",
|
|
27
|
+
"AgentSpend",
|
|
28
|
+
"CallbackSink",
|
|
29
|
+
"HttpSink",
|
|
30
|
+
"JsonlSink",
|
|
31
|
+
"LoopGuard",
|
|
32
|
+
"RouteContext",
|
|
33
|
+
"RouteDecision",
|
|
34
|
+
"RouteResult",
|
|
35
|
+
"RoutingPolicy",
|
|
36
|
+
"StepType",
|
|
37
|
+
"TelemetryEmitter",
|
|
38
|
+
"TelemetryEvent",
|
|
39
|
+
"classify_step",
|
|
40
|
+
"decide_model",
|
|
41
|
+
]
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""Adaptive routing — learns from telemetry outcomes to adjust model selection.
|
|
2
|
+
|
|
3
|
+
This module provides an optional layer on top of the deterministic router.
|
|
4
|
+
It tracks per-step success rates and costs, then suggests cheaper models
|
|
5
|
+
when confidence is high enough.
|
|
6
|
+
|
|
7
|
+
Key design constraints:
|
|
8
|
+
- Deterministic mode is always the default and the safe fallback.
|
|
9
|
+
- Adaptive mode requires a minimum sample size before overriding.
|
|
10
|
+
- A rollback switch instantly disables adaptation.
|
|
11
|
+
- All state is local and serializable (no external DB needed).
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from token_aud.agent.adaptive import AdaptiveRouter
|
|
15
|
+
|
|
16
|
+
adaptive = AdaptiveRouter(min_samples=20, confidence_threshold=0.85)
|
|
17
|
+
adaptive.record_outcome(step="plan", model="gpt-4o-mini", success=True, cost=0.001)
|
|
18
|
+
suggestion = adaptive.suggest(step="plan", current_model="gpt-4o")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
from collections import defaultdict
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ModelStats:
|
|
31
|
+
"""Accumulated statistics for a model on a specific step type."""
|
|
32
|
+
|
|
33
|
+
total_calls: int = 0
|
|
34
|
+
success_count: int = 0
|
|
35
|
+
total_cost: float = 0.0
|
|
36
|
+
total_latency_ms: float = 0.0
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def success_rate(self) -> float:
|
|
40
|
+
if self.total_calls == 0:
|
|
41
|
+
return 0.0
|
|
42
|
+
return self.success_count / self.total_calls
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def avg_cost(self) -> float:
|
|
46
|
+
if self.total_calls == 0:
|
|
47
|
+
return 0.0
|
|
48
|
+
return self.total_cost / self.total_calls
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def avg_latency_ms(self) -> float:
|
|
52
|
+
if self.total_calls == 0:
|
|
53
|
+
return 0.0
|
|
54
|
+
return self.total_latency_ms / self.total_calls
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class AdaptiveSuggestion:
|
|
59
|
+
"""A suggestion from the adaptive router."""
|
|
60
|
+
|
|
61
|
+
suggested_model: str | None
|
|
62
|
+
reason: str
|
|
63
|
+
confidence: float
|
|
64
|
+
stats: ModelStats | None = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AdaptiveRouter:
|
|
68
|
+
"""Learns from outcomes to suggest cheaper models per step type.
|
|
69
|
+
|
|
70
|
+
This is opt-in and sits alongside the deterministic router. It does not
|
|
71
|
+
replace it — the runtime checks this for suggestions before falling back
|
|
72
|
+
to the deterministic path.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
min_samples: int = 20,
|
|
78
|
+
confidence_threshold: float = 0.85,
|
|
79
|
+
enabled: bool = True,
|
|
80
|
+
) -> None:
|
|
81
|
+
self._min_samples = min_samples
|
|
82
|
+
self._confidence_threshold = confidence_threshold
|
|
83
|
+
self._enabled = enabled
|
|
84
|
+
self._stats: dict[str, dict[str, ModelStats]] = defaultdict(
|
|
85
|
+
lambda: defaultdict(ModelStats)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def enabled(self) -> bool:
|
|
90
|
+
return self._enabled
|
|
91
|
+
|
|
92
|
+
def enable(self) -> None:
|
|
93
|
+
self._enabled = True
|
|
94
|
+
|
|
95
|
+
def disable(self) -> None:
|
|
96
|
+
"""Rollback switch — immediately disables adaptive suggestions."""
|
|
97
|
+
self._enabled = False
|
|
98
|
+
|
|
99
|
+
def record_outcome(
|
|
100
|
+
self,
|
|
101
|
+
step: str,
|
|
102
|
+
model: str,
|
|
103
|
+
success: bool,
|
|
104
|
+
cost: float = 0.0,
|
|
105
|
+
latency_ms: float = 0.0,
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Record the outcome of a routed call for learning."""
|
|
108
|
+
stats = self._stats[step][model]
|
|
109
|
+
stats.total_calls += 1
|
|
110
|
+
if success:
|
|
111
|
+
stats.success_count += 1
|
|
112
|
+
stats.total_cost += cost
|
|
113
|
+
stats.total_latency_ms += latency_ms
|
|
114
|
+
|
|
115
|
+
def suggest(
|
|
116
|
+
self,
|
|
117
|
+
step: str,
|
|
118
|
+
current_model: str,
|
|
119
|
+
candidate_models: list[str] | None = None,
|
|
120
|
+
) -> AdaptiveSuggestion:
|
|
121
|
+
"""Suggest a potentially cheaper model based on historical success rates.
|
|
122
|
+
|
|
123
|
+
Returns a suggestion only if:
|
|
124
|
+
1. Adaptive routing is enabled
|
|
125
|
+
2. There's enough data (>= min_samples) for the candidate
|
|
126
|
+
3. The candidate's success rate >= confidence_threshold
|
|
127
|
+
4. The candidate is cheaper on average than the current model
|
|
128
|
+
"""
|
|
129
|
+
if not self._enabled:
|
|
130
|
+
return AdaptiveSuggestion(
|
|
131
|
+
suggested_model=None,
|
|
132
|
+
reason="Adaptive routing disabled",
|
|
133
|
+
confidence=0.0,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
step_stats = self._stats.get(step, {})
|
|
137
|
+
current_stats = step_stats.get(current_model)
|
|
138
|
+
|
|
139
|
+
if candidate_models is None:
|
|
140
|
+
candidate_models = [m for m in step_stats if m != current_model]
|
|
141
|
+
|
|
142
|
+
best: AdaptiveSuggestion | None = None
|
|
143
|
+
|
|
144
|
+
for candidate in candidate_models:
|
|
145
|
+
cand_stats = step_stats.get(candidate)
|
|
146
|
+
if cand_stats is None or cand_stats.total_calls < self._min_samples:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
if cand_stats.success_rate < self._confidence_threshold:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
if current_stats and cand_stats.avg_cost >= current_stats.avg_cost:
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
suggestion = AdaptiveSuggestion(
|
|
156
|
+
suggested_model=candidate,
|
|
157
|
+
reason=(
|
|
158
|
+
f"{candidate} has {cand_stats.success_rate:.0%} success rate "
|
|
159
|
+
f"over {cand_stats.total_calls} calls at ${cand_stats.avg_cost:.6f}/call avg"
|
|
160
|
+
),
|
|
161
|
+
confidence=cand_stats.success_rate,
|
|
162
|
+
stats=cand_stats,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if best is None or (cand_stats.avg_cost < (best.stats.avg_cost if best.stats else float("inf"))):
|
|
166
|
+
best = suggestion
|
|
167
|
+
|
|
168
|
+
if best is not None:
|
|
169
|
+
return best
|
|
170
|
+
|
|
171
|
+
return AdaptiveSuggestion(
|
|
172
|
+
suggested_model=None,
|
|
173
|
+
reason="No confident cheaper alternative found",
|
|
174
|
+
confidence=0.0,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def get_stats(self, step: str | None = None) -> dict:
|
|
178
|
+
"""Return stats as a plain dict for inspection or serialization."""
|
|
179
|
+
if step:
|
|
180
|
+
return {
|
|
181
|
+
model: {
|
|
182
|
+
"total_calls": s.total_calls,
|
|
183
|
+
"success_rate": s.success_rate,
|
|
184
|
+
"avg_cost": s.avg_cost,
|
|
185
|
+
"avg_latency_ms": s.avg_latency_ms,
|
|
186
|
+
}
|
|
187
|
+
for model, s in self._stats.get(step, {}).items()
|
|
188
|
+
}
|
|
189
|
+
return {
|
|
190
|
+
step_name: {
|
|
191
|
+
model: {
|
|
192
|
+
"total_calls": s.total_calls,
|
|
193
|
+
"success_rate": s.success_rate,
|
|
194
|
+
"avg_cost": s.avg_cost,
|
|
195
|
+
"avg_latency_ms": s.avg_latency_ms,
|
|
196
|
+
}
|
|
197
|
+
for model, s in models.items()
|
|
198
|
+
}
|
|
199
|
+
for step_name, models in self._stats.items()
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def save(self, path: str | Path) -> None:
|
|
203
|
+
"""Persist stats to a JSON file."""
|
|
204
|
+
raw: dict = {}
|
|
205
|
+
for step_name, models in self._stats.items():
|
|
206
|
+
raw[step_name] = {}
|
|
207
|
+
for model, s in models.items():
|
|
208
|
+
raw[step_name][model] = {
|
|
209
|
+
"total_calls": s.total_calls,
|
|
210
|
+
"success_count": s.success_count,
|
|
211
|
+
"total_cost": s.total_cost,
|
|
212
|
+
"total_latency_ms": s.total_latency_ms,
|
|
213
|
+
}
|
|
214
|
+
Path(path).write_text(json.dumps(raw, indent=2))
|
|
215
|
+
|
|
216
|
+
def load(self, path: str | Path) -> None:
|
|
217
|
+
"""Load stats from a previously saved JSON file."""
|
|
218
|
+
p = Path(path)
|
|
219
|
+
if not p.exists():
|
|
220
|
+
return
|
|
221
|
+
raw = json.loads(p.read_text())
|
|
222
|
+
for step_name, models in raw.items():
|
|
223
|
+
for model, data in models.items():
|
|
224
|
+
stats = self._stats[step_name][model]
|
|
225
|
+
stats.total_calls = data["total_calls"]
|
|
226
|
+
stats.success_count = data["success_count"]
|
|
227
|
+
stats.total_cost = data["total_cost"]
|
|
228
|
+
stats.total_latency_ms = data.get("total_latency_ms", 0.0)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Loop guard — detects agent loops from repeated similar messages.
|
|
2
|
+
|
|
3
|
+
Tracks recent message fingerprints per session. When consecutive turns
|
|
4
|
+
have high textual similarity or identical content, signals a loop so the
|
|
5
|
+
router can escalate or halt.
|
|
6
|
+
|
|
7
|
+
Budget guard logic lives in router.py (it's part of the routing decision).
|
|
8
|
+
This module focuses on turn-level repetition detection.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections import deque
|
|
14
|
+
from difflib import SequenceMatcher
|
|
15
|
+
|
|
16
|
+
from token_aud.agent.policy import LoopGuardAction, LoopGuardConfig
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LoopGuard:
|
|
20
|
+
"""Stateful loop detector for an agent run."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, config: LoopGuardConfig) -> None:
|
|
23
|
+
self._config = config
|
|
24
|
+
self._recent_fingerprints: deque[str] = deque(
|
|
25
|
+
maxlen=config.repeated_turn_limit + 2
|
|
26
|
+
)
|
|
27
|
+
self._consecutive_similar: int = 0
|
|
28
|
+
self._escalation_count: int = 0
|
|
29
|
+
|
|
30
|
+
def reset(self) -> None:
|
|
31
|
+
"""Clear all state between runs."""
|
|
32
|
+
self._recent_fingerprints.clear()
|
|
33
|
+
self._consecutive_similar = 0
|
|
34
|
+
self._escalation_count = 0
|
|
35
|
+
|
|
36
|
+
def check(self, messages: list[dict[str, str]]) -> bool:
|
|
37
|
+
"""Check if the current turn looks like a loop.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
messages: The message list about to be sent to the LLM.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
True if a loop is detected.
|
|
44
|
+
"""
|
|
45
|
+
if not self._config.enabled:
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
fingerprint = self._fingerprint(messages)
|
|
49
|
+
if not fingerprint:
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
is_similar = self._is_similar_to_recent(fingerprint)
|
|
53
|
+
self._recent_fingerprints.append(fingerprint)
|
|
54
|
+
|
|
55
|
+
if is_similar:
|
|
56
|
+
self._consecutive_similar += 1
|
|
57
|
+
else:
|
|
58
|
+
self._consecutive_similar = 0
|
|
59
|
+
|
|
60
|
+
if self._consecutive_similar >= self._config.repeated_turn_limit:
|
|
61
|
+
return True
|
|
62
|
+
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
def record_escalation(self) -> None:
|
|
66
|
+
"""Record that the router escalated due to a loop detection."""
|
|
67
|
+
self._escalation_count += 1
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def should_hard_stop(self) -> bool:
|
|
71
|
+
"""True if escalation count exceeds the hard_stop_after threshold."""
|
|
72
|
+
limit = self._config.on_trigger.hard_stop_after
|
|
73
|
+
return self._escalation_count >= limit
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def action(self) -> LoopGuardAction:
|
|
77
|
+
return self._config.on_trigger.action
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def escalation_count(self) -> int:
|
|
81
|
+
return self._escalation_count
|
|
82
|
+
|
|
83
|
+
# --- Internals ---
|
|
84
|
+
|
|
85
|
+
def _fingerprint(self, messages: list[dict[str, str]]) -> str:
|
|
86
|
+
"""Extract a comparable fingerprint from the last message."""
|
|
87
|
+
if not messages:
|
|
88
|
+
return ""
|
|
89
|
+
for msg in reversed(messages):
|
|
90
|
+
content = msg.get("content", "")
|
|
91
|
+
if content:
|
|
92
|
+
return content.strip()[:500]
|
|
93
|
+
return ""
|
|
94
|
+
|
|
95
|
+
def _is_similar_to_recent(self, fingerprint: str) -> bool:
|
|
96
|
+
"""Check if the fingerprint is similar to any recent one."""
|
|
97
|
+
threshold = self._config.similarity_threshold
|
|
98
|
+
for recent in self._recent_fingerprints:
|
|
99
|
+
ratio = SequenceMatcher(None, fingerprint, recent).ratio()
|
|
100
|
+
if ratio >= threshold:
|
|
101
|
+
return True
|
|
102
|
+
return False
|