agent-vitals 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_vitals-1.0.0/LICENSE +21 -0
- agent_vitals-1.0.0/PKG-INFO +235 -0
- agent_vitals-1.0.0/README.md +188 -0
- agent_vitals-1.0.0/agent_vitals/__init__.py +54 -0
- agent_vitals-1.0.0/agent_vitals/adapters/__init__.py +128 -0
- agent_vitals-1.0.0/agent_vitals/backtest.py +452 -0
- agent_vitals-1.0.0/agent_vitals/config.py +568 -0
- agent_vitals-1.0.0/agent_vitals/detection/__init__.py +18 -0
- agent_vitals-1.0.0/agent_vitals/detection/loop.py +362 -0
- agent_vitals-1.0.0/agent_vitals/detection/metrics.py +160 -0
- agent_vitals-1.0.0/agent_vitals/detection/stop_rule.py +166 -0
- agent_vitals-1.0.0/agent_vitals/exceptions.py +40 -0
- agent_vitals-1.0.0/agent_vitals/export/__init__.py +42 -0
- agent_vitals-1.0.0/agent_vitals/export/jsonl.py +144 -0
- agent_vitals-1.0.0/agent_vitals/monitor.py +355 -0
- agent_vitals-1.0.0/agent_vitals/schema.py +202 -0
- agent_vitals-1.0.0/agent_vitals.egg-info/PKG-INFO +235 -0
- agent_vitals-1.0.0/agent_vitals.egg-info/SOURCES.txt +30 -0
- agent_vitals-1.0.0/agent_vitals.egg-info/dependency_links.txt +1 -0
- agent_vitals-1.0.0/agent_vitals.egg-info/requires.txt +30 -0
- agent_vitals-1.0.0/agent_vitals.egg-info/top_level.txt +1 -0
- agent_vitals-1.0.0/pyproject.toml +66 -0
- agent_vitals-1.0.0/setup.cfg +4 -0
- agent_vitals-1.0.0/tests/test_backtest.py +280 -0
- agent_vitals-1.0.0/tests/test_config.py +77 -0
- agent_vitals-1.0.0/tests/test_detection_loop.py +255 -0
- agent_vitals-1.0.0/tests/test_detection_metrics.py +152 -0
- agent_vitals-1.0.0/tests/test_detection_stop_rule.py +53 -0
- agent_vitals-1.0.0/tests/test_export.py +293 -0
- agent_vitals-1.0.0/tests/test_integration.py +210 -0
- agent_vitals-1.0.0/tests/test_monitor.py +167 -0
- agent_vitals-1.0.0/tests/test_schema.py +163 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Agent Vitals Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-vitals
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Standalone agent health monitor — detect loops, stuck states, thrash, and runaway costs in any AI agent workflow.
|
|
5
|
+
Author: Agent Vitals Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/kneelinghorse/agent-vitals
|
|
8
|
+
Project-URL: Documentation, https://github.com/kneelinghorse/agent-vitals#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/kneelinghorse/agent-vitals
|
|
10
|
+
Project-URL: Issues, https://github.com/kneelinghorse/agent-vitals/issues
|
|
11
|
+
Keywords: agent,vitals,monitoring,llm,health,loop-detection
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: pydantic>=2.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0
|
|
25
|
+
Provides-Extra: langchain
|
|
26
|
+
Requires-Dist: langchain>=0.3; extra == "langchain"
|
|
27
|
+
Provides-Extra: langgraph
|
|
28
|
+
Requires-Dist: langgraph>=0.2; extra == "langgraph"
|
|
29
|
+
Provides-Extra: otlp
|
|
30
|
+
Requires-Dist: opentelemetry-sdk; extra == "otlp"
|
|
31
|
+
Provides-Extra: langfuse
|
|
32
|
+
Requires-Dist: langfuse>=2.0; extra == "langfuse"
|
|
33
|
+
Provides-Extra: langsmith
|
|
34
|
+
Requires-Dist: langsmith>=0.1; extra == "langsmith"
|
|
35
|
+
Provides-Extra: all
|
|
36
|
+
Requires-Dist: langchain>=0.3; extra == "all"
|
|
37
|
+
Requires-Dist: langgraph>=0.2; extra == "all"
|
|
38
|
+
Requires-Dist: opentelemetry-sdk; extra == "all"
|
|
39
|
+
Requires-Dist: langfuse>=2.0; extra == "all"
|
|
40
|
+
Requires-Dist: langsmith>=0.1; extra == "all"
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
|
|
44
|
+
Requires-Dist: ruff>=0.7; extra == "dev"
|
|
45
|
+
Requires-Dist: mypy>=1.13; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# Agent Vitals
|
|
49
|
+
|
|
50
|
+
[](https://pypi.org/project/agent-vitals/)
|
|
51
|
+
[](https://pypi.org/project/agent-vitals/)
|
|
52
|
+
[](https://opensource.org/licenses/MIT)
|
|
53
|
+
|
|
54
|
+
**Standalone agent health monitor** — detect loops, stuck states, thrash, and runaway costs in any AI agent workflow.
|
|
55
|
+
|
|
56
|
+
Agent Vitals watches your LLM agent's vital signs in real time. Feed it four numbers per step and it tells you when your agent is looping, stuck, thrashing, or burning tokens for nothing.
|
|
57
|
+
|
|
58
|
+
## Install
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install agent-vitals
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick Start
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from agent_vitals import AgentVitals
|
|
68
|
+
|
|
69
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
70
|
+
|
|
71
|
+
for step in range(max_steps):
|
|
72
|
+
result = call_llm(prompt)
|
|
73
|
+
findings = extract_findings(result)
|
|
74
|
+
|
|
75
|
+
snapshot = monitor.step(
|
|
76
|
+
findings_count=len(findings),
|
|
77
|
+
coverage_score=compute_coverage(findings),
|
|
78
|
+
total_tokens=result.usage.total_tokens,
|
|
79
|
+
error_count=error_tracker.count,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if snapshot.any_failure:
|
|
83
|
+
print(f"Health issue at step {snapshot.loop_index}: "
|
|
84
|
+
f"{snapshot.stuck_trigger or snapshot.loop_trigger}")
|
|
85
|
+
break
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Features
|
|
89
|
+
|
|
90
|
+
- **4-field minimum**: Only `findings_count`, `coverage_score`, `total_tokens`, `error_count` required
|
|
91
|
+
- **Zero-config defaults**: `AgentVitals()` works out of the box with tuned thresholds
|
|
92
|
+
- **Framework-agnostic**: No dependency on LangChain, LangGraph, or any agent framework
|
|
93
|
+
- **Immutable snapshots**: Every `step()` returns a `VitalsSnapshot` with signals, metrics, and detection results
|
|
94
|
+
- **JSONL export**: Auto-log every snapshot to structured JSONL files
|
|
95
|
+
- **Backtest harness**: Offline evaluation of recorded trajectories with P/R/F1 metrics
|
|
96
|
+
- **Context manager**: `with AgentVitals(...) as monitor:` for clean resource management
|
|
97
|
+
|
|
98
|
+
## Detection Modes
|
|
99
|
+
|
|
100
|
+
| Detector | What it catches | Signal |
|
|
101
|
+
|---|---|---|
|
|
102
|
+
| **Loop** | Agent repeating actions without progress | Findings plateau over N steps |
|
|
103
|
+
| **Stuck** | Coverage stagnation despite continued work | Low DM + low CV on coverage |
|
|
104
|
+
| **Thrash** | Excessive errors indicating instability | Error count above threshold |
|
|
105
|
+
| **Runaway Cost** | Token burn with no output | Token spike with flat findings |
|
|
106
|
+
|
|
107
|
+
## API Overview
|
|
108
|
+
|
|
109
|
+
### Manual Integration (Recommended)
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from agent_vitals import AgentVitals
|
|
113
|
+
|
|
114
|
+
monitor = AgentVitals(mission_id="research-task")
|
|
115
|
+
snapshot = monitor.step(
|
|
116
|
+
findings_count=5,
|
|
117
|
+
coverage_score=0.6,
|
|
118
|
+
total_tokens=12000,
|
|
119
|
+
error_count=0,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
print(snapshot.health_state) # "healthy" | "warning" | "critical"
|
|
123
|
+
print(snapshot.any_failure) # True if loop or stuck detected
|
|
124
|
+
print(snapshot.stuck_trigger) # e.g. "coverage_stagnation", "burn_rate_anomaly"
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Adapter Integration
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from agent_vitals import AgentVitals
|
|
131
|
+
from agent_vitals.adapters import TelemetryAdapter
|
|
132
|
+
|
|
133
|
+
monitor = AgentVitals(mission_id="my-task", adapter=TelemetryAdapter())
|
|
134
|
+
snapshot = monitor.step_from_state({
|
|
135
|
+
"cumulative_outputs": 5,
|
|
136
|
+
"coverage_score": 0.6,
|
|
137
|
+
"cumulative_tokens": 12000,
|
|
138
|
+
"cumulative_errors": 0,
|
|
139
|
+
})
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Pre-built Signals
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from agent_vitals import AgentVitals, RawSignals
|
|
146
|
+
|
|
147
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
148
|
+
signals = RawSignals(findings_count=5, coverage_score=0.6, total_tokens=12000, error_count=0)
|
|
149
|
+
snapshot = monitor.step_from_signals(signals)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Export
|
|
153
|
+
|
|
154
|
+
Log every snapshot to JSONL for offline analysis or observability pipelines.
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from agent_vitals import AgentVitals, JSONLExporter
|
|
158
|
+
|
|
159
|
+
exporter = JSONLExporter(
|
|
160
|
+
directory="./vitals_logs",
|
|
161
|
+
layout="per_run", # or "append"
|
|
162
|
+
max_bytes=10_000_000, # rotation threshold (append mode)
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
with AgentVitals(mission_id="my-task", exporters=[exporter]) as monitor:
|
|
166
|
+
for step in range(max_steps):
|
|
167
|
+
monitor.step(findings_count=..., coverage_score=..., total_tokens=..., error_count=...)
|
|
168
|
+
# Exporter is automatically flushed and closed on exit
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Layouts:**
|
|
172
|
+
- `per_run`: `{directory}/{mission_id}/{run_id}.jsonl` — one file per run
|
|
173
|
+
- `append`: `{directory}/{mission_id}.jsonl` — all runs in one file, with rotation
|
|
174
|
+
|
|
175
|
+
## Configuration
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from agent_vitals import AgentVitals, VitalsConfig
|
|
179
|
+
|
|
180
|
+
# From constructor kwargs
|
|
181
|
+
monitor = AgentVitals(config=VitalsConfig(
|
|
182
|
+
loop_consecutive_count=6,
|
|
183
|
+
stuck_dm_threshold=0.15,
|
|
184
|
+
))
|
|
185
|
+
|
|
186
|
+
# From YAML file
|
|
187
|
+
monitor = AgentVitals.from_yaml("thresholds.yaml")
|
|
188
|
+
|
|
189
|
+
# From environment variables (VITALS_* prefix)
|
|
190
|
+
monitor = AgentVitals() # auto-reads VITALS_LOOP_CONSECUTIVE_COUNT, etc.
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Key Thresholds
|
|
194
|
+
|
|
195
|
+
| Parameter | Default | Description |
|
|
196
|
+
|---|---|---|
|
|
197
|
+
| `loop_consecutive_count` | 5 | Steps of flat findings before loop detection |
|
|
198
|
+
| `stuck_dm_threshold` | 0.15 | DM below this → coverage stagnation |
|
|
199
|
+
| `stuck_cv_threshold` | 0.5 | CV below this → low variation |
|
|
200
|
+
| `burn_rate_multiplier` | 2.0 | Token spike ratio for burn rate anomaly |
|
|
201
|
+
|
|
202
|
+
## Backtest
|
|
203
|
+
|
|
204
|
+
Evaluate detection accuracy against labeled trajectory corpora.
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from agent_vitals.backtest import load_dataset, load_labels, run_backtest
|
|
208
|
+
|
|
209
|
+
dataset = load_dataset("path/to/traces/")
|
|
210
|
+
labels = load_labels("path/to/labels.json")
|
|
211
|
+
report = run_backtest(dataset, labels)
|
|
212
|
+
|
|
213
|
+
print(f"vitals.any: P={report.composite_any.precision:.3f} "
|
|
214
|
+
f"R={report.composite_any.recall:.3f} "
|
|
215
|
+
f"F1={report.composite_any.f1:.3f}")
|
|
216
|
+
|
|
217
|
+
for name, detector in report.detectors.items():
|
|
218
|
+
print(f" {name}: P={detector.precision:.3f} R={detector.recall:.3f}")
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Session Summary
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
225
|
+
# ... run steps ...
|
|
226
|
+
summary = monitor.summary()
|
|
227
|
+
# {"mission_id": "my-task", "total_steps": 8, "health_state": "healthy",
|
|
228
|
+
# "any_loop_detected": False, "any_stuck_detected": False, ...}
|
|
229
|
+
|
|
230
|
+
monitor.reset() # Clear history for next run (also flushes exporters)
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Agent Vitals
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/agent-vitals/)
|
|
4
|
+
[](https://pypi.org/project/agent-vitals/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
**Standalone agent health monitor** — detect loops, stuck states, thrash, and runaway costs in any AI agent workflow.
|
|
8
|
+
|
|
9
|
+
Agent Vitals watches your LLM agent's vital signs in real time. Feed it four numbers per step and it tells you when your agent is looping, stuck, thrashing, or burning tokens for nothing.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install agent-vitals
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from agent_vitals import AgentVitals
|
|
21
|
+
|
|
22
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
23
|
+
|
|
24
|
+
for step in range(max_steps):
|
|
25
|
+
result = call_llm(prompt)
|
|
26
|
+
findings = extract_findings(result)
|
|
27
|
+
|
|
28
|
+
snapshot = monitor.step(
|
|
29
|
+
findings_count=len(findings),
|
|
30
|
+
coverage_score=compute_coverage(findings),
|
|
31
|
+
total_tokens=result.usage.total_tokens,
|
|
32
|
+
error_count=error_tracker.count,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if snapshot.any_failure:
|
|
36
|
+
print(f"Health issue at step {snapshot.loop_index}: "
|
|
37
|
+
f"{snapshot.stuck_trigger or snapshot.loop_trigger}")
|
|
38
|
+
break
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Features
|
|
42
|
+
|
|
43
|
+
- **4-field minimum**: Only `findings_count`, `coverage_score`, `total_tokens`, `error_count` required
|
|
44
|
+
- **Zero-config defaults**: `AgentVitals()` works out of the box with tuned thresholds
|
|
45
|
+
- **Framework-agnostic**: No dependency on LangChain, LangGraph, or any agent framework
|
|
46
|
+
- **Immutable snapshots**: Every `step()` returns a `VitalsSnapshot` with signals, metrics, and detection results
|
|
47
|
+
- **JSONL export**: Auto-log every snapshot to structured JSONL files
|
|
48
|
+
- **Backtest harness**: Offline evaluation of recorded trajectories with P/R/F1 metrics
|
|
49
|
+
- **Context manager**: `with AgentVitals(...) as monitor:` for clean resource management
|
|
50
|
+
|
|
51
|
+
## Detection Modes
|
|
52
|
+
|
|
53
|
+
| Detector | What it catches | Signal |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| **Loop** | Agent repeating actions without progress | Findings plateau over N steps |
|
|
56
|
+
| **Stuck** | Coverage stagnation despite continued work | Low DM + low CV on coverage |
|
|
57
|
+
| **Thrash** | Excessive errors indicating instability | Error count above threshold |
|
|
58
|
+
| **Runaway Cost** | Token burn with no output | Token spike with flat findings |
|
|
59
|
+
|
|
60
|
+
## API Overview
|
|
61
|
+
|
|
62
|
+
### Manual Integration (Recommended)
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from agent_vitals import AgentVitals
|
|
66
|
+
|
|
67
|
+
monitor = AgentVitals(mission_id="research-task")
|
|
68
|
+
snapshot = monitor.step(
|
|
69
|
+
findings_count=5,
|
|
70
|
+
coverage_score=0.6,
|
|
71
|
+
total_tokens=12000,
|
|
72
|
+
error_count=0,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
print(snapshot.health_state) # "healthy" | "warning" | "critical"
|
|
76
|
+
print(snapshot.any_failure) # True if loop or stuck detected
|
|
77
|
+
print(snapshot.stuck_trigger) # e.g. "coverage_stagnation", "burn_rate_anomaly"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Adapter Integration
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from agent_vitals import AgentVitals
|
|
84
|
+
from agent_vitals.adapters import TelemetryAdapter
|
|
85
|
+
|
|
86
|
+
monitor = AgentVitals(mission_id="my-task", adapter=TelemetryAdapter())
|
|
87
|
+
snapshot = monitor.step_from_state({
|
|
88
|
+
"cumulative_outputs": 5,
|
|
89
|
+
"coverage_score": 0.6,
|
|
90
|
+
"cumulative_tokens": 12000,
|
|
91
|
+
"cumulative_errors": 0,
|
|
92
|
+
})
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Pre-built Signals
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from agent_vitals import AgentVitals, RawSignals
|
|
99
|
+
|
|
100
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
101
|
+
signals = RawSignals(findings_count=5, coverage_score=0.6, total_tokens=12000, error_count=0)
|
|
102
|
+
snapshot = monitor.step_from_signals(signals)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Export
|
|
106
|
+
|
|
107
|
+
Log every snapshot to JSONL for offline analysis or observability pipelines.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from agent_vitals import AgentVitals, JSONLExporter
|
|
111
|
+
|
|
112
|
+
exporter = JSONLExporter(
|
|
113
|
+
directory="./vitals_logs",
|
|
114
|
+
layout="per_run", # or "append"
|
|
115
|
+
max_bytes=10_000_000, # rotation threshold (append mode)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
with AgentVitals(mission_id="my-task", exporters=[exporter]) as monitor:
|
|
119
|
+
for step in range(max_steps):
|
|
120
|
+
monitor.step(findings_count=..., coverage_score=..., total_tokens=..., error_count=...)
|
|
121
|
+
# Exporter is automatically flushed and closed on exit
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Layouts:**
|
|
125
|
+
- `per_run`: `{directory}/{mission_id}/{run_id}.jsonl` — one file per run
|
|
126
|
+
- `append`: `{directory}/{mission_id}.jsonl` — all runs in one file, with rotation
|
|
127
|
+
|
|
128
|
+
## Configuration
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from agent_vitals import AgentVitals, VitalsConfig
|
|
132
|
+
|
|
133
|
+
# From constructor kwargs
|
|
134
|
+
monitor = AgentVitals(config=VitalsConfig(
|
|
135
|
+
loop_consecutive_count=6,
|
|
136
|
+
stuck_dm_threshold=0.15,
|
|
137
|
+
))
|
|
138
|
+
|
|
139
|
+
# From YAML file
|
|
140
|
+
monitor = AgentVitals.from_yaml("thresholds.yaml")
|
|
141
|
+
|
|
142
|
+
# From environment variables (VITALS_* prefix)
|
|
143
|
+
monitor = AgentVitals() # auto-reads VITALS_LOOP_CONSECUTIVE_COUNT, etc.
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Key Thresholds
|
|
147
|
+
|
|
148
|
+
| Parameter | Default | Description |
|
|
149
|
+
|---|---|---|
|
|
150
|
+
| `loop_consecutive_count` | 5 | Steps of flat findings before loop detection |
|
|
151
|
+
| `stuck_dm_threshold` | 0.15 | DM below this → coverage stagnation |
|
|
152
|
+
| `stuck_cv_threshold` | 0.5 | CV below this → low variation |
|
|
153
|
+
| `burn_rate_multiplier` | 2.0 | Token spike ratio for burn rate anomaly |
|
|
154
|
+
|
|
155
|
+
## Backtest
|
|
156
|
+
|
|
157
|
+
Evaluate detection accuracy against labeled trajectory corpora.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from agent_vitals.backtest import load_dataset, load_labels, run_backtest
|
|
161
|
+
|
|
162
|
+
dataset = load_dataset("path/to/traces/")
|
|
163
|
+
labels = load_labels("path/to/labels.json")
|
|
164
|
+
report = run_backtest(dataset, labels)
|
|
165
|
+
|
|
166
|
+
print(f"vitals.any: P={report.composite_any.precision:.3f} "
|
|
167
|
+
f"R={report.composite_any.recall:.3f} "
|
|
168
|
+
f"F1={report.composite_any.f1:.3f}")
|
|
169
|
+
|
|
170
|
+
for name, detector in report.detectors.items():
|
|
171
|
+
print(f" {name}: P={detector.precision:.3f} R={detector.recall:.3f}")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Session Summary
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
178
|
+
# ... run steps ...
|
|
179
|
+
summary = monitor.summary()
|
|
180
|
+
# {"mission_id": "my-task", "total_steps": 8, "health_state": "healthy",
|
|
181
|
+
# "any_loop_detected": False, "any_stuck_detected": False, ...}
|
|
182
|
+
|
|
183
|
+
monitor.reset() # Clear history for next run (also flushes exporters)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## License
|
|
187
|
+
|
|
188
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Agent Vitals — Standalone agent health monitor.
|
|
2
|
+
|
|
3
|
+
Detect loops, stuck states, thrash, and runaway costs in any AI agent workflow.
|
|
4
|
+
|
|
5
|
+
Usage::
|
|
6
|
+
|
|
7
|
+
from agent_vitals import AgentVitals, VitalsSnapshot, RawSignals
|
|
8
|
+
|
|
9
|
+
monitor = AgentVitals(mission_id="my-task")
|
|
10
|
+
snapshot = monitor.step(
|
|
11
|
+
findings_count=5,
|
|
12
|
+
coverage_score=0.6,
|
|
13
|
+
total_tokens=12000,
|
|
14
|
+
error_count=0,
|
|
15
|
+
)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .config import VitalsConfig, get_vitals_config
|
|
19
|
+
from .detection.loop import LoopDetectionResult, detect_loop
|
|
20
|
+
from .detection.stop_rule import StopRuleSignals, derive_stop_signals
|
|
21
|
+
from .exceptions import AdapterError, BacktestError, ConfigurationError, ExportError, VitalsError
|
|
22
|
+
from .export import JSONLExporter, VitalsExporter
|
|
23
|
+
from .monitor import AgentVitals
|
|
24
|
+
from .schema import (
|
|
25
|
+
HealthState,
|
|
26
|
+
InterventionRecord,
|
|
27
|
+
RawSignals,
|
|
28
|
+
TemporalMetricsResult,
|
|
29
|
+
VitalsSnapshot,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
__version__ = "1.0.0"
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"AdapterError",
|
|
36
|
+
"AgentVitals",
|
|
37
|
+
"BacktestError",
|
|
38
|
+
"ConfigurationError",
|
|
39
|
+
"ExportError",
|
|
40
|
+
"HealthState",
|
|
41
|
+
"InterventionRecord",
|
|
42
|
+
"JSONLExporter",
|
|
43
|
+
"LoopDetectionResult",
|
|
44
|
+
"RawSignals",
|
|
45
|
+
"StopRuleSignals",
|
|
46
|
+
"TemporalMetricsResult",
|
|
47
|
+
"VitalsConfig",
|
|
48
|
+
"VitalsError",
|
|
49
|
+
"VitalsExporter",
|
|
50
|
+
"VitalsSnapshot",
|
|
51
|
+
"derive_stop_signals",
|
|
52
|
+
"detect_loop",
|
|
53
|
+
"get_vitals_config",
|
|
54
|
+
]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Signal Adapter Protocol for Agent Vitals.
|
|
2
|
+
|
|
3
|
+
Defines the interface for mapping arbitrary agent state to RawSignals.
|
|
4
|
+
Concrete adapters implement `extract()` for their specific framework.
|
|
5
|
+
|
|
6
|
+
This is the primary extension point for integrating Agent Vitals with
|
|
7
|
+
any agent framework. Users can either:
|
|
8
|
+
1. Implement SignalAdapter for their framework
|
|
9
|
+
2. Construct RawSignals directly (manual mode)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any, Mapping, Protocol, runtime_checkable
|
|
15
|
+
|
|
16
|
+
from ..schema import RawSignals
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@runtime_checkable
|
|
20
|
+
class SignalAdapter(Protocol):
|
|
21
|
+
"""Protocol for extracting RawSignals from arbitrary agent state.
|
|
22
|
+
|
|
23
|
+
Implementors map framework-specific telemetry/state to the generic
|
|
24
|
+
RawSignals schema that the detection engine operates on.
|
|
25
|
+
|
|
26
|
+
Minimum viable implementation requires only 4 fields:
|
|
27
|
+
- findings_count (or equivalent output count)
|
|
28
|
+
- coverage_score (or equivalent progress metric, 0.0-1.0)
|
|
29
|
+
- total_tokens (cumulative token usage)
|
|
30
|
+
- error_count (cumulative errors)
|
|
31
|
+
|
|
32
|
+
All other fields enhance detection confidence but are not required.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def extract(self, state: Mapping[str, Any]) -> RawSignals:
|
|
36
|
+
"""Extract RawSignals from framework-specific agent state.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
state: Framework-specific state mapping. The shape depends
|
|
40
|
+
on the agent framework (e.g., LangGraph AgentState,
|
|
41
|
+
LangChain callback data, raw telemetry dict).
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
RawSignals instance populated from the agent state.
|
|
45
|
+
Fields that cannot be mapped should use their defaults (0).
|
|
46
|
+
"""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class BaseAdapter:
|
|
51
|
+
"""Base class for signal adapters with common helpers.
|
|
52
|
+
|
|
53
|
+
Subclass this and override `extract()` for your framework.
|
|
54
|
+
Provides utility methods for safe field extraction.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _safe_int(value: Any, default: int = 0) -> int:
|
|
59
|
+
try:
|
|
60
|
+
return int(value)
|
|
61
|
+
except (TypeError, ValueError):
|
|
62
|
+
return default
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def _safe_float(value: Any, default: float = 0.0) -> float:
|
|
66
|
+
try:
|
|
67
|
+
return float(value)
|
|
68
|
+
except (TypeError, ValueError):
|
|
69
|
+
return default
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _safe_len(value: Any, default: int = 0) -> int:
|
|
73
|
+
try:
|
|
74
|
+
return len(value) if value else default
|
|
75
|
+
except TypeError:
|
|
76
|
+
return default
|
|
77
|
+
|
|
78
|
+
def extract(self, state: Mapping[str, Any]) -> RawSignals:
|
|
79
|
+
raise NotImplementedError("Subclasses must implement extract()")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class TelemetryAdapter(BaseAdapter):
|
|
83
|
+
"""Adapter for generic per-step telemetry dicts.
|
|
84
|
+
|
|
85
|
+
Maps the StepTelemetry format from cross-agent harnesses to RawSignals.
|
|
86
|
+
This is the adapter used for cross-agent validation — it works with
|
|
87
|
+
any agent that emits per-step JSON telemetry.
|
|
88
|
+
|
|
89
|
+
Expected state keys (from StepTelemetry.to_dict()):
|
|
90
|
+
- outputs_produced: int (maps to findings_count)
|
|
91
|
+
- total_tokens: int
|
|
92
|
+
- errors: int (maps to error_count)
|
|
93
|
+
- tool_calls: int (maps to query_count)
|
|
94
|
+
- tool_results: int (maps to sources_count)
|
|
95
|
+
|
|
96
|
+
Cumulative state keys (aggregated across steps):
|
|
97
|
+
- cumulative_outputs: int (total outputs so far)
|
|
98
|
+
- cumulative_tokens: int (total tokens so far)
|
|
99
|
+
- cumulative_errors: int (total errors so far)
|
|
100
|
+
- cumulative_queries: int (total tool calls so far)
|
|
101
|
+
- cumulative_sources: int (total tool results so far)
|
|
102
|
+
- coverage_score: float (progress estimate, 0.0-1.0)
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def extract(self, state: Mapping[str, Any]) -> RawSignals:
|
|
106
|
+
return RawSignals(
|
|
107
|
+
findings_count=self._safe_int(state.get("cumulative_outputs", 0)),
|
|
108
|
+
sources_count=self._safe_int(state.get("cumulative_sources", 0)),
|
|
109
|
+
objectives_covered=0,
|
|
110
|
+
coverage_score=self._safe_float(state.get("coverage_score", 0.0)),
|
|
111
|
+
confidence_score=0.0,
|
|
112
|
+
prompt_tokens=self._safe_int(state.get("prompt_tokens", 0)),
|
|
113
|
+
completion_tokens=self._safe_int(state.get("completion_tokens", 0)),
|
|
114
|
+
total_tokens=self._safe_int(state.get("cumulative_tokens", 0)),
|
|
115
|
+
api_calls=self._safe_int(state.get("cumulative_queries", 0)),
|
|
116
|
+
query_count=self._safe_int(state.get("cumulative_queries", 0)),
|
|
117
|
+
unique_domains=0,
|
|
118
|
+
refinement_count=0,
|
|
119
|
+
convergence_delta=0.0,
|
|
120
|
+
error_count=self._safe_int(state.get("cumulative_errors", 0)),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
__all__ = [
|
|
125
|
+
"BaseAdapter",
|
|
126
|
+
"SignalAdapter",
|
|
127
|
+
"TelemetryAdapter",
|
|
128
|
+
]
|