argus-agents 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- argus_agents-0.1.0/.gitignore +57 -0
- argus_agents-0.1.0/PKG-INFO +143 -0
- argus_agents-0.1.0/README.md +126 -0
- argus_agents-0.1.0/log.txt +372 -0
- argus_agents-0.1.0/pyproject.toml +48 -0
- argus_agents-0.1.0/src/argus/__init__.py +20 -0
- argus_agents-0.1.0/src/argus/cli/__init__.py +0 -0
- argus_agents-0.1.0/src/argus/cli/cmd_replay.py +194 -0
- argus_agents-0.1.0/src/argus/cli/cmd_show.py +291 -0
- argus_agents-0.1.0/src/argus/cli/main.py +138 -0
- argus_agents-0.1.0/src/argus/inspector.py +179 -0
- argus_agents-0.1.0/src/argus/models.py +53 -0
- argus_agents-0.1.0/src/argus/patcher.py +131 -0
- argus_agents-0.1.0/src/argus/replay.py +75 -0
- argus_agents-0.1.0/src/argus/storage.py +137 -0
- argus_agents-0.1.0/src/argus/utils/__init__.py +0 -0
- argus_agents-0.1.0/src/argus/utils/ids.py +8 -0
- argus_agents-0.1.0/src/argus/utils/serializer.py +118 -0
- argus_agents-0.1.0/src/argus/utils/type_introspection.py +112 -0
- argus_agents-0.1.0/src/argus/watcher.py +236 -0
- argus_agents-0.1.0/tests/__init__.py +0 -0
- argus_agents-0.1.0/tests/test_inspector.py +211 -0
- argus_agents-0.1.0/tests/test_integration.py +116 -0
- argus_agents-0.1.0/tests/test_models.py +110 -0
- argus_agents-0.1.0/tests/test_patcher.py +159 -0
- argus_agents-0.1.0/tests/test_serializer.py +102 -0
- argus_agents-0.1.0/tests/test_storage.py +155 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Distribution / packaging
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
*.egg-info/
|
|
12
|
+
*.egg
|
|
13
|
+
*.whl
|
|
14
|
+
.eggs/
|
|
15
|
+
|
|
16
|
+
# Virtual environment
|
|
17
|
+
.venv/
|
|
18
|
+
venv/
|
|
19
|
+
env/
|
|
20
|
+
ENV/
|
|
21
|
+
|
|
22
|
+
# Testing & coverage
|
|
23
|
+
.pytest_cache/
|
|
24
|
+
.coverage
|
|
25
|
+
.coverage.*
|
|
26
|
+
htmlcov/
|
|
27
|
+
coverage.xml
|
|
28
|
+
|
|
29
|
+
# Type checking & linting
|
|
30
|
+
.mypy_cache/
|
|
31
|
+
.ruff_cache/
|
|
32
|
+
.dmypy.json
|
|
33
|
+
|
|
34
|
+
# ARGUS runtime data (saved pipeline runs)
|
|
35
|
+
.argus/
|
|
36
|
+
|
|
37
|
+
# Local demo / exercise pipelines (not part of src/argus)
|
|
38
|
+
test_workflow/
|
|
39
|
+
real_world_demo/
|
|
40
|
+
|
|
41
|
+
# macOS
|
|
42
|
+
.DS_Store
|
|
43
|
+
.localized
|
|
44
|
+
.AppleDouble
|
|
45
|
+
__MACOSX/
|
|
46
|
+
|
|
47
|
+
# IDE
|
|
48
|
+
.idea/
|
|
49
|
+
.vscode/
|
|
50
|
+
*.swp
|
|
51
|
+
*.swo
|
|
52
|
+
*~
|
|
53
|
+
|
|
54
|
+
# Claude Code local settings
|
|
55
|
+
.claude/
|
|
56
|
+
CLAUDE.md
|
|
57
|
+
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: argus-agents
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Silent watcher for LangGraph multiagent pipelines β detects silent failures, captures full state, enables step-level replay.
|
|
5
|
+
Project-URL: Repository, https://github.com/varaddurge/argus-agents
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: langgraph>=0.2.0
|
|
9
|
+
Requires-Dist: rich>=13.0.0
|
|
10
|
+
Requires-Dist: typer>=0.12.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: mypy>=1.10.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# ARGUS
|
|
19
|
+
|
|
20
|
+
A monitoring library for LangGraph pipelines. Two lines to integrate β ARGUS captures node inputs/outputs, catches silent failures before they propagate, and lets you replay any run from the step it broke.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## πThe problem
|
|
25
|
+
|
|
26
|
+
LangGraph pipelines fail silently. A node runs, returns an incomplete dict, and the next node either crashes on a missing key or produces garbage with no error. By the time you notice, the state has been overwritten and the original failure is gone.
|
|
27
|
+
|
|
28
|
+
ARGUS catches this at the boundary between nodes, before it cascades.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install argus-langgraph
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
From source:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
git clone https://github.com/VaradDurge/ARGUS.git
|
|
42
|
+
cd ARGUS
|
|
43
|
+
pip install -e ".[dev]"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Requires Python 3.9+ and LangGraph 0.2+.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from argus import ArgusWatcher
|
|
54
|
+
from langgraph.graph import StateGraph
|
|
55
|
+
|
|
56
|
+
graph = StateGraph(MyState)
|
|
57
|
+
graph.add_node("fetch", fetch_node)
|
|
58
|
+
graph.add_node("analyze", analyze_node)
|
|
59
|
+
graph.add_edge("fetch", "analyze")
|
|
60
|
+
|
|
61
|
+
watcher = ArgusWatcher()
|
|
62
|
+
watcher.watch(graph) # before compile()
|
|
63
|
+
|
|
64
|
+
app = graph.compile()
|
|
65
|
+
result = app.invoke(initial_state)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
No decorators, no changes to your node functions.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## How it works
|
|
73
|
+
|
|
74
|
+
ARGUS patches node functions at the graph level before `compile()`. After each node executes, it:
|
|
75
|
+
|
|
76
|
+
- Captures the full input and output state as a JSON snapshot
|
|
77
|
+
- Checks the output against what the next node's type annotation expects
|
|
78
|
+
- Flags missing required fields, empty fields, and primitive type mismatches
|
|
79
|
+
- Writes the run record to `.argus/runs/<run-id>.json`
|
|
80
|
+
|
|
81
|
+
Detection is driven by the successor node's type annotations. TypedDict and Pydantic both work.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Features
|
|
86
|
+
|
|
87
|
+
**Silent failure detectionπ** β if a node forgets to populate a field that the next node requires, ARGUS flags it right after that node runs:
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
overall_status: silent_failure
|
|
91
|
+
first_failure_step: fetch_agent
|
|
92
|
+
root_cause_chain: ['fetch_agent', 'analyze_agent']
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Per-node snapshotsπΈ** β every run records input state, output dict, duration, timestamp, and full traceback on crash.
|
|
96
|
+
|
|
97
|
+
**Root cause chainingβοΈ** β when multiple nodes fail in sequence, ARGUS walks the event chain back to where it started.
|
|
98
|
+
|
|
99
|
+
**Step-level replayβΆοΈ** β re-run from any saved step with the exact input state that was captured:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
argus replay <run-id> analyze_agent --app my_module:build_graph
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
`build_graph` is a zero-argument function that returns an uncompiled `StateGraph`. ARGUS re-instruments it and saves the replay as a new run.
|
|
106
|
+
|
|
107
|
+
**Local storage** β runs are plain JSON under `.argus/runs/`. No database, no cloud.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## CLI
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
argus list # all runs, newest first
|
|
115
|
+
argus show last # most recent run
|
|
116
|
+
argus show run a1b2c3d4 # by full or 8-char prefix ID
|
|
117
|
+
argus inspect a1b2c3d4 --step analyze_agent # full snapshot for a node
|
|
118
|
+
argus replay a1b2c3d4 analyze_agent --app my_module:build_graph
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Example output
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
Run ID: a1b2c3d4e5f6...
|
|
127
|
+
Status: silent_failure
|
|
128
|
+
Started: 2026-04-02T10:23:11Z Duration: 842ms
|
|
129
|
+
|
|
130
|
+
Step Node Status Duration
|
|
131
|
+
ββββ βββββββββββββββ βββββββ ββββββββ
|
|
132
|
+
0 research_agent pass 210ms
|
|
133
|
+
1 analysis_agent fail 312ms β Missing: kb_articles
|
|
134
|
+
2 validation_agent pass β
|
|
135
|
+
|
|
136
|
+
Root cause chain: research_agent β analysis_agent
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## License
|
|
142
|
+
|
|
143
|
+
MIT
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# ARGUS
|
|
2
|
+
|
|
3
|
+
A monitoring library for LangGraph pipelines. Two lines to integrate β ARGUS captures node inputs/outputs, catches silent failures before they propagate, and lets you replay any run from the step it broke.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## πThe problem
|
|
8
|
+
|
|
9
|
+
LangGraph pipelines fail silently. A node runs, returns an incomplete dict, and the next node either crashes on a missing key or produces garbage with no error. By the time you notice, the state has been overwritten and the original failure is gone.
|
|
10
|
+
|
|
11
|
+
ARGUS catches this at the boundary between nodes, before it cascades.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install argus-langgraph
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
From source:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
git clone https://github.com/VaradDurge/ARGUS.git
|
|
25
|
+
cd ARGUS
|
|
26
|
+
pip install -e ".[dev]"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Requires Python 3.9+ and LangGraph 0.2+.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from argus import ArgusWatcher
|
|
37
|
+
from langgraph.graph import StateGraph
|
|
38
|
+
|
|
39
|
+
graph = StateGraph(MyState)
|
|
40
|
+
graph.add_node("fetch", fetch_node)
|
|
41
|
+
graph.add_node("analyze", analyze_node)
|
|
42
|
+
graph.add_edge("fetch", "analyze")
|
|
43
|
+
|
|
44
|
+
watcher = ArgusWatcher()
|
|
45
|
+
watcher.watch(graph) # before compile()
|
|
46
|
+
|
|
47
|
+
app = graph.compile()
|
|
48
|
+
result = app.invoke(initial_state)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
No decorators, no changes to your node functions.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## How it works
|
|
56
|
+
|
|
57
|
+
ARGUS patches node functions at the graph level before `compile()`. After each node executes, it:
|
|
58
|
+
|
|
59
|
+
- Captures the full input and output state as a JSON snapshot
|
|
60
|
+
- Checks the output against what the next node's type annotation expects
|
|
61
|
+
- Flags missing required fields, empty fields, and primitive type mismatches
|
|
62
|
+
- Writes the run record to `.argus/runs/<run-id>.json`
|
|
63
|
+
|
|
64
|
+
Detection is driven by the successor node's type annotations. TypedDict and Pydantic both work.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Features
|
|
69
|
+
|
|
70
|
+
**Silent failure detectionπ** β if a node forgets to populate a field that the next node requires, ARGUS flags it right after that node runs:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
overall_status: silent_failure
|
|
74
|
+
first_failure_step: fetch_agent
|
|
75
|
+
root_cause_chain: ['fetch_agent', 'analyze_agent']
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Per-node snapshotsπΈ** β every run records input state, output dict, duration, timestamp, and full traceback on crash.
|
|
79
|
+
|
|
80
|
+
**Root cause chainingβοΈ** β when multiple nodes fail in sequence, ARGUS walks the event chain back to where it started.
|
|
81
|
+
|
|
82
|
+
**Step-level replayβΆοΈ** β re-run from any saved step with the exact input state that was captured:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
argus replay <run-id> analyze_agent --app my_module:build_graph
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
`build_graph` is a zero-argument function that returns an uncompiled `StateGraph`. ARGUS re-instruments it and saves the replay as a new run.
|
|
89
|
+
|
|
90
|
+
**Local storage** β runs are plain JSON under `.argus/runs/`. No database, no cloud.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## CLI
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
argus list # all runs, newest first
|
|
98
|
+
argus show last # most recent run
|
|
99
|
+
argus show run a1b2c3d4 # by full or 8-char prefix ID
|
|
100
|
+
argus inspect a1b2c3d4 --step analyze_agent # full snapshot for a node
|
|
101
|
+
argus replay a1b2c3d4 analyze_agent --app my_module:build_graph
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Example output
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
Run ID: a1b2c3d4e5f6...
|
|
110
|
+
Status: silent_failure
|
|
111
|
+
Started: 2026-04-02T10:23:11Z Duration: 842ms
|
|
112
|
+
|
|
113
|
+
Step Node Status Duration
|
|
114
|
+
ββββ βββββββββββββββ βββββββ ββββββββ
|
|
115
|
+
0 research_agent pass 210ms
|
|
116
|
+
1 analysis_agent fail 312ms β Missing: kb_articles
|
|
117
|
+
2 validation_agent pass β
|
|
118
|
+
|
|
119
|
+
Root cause chain: research_agent β analysis_agent
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## License
|
|
125
|
+
|
|
126
|
+
MIT
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
================================================================================
|
|
2
|
+
ARGUS β Development Log
|
|
3
|
+
Agentic Realtime Guard and Unified Scope
|
|
4
|
+
================================================================================
|
|
5
|
+
|
|
6
|
+
--------------------------------------------------------------------------------
|
|
7
|
+
WHAT IS ARGUS
|
|
8
|
+
--------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
ARGUS is a silent watcher for LangGraph multi-agent pipelines.
|
|
11
|
+
It requires zero changes inside your agent functions β just 2 lines of setup.
|
|
12
|
+
|
|
13
|
+
It detects:
|
|
14
|
+
- Silent failures : a node runs without crashing but drops a required field,
|
|
15
|
+
causing the next node to receive broken state silently.
|
|
16
|
+
- Crashes : a node raises an unhandled exception mid-pipeline.
|
|
17
|
+
- Type mismatches : a node returns a field with the wrong primitive type.
|
|
18
|
+
- Empty fields : a node returns a field that is None, "", [], or {}.
|
|
19
|
+
|
|
20
|
+
It also supports:
|
|
21
|
+
- Full state capture : every node's input and output is snapshotted to JSON.
|
|
22
|
+
- Step-level replay : re-run from any saved step using a fixed graph factory.
|
|
23
|
+
- Local-first storage: everything saved under .argus/runs/ as JSON, no cloud.
|
|
24
|
+
|
|
25
|
+
Integration:
|
|
26
|
+
|
|
27
|
+
from argus import ArgusWatcher
|
|
28
|
+
|
|
29
|
+
watcher = ArgusWatcher()
|
|
30
|
+
watcher.watch(graph) # call before graph.compile()
|
|
31
|
+
app = graph.compile()
|
|
32
|
+
result = app.invoke(state) # run normally, ARGUS captures everything
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
--------------------------------------------------------------------------------
|
|
36
|
+
PROJECT STRUCTURE
|
|
37
|
+
--------------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
ARGUS/
|
|
40
|
+
βββ src/argus/
|
|
41
|
+
β βββ __init__.py exports ArgusWatcher
|
|
42
|
+
β βββ watcher.py ArgusWatcher + RunSession (orchestration)
|
|
43
|
+
β βββ patcher.py wraps every node with monitoring (sync + async)
|
|
44
|
+
β βββ inspector.py silent failure detection via type introspection
|
|
45
|
+
β βββ replay.py ReplayEngine β loads saved state, re-runs
|
|
46
|
+
β βββ storage.py save / load / list runs (.argus/runs/)
|
|
47
|
+
β βββ models.py RunRecord, NodeEvent, InspectionResult, FieldMismatch
|
|
48
|
+
β βββ cli/
|
|
49
|
+
β β βββ main.py Typer CLI entry point (argus command)
|
|
50
|
+
β β βββ cmd_show.py argus show last / argus list
|
|
51
|
+
β β βββ cmd_replay.py argus replay / argus inspect
|
|
52
|
+
β βββ utils/
|
|
53
|
+
β βββ ids.py run ID generator (timestamp + hex)
|
|
54
|
+
β βββ serializer.py safe_serialize / safe_deserialize
|
|
55
|
+
β βββ type_introspection.py extract_fields, get_node_state_type
|
|
56
|
+
β
|
|
57
|
+
βββ test_workflow/ 4-agent demo pipeline for testing ARGUS
|
|
58
|
+
β βββ state.py PipelineState + per-node input TypedDicts
|
|
59
|
+
β βββ agents.py agent functions (buggy + fixed variants)
|
|
60
|
+
β βββ graph.py build_graph() / build_graph_fixed() factories
|
|
61
|
+
β βββ run_silent_failure.py run script (minimal pass/fail output)
|
|
62
|
+
β
|
|
63
|
+
βββ tests/ pytest test suite
|
|
64
|
+
βββ pyproject.toml package config, deps, tool settings
|
|
65
|
+
βββ .gitignore
|
|
66
|
+
βββ CLAUDE.md
|
|
67
|
+
βββ README.md
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
--------------------------------------------------------------------------------
|
|
71
|
+
HOW SILENT FAILURE DETECTION WORKS
|
|
72
|
+
--------------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
ARGUS reads the first-parameter type annotation of each node function.
|
|
75
|
+
After a node runs it checks whether every required field of the SUCCESSOR's
|
|
76
|
+
annotation is present in the merged state (input + output of the current node).
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
|
|
80
|
+
def analysis_agent(state: AnalysisInput) -> dict:
|
|
81
|
+
return {
|
|
82
|
+
"analysis": "...",
|
|
83
|
+
"confidence_score": 0.87,
|
|
84
|
+
# "key_insights" missing β bug
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
class ValidationInput(TypedDict):
|
|
88
|
+
topic: str
|
|
89
|
+
analysis: str
|
|
90
|
+
key_insights: list[str] # required
|
|
91
|
+
confidence_score: float
|
|
92
|
+
|
|
93
|
+
After analysis_agent runs, ARGUS checks ValidationInput.
|
|
94
|
+
"key_insights" is not in the merged state β silent_failure detected.
|
|
95
|
+
|
|
96
|
+
Supported state types: TypedDict, Pydantic v1/v2, dataclasses.
|
|
97
|
+
|
|
98
|
+
Note: LangGraph filters each node's input to its annotation fields via
|
|
99
|
+
@functools.wraps copying __annotations__. This means per-node TypedDicts
|
|
100
|
+
must include any passthrough fields (e.g. "topic") that downstream nodes
|
|
101
|
+
need, even if the current node does not use them directly.
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
--------------------------------------------------------------------------------
|
|
105
|
+
TEST WORKFLOW β 4-AGENT PIPELINE
|
|
106
|
+
--------------------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
Pipeline:
|
|
109
|
+
|
|
110
|
+
research_agent β analysis_agent β validation_agent β report_agent
|
|
111
|
+
|
|
112
|
+
research_agent : collects findings for a topic (research_results, metadata)
|
|
113
|
+
analysis_agent : synthesises findings (analysis, key_insights, confidence_score)
|
|
114
|
+
validation_agent : checks completeness (validated, issues)
|
|
115
|
+
report_agent : writes final report (final_report)
|
|
116
|
+
|
|
117
|
+
Variants in agents.py:
|
|
118
|
+
analysis_agent_buggy : drops "key_insights" β triggers silent failure
|
|
119
|
+
analysis_agent_fixed : returns all required fields
|
|
120
|
+
|
|
121
|
+
Variants in graph.py:
|
|
122
|
+
build_graph() : uses analysis_agent_buggy (the broken pipeline)
|
|
123
|
+
build_graph_fixed() : uses analysis_agent_fixed (the fixed pipeline)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
--------------------------------------------------------------------------------
|
|
127
|
+
SETUP
|
|
128
|
+
--------------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
# From the ARGUS repo root:
|
|
131
|
+
|
|
132
|
+
python3 -m venv .venv
|
|
133
|
+
source .venv/bin/activate # macOS / Linux
|
|
134
|
+
# .venv\Scripts\activate # Windows
|
|
135
|
+
|
|
136
|
+
pip install -e ".[dev]"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
--------------------------------------------------------------------------------
|
|
140
|
+
COMMANDS & SAMPLE OUTPUT
|
|
141
|
+
--------------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
144
|
+
1. Run the buggy pipeline
|
|
145
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
146
|
+
|
|
147
|
+
$ python -m test_workflow.run_silent_failure
|
|
148
|
+
|
|
149
|
+
research_agent β PASSED
|
|
150
|
+
analysis_agent β FAILED
|
|
151
|
+
validation_agent β FAILED
|
|
152
|
+
report_agent β PASSED
|
|
153
|
+
|
|
154
|
+
status silent_failure
|
|
155
|
+
run argus show last for full details
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
159
|
+
2. Inspect the last run in detail
|
|
160
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
161
|
+
|
|
162
|
+
$ argus show last
|
|
163
|
+
|
|
164
|
+
argus 20260402-040801-f36747 Β· 2026-04-02 04:08 Β· 2 ms
|
|
165
|
+
status silent_failure
|
|
166
|
+
|
|
167
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
168
|
+
|
|
169
|
+
Node 1 research_agent 0 ms β pass
|
|
170
|
+
|
|
171
|
+
Node 2 analysis_agent 0 ms β silent failure
|
|
172
|
+
ββ Field "key_insights" is missing
|
|
173
|
+
ββ validation_agent received bad state
|
|
174
|
+
|
|
175
|
+
Node 3 validation_agent 0 ms β silent failure
|
|
176
|
+
ββ Field "key_insights" is missing
|
|
177
|
+
ββ report_agent received bad state
|
|
178
|
+
ββ Root cause: analysis_agent
|
|
179
|
+
|
|
180
|
+
Node 4 report_agent 0 ms β pass
|
|
181
|
+
|
|
182
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
183
|
+
root cause analysis_agent β validation_agent
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
187
|
+
3. List all saved runs
|
|
188
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
189
|
+
|
|
190
|
+
$ argus list
|
|
191
|
+
|
|
192
|
+
run id started status duration steps
|
|
193
|
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
194
|
+
20260402-040801-f36747 2026-04-02 04:08 silent_failure 2 ms 4
|
|
195
|
+
20260402-032027-8bd018 2026-04-02 03:20 silent_failure 3 ms 4
|
|
196
|
+
20260402-032023-b75cf4 2026-04-02 03:20 clean 2 ms 4
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
200
|
+
4. Inspect input/output state of a specific node
|
|
201
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
202
|
+
|
|
203
|
+
$ argus inspect 20260402-040801-f36747 --step analysis_agent
|
|
204
|
+
|
|
205
|
+
analysis_agent #1 fail
|
|
206
|
+
|
|
207
|
+
ββ input ββ
|
|
208
|
+
{
|
|
209
|
+
"topic": "quantum computing",
|
|
210
|
+
"research_results": [
|
|
211
|
+
"[Finding 1] quantum computing has shown significant momentum...",
|
|
212
|
+
"[Finding 2] Key technical challenges in quantum computing...",
|
|
213
|
+
"[Finding 3] Recent peer-reviewed breakthroughs...",
|
|
214
|
+
"[Finding 4] Cross-industry investment..."
|
|
215
|
+
],
|
|
216
|
+
"metadata": {
|
|
217
|
+
"source_count": 4,
|
|
218
|
+
"search_depth": "comprehensive",
|
|
219
|
+
"topic": "quantum computing"
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
ββ output ββ
|
|
224
|
+
{
|
|
225
|
+
"analysis": "Across 4 research findings on 'quantum computing'...",
|
|
226
|
+
"confidence_score": 0.87
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
ββ inspection ββ
|
|
230
|
+
Missing required fields: key_insights
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
234
|
+
5. Fix the bug
|
|
235
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
236
|
+
|
|
237
|
+
In test_workflow/agents.py, analysis_agent_buggy returns:
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"analysis": "...",
|
|
241
|
+
"confidence_score": 0.87,
|
|
242
|
+
# "key_insights" missing β this is the bug
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
The fix β add the missing field:
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
"analysis": "...",
|
|
249
|
+
"key_insights": ["insight one", "insight two", ...],
|
|
250
|
+
"confidence_score": 0.87,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
Or swap the agent in test_workflow/graph.py:
|
|
254
|
+
|
|
255
|
+
# change:
|
|
256
|
+
return _assemble(analysis_agent_buggy)
|
|
257
|
+
# to:
|
|
258
|
+
return _assemble(analysis_agent_fixed)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
262
|
+
6. Replay from the failed node using the fixed graph
|
|
263
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
264
|
+
|
|
265
|
+
$ argus replay 20260402-040801-f36747 analysis_agent --app test_workflow.graph:build_graph_fixed
|
|
266
|
+
|
|
267
|
+
argus replay 20260402-040801-f36747 βΊ from analysis_agent
|
|
268
|
+
|
|
269
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
270
|
+
|
|
271
|
+
Node 1 research_agent 0 ms β pass
|
|
272
|
+
|
|
273
|
+
Node 2 analysis_agent 0 ms β pass
|
|
274
|
+
|
|
275
|
+
Node 3 validation_agent 0 ms β pass
|
|
276
|
+
|
|
277
|
+
Node 4 report_agent 0 ms β pass
|
|
278
|
+
|
|
279
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
280
|
+
β clean 20260402-041256-6bc3ff
|
|
281
|
+
run argus show last for full details
|
|
282
|
+
|
|
283
|
+
The replay:
|
|
284
|
+
1. Loads the saved input state of analysis_agent from the original run.
|
|
285
|
+
2. Calls build_graph_fixed() to get a fresh graph with the fixed agent.
|
|
286
|
+
3. Attaches a new ArgusWatcher and invokes app with the saved state.
|
|
287
|
+
4. Saves the replay run as a new entry in .argus/runs/.
|
|
288
|
+
5. The original run is preserved β both are stored for comparison.
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
292
|
+
7. Show the replay run
|
|
293
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
294
|
+
|
|
295
|
+
$ argus show last
|
|
296
|
+
|
|
297
|
+
argus 20260402-041256-6bc3ff Β· 2026-04-02 04:12 Β· 3 ms
|
|
298
|
+
status clean
|
|
299
|
+
replay of 20260402-040801-f36747 from analysis_agent
|
|
300
|
+
|
|
301
|
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
302
|
+
|
|
303
|
+
Node 1 research_agent 0 ms β pass
|
|
304
|
+
|
|
305
|
+
Node 2 analysis_agent 0 ms β pass
|
|
306
|
+
|
|
307
|
+
Node 3 validation_agent 0 ms β pass
|
|
308
|
+
|
|
309
|
+
Node 4 report_agent 0 ms β pass
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
--------------------------------------------------------------------------------
|
|
313
|
+
DEPENDENCIES
|
|
314
|
+
--------------------------------------------------------------------------------
|
|
315
|
+
|
|
316
|
+
Runtime:
|
|
317
|
+
langgraph >= 0.2.0 LangGraph pipeline framework
|
|
318
|
+
typer >= 0.12.0 CLI framework
|
|
319
|
+
rich >= 13.0.0 Terminal formatting
|
|
320
|
+
|
|
321
|
+
Dev:
|
|
322
|
+
pytest >= 8.0.0
|
|
323
|
+
pytest-cov >= 5.0.0
|
|
324
|
+
ruff >= 0.4.0
|
|
325
|
+
mypy >= 1.10.0
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
--------------------------------------------------------------------------------
|
|
329
|
+
STORAGE FORMAT
|
|
330
|
+
--------------------------------------------------------------------------------
|
|
331
|
+
|
|
332
|
+
Every run is saved as .argus/runs/<run-id>.json with the structure:
|
|
333
|
+
|
|
334
|
+
{
|
|
335
|
+
"run_id": "20260402-040801-f36747",
|
|
336
|
+
"argus_version": "0.1.0",
|
|
337
|
+
"started_at": "2026-04-02T04:08:01.123Z",
|
|
338
|
+
"completed_at": "2026-04-02T04:08:01.125Z",
|
|
339
|
+
"duration_ms": 2.1,
|
|
340
|
+
"overall_status": "silent_failure", // clean | silent_failure | crashed
|
|
341
|
+
"first_failure_step": "analysis_agent",
|
|
342
|
+
"root_cause_chain": ["analysis_agent", "validation_agent"],
|
|
343
|
+
"graph_node_names": ["research_agent", "analysis_agent", ...],
|
|
344
|
+
"graph_edge_map": {"research_agent": ["analysis_agent"], ...},
|
|
345
|
+
"initial_state": { ... },
|
|
346
|
+
"parent_run_id": null, // set on replay runs
|
|
347
|
+
"replay_from_step": null, // set on replay runs
|
|
348
|
+
"steps": [
|
|
349
|
+
{
|
|
350
|
+
"step_index": 1,
|
|
351
|
+
"node_name": "analysis_agent",
|
|
352
|
+
"status": "fail", // pass | fail | crashed
|
|
353
|
+
"input_state": { ... },
|
|
354
|
+
"output_dict": { ... },
|
|
355
|
+
"duration_ms": 0.3,
|
|
356
|
+
"timestamp_utc": "2026-04-02T04:08:01.124Z",
|
|
357
|
+
"exception": null,
|
|
358
|
+
"inspection": {
|
|
359
|
+
"is_silent_failure": true,
|
|
360
|
+
"missing_fields": ["key_insights"],
|
|
361
|
+
"empty_fields": [],
|
|
362
|
+
"type_mismatches": [],
|
|
363
|
+
"severity": "critical",
|
|
364
|
+
"message": "Missing required fields: key_insights"
|
|
365
|
+
}
|
|
366
|
+
},
|
|
367
|
+
...
|
|
368
|
+
]
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
================================================================================
|