agent-replay-debugger-mcp 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_replay_debugger_mcp-1.0.1/LICENSE +15 -0
- agent_replay_debugger_mcp-1.0.1/PKG-INFO +98 -0
- agent_replay_debugger_mcp-1.0.1/README.md +79 -0
- agent_replay_debugger_mcp-1.0.1/agent_replay_debugger_mcp.egg-info/PKG-INFO +98 -0
- agent_replay_debugger_mcp-1.0.1/agent_replay_debugger_mcp.egg-info/SOURCES.txt +11 -0
- agent_replay_debugger_mcp-1.0.1/agent_replay_debugger_mcp.egg-info/dependency_links.txt +1 -0
- agent_replay_debugger_mcp-1.0.1/agent_replay_debugger_mcp.egg-info/entry_points.txt +2 -0
- agent_replay_debugger_mcp-1.0.1/agent_replay_debugger_mcp.egg-info/requires.txt +1 -0
- agent_replay_debugger_mcp-1.0.1/agent_replay_debugger_mcp.egg-info/top_level.txt +1 -0
- agent_replay_debugger_mcp-1.0.1/pyproject.toml +30 -0
- agent_replay_debugger_mcp-1.0.1/server.py +278 -0
- agent_replay_debugger_mcp-1.0.1/setup.cfg +4 -0
- agent_replay_debugger_mcp-1.0.1/tests/test_replay.py +124 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicholas Templeman / MEOK AI Labs (CSOAI LTD, UK Companies House 16939677)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-replay-debugger-mcp
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Agent Replay Debugger MCP — record every agent step + replay deterministically. Step-debugger for agentic loops. Audit-evidence signed exports. By MEOK AI Labs.
|
|
5
|
+
Author-email: Nicholas Templeman <nicholas@meok.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://meok.ai/a2a
|
|
8
|
+
Project-URL: Repository, https://github.com/CSOAI-ORG/agent-replay-debugger-mcp
|
|
9
|
+
Keywords: mcp,debugger,replay,agent-debugging,audit-evidence,meok
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: mcp[cli]>=1.3.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# Agent Replay Debugger MCP
|
|
21
|
+
|
|
22
|
+
> ## 🧱 Part of the MEOK A2A Substrate (£499/mo)
|
|
23
|
+
> See [meok.ai/a2a](https://meok.ai/a2a).
|
|
24
|
+
|
|
25
|
+
# Step-debug an agent run — deterministic replay + signed audit evidence
|
|
26
|
+
|
|
27
|
+
<!-- mcp-name: io.github.CSOAI-ORG/agent-replay-debugger-mcp -->
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/agent-replay-debugger-mcp/)
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
|
|
32
|
+
## What this does
|
|
33
|
+
|
|
34
|
+
Record every action an agent takes (inputs, outputs, timestamps, costs) so you can REPLAY the run deterministically afterward. Step through each action like a debugger. Filter, search, branch from any step. Sign the recording for audit evidence.
|
|
35
|
+
|
|
36
|
+
Pairs with `bft-progress-council-mcp` (real-time loop halting) and `agent-audit-logger-mcp` (audit-chain submission).
|
|
37
|
+
|
|
38
|
+
## Tools
|
|
39
|
+
|
|
40
|
+
| Tool | Purpose |
|
|
41
|
+
|---|---|
|
|
42
|
+
| `start_recording(goal, session_id?, agent_id?)` | Open new recording |
|
|
43
|
+
| `record_step(session_id, action, input?, output?, model?, tokens?, ms?)` | Log one step |
|
|
44
|
+
| `replay_step(session_id, step_idx)` | Re-fetch a step |
|
|
45
|
+
| `timeline(session_id, model_filter?, tag_filter?)` | Full ordered timeline |
|
|
46
|
+
| `branch_from(session_id, step_idx, new_action)` | Alternative branch |
|
|
47
|
+
| `search_steps(session_id, query)` | Free-text step search |
|
|
48
|
+
| `export_recording(session_id, format)` | JSON or markdown export |
|
|
49
|
+
| `sign_recording(session_id)` | HMAC-sign for audit submission |
|
|
50
|
+
|
|
51
|
+
## Use cases
|
|
52
|
+
|
|
53
|
+
- Debug why an agent burned £20 on a £0.50 task
|
|
54
|
+
- Reproduce a customer-reported wrong-answer bug
|
|
55
|
+
- Audit evidence: prove what an agent did, to whom, when
|
|
56
|
+
- Train next-gen agents on senior-agent traces
|
|
57
|
+
- Forensic post-mortems after prompt-injection incidents
|
|
58
|
+
|
|
59
|
+
## Sister MCPs
|
|
60
|
+
|
|
61
|
+
- `bft-progress-council-mcp` — real-time loop halt
|
|
62
|
+
- `agent-token-budget-mcp` — spend cap
|
|
63
|
+
- `agent-audit-logger-mcp` — audit-chain submission
|
|
64
|
+
- `agent-cost-allocator-mcp` — multi-tenant chargeback
|
|
65
|
+
|
|
66
|
+
Full catalogue: [meok.ai/anthropic-registry](https://meok.ai/anthropic-registry)
|
|
67
|
+
|
|
68
|
+
## Pricing
|
|
69
|
+
|
|
70
|
+
| Option | Price |
|
|
71
|
+
|---|---|
|
|
72
|
+
| Self-host MIT | £0 |
|
|
73
|
+
| Universal PAYG | £29/mo + £0.0002/call |
|
|
74
|
+
| A2A Substrate | £499/mo |
|
|
75
|
+
| Universe | £1,499/mo |
|
|
76
|
+
| Defence | £4,990/mo |
|
|
77
|
+
|
|
78
|
+
Buy: https://meok.ai/a2a
|
|
79
|
+
|
|
80
|
+
## Licence
|
|
81
|
+
|
|
82
|
+
MIT. By [MEOK AI Labs](https://meok.ai) (CSOAI LTD, UK Companies House 16939677).
|
|
83
|
+
|
|
84
|
+
<!-- BUY-LADDER:START -->
|
|
85
|
+
|
|
86
|
+
## 💸 Try MEOK in 30 seconds — instant buy ladder
|
|
87
|
+
|
|
88
|
+
| Tier | Price | What you get | Stripe |
|
|
89
|
+
|---|---|---|---|
|
|
90
|
+
| Smoke test | **£1** | Signed sample MCP-Hardening report + Article 50 PDF | <https://buy.stripe.com/dRmcN75ScdQS7oh1Uc8k90U> |
|
|
91
|
+
| Quick Kit | **£9** | EU AI Act Article 50 implementation guide (C2PA + EU-Icon) | <https://buy.stripe.com/cNi00la8s1460ZT0Q88k90V> |
|
|
92
|
+
| Founder Call | **£29** | 30-min 1-on-1 with the founder | <https://buy.stripe.com/8x228ta8s6oqbExaqI8k90W> |
|
|
93
|
+
|
|
94
|
+
> Refundable. UK Stripe — VAT-clean. Builds on the 81-MCP MEOK fleet.
|
|
95
|
+
> Verify any signed report at <https://meok.ai/verify>.
|
|
96
|
+
|
|
97
|
+
<!-- BUY-LADDER:END -->
|
|
98
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Agent Replay Debugger MCP
|
|
2
|
+
|
|
3
|
+
> ## 🧱 Part of the MEOK A2A Substrate (£499/mo)
|
|
4
|
+
> See [meok.ai/a2a](https://meok.ai/a2a).
|
|
5
|
+
|
|
6
|
+
# Step-debug an agent run — deterministic replay + signed audit evidence
|
|
7
|
+
|
|
8
|
+
<!-- mcp-name: io.github.CSOAI-ORG/agent-replay-debugger-mcp -->
|
|
9
|
+
|
|
10
|
+
[](https://pypi.org/project/agent-replay-debugger-mcp/)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
|
|
13
|
+
## What this does
|
|
14
|
+
|
|
15
|
+
Record every action an agent takes (inputs, outputs, timestamps, costs) so you can REPLAY the run deterministically afterward. Step through each action like a debugger. Filter, search, branch from any step. Sign the recording for audit evidence.
|
|
16
|
+
|
|
17
|
+
Pairs with `bft-progress-council-mcp` (real-time loop halting) and `agent-audit-logger-mcp` (audit-chain submission).
|
|
18
|
+
|
|
19
|
+
## Tools
|
|
20
|
+
|
|
21
|
+
| Tool | Purpose |
|
|
22
|
+
|---|---|
|
|
23
|
+
| `start_recording(goal, session_id?, agent_id?)` | Open new recording |
|
|
24
|
+
| `record_step(session_id, action, input?, output?, model?, tokens?, ms?)` | Log one step |
|
|
25
|
+
| `replay_step(session_id, step_idx)` | Re-fetch a step |
|
|
26
|
+
| `timeline(session_id, model_filter?, tag_filter?)` | Full ordered timeline |
|
|
27
|
+
| `branch_from(session_id, step_idx, new_action)` | Alternative branch |
|
|
28
|
+
| `search_steps(session_id, query)` | Free-text step search |
|
|
29
|
+
| `export_recording(session_id, format)` | JSON or markdown export |
|
|
30
|
+
| `sign_recording(session_id)` | HMAC-sign for audit submission |
|
|
31
|
+
|
|
32
|
+
## Use cases
|
|
33
|
+
|
|
34
|
+
- Debug why an agent burned £20 on a £0.50 task
|
|
35
|
+
- Reproduce a customer-reported wrong-answer bug
|
|
36
|
+
- Audit evidence: prove what an agent did, to whom, when
|
|
37
|
+
- Train next-gen agents on senior-agent traces
|
|
38
|
+
- Forensic post-mortems after prompt-injection incidents
|
|
39
|
+
|
|
40
|
+
## Sister MCPs
|
|
41
|
+
|
|
42
|
+
- `bft-progress-council-mcp` — real-time loop halt
|
|
43
|
+
- `agent-token-budget-mcp` — spend cap
|
|
44
|
+
- `agent-audit-logger-mcp` — audit-chain submission
|
|
45
|
+
- `agent-cost-allocator-mcp` — multi-tenant chargeback
|
|
46
|
+
|
|
47
|
+
Full catalogue: [meok.ai/anthropic-registry](https://meok.ai/anthropic-registry)
|
|
48
|
+
|
|
49
|
+
## Pricing
|
|
50
|
+
|
|
51
|
+
| Option | Price |
|
|
52
|
+
|---|---|
|
|
53
|
+
| Self-host MIT | £0 |
|
|
54
|
+
| Universal PAYG | £29/mo + £0.0002/call |
|
|
55
|
+
| A2A Substrate | £499/mo |
|
|
56
|
+
| Universe | £1,499/mo |
|
|
57
|
+
| Defence | £4,990/mo |
|
|
58
|
+
|
|
59
|
+
Buy: https://meok.ai/a2a
|
|
60
|
+
|
|
61
|
+
## Licence
|
|
62
|
+
|
|
63
|
+
MIT. By [MEOK AI Labs](https://meok.ai) (CSOAI LTD, UK Companies House 16939677).
|
|
64
|
+
|
|
65
|
+
<!-- BUY-LADDER:START -->
|
|
66
|
+
|
|
67
|
+
## 💸 Try MEOK in 30 seconds — instant buy ladder
|
|
68
|
+
|
|
69
|
+
| Tier | Price | What you get | Stripe |
|
|
70
|
+
|---|---|---|---|
|
|
71
|
+
| Smoke test | **£1** | Signed sample MCP-Hardening report + Article 50 PDF | <https://buy.stripe.com/dRmcN75ScdQS7oh1Uc8k90U> |
|
|
72
|
+
| Quick Kit | **£9** | EU AI Act Article 50 implementation guide (C2PA + EU-Icon) | <https://buy.stripe.com/cNi00la8s1460ZT0Q88k90V> |
|
|
73
|
+
| Founder Call | **£29** | 30-min 1-on-1 with the founder | <https://buy.stripe.com/8x228ta8s6oqbExaqI8k90W> |
|
|
74
|
+
|
|
75
|
+
> Refundable. UK Stripe — VAT-clean. Builds on the 81-MCP MEOK fleet.
|
|
76
|
+
> Verify any signed report at <https://meok.ai/verify>.
|
|
77
|
+
|
|
78
|
+
<!-- BUY-LADDER:END -->
|
|
79
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-replay-debugger-mcp
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Agent Replay Debugger MCP — record every agent step + replay deterministically. Step-debugger for agentic loops. Audit-evidence signed exports. By MEOK AI Labs.
|
|
5
|
+
Author-email: Nicholas Templeman <nicholas@meok.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://meok.ai/a2a
|
|
8
|
+
Project-URL: Repository, https://github.com/CSOAI-ORG/agent-replay-debugger-mcp
|
|
9
|
+
Keywords: mcp,debugger,replay,agent-debugging,audit-evidence,meok
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: mcp[cli]>=1.3.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# Agent Replay Debugger MCP
|
|
21
|
+
|
|
22
|
+
> ## 🧱 Part of the MEOK A2A Substrate (£499/mo)
|
|
23
|
+
> See [meok.ai/a2a](https://meok.ai/a2a).
|
|
24
|
+
|
|
25
|
+
# Step-debug an agent run — deterministic replay + signed audit evidence
|
|
26
|
+
|
|
27
|
+
<!-- mcp-name: io.github.CSOAI-ORG/agent-replay-debugger-mcp -->
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/agent-replay-debugger-mcp/)
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
|
|
32
|
+
## What this does
|
|
33
|
+
|
|
34
|
+
Record every action an agent takes (inputs, outputs, timestamps, costs) so you can REPLAY the run deterministically afterward. Step through each action like a debugger. Filter, search, branch from any step. Sign the recording for audit evidence.
|
|
35
|
+
|
|
36
|
+
Pairs with `bft-progress-council-mcp` (real-time loop halting) and `agent-audit-logger-mcp` (audit-chain submission).
|
|
37
|
+
|
|
38
|
+
## Tools
|
|
39
|
+
|
|
40
|
+
| Tool | Purpose |
|
|
41
|
+
|---|---|
|
|
42
|
+
| `start_recording(goal, session_id?, agent_id?)` | Open new recording |
|
|
43
|
+
| `record_step(session_id, action, input?, output?, model?, tokens?, ms?)` | Log one step |
|
|
44
|
+
| `replay_step(session_id, step_idx)` | Re-fetch a step |
|
|
45
|
+
| `timeline(session_id, model_filter?, tag_filter?)` | Full ordered timeline |
|
|
46
|
+
| `branch_from(session_id, step_idx, new_action)` | Alternative branch |
|
|
47
|
+
| `search_steps(session_id, query)` | Free-text step search |
|
|
48
|
+
| `export_recording(session_id, format)` | JSON or markdown export |
|
|
49
|
+
| `sign_recording(session_id)` | HMAC-sign for audit submission |
|
|
50
|
+
|
|
51
|
+
## Use cases
|
|
52
|
+
|
|
53
|
+
- Debug why an agent burned £20 on a £0.50 task
|
|
54
|
+
- Reproduce a customer-reported wrong-answer bug
|
|
55
|
+
- Audit evidence: prove what an agent did, to whom, when
|
|
56
|
+
- Train next-gen agents on senior-agent traces
|
|
57
|
+
- Forensic post-mortems after prompt-injection incidents
|
|
58
|
+
|
|
59
|
+
## Sister MCPs
|
|
60
|
+
|
|
61
|
+
- `bft-progress-council-mcp` — real-time loop halt
|
|
62
|
+
- `agent-token-budget-mcp` — spend cap
|
|
63
|
+
- `agent-audit-logger-mcp` — audit-chain submission
|
|
64
|
+
- `agent-cost-allocator-mcp` — multi-tenant chargeback
|
|
65
|
+
|
|
66
|
+
Full catalogue: [meok.ai/anthropic-registry](https://meok.ai/anthropic-registry)
|
|
67
|
+
|
|
68
|
+
## Pricing
|
|
69
|
+
|
|
70
|
+
| Option | Price |
|
|
71
|
+
|---|---|
|
|
72
|
+
| Self-host MIT | £0 |
|
|
73
|
+
| Universal PAYG | £29/mo + £0.0002/call |
|
|
74
|
+
| A2A Substrate | £499/mo |
|
|
75
|
+
| Universe | £1,499/mo |
|
|
76
|
+
| Defence | £4,990/mo |
|
|
77
|
+
|
|
78
|
+
Buy: https://meok.ai/a2a
|
|
79
|
+
|
|
80
|
+
## Licence
|
|
81
|
+
|
|
82
|
+
MIT. By [MEOK AI Labs](https://meok.ai) (CSOAI LTD, UK Companies House 16939677).
|
|
83
|
+
|
|
84
|
+
<!-- BUY-LADDER:START -->
|
|
85
|
+
|
|
86
|
+
## 💸 Try MEOK in 30 seconds — instant buy ladder
|
|
87
|
+
|
|
88
|
+
| Tier | Price | What you get | Stripe |
|
|
89
|
+
|---|---|---|---|
|
|
90
|
+
| Smoke test | **£1** | Signed sample MCP-Hardening report + Article 50 PDF | <https://buy.stripe.com/dRmcN75ScdQS7oh1Uc8k90U> |
|
|
91
|
+
| Quick Kit | **£9** | EU AI Act Article 50 implementation guide (C2PA + EU-Icon) | <https://buy.stripe.com/cNi00la8s1460ZT0Q88k90V> |
|
|
92
|
+
| Founder Call | **£29** | 30-min 1-on-1 with the founder | <https://buy.stripe.com/8x228ta8s6oqbExaqI8k90W> |
|
|
93
|
+
|
|
94
|
+
> Refundable. UK Stripe — VAT-clean. Builds on the 81-MCP MEOK fleet.
|
|
95
|
+
> Verify any signed report at <https://meok.ai/verify>.
|
|
96
|
+
|
|
97
|
+
<!-- BUY-LADDER:END -->
|
|
98
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
server.py
|
|
5
|
+
agent_replay_debugger_mcp.egg-info/PKG-INFO
|
|
6
|
+
agent_replay_debugger_mcp.egg-info/SOURCES.txt
|
|
7
|
+
agent_replay_debugger_mcp.egg-info/dependency_links.txt
|
|
8
|
+
agent_replay_debugger_mcp.egg-info/entry_points.txt
|
|
9
|
+
agent_replay_debugger_mcp.egg-info/requires.txt
|
|
10
|
+
agent_replay_debugger_mcp.egg-info/top_level.txt
|
|
11
|
+
tests/test_replay.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mcp[cli]>=1.3.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
server
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agent-replay-debugger-mcp"
|
|
7
|
+
version = "1.0.1"
|
|
8
|
+
description = "Agent Replay Debugger MCP — record every agent step + replay deterministically. Step-debugger for agentic loops. Audit-evidence signed exports. By MEOK AI Labs."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Nicholas Templeman", email = "nicholas@meok.ai" }]
|
|
13
|
+
keywords = ["mcp", "debugger", "replay", "agent-debugging", "audit-evidence", "meok"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Topic :: Software Development :: Debuggers",
|
|
19
|
+
]
|
|
20
|
+
dependencies = ["mcp[cli]>=1.3.0"]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://meok.ai/a2a"
|
|
24
|
+
Repository = "https://github.com/CSOAI-ORG/agent-replay-debugger-mcp"
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
agent-replay-debugger-mcp = "server:main"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools]
|
|
30
|
+
py-modules = ["server"]
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Agent Replay Debugger MCP — step-debug an agent run
|
|
4
|
+
=========================================================
|
|
5
|
+
|
|
6
|
+
By MEOK AI Labs · https://meok.ai · MIT
|
|
7
|
+
<!-- mcp-name: io.github.CSOAI-ORG/agent-replay-debugger-mcp -->
|
|
8
|
+
|
|
9
|
+
WHAT THIS DOES
|
|
10
|
+
--------------
|
|
11
|
+
Record every action an agent takes (with inputs, outputs, timestamps,
|
|
12
|
+
costs) so you can REPLAY the run deterministically afterward. Step through
|
|
13
|
+
each action like a debugger. Filter, search, branch from any step. Sign
|
|
14
|
+
the recording for audit evidence.
|
|
15
|
+
|
|
16
|
+
Pairs with bft-progress-council-mcp (stops loops in real-time) and
|
|
17
|
+
agent-audit-logger-mcp (chains the recording into the evidence ledger).
|
|
18
|
+
|
|
19
|
+
USE CASES
|
|
20
|
+
---------
|
|
21
|
+
- Debug why an agent ran a £20 cost on what should have been a £0.50 task
|
|
22
|
+
- Reproduce a customer-reported "agent gave wrong answer" bug
|
|
23
|
+
- Audit evidence: prove EXACTLY what an agent did, to whom, at when
|
|
24
|
+
- Train next-gen agents on traces of how senior agents solved problems
|
|
25
|
+
- Forensic analysis after a prompt-injection incident
|
|
26
|
+
|
|
27
|
+
TOOLS
|
|
28
|
+
-----
|
|
29
|
+
- start_recording(goal, session_id?): open a new recording
|
|
30
|
+
- record_step(session_id, action, input, output, model?, tokens?, ms?): one step
|
|
31
|
+
- replay_step(session_id, step_idx): re-fetch a specific step
|
|
32
|
+
- timeline(session_id, filter?): get full ordered timeline
|
|
33
|
+
- branch_from(session_id, step_idx, new_action): explore alternative branch
|
|
34
|
+
- search_steps(session_id, query): find steps matching a query
|
|
35
|
+
- export_recording(session_id, format): JSON or markdown export
|
|
36
|
+
- sign_recording(session_id): HMAC-sign the whole recording for audit
|
|
37
|
+
|
|
38
|
+
PRICING
|
|
39
|
+
-------
|
|
40
|
+
Free MIT self-host · £29/mo Starter · £79/mo Pro · A2A Substrate £499/mo.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from __future__ import annotations
|
|
44
|
+
import hashlib
|
|
45
|
+
import hmac
|
|
46
|
+
import json
|
|
47
|
+
import os
|
|
48
|
+
import time
|
|
49
|
+
from datetime import datetime, timezone
|
|
50
|
+
from typing import Optional
|
|
51
|
+
from mcp.server.fastmcp import FastMCP
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
mcp = FastMCP("agent-replay-debugger")
|
|
55
|
+
_HMAC_SECRET = os.environ.get("MEOK_HMAC_SECRET", "")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# In-memory recordings store. Production: Postgres or DuckDB.
|
|
59
|
+
_RECORDINGS: dict[str, dict] = {}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _sign(payload: dict) -> str:
|
|
63
|
+
if not _HMAC_SECRET:
|
|
64
|
+
return "unsigned-no-key-configured"
|
|
65
|
+
return hmac.new(_HMAC_SECRET.encode(), json.dumps(payload, sort_keys=True).encode(), hashlib.sha256).hexdigest()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _ts() -> str:
|
|
69
|
+
return datetime.now(timezone.utc).isoformat()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ────────────────────────────────────────────────────────────────────────
|
|
73
|
+
# Tools
|
|
74
|
+
# ────────────────────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
@mcp.tool()
|
|
77
|
+
def start_recording(goal: str, session_id: Optional[str] = None, agent_id: Optional[str] = None) -> dict:
|
|
78
|
+
"""Open a new recording for an agent run."""
|
|
79
|
+
sid = session_id or f"rec_{int(time.time())}_{os.urandom(4).hex()}"
|
|
80
|
+
_RECORDINGS[sid] = {
|
|
81
|
+
"session_id": sid,
|
|
82
|
+
"goal": goal,
|
|
83
|
+
"agent_id": agent_id or "anonymous",
|
|
84
|
+
"started_at": _ts(),
|
|
85
|
+
"steps": [],
|
|
86
|
+
"branches": {},
|
|
87
|
+
"signed": False,
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
"session_id": sid,
|
|
91
|
+
"goal": goal,
|
|
92
|
+
"started_at": _RECORDINGS[sid]["started_at"],
|
|
93
|
+
"hint": "Call record_step() after every agent action. Sign + export at end of run.",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@mcp.tool()
|
|
98
|
+
def record_step(
|
|
99
|
+
session_id: str,
|
|
100
|
+
action: str,
|
|
101
|
+
input: Optional[str] = None,
|
|
102
|
+
output: Optional[str] = None,
|
|
103
|
+
model: Optional[str] = None,
|
|
104
|
+
input_tokens: int = 0,
|
|
105
|
+
output_tokens: int = 0,
|
|
106
|
+
duration_ms: Optional[int] = None,
|
|
107
|
+
cost_gbp: Optional[float] = None,
|
|
108
|
+
tags: Optional[list[str]] = None,
|
|
109
|
+
) -> dict:
|
|
110
|
+
"""Record one agent step."""
|
|
111
|
+
rec = _RECORDINGS.get(session_id)
|
|
112
|
+
if not rec:
|
|
113
|
+
return {"error": "unknown_session"}
|
|
114
|
+
step_idx = len(rec["steps"])
|
|
115
|
+
step = {
|
|
116
|
+
"step_idx": step_idx,
|
|
117
|
+
"ts": time.time(),
|
|
118
|
+
"iso_ts": _ts(),
|
|
119
|
+
"action": action,
|
|
120
|
+
"input": input,
|
|
121
|
+
"output": output,
|
|
122
|
+
"model": model,
|
|
123
|
+
"input_tokens": input_tokens,
|
|
124
|
+
"output_tokens": output_tokens,
|
|
125
|
+
"duration_ms": duration_ms,
|
|
126
|
+
"cost_gbp": cost_gbp,
|
|
127
|
+
"tags": tags or [],
|
|
128
|
+
}
|
|
129
|
+
rec["steps"].append(step)
|
|
130
|
+
return {
|
|
131
|
+
"session_id": session_id,
|
|
132
|
+
"step_idx": step_idx,
|
|
133
|
+
"step_count": len(rec["steps"]),
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@mcp.tool()
|
|
138
|
+
def replay_step(session_id: str, step_idx: int) -> dict:
|
|
139
|
+
"""Re-fetch a recorded step for replay."""
|
|
140
|
+
rec = _RECORDINGS.get(session_id)
|
|
141
|
+
if not rec:
|
|
142
|
+
return {"error": "unknown_session"}
|
|
143
|
+
if step_idx < 0 or step_idx >= len(rec["steps"]):
|
|
144
|
+
return {"error": "step_out_of_range", "range": [0, len(rec["steps"]) - 1]}
|
|
145
|
+
return {"step": rec["steps"][step_idx], "of_total": len(rec["steps"])}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@mcp.tool()
|
|
149
|
+
def timeline(session_id: str, model_filter: Optional[str] = None, tag_filter: Optional[str] = None) -> dict:
|
|
150
|
+
"""Get the full ordered timeline of recorded steps."""
|
|
151
|
+
rec = _RECORDINGS.get(session_id)
|
|
152
|
+
if not rec:
|
|
153
|
+
return {"error": "unknown_session"}
|
|
154
|
+
steps = rec["steps"]
|
|
155
|
+
if model_filter:
|
|
156
|
+
steps = [s for s in steps if s.get("model") == model_filter]
|
|
157
|
+
if tag_filter:
|
|
158
|
+
steps = [s for s in steps if tag_filter in (s.get("tags") or [])]
|
|
159
|
+
total_cost = sum(s.get("cost_gbp", 0) or 0 for s in steps)
|
|
160
|
+
total_tokens = sum((s.get("input_tokens", 0) or 0) + (s.get("output_tokens", 0) or 0) for s in steps)
|
|
161
|
+
total_ms = sum(s.get("duration_ms", 0) or 0 for s in steps)
|
|
162
|
+
return {
|
|
163
|
+
"session_id": session_id,
|
|
164
|
+
"goal": rec["goal"],
|
|
165
|
+
"step_count": len(steps),
|
|
166
|
+
"total_cost_gbp": round(total_cost, 6),
|
|
167
|
+
"total_tokens": total_tokens,
|
|
168
|
+
"total_duration_ms": total_ms,
|
|
169
|
+
"steps_compact": [
|
|
170
|
+
{
|
|
171
|
+
"i": s["step_idx"],
|
|
172
|
+
"action": (s["action"] or "")[:100],
|
|
173
|
+
"model": s.get("model"),
|
|
174
|
+
"tokens": (s.get("input_tokens", 0) or 0) + (s.get("output_tokens", 0) or 0),
|
|
175
|
+
"cost_gbp": s.get("cost_gbp"),
|
|
176
|
+
"ms": s.get("duration_ms"),
|
|
177
|
+
}
|
|
178
|
+
for s in steps
|
|
179
|
+
],
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@mcp.tool()
|
|
184
|
+
def branch_from(session_id: str, step_idx: int, new_action: str) -> dict:
|
|
185
|
+
"""Open an alternative branch from a specific step."""
|
|
186
|
+
rec = _RECORDINGS.get(session_id)
|
|
187
|
+
if not rec:
|
|
188
|
+
return {"error": "unknown_session"}
|
|
189
|
+
branch_id = f"branch_{step_idx}_{int(time.time())}"
|
|
190
|
+
rec["branches"][branch_id] = {
|
|
191
|
+
"from_step_idx": step_idx,
|
|
192
|
+
"new_action": new_action,
|
|
193
|
+
"created_at": _ts(),
|
|
194
|
+
"steps": [],
|
|
195
|
+
}
|
|
196
|
+
return {
|
|
197
|
+
"session_id": session_id,
|
|
198
|
+
"branch_id": branch_id,
|
|
199
|
+
"from_step_idx": step_idx,
|
|
200
|
+
"hint": "Record steps on this branch by passing branch_id in tags or use a sub-session.",
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@mcp.tool()
|
|
205
|
+
def search_steps(session_id: str, query: str) -> dict:
|
|
206
|
+
"""Find steps matching a free-text query (across action + input + output)."""
|
|
207
|
+
rec = _RECORDINGS.get(session_id)
|
|
208
|
+
if not rec:
|
|
209
|
+
return {"error": "unknown_session"}
|
|
210
|
+
q = query.lower()
|
|
211
|
+
hits = []
|
|
212
|
+
for s in rec["steps"]:
|
|
213
|
+
haystack = " ".join(str(s.get(k) or "") for k in ["action", "input", "output"]).lower()
|
|
214
|
+
if q in haystack:
|
|
215
|
+
hits.append({"step_idx": s["step_idx"], "action": s["action"][:100], "model": s.get("model")})
|
|
216
|
+
return {"query": query, "hits": hits, "match_count": len(hits)}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@mcp.tool()
|
|
220
|
+
def export_recording(session_id: str, format: str = "json") -> dict:
|
|
221
|
+
"""Export the full recording as JSON or markdown."""
|
|
222
|
+
rec = _RECORDINGS.get(session_id)
|
|
223
|
+
if not rec:
|
|
224
|
+
return {"error": "unknown_session"}
|
|
225
|
+
if format == "markdown":
|
|
226
|
+
lines = [
|
|
227
|
+
f"# Recording: {rec['session_id']}",
|
|
228
|
+
f"**Goal:** {rec['goal']}",
|
|
229
|
+
f"**Agent:** {rec['agent_id']}",
|
|
230
|
+
f"**Started:** {rec['started_at']}",
|
|
231
|
+
f"**Steps:** {len(rec['steps'])}",
|
|
232
|
+
"",
|
|
233
|
+
"## Timeline",
|
|
234
|
+
"",
|
|
235
|
+
]
|
|
236
|
+
for s in rec["steps"]:
|
|
237
|
+
lines.append(f"### Step {s['step_idx']} — {s.get('action','')[:80]}")
|
|
238
|
+
if s.get("model"):
|
|
239
|
+
lines.append(f"- Model: `{s['model']}`")
|
|
240
|
+
if s.get("input"):
|
|
241
|
+
lines.append(f"- Input: `{(s['input'] or '')[:200]}`")
|
|
242
|
+
if s.get("output"):
|
|
243
|
+
lines.append(f"- Output: `{(s['output'] or '')[:200]}`")
|
|
244
|
+
if s.get("cost_gbp"):
|
|
245
|
+
lines.append(f"- Cost: £{s['cost_gbp']:.6f}")
|
|
246
|
+
lines.append("")
|
|
247
|
+
return {"format": "markdown", "content": "\n".join(lines)}
|
|
248
|
+
return {"format": "json", "content": rec}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@mcp.tool()
|
|
252
|
+
def sign_recording(session_id: str) -> dict:
|
|
253
|
+
"""HMAC-sign the recording for audit-chain submission."""
|
|
254
|
+
rec = _RECORDINGS.get(session_id)
|
|
255
|
+
if not rec:
|
|
256
|
+
return {"error": "unknown_session"}
|
|
257
|
+
payload = {
|
|
258
|
+
"session_id": session_id,
|
|
259
|
+
"goal": rec["goal"],
|
|
260
|
+
"agent_id": rec["agent_id"],
|
|
261
|
+
"started_at": rec["started_at"],
|
|
262
|
+
"step_count": len(rec["steps"]),
|
|
263
|
+
"total_cost_gbp": round(sum(s.get("cost_gbp", 0) or 0 for s in rec["steps"]), 6),
|
|
264
|
+
"sealed_at": _ts(),
|
|
265
|
+
}
|
|
266
|
+
sig = _sign(payload)
|
|
267
|
+
rec["signed"] = True
|
|
268
|
+
return {
|
|
269
|
+
"signed": True,
|
|
270
|
+
"payload": payload,
|
|
271
|
+
"signature": sig,
|
|
272
|
+
"verify_url": "https://verify.meok.ai",
|
|
273
|
+
"audit_value": "Submit this signed seal alongside EU AI Act Article 12 audit-log records.",
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
if __name__ == "__main__":
|
|
278
|
+
mcp.run()
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Smoke tests for agent-replay-debugger-mcp."""
|
|
2
|
+
import sys, os, inspect, traceback
|
|
3
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
4
|
+
|
|
5
|
+
from server import (
|
|
6
|
+
start_recording,
|
|
7
|
+
record_step,
|
|
8
|
+
replay_step,
|
|
9
|
+
timeline,
|
|
10
|
+
branch_from,
|
|
11
|
+
search_steps,
|
|
12
|
+
export_recording,
|
|
13
|
+
sign_recording,
|
|
14
|
+
_RECORDINGS,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_start_recording():
|
|
19
|
+
_RECORDINGS.clear()
|
|
20
|
+
r = start_recording("Fix bug X")
|
|
21
|
+
assert r["session_id"].startswith("rec_")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_record_steps_increment_idx():
|
|
25
|
+
_RECORDINGS.clear()
|
|
26
|
+
sid = start_recording("test")["session_id"]
|
|
27
|
+
r1 = record_step(sid, "edit file.py", input="x", output="y", model="claude-opus-4.7", input_tokens=100, output_tokens=50, duration_ms=1500, cost_gbp=0.006)
|
|
28
|
+
r2 = record_step(sid, "run tests", output="fail")
|
|
29
|
+
assert r1["step_idx"] == 0
|
|
30
|
+
assert r2["step_idx"] == 1
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_replay_step():
|
|
34
|
+
_RECORDINGS.clear()
|
|
35
|
+
sid = start_recording("test")["session_id"]
|
|
36
|
+
record_step(sid, "edit", input="aaa")
|
|
37
|
+
r = replay_step(sid, 0)
|
|
38
|
+
assert r["step"]["action"] == "edit"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_replay_step_out_of_range():
|
|
42
|
+
_RECORDINGS.clear()
|
|
43
|
+
sid = start_recording("test")["session_id"]
|
|
44
|
+
r = replay_step(sid, 99)
|
|
45
|
+
assert "error" in r
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_timeline_totals_cost_tokens_ms():
|
|
49
|
+
_RECORDINGS.clear()
|
|
50
|
+
sid = start_recording("test")["session_id"]
|
|
51
|
+
record_step(sid, "a", model="claude-opus-4.7", input_tokens=1000, output_tokens=500, cost_gbp=0.06, duration_ms=2000)
|
|
52
|
+
record_step(sid, "b", model="gpt-5", input_tokens=2000, output_tokens=1000, cost_gbp=0.105, duration_ms=3000)
|
|
53
|
+
t = timeline(sid)
|
|
54
|
+
assert t["step_count"] == 2
|
|
55
|
+
assert abs(t["total_cost_gbp"] - 0.165) < 0.001
|
|
56
|
+
assert t["total_tokens"] == 4500
|
|
57
|
+
assert t["total_duration_ms"] == 5000
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_timeline_filter_by_model():
|
|
61
|
+
_RECORDINGS.clear()
|
|
62
|
+
sid = start_recording("test")["session_id"]
|
|
63
|
+
record_step(sid, "a", model="claude-opus-4.7")
|
|
64
|
+
record_step(sid, "b", model="gpt-5")
|
|
65
|
+
record_step(sid, "c", model="claude-opus-4.7")
|
|
66
|
+
t = timeline(sid, model_filter="claude-opus-4.7")
|
|
67
|
+
assert t["step_count"] == 2
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_branch_from_creates_branch():
|
|
71
|
+
_RECORDINGS.clear()
|
|
72
|
+
sid = start_recording("test")["session_id"]
|
|
73
|
+
record_step(sid, "step1")
|
|
74
|
+
record_step(sid, "step2")
|
|
75
|
+
r = branch_from(sid, 1, "alternative-step2")
|
|
76
|
+
assert r["branch_id"].startswith("branch_1_")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_search_steps_finds_match():
|
|
80
|
+
_RECORDINGS.clear()
|
|
81
|
+
sid = start_recording("test")["session_id"]
|
|
82
|
+
record_step(sid, "fix EU AI Act bug")
|
|
83
|
+
record_step(sid, "run pytest")
|
|
84
|
+
record_step(sid, "deploy to staging")
|
|
85
|
+
r = search_steps(sid, "AI Act")
|
|
86
|
+
assert r["match_count"] >= 1
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_export_json():
|
|
90
|
+
_RECORDINGS.clear()
|
|
91
|
+
sid = start_recording("test")["session_id"]
|
|
92
|
+
record_step(sid, "x")
|
|
93
|
+
r = export_recording(sid, "json")
|
|
94
|
+
assert r["format"] == "json"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_export_markdown():
|
|
98
|
+
_RECORDINGS.clear()
|
|
99
|
+
sid = start_recording("test goal")["session_id"]
|
|
100
|
+
record_step(sid, "step1", model="opus", cost_gbp=0.05)
|
|
101
|
+
r = export_recording(sid, "markdown")
|
|
102
|
+
assert "# Recording:" in r["content"]
|
|
103
|
+
assert "step1" in r["content"]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_sign_recording():
|
|
107
|
+
_RECORDINGS.clear()
|
|
108
|
+
sid = start_recording("test")["session_id"]
|
|
109
|
+
record_step(sid, "x", cost_gbp=0.5)
|
|
110
|
+
r = sign_recording(sid)
|
|
111
|
+
assert r["signed"] is True
|
|
112
|
+
assert "signature" in r
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
if __name__ == "__main__":
|
|
116
|
+
g = dict(globals())
|
|
117
|
+
fns = [v for k, v in g.items() if k.startswith("test_") and inspect.isfunction(v)]
|
|
118
|
+
p = f = 0
|
|
119
|
+
for fn in fns:
|
|
120
|
+
try:
|
|
121
|
+
fn(); print(f"✓ {fn.__name__}"); p += 1
|
|
122
|
+
except Exception as e:
|
|
123
|
+
print(f"✗ {fn.__name__}: {type(e).__name__}: {e}"); traceback.print_exc(); f += 1
|
|
124
|
+
print(f"\n{p} passed, {f} failed")
|