agentautopsy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ agentautopsy.db
2
+ *.db
3
+ dist/
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentautopsy
3
+ Version: 0.1.0
4
+ Summary: When your agent fails, this tells you exactly why.
5
+ Project-URL: Homepage, https://github.com/Abhisekhpatel/AgentAutopsy
6
+ Author: Abhishek Patel
7
+ Keywords: ai-agents,debugging,llm,observability
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: anthropic
10
+ Requires-Dist: httpx
11
+ Requires-Dist: openai
12
+ Requires-Dist: sqlite-utils
13
+ Requires-Dist: sqlite-vec
14
+ Description-Content-Type: text/markdown
15
+
16
+ # AgentAutopsy
17
+
18
+ > When your agent fails, this tells you exactly why.
19
+
20
+ ![demo](assets/demo.png)
21
+
22
+ ![Python](https://img.shields.io/badge/python-3.11+-blue)
23
+ ![License](https://img.shields.io/badge/license-Apache%202.0-green)
24
+ ![Zero Config](https://img.shields.io/badge/config-zero-brightgreen)
25
+ ![Works with](https://img.shields.io/badge/works%20with-OpenAI%20%2B%20Anthropic-orange)
26
+
27
+ ## CLI
28
+
29
+ agentautopsy runs # see all agent runs
30
+ agentautopsy replay <id> # replay any failure
31
+ agentautopsy stats # fix cache stats
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ pip install git+https://github.com/Abhisekhpatel/AgentAutopsy.git
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ```python
42
+ import agentautopsy
43
+ agentautopsy.watch()
44
+ # your existing agent code here — nothing else changes
45
+ ```
46
+
47
+ AgentAutopsy automatically intercepts every LLM call, detects failures, finds root cause, outputs a verified fix, and caches it for next time.
48
+
49
+ ## Setup
50
+
51
+ Windows: `set ANTHROPIC_API_KEY=your-key-here`
52
+ Mac/Linux: `export ANTHROPIC_API_KEY=your-key-here`
53
+ Get your free key at console.anthropic.com
54
+
55
+ ## Quick start
56
+
57
+ Create test_agent.py and paste this:
58
+
59
+ ```python
60
+ import agentautopsy
61
+ agentautopsy.watch()
62
+ ```
63
+
64
+ Run: `python test_agent.py`
65
+
66
+ ## Works with
67
+
68
+ OpenAI, Anthropic, LangChain, any framework using openai or anthropic
69
+
70
+ ## Requirements
71
+
72
+ Python 3.11+, ANTHROPIC_API_KEY
73
+
74
+ ## License
75
+
76
+ Apache 2.0
@@ -0,0 +1,61 @@
1
+ # AgentAutopsy
2
+
3
+ > When your agent fails, this tells you exactly why.
4
+
5
+ ![demo](assets/demo.png)
6
+
7
+ ![Python](https://img.shields.io/badge/python-3.11+-blue)
8
+ ![License](https://img.shields.io/badge/license-Apache%202.0-green)
9
+ ![Zero Config](https://img.shields.io/badge/config-zero-brightgreen)
10
+ ![Works with](https://img.shields.io/badge/works%20with-OpenAI%20%2B%20Anthropic-orange)
11
+
12
+ ## CLI
13
+
14
+ agentautopsy runs # see all agent runs
15
+ agentautopsy replay <id> # replay any failure
16
+ agentautopsy stats # fix cache stats
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install git+https://github.com/Abhisekhpatel/AgentAutopsy.git
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ```python
27
+ import agentautopsy
28
+ agentautopsy.watch()
29
+ # your existing agent code here — nothing else changes
30
+ ```
31
+
32
+ AgentAutopsy automatically intercepts every LLM call, detects failures, finds root cause, outputs a verified fix, and caches it for next time.
33
+
34
+ ## Setup
35
+
36
+ Windows: `set ANTHROPIC_API_KEY=your-key-here`
37
+ Mac/Linux: `export ANTHROPIC_API_KEY=your-key-here`
38
+ Get your free key at console.anthropic.com
39
+
40
+ ## Quick start
41
+
42
+ Create test_agent.py and paste this:
43
+
44
+ ```python
45
+ import agentautopsy
46
+ agentautopsy.watch()
47
+ ```
48
+
49
+ Run: `python test_agent.py`
50
+
51
+ ## Works with
52
+
53
+ OpenAI, Anthropic, LangChain, any framework using openai or anthropic
54
+
55
+ ## Requirements
56
+
57
+ Python 3.11+, ANTHROPIC_API_KEY
58
+
59
+ ## License
60
+
61
+ Apache 2.0
Binary file
Binary file
Binary file
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agentautopsy"
7
+ version = "0.1.0"
8
+ description = "When your agent fails, this tells you exactly why."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ authors = [{ name = "Abhishek Patel" }]
12
+ keywords = ["ai-agents", "debugging", "llm", "observability"]
13
+ dependencies = [
14
+ "openai",
15
+ "anthropic",
16
+ "sqlite-utils",
17
+ "sqlite-vec",
18
+ "httpx",
19
+ ]
20
+
21
+ [project.urls]
22
+ Homepage = "https://github.com/Abhisekhpatel/AgentAutopsy"
23
+
24
+ [project.scripts]
25
+ agentautopsy = "agentautopsy.cli:main"
26
+
27
+ [tool.hatch.build.targets.wheel]
28
+ packages = ["src/agentautopsy"]
@@ -0,0 +1,62 @@
1
+ """AgentAutopsy — when your agent fails, this tells you exactly why."""
2
+
3
+ import atexit
4
+
5
+ from agentautopsy.db import create_tables, get_db, insert_run
6
+ from agentautopsy.interceptor import (
7
+ start_interceptor,
8
+ start_anthropic_interceptor,
9
+ start_http_interceptor,
10
+ )
11
+ from agentautopsy.reporter import print_report
12
+
13
+
14
+ def watch():
15
+ db = get_db()
16
+ create_tables(db)
17
+ from agentautopsy.cache import setup_cache
18
+
19
+ setup_cache(db)
20
+ run_id = insert_run(db)
21
+ start_interceptor(run_id, db)
22
+ start_anthropic_interceptor(run_id, db)
23
+ start_http_interceptor(run_id, db)
24
+ print(f"[AgentAutopsy] watching — run {run_id}")
25
+
26
+ def on_exit():
27
+ from agentautopsy.detector import detect_failure, take_snapshot
28
+ from agentautopsy.pruner import prune
29
+ from agentautopsy.analyzer import analyze
30
+ from agentautopsy.replay import replay
31
+ from agentautopsy.cache import lookup_fix, store_fix
32
+
33
+ result = detect_failure(run_id, db)
34
+ if not result["failed"]:
35
+ print(f"[AgentAutopsy] run completed cleanly — {run_id}")
36
+ return
37
+
38
+ print(f"\n[AgentAutopsy] failure detected: {result['error_type']}: {result['message']}")
39
+
40
+ cached = lookup_fix(db, result["error_type"], result["message"])
41
+ if cached:
42
+ print(f"[AgentAutopsy] cache hit — fix found instantly:")
43
+ print(cached)
44
+ return
45
+
46
+ snapshot = take_snapshot(run_id, db)
47
+ pruned = prune(snapshot, result["failure_event_id"])
48
+ analysis = analyze(pruned, result)
49
+ print(f"\n[AgentAutopsy] analysis:\n{analysis}")
50
+
51
+ replay_result = replay(run_id, db, analysis)
52
+ if replay_result["verified"]:
53
+ print(f"\n[AgentAutopsy] fix verified ✓")
54
+ print("✓ Replay passed")
55
+ print("✓ Failure resolved")
56
+ store_fix(db, result["error_type"], result["message"], analysis, verified=True)
57
+ else:
58
+ print(f"\n[AgentAutopsy] fix not verified — review manually")
59
+
60
+ print_report(run_id, db)
61
+
62
+ atexit.register(on_exit)
@@ -0,0 +1,44 @@
1
+ """Fix analyzer for AgentAutopsy."""
2
+
3
+ import anthropic
4
+
5
+
6
+ def analyze(pruned_snapshot, failure):
7
+ lines = [
8
+ f"Error: {failure['error_type']}: {failure['message']}",
9
+ "Trace:"
10
+ ]
11
+ for e in pruned_snapshot:
12
+ lines.append(f"- [{e['type']}] {e['payload']}")
13
+ user_message = "\n".join(lines)
14
+
15
+ client = anthropic.Anthropic()
16
+ response = client.messages.create(
17
+ model="claude-haiku-4-5-20251001",
18
+ max_tokens=500,
19
+ system=(
20
+ "You are AgentAutopsy, an expert AI agent debugger. "
21
+ "Given a trace of an AI agent's decisions leading up to a failure, output:\n"
22
+ "FAILURE NODE: <exact step that caused failure>\n"
23
+ "ROOT CAUSE: <one sentence>\n"
24
+ "FIX: <concrete patch or instruction>"
25
+ ),
26
+ messages=[{"role": "user", "content": user_message}]
27
+ )
28
+ return response.content[0].text
29
+
30
+
31
+ if __name__ == "__main__":
32
+ fake_snapshot = [
33
+ {"id": "1", "type": "llm_call", "payload": {"model": "gpt-4", "messages": [{"role": "user", "content": "fetch data from api"}]}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:01"},
34
+ {"id": "2", "type": "error", "payload": {"error_type": "TimeoutError", "message": "request timed out after 30s"}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:02"},
35
+ ]
36
+ fake_failure = {
37
+ "failed": True,
38
+ "error_type": "TimeoutError",
39
+ "message": "request timed out after 30s",
40
+ "run_id": "test-123",
41
+ "failure_event_id": "2"
42
+ }
43
+ result = analyze(fake_snapshot, fake_failure)
44
+ print(result)
@@ -0,0 +1,116 @@
1
+ """Fix cache for AgentAutopsy."""
2
+
3
+ import re
4
+ import uuid
5
+ from typing import Any
6
+
7
+ from sqlite_utils import Database
8
+
9
+
10
+ def _words(text: str) -> set[str]:
11
+ return {w.lower() for w in re.findall(r"\w+", text) if w}
12
+
13
+
14
+ def _match_score(query_text: str, stored_text: str) -> float:
15
+ query_words = _words(query_text)
16
+ if not query_words:
17
+ return 0.0
18
+ stored_words = _words(stored_text)
19
+ overlap = len(query_words & stored_words)
20
+ return overlap / len(query_words)
21
+
22
+
23
+ def setup_cache(db: Database) -> None:
24
+ db["fix_cache"].create(
25
+ {
26
+ "id": str,
27
+ "failure_type": str,
28
+ "failure_text": str,
29
+ "patch": str,
30
+ "verified": bool,
31
+ "hits": int,
32
+ },
33
+ pk="id",
34
+ if_not_exists=True,
35
+ )
36
+
37
+
38
+ def store_fix(
39
+ db: Database,
40
+ failure_type: str,
41
+ failure_text: str,
42
+ patch: str,
43
+ verified: bool = True,
44
+ ) -> str:
45
+ fix_id = str(uuid.uuid4())
46
+ db["fix_cache"].insert(
47
+ {
48
+ "id": fix_id,
49
+ "failure_type": failure_type,
50
+ "failure_text": failure_text,
51
+ "patch": patch,
52
+ "verified": verified,
53
+ "hits": 0,
54
+ },
55
+ pk="id",
56
+ )
57
+ return fix_id
58
+
59
+
60
+ def lookup_fix(
61
+ db: Database,
62
+ failure_type: str,
63
+ failure_text: str,
64
+ threshold: float = 0.6,
65
+ ) -> str | None:
66
+ if not db["fix_cache"].exists():
67
+ return None
68
+
69
+ best_patch: str | None = None
70
+ best_score = -1.0
71
+ best_id: str | None = None
72
+
73
+ for row in db["fix_cache"].rows_where(
74
+ where="failure_type = ?",
75
+ where_args=[failure_type],
76
+ ):
77
+ score = _match_score(failure_text, row["failure_text"])
78
+ if score >= threshold and score > best_score:
79
+ best_score = score
80
+ best_patch = row["patch"]
81
+ best_id = row["id"]
82
+
83
+ if best_id is None:
84
+ return None
85
+
86
+ db.execute(
87
+ "UPDATE fix_cache SET hits = hits + 1 WHERE id = ?",
88
+ [best_id],
89
+ )
90
+ return best_patch
91
+
92
+
93
+ def cache_stats(db: Database) -> dict[str, int]:
94
+ if not db["fix_cache"].exists():
95
+ return {"total_fixes": 0, "total_hits": 0}
96
+
97
+ total_fixes = db["fix_cache"].count
98
+ row = db.execute("SELECT COALESCE(SUM(hits), 0) FROM fix_cache").fetchone()
99
+ total_hits = int(row[0]) if row else 0
100
+ return {"total_fixes": total_fixes, "total_hits": total_hits}
101
+
102
+
103
+ if __name__ == "__main__":
104
+ from agentautopsy.db import create_tables, get_db
105
+
106
+ db = get_db()
107
+ create_tables(db)
108
+ setup_cache(db)
109
+ store_fix(db, "TimeoutError", "request timed out after 30s calling external api", "Add timeout=60 and retry logic")
110
+ store_fix(db, "AuthenticationError", "invalid api key provided", "Check OPENAI_API_KEY environment variable")
111
+ result = lookup_fix(db, "TimeoutError", "timed out calling api")
112
+ print(f"Cache hit: {result}")
113
+ miss = lookup_fix(db, "TimeoutError", "memory allocation failed")
114
+ print(f"Cache miss: {miss}")
115
+ stats = cache_stats(db)
116
+ print(f"Stats: {stats}")
@@ -0,0 +1,30 @@
1
+ """Cassette serialization for AgentAutopsy LLM responses."""
2
+
3
+ import json
4
+
5
+
6
+ def save_cassette(response_object: object) -> bytes:
7
+ try:
8
+ dumped = response_object.model_dump()
9
+ return json.dumps(dumped).encode()
10
+ except Exception:
11
+ return str(response_object).encode()
12
+
13
+
14
+ def load_cassette(cassette_bytes: bytes) -> dict:
15
+ try:
16
+ data = json.loads(cassette_bytes.decode())
17
+ return data if isinstance(data, dict) else {}
18
+ except Exception:
19
+ return {}
20
+
21
+
22
+ if __name__ == "__main__":
23
+ test_bytes = save_cassette(
24
+ type("R", (), {"model_dump": lambda self: {"id": "test", "content": "hello"}})()
25
+ )
26
+ print(f"Cassette saved: {len(test_bytes)} bytes")
27
+ result = load_cassette(test_bytes)
28
+ print(f"Cassette loaded: {result}")
29
+ bad = load_cassette(b"not json at all")
30
+ print(f"Bad cassette returns: {bad}")
@@ -0,0 +1,65 @@
1
+ """Command-line interface for AgentAutopsy."""
2
+
3
+ import sys
4
+
5
+ from agentautopsy.cache import cache_stats, setup_cache
6
+ from agentautopsy.db import create_tables, get_db
7
+ from agentautopsy.reporter import print_report
8
+
9
+
10
+ def _usage() -> None:
11
+ print(
12
+ """Usage: agentautopsy <command>
13
+
14
+ Commands:
15
+ runs List all runs (id, start_time, status)
16
+ replay <run_id> Print the event report for a run
17
+ stats Show fix cache statistics
18
+
19
+ Examples:
20
+ agentautopsy runs
21
+ agentautopsy replay abc-123-def
22
+ agentautopsy stats"""
23
+ )
24
+
25
+
26
+ def main() -> None:
27
+ argv = sys.argv[1:]
28
+ if not argv:
29
+ _usage()
30
+ return
31
+
32
+ cmd = argv[0]
33
+ db = get_db()
34
+ create_tables(db)
35
+
36
+ if cmd == "runs":
37
+ if not db["runs"].exists():
38
+ print("No runs table yet.")
39
+ return
40
+ rows = list(db["runs"].rows_where(order_by="start_time desc"))
41
+ if not rows:
42
+ print("No runs found.")
43
+ return
44
+ for row in rows:
45
+ print(f"{row['id']}\t{row['start_time']}\t{row['status']}")
46
+ return
47
+
48
+ if cmd == "replay":
49
+ if len(argv) < 2:
50
+ print("usage: agentautopsy replay <run_id>", file=sys.stderr)
51
+ sys.exit(2)
52
+ run_id = argv[1]
53
+ print_report(run_id, db)
54
+ return
55
+
56
+ if cmd == "stats":
57
+ setup_cache(db)
58
+ stats = cache_stats(db)
59
+ print(f"total_fixes: {stats['total_fixes']}")
60
+ print(f"total_hits: {stats['total_hits']}")
61
+ return
62
+
63
+ print(f"Unknown command: {cmd}", file=sys.stderr)
64
+ _usage()
65
+ sys.exit(2)
@@ -0,0 +1,79 @@
1
+ """Database layer for AgentAutopsy."""
2
+
3
+ import json
4
+ import uuid
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+
8
+ from sqlite_utils import Database
9
+
10
+
11
+ def get_db() -> Database:
12
+ return Database(Path.cwd() / "agentautopsy.db")
13
+
14
+
15
+ def create_tables(db: Database) -> None:
16
+ db["runs"].create(
17
+ {
18
+ "id": str,
19
+ "start_time": str,
20
+ "status": str,
21
+ "framework": str,
22
+ },
23
+ pk="id",
24
+ if_not_exists=True,
25
+ )
26
+ db["events"].create(
27
+ {
28
+ "id": str,
29
+ "run_id": str,
30
+ "timestamp": str,
31
+ "type": str,
32
+ "payload": str,
33
+ "cassette": bytes,
34
+ },
35
+ pk="id",
36
+ if_not_exists=True,
37
+ )
38
+
39
+
40
+ def insert_run(db: Database) -> str:
41
+ run_id = str(uuid.uuid4())
42
+ start_time = datetime.now(timezone.utc).isoformat()
43
+ db["runs"].insert(
44
+ {
45
+ "id": run_id,
46
+ "start_time": start_time,
47
+ "status": "running",
48
+ "framework": "unknown",
49
+ },
50
+ pk="id",
51
+ )
52
+ return run_id
53
+
54
+
55
+ def insert_event(
56
+ db: Database, run_id: str, type: str, payload: dict, cassette: bytes | None = None
57
+ ) -> None:
58
+ event_id = str(uuid.uuid4())
59
+ timestamp = datetime.now(timezone.utc).isoformat()
60
+ db["events"].insert(
61
+ {
62
+ "id": event_id,
63
+ "run_id": run_id,
64
+ "timestamp": timestamp,
65
+ "type": type,
66
+ "payload": json.dumps(payload),
67
+ "cassette": cassette,
68
+ },
69
+ pk="id",
70
+ )
71
+
72
+
73
+ if __name__ == "__main__":
74
+ db = get_db()
75
+ create_tables(db)
76
+ run_id = insert_run(db)
77
+ insert_event(db, run_id, "test", {"msg": "day 2 works"})
78
+ print(f"Run created: {run_id}")
79
+ print(f"Events in db: {db['events'].count}")
@@ -0,0 +1,85 @@
1
+ """Failure detection and trace snapshots for AgentAutopsy."""
2
+
3
+ import json
4
+ from typing import Any
5
+
6
+ from sqlite_utils import Database
7
+
8
+
9
+ def detect_failure(run_id: str, db: Database) -> dict[str, Any]:
10
+ errors = list(
11
+ db["events"].rows_where(
12
+ where='run_id = ? AND "type" = ?',
13
+ where_args=[run_id, "error"],
14
+ order_by="timestamp",
15
+ )
16
+ )
17
+ if not errors:
18
+ return {"failed": False, "run_id": run_id}
19
+
20
+ row = errors[0]
21
+ payload: dict[str, Any]
22
+ raw_payload = row.get("payload")
23
+ try:
24
+ payload = json.loads(raw_payload) if raw_payload else {}
25
+ except (json.JSONDecodeError, TypeError):
26
+ payload = {}
27
+
28
+ return {
29
+ "failed": True,
30
+ "run_id": run_id,
31
+ "error_type": payload.get("error_type"),
32
+ "message": payload.get("message"),
33
+ "failure_event_id": row["id"],
34
+ }
35
+
36
+
37
+ def take_snapshot(run_id: str, db: Database) -> list[dict[str, Any]]:
38
+ rows = list(
39
+ db["events"].rows_where(
40
+ where="run_id = ?",
41
+ where_args=[run_id],
42
+ order_by="timestamp",
43
+ )
44
+ )
45
+ snapshot: list[dict[str, Any]] = []
46
+ for row in rows:
47
+ raw_payload = row.get("payload")
48
+ try:
49
+ payload_obj: Any = (
50
+ json.loads(raw_payload) if raw_payload is not None else {}
51
+ )
52
+ if not isinstance(payload_obj, dict):
53
+ payload_obj = {}
54
+ except (json.JSONDecodeError, TypeError):
55
+ payload_obj = {}
56
+
57
+ cassette = row.get("cassette")
58
+ cassette_size = len(cassette) if cassette is not None else 0
59
+
60
+ snapshot.append(
61
+ {
62
+ "id": row["id"],
63
+ "type": row["type"],
64
+ "payload": payload_obj,
65
+ "cassette_size": cassette_size,
66
+ "timestamp": row["timestamp"],
67
+ }
68
+ )
69
+ return snapshot
70
+
71
+
72
+ if __name__ == "__main__":
73
+ from agentautopsy.db import create_tables, get_db, insert_event, insert_run
74
+
75
+ db = get_db()
76
+ create_tables(db)
77
+ run_id = insert_run(db)
78
+ insert_event(db, run_id, "llm_call", {"model": "gpt-4", "messages": []})
79
+ insert_event(db, run_id, "error", {"error_type": "TimeoutError", "message": "request timed out"})
80
+ result = detect_failure(run_id, db)
81
+ print(f"Failed: {result['failed']}")
82
+ print(f"Error: {result['error_type']}: {result['message']}")
83
+ snapshot = take_snapshot(run_id, db)
84
+ print(f"Snapshot has {len(snapshot)} events")
85
+ print(f"Event types: {[e['type'] for e in snapshot]}")
@@ -0,0 +1,137 @@
1
+ """OpenAI and Anthropic LLM interceptors for AgentAutopsy."""
2
+
3
+ from typing import Any, Callable
4
+
5
+ import openai
6
+
7
+ from agentautopsy.cassette import save_cassette
8
+ from agentautopsy.db import insert_event
9
+
10
+
11
+ def start_interceptor(run_id: str, db: Any) -> None:
12
+ completions = openai.chat.completions
13
+ original_create: Callable[..., Any] = completions.create
14
+
15
+ def create_wrapper(*args: Any, **kwargs: Any) -> Any:
16
+ model_name = kwargs.get("model")
17
+ messages_list = kwargs.get("messages")
18
+ insert_event(
19
+ db,
20
+ run_id,
21
+ "llm_call",
22
+ {"model": model_name, "messages": messages_list},
23
+ )
24
+ try:
25
+ response = original_create(*args, **kwargs)
26
+ except Exception as e:
27
+ insert_event(
28
+ db,
29
+ run_id,
30
+ "error",
31
+ {"error_type": type(e).__name__, "message": str(e)},
32
+ )
33
+ raise
34
+ insert_event(
35
+ db,
36
+ run_id,
37
+ "llm_response",
38
+ {},
39
+ cassette=save_cassette(response),
40
+ )
41
+ return response
42
+
43
+ completions.create = create_wrapper
44
+
45
+
46
+ def start_anthropic_interceptor(run_id: str, db: Any) -> None:
47
+ import anthropic
48
+
49
+ client_class = anthropic.Anthropic
50
+ original_init = client_class.__init__
51
+
52
+ def patched_init(self, *args: Any, **kwargs: Any) -> None:
53
+ original_init(self, *args, **kwargs)
54
+ original_create = self.messages.create
55
+
56
+ def create_wrapper(*args: Any, **kwargs: Any) -> Any:
57
+ insert_event(
58
+ db,
59
+ run_id,
60
+ "llm_call",
61
+ {
62
+ "provider": "anthropic",
63
+ "model": kwargs.get("model"),
64
+ "messages": kwargs.get("messages"),
65
+ },
66
+ )
67
+ try:
68
+ response = original_create(*args, **kwargs)
69
+ except Exception as e:
70
+ insert_event(
71
+ db,
72
+ run_id,
73
+ "error",
74
+ {"error_type": type(e).__name__, "message": str(e)},
75
+ )
76
+ raise
77
+ insert_event(
78
+ db,
79
+ run_id,
80
+ "llm_response",
81
+ {},
82
+ cassette=save_cassette(response),
83
+ )
84
+ return response
85
+
86
+ self.messages.create = create_wrapper
87
+
88
+ client_class.__init__ = patched_init
89
+
90
+
91
+ def start_http_interceptor(run_id: str, db: Any) -> None:
92
+ import httpx
93
+
94
+ original_send = httpx.Client.send
95
+
96
+ def patched_send(self, request, **kwargs):
97
+ insert_event(
98
+ db,
99
+ run_id,
100
+ "http_request",
101
+ {"method": request.method, "url": str(request.url)},
102
+ )
103
+ try:
104
+ response = original_send(self, request, **kwargs)
105
+ except Exception as e:
106
+ insert_event(
107
+ db,
108
+ run_id,
109
+ "error",
110
+ {"error_type": type(e).__name__, "message": str(e)},
111
+ )
112
+ raise
113
+ insert_event(
114
+ db,
115
+ run_id,
116
+ "http_response",
117
+ {"status_code": response.status_code},
118
+ cassette=response.content,
119
+ )
120
+ return response
121
+
122
+ httpx.Client.send = patched_send
123
+
124
+
125
+ if __name__ == "__main__":
126
+ from agentautopsy.db import create_tables, get_db, insert_run
127
+
128
+ db = get_db()
129
+ create_tables(db)
130
+ run_id = insert_run(db)
131
+ start_interceptor(run_id, db)
132
+ start_anthropic_interceptor(run_id, db)
133
+ start_http_interceptor(run_id, db)
134
+ print("OpenAI patched")
135
+ print("Anthropic patched")
136
+ print("HTTP patched")
137
+ print("Both interceptors active")
@@ -0,0 +1,58 @@
1
+ """Counterfactual pruner for AgentAutopsy snapshots."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def prune(snapshot: list[dict[str, Any]], failure_event_id: str) -> list[dict[str, Any]]:
9
+ by_id = {e.get("id"): e for e in snapshot}
10
+ failure = by_id.get(failure_event_id)
11
+ if failure is None:
12
+ return []
13
+
14
+ ordered = sorted(snapshot, key=lambda e: e.get("timestamp", ""))
15
+ failure_index = next(
16
+ (i for i, e in enumerate(ordered) if e.get("id") == failure_event_id), None
17
+ )
18
+
19
+ keep_ids: set[str] = {failure_event_id}
20
+
21
+ if failure_index is not None and failure_index > 0:
22
+ prev = ordered[failure_index - 1]
23
+ prev_id = prev.get("id")
24
+ if isinstance(prev_id, str):
25
+ keep_ids.add(prev_id)
26
+
27
+ for e in ordered:
28
+ ev_id = e.get("id")
29
+ ev_type = e.get("type")
30
+ if not isinstance(ev_id, str):
31
+ continue
32
+
33
+ if ev_type in ("llm_call", "tool_call", "error"):
34
+ keep_ids.add(ev_id)
35
+
36
+ pruned = [e for e in ordered if e.get("id") in keep_ids]
37
+ pruned = [e for e in pruned if e.get("type") not in ("llm_response", "http_response")]
38
+
39
+ pruned = sorted(pruned, key=lambda e: e.get("timestamp", ""))
40
+ if len(pruned) > 10:
41
+ pruned = pruned[-10:]
42
+
43
+ return pruned
44
+
45
+
46
+ if __name__ == "__main__":
47
+ fake_snapshot = [
48
+ {"id": "1", "type": "llm_call", "payload": {"model": "gpt-4"}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:01"},
49
+ {"id": "2", "type": "llm_response", "payload": {}, "cassette_size": 142, "timestamp": "2024-01-01T00:00:02"},
50
+ {"id": "3", "type": "http_request", "payload": {"method": "GET", "url": "https://api.example.com"}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:03"},
51
+ {"id": "4", "type": "http_response", "payload": {"status_code": 200}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:04"},
52
+ {"id": "5", "type": "llm_call", "payload": {"model": "gpt-4"}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:05"},
53
+ {"id": "6", "type": "error", "payload": {"error_type": "TimeoutError", "message": "timed out"}, "cassette_size": 0, "timestamp": "2024-01-01T00:00:06"},
54
+ ]
55
+ pruned = prune(fake_snapshot, "6")
56
+ print(f"Original events: {len(fake_snapshot)}")
57
+ print(f"Pruned events: {len(pruned)}")
58
+ print(f"Kept types: {[e['type'] for e in pruned]}")
@@ -0,0 +1,75 @@
1
+ """Replay sandbox for AgentAutopsy."""
2
+
3
+ from typing import Any
4
+
5
+ import openai
6
+
7
+ from agentautopsy.cassette import load_cassette
8
+ from agentautopsy.detector import take_snapshot
9
+
10
+
11
+ def replay(run_id: str, db: Any, patch_instructions: str) -> dict[str, Any]:
12
+ snapshot = take_snapshot(run_id, db)
13
+
14
+ cassette_map: dict[int, dict[str, Any]] = {}
15
+ index = 0
16
+ for event in snapshot:
17
+ if event["type"] != "llm_response":
18
+ continue
19
+ if event["cassette_size"] <= 0:
20
+ continue
21
+ row = db["events"].get(event["id"])
22
+ if row is None:
23
+ continue
24
+ cassette_bytes = row.get("cassette")
25
+ if cassette_bytes is None:
26
+ continue
27
+ response_dict = load_cassette(cassette_bytes)
28
+ if not response_dict:
29
+ continue
30
+ cassette_map[index] = response_dict
31
+ index += 1
32
+
33
+ original_create = openai.chat.completions.create
34
+ responses = [cassette_map[i] for i in range(len(cassette_map))]
35
+ call_index = [0]
36
+
37
+ def replay_create(*args: Any, **kwargs: Any) -> dict[str, Any]:
38
+ if call_index[0] >= len(responses):
39
+ raise RuntimeError("No more cassette responses to replay")
40
+ response = responses[call_index[0]]
41
+ call_index[0] += 1
42
+ return response
43
+
44
+ openai.chat.completions.create = replay_create
45
+ verified = False
46
+ try:
47
+ if cassette_map:
48
+ result = openai.chat.completions.create(model="gpt-4", messages=[])
49
+ verified = result == cassette_map[0]
50
+ finally:
51
+ openai.chat.completions.create = original_create
52
+
53
+ return {
54
+ "verified": verified,
55
+ "patch_instructions": patch_instructions,
56
+ "events_replayed": len(cassette_map),
57
+ }
58
+
59
+
60
+ if __name__ == "__main__":
61
+ import json
62
+
63
+ from agentautopsy.db import create_tables, get_db, insert_event, insert_run
64
+
65
+ db = get_db()
66
+ create_tables(db)
67
+ run_id = insert_run(db)
68
+ fake_response = {"id": "chatcmpl-123", "choices": [{"message": {"content": "hello"}}]}
69
+ insert_event(db, run_id, "llm_call", {"model": "gpt-4", "messages": []})
70
+ insert_event(db, run_id, "llm_response", {}, cassette=json.dumps(fake_response).encode())
71
+ insert_event(db, run_id, "error", {"error_type": "TimeoutError", "message": "timed out"})
72
+ result = replay(run_id, db, "Add timeout=60 to the API call")
73
+ print(f"Verified: {result['verified']}")
74
+ print(f"Events replayed: {result['events_replayed']}")
75
+ print(f"Patch: {result['patch_instructions']}")
@@ -0,0 +1,105 @@
1
+ """Terminal reporter for AgentAutopsy."""
2
+
3
+ import json
4
+ from typing import Any
5
+
6
+ RESET = "\033[0m"
7
+ CYAN = "\033[96m"
8
+ RED = "\033[91m"
9
+ YELLOW = "\033[93m"
10
+ GREEN = "\033[92m"
11
+ BLUE = "\033[94m"
12
+ RED_BOLD = "\033[91;1m"
13
+
14
+ EVENT_COLORS: dict[str, str] = {
15
+ "llm_call": CYAN,
16
+ "error": RED,
17
+ "http_request": YELLOW,
18
+ "http_response": GREEN,
19
+ "llm_response": BLUE,
20
+ }
21
+
22
+ TAG_WIDTH = 17
23
+
24
+
25
+ def _colored_event_tag(ev_type: str) -> str:
26
+ color = EVENT_COLORS.get(ev_type)
27
+ if color:
28
+ return f"{color}[{ev_type}]{RESET}"
29
+ return f"[{ev_type}]"
30
+
31
+
32
+ def _print_event_line(ev_type: str, detail: str) -> None:
33
+ plain_tag = f"[{ev_type}]"
34
+ colored_tag = _colored_event_tag(ev_type)
35
+ padding = " " * max(0, TAG_WIDTH - len(plain_tag))
36
+ if detail:
37
+ print(f"{colored_tag}{padding}{detail}")
38
+ else:
39
+ print(colored_tag)
40
+
41
+
42
+ def print_report(run_id: str, db: Any) -> None:
43
+ sep = "═══════════════════════════════════"
44
+ rows = list(
45
+ db["events"].rows_where(
46
+ where="run_id = ?",
47
+ where_args=[run_id],
48
+ order_by="timestamp",
49
+ )
50
+ )
51
+
52
+ print(sep)
53
+ print(" AgentAutopsy — Run Report")
54
+ print(f" Run ID: {run_id}")
55
+ print(sep)
56
+
57
+ for row in rows:
58
+ ev_type = row["type"]
59
+ try:
60
+ payload = json.loads(row["payload"]) if row.get("payload") else {}
61
+ except (json.JSONDecodeError, TypeError):
62
+ payload = {}
63
+ cassette = row.get("cassette")
64
+ blob = cassette if cassette is not None else b""
65
+
66
+ if ev_type == "llm_call":
67
+ detail = f"model: {payload.get('model')}"
68
+ elif ev_type == "llm_response":
69
+ detail = f"cassette: {len(blob)} bytes"
70
+ elif ev_type == "http_request":
71
+ detail = f"{payload.get('method')} {payload.get('url')}"
72
+ elif ev_type == "http_response":
73
+ detail = f"status: {payload.get('status_code')}"
74
+ elif ev_type == "error":
75
+ detail = f"{payload.get('error_type')}: {payload.get('message')}"
76
+ else:
77
+ detail = ""
78
+
79
+ _print_event_line(ev_type, detail)
80
+
81
+ root_sep = "══════════════════════════════════════"
82
+ error_index = None
83
+ error_payload: dict[str, Any] = {}
84
+ for i, row in enumerate(rows, start=1):
85
+ if row["type"] == "error" and error_index is None:
86
+ error_index = i
87
+ try:
88
+ error_payload = (
89
+ json.loads(row["payload"]) if row.get("payload") else {}
90
+ )
91
+ except (json.JSONDecodeError, TypeError):
92
+ error_payload = {}
93
+ if not isinstance(error_payload, dict):
94
+ error_payload = {}
95
+
96
+ if error_index is not None:
97
+ print(f"{RED_BOLD}→ Divergence detected at event {error_index}{RESET}")
98
+ error_type = error_payload.get("error_type")
99
+ message = error_payload.get("message")
100
+ print(root_sep)
101
+ print(f"{RED_BOLD}Root Cause: {error_type} — {message}{RESET}")
102
+ print(root_sep)
103
+
104
+ print(sep)
105
+ print(f"Total events: {len(rows)}")
@@ -0,0 +1,7 @@
1
+ import agentautopsy
2
+ from agentautopsy.db import get_db
3
+
4
+ agentautopsy.watch()
5
+ db = get_db()
6
+ print(f"Tables: {db.table_names()}")
7
+ print("Day 7 complete — watch() is fully wired")
@@ -0,0 +1,13 @@
1
+ import agentautopsy
2
+ from agentautopsy.db import get_db, insert_event
3
+
4
+ agentautopsy.watch()
5
+ db = get_db()
6
+
7
+ from agentautopsy.db import get_db, create_tables, insert_run
8
+
9
+ runs = list(db["runs"].rows)
10
+ run_id = runs[-1]["id"]
11
+ insert_event(db, run_id, "llm_call", {"model": "gpt-4", "messages": [{"role": "user", "content": "fetch data"}]})
12
+ insert_event(db, run_id, "error", {"error_type": "TimeoutError", "message": "request timed out after 30s"})
13
+ print("Pipeline test complete — check output above on exit")
@@ -0,0 +1,12 @@
1
+ import unittest
2
+
3
+ import agentautopsy
4
+
5
+
6
+ class TestSmoke(unittest.TestCase):
7
+ def test_watch_does_not_raise(self):
8
+ agentautopsy.watch()
9
+
10
+
11
+ if __name__ == "__main__":
12
+ unittest.main()