agentstep 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ import sqlite3
2
+ import json
3
+ from typing import Sequence
4
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
5
+ from opentelemetry.sdk.trace import ReadableSpan
6
+
7
+ class ReplayOtelExporter(SpanExporter):
8
+ """
9
+ Exports OpenTelemetry spans directly to a local SQLite database in the `otel_spans` table.
10
+ """
11
+ def __init__(self, conn: sqlite3.Connection):
12
+ self.conn = conn
13
+ self._init_db()
14
+
15
+ def _init_db(self):
16
+ cursor = self.conn.cursor()
17
+ cursor.execute('''
18
+ CREATE TABLE IF NOT EXISTS otel_spans (
19
+ span_id TEXT PRIMARY KEY,
20
+ trace_id TEXT,
21
+ parent_span_id TEXT,
22
+ name TEXT,
23
+ start_time INTEGER,
24
+ end_time INTEGER,
25
+ attributes TEXT,
26
+ events TEXT,
27
+ status_code TEXT,
28
+ thread_id TEXT
29
+ )
30
+ ''')
31
+ # Index on thread_id for fast lookup during branch replay
32
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_otel_spans_thread_id ON otel_spans (thread_id)')
33
+ self.conn.commit()
34
+
35
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
36
+ cursor = self.conn.cursor()
37
+
38
+ for span in spans:
39
+ attrs = dict(span.attributes) if span.attributes else {}
40
+ thread_id = attrs.get("lg.thread_id")
41
+
42
+ events = []
43
+ for event in span.events:
44
+ events.append({
45
+ "name": event.name,
46
+ "timestamp": event.timestamp,
47
+ "attributes": dict(event.attributes) if event.attributes else {}
48
+ })
49
+
50
+ cursor.execute('''
51
+ INSERT OR REPLACE INTO otel_spans
52
+ (span_id, trace_id, parent_span_id, name, start_time, end_time, attributes, events, status_code, thread_id)
53
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
54
+ ''', (
55
+ str(span.context.span_id),
56
+ str(span.context.trace_id),
57
+ str(span.parent.span_id) if span.parent else None,
58
+ span.name,
59
+ span.start_time,
60
+ span.end_time,
61
+ json.dumps(attrs),
62
+ json.dumps(events),
63
+ span.status.status_code.name if span.status else "UNSET",
64
+ str(thread_id) if thread_id else None
65
+ ))
66
+
67
+ self.conn.commit()
68
+ return SpanExportResult.SUCCESS
69
+
70
+ def shutdown(self) -> None:
71
+ pass
@@ -0,0 +1,118 @@
1
+ import functools
2
+ import sqlite3
3
+ import os
4
+ import json
5
+ from contextlib import contextmanager
6
+
7
+ from opentelemetry import trace
8
+ from opentelemetry.sdk.trace import TracerProvider
9
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
10
+
11
+ from langchain_core.callbacks import BaseCallbackHandler
12
+ from langchain_core.messages import BaseMessage
13
+ from langchain_core.outputs import LLMResult
14
+
15
+ from agentstep.sdk.exporter import ReplayOtelExporter
16
+
17
+ tracer_provider = None
18
+
19
+ def setup_otel(sqlite_path: str = "trace.sqlite"):
20
+ global tracer_provider
21
+ if tracer_provider is not None:
22
+ return
23
+
24
+ conn = sqlite3.connect(sqlite_path, check_same_thread=False)
25
+ exporter = ReplayOtelExporter(conn)
26
+
27
+ tracer_provider = TracerProvider()
28
+ tracer_provider.add_span_processor(SimpleSpanProcessor(exporter))
29
+ trace.set_tracer_provider(tracer_provider)
30
+
31
+ class ReplayCallbackHandler(BaseCallbackHandler):
32
+ """
33
+ A LangChain callback handler that emits OpenTelemetry spans for LLMs and Tools,
34
+ enriching them with the LangGraph thread_id and optional branch_id.
35
+ """
36
+ def __init__(self, thread_id: str, branch_id: str | None = None):
37
+ self.thread_id = thread_id
38
+ self.branch_id = branch_id
39
+ self.tracer = trace.get_tracer("agentstep")
40
+ self.spans = {} # run_id -> Span
41
+
42
+ def _set_branch_attrs(self, span):
43
+ span.set_attribute("lg.thread_id", self.thread_id)
44
+ if self.branch_id:
45
+ span.set_attribute("lg.branch_id", self.branch_id)
46
+
47
+ def on_llm_start(self, serialized: dict, prompts: list[str], *, run_id, parent_run_id=None, tags=None, metadata=None, **kwargs):
48
+ span = self.tracer.start_span("llm_call")
49
+ self._set_branch_attrs(span)
50
+ span.set_attribute("gen_ai.system", "langgraph")
51
+ if prompts:
52
+ span.set_attribute("gen_ai.prompt", prompts[0])
53
+ self.spans[str(run_id)] = span
54
+
55
+ def on_llm_end(self, response: LLMResult, *, run_id, parent_run_id=None, **kwargs):
56
+ span = self.spans.get(str(run_id))
57
+ if span:
58
+ # Capture output if available
59
+ if response.generations and response.generations[0]:
60
+ span.set_attribute("gen_ai.completion", response.generations[0][0].text)
61
+
62
+ # Capture token usage
63
+ if response.llm_output and "token_usage" in response.llm_output:
64
+ usage = response.llm_output["token_usage"]
65
+ if "prompt_tokens" in usage:
66
+ span.set_attribute("gen_ai.usage.input_tokens", usage["prompt_tokens"])
67
+ if "completion_tokens" in usage:
68
+ span.set_attribute("gen_ai.usage.output_tokens", usage["completion_tokens"])
69
+
70
+ span.end()
71
+ del self.spans[str(run_id)]
72
+
73
+ def on_tool_start(self, serialized: dict, input_str: str, *, run_id, parent_run_id=None, tags=None, metadata=None, **kwargs):
74
+ span = self.tracer.start_span("tool_call")
75
+ self._set_branch_attrs(span)
76
+ span.set_attribute("gen_ai.tool.name", serialized.get("name", "unknown_tool"))
77
+ span.set_attribute("gen_ai.tool.input", input_str)
78
+ self.spans[str(run_id)] = span
79
+
80
+ def on_tool_end(self, output: str, *, run_id, parent_run_id=None, **kwargs):
81
+ span = self.spans.get(str(run_id))
82
+ if span:
83
+ span.set_attribute("gen_ai.tool.output", str(output))
84
+ span.end()
85
+ del self.spans[str(run_id)]
86
+
87
+ def on_tool_error(self, error: BaseException, *, run_id, parent_run_id=None, **kwargs):
88
+ span = self.spans.get(str(run_id))
89
+ if span:
90
+ span.record_exception(error)
91
+ span.end()
92
+ del self.spans[str(run_id)]
93
+
94
+ @contextmanager
95
+ def replay_trace(config: dict, sqlite_path: str = "trace.sqlite", branch_id: str | None = None):
96
+ """
97
+ Context manager to wrap LangGraph executions and inject the OTel callback handler.
98
+ Example:
99
+ config = {"configurable": {"thread_id": "123"}}
100
+ with replay_trace(config):
101
+ graph.invoke(input_data, config=config)
102
+ """
103
+ setup_otel(sqlite_path)
104
+
105
+ thread_id = config.get("configurable", {}).get("thread_id", "default_thread")
106
+
107
+ handler = ReplayCallbackHandler(thread_id, branch_id=branch_id)
108
+
109
+ # Inject the handler into the config's callbacks
110
+ callbacks = config.get("callbacks", [])
111
+ if isinstance(callbacks, list):
112
+ callbacks.append(handler)
113
+ else:
114
+ callbacks = [callbacks, handler]
115
+
116
+ config["callbacks"] = callbacks
117
+
118
+ yield config
@@ -0,0 +1,222 @@
1
+ import sqlite3
2
+ import json
3
+ import uuid
4
+ from typing import Any, Dict, List, Optional
5
+ from fastapi import FastAPI, HTTPException, Request
6
+ from pydantic import BaseModel
7
+
8
+ from agentstep.server.replayer import replay_branch
9
+ from agentstep.sdk.tracer import replay_trace
10
+
11
+ from langchain_core.messages import ToolMessage, AIMessage
12
+
13
+ app = FastAPI(title="Agent Replay Debugger")
14
+
15
+
16
+ class BranchRequest(BaseModel):
17
+ thread_id: str
18
+ checkpoint_id: str
19
+ node_name: str
20
+ span_type: str
21
+ tool_call_id: Optional[str] = None # tool *name* sent by the UI
22
+ new_output: str
23
+
24
+
25
+ # ── Threads ────────────────────────────────────────────────────
26
+
27
+ @app.get("/api/threads")
28
+ def list_threads(request: Request):
29
+ conn: sqlite3.Connection = request.app.state.db_conn
30
+ cursor = conn.cursor()
31
+ cursor.execute("SELECT DISTINCT thread_id FROM otel_spans WHERE thread_id IS NOT NULL")
32
+ return {"threads": [row[0] for row in cursor.fetchall()]}
33
+
34
+
35
+ # ── Traces ─────────────────────────────────────────────────────
36
+
37
+ @app.get("/api/traces/{thread_id}")
38
+ def get_traces(thread_id: str, request: Request):
39
+ conn: sqlite3.Connection = request.app.state.db_conn
40
+ cursor = conn.cursor()
41
+
42
+ cursor.execute("""
43
+ SELECT span_id, parent_span_id, name, start_time, end_time,
44
+ attributes, events, status_code
45
+ FROM otel_spans
46
+ WHERE thread_id = ?
47
+ ORDER BY start_time ASC
48
+ """, (thread_id,))
49
+
50
+ rows = cursor.fetchall()
51
+ spans = [
52
+ {
53
+ "span_id": r[0],
54
+ "parent_span_id": r[1],
55
+ "name": r[2],
56
+ "start_time": r[3],
57
+ "end_time": r[4],
58
+ "attributes": json.loads(r[5]) if r[5] else {},
59
+ "events": json.loads(r[6]) if r[6] else [],
60
+ "status_code": r[7],
61
+ }
62
+ for r in rows
63
+ ]
64
+
65
+ # Group spans by lg.branch_id (original trace has no branch_id)
66
+ groups: dict[str | None, list] = {}
67
+ for s in spans:
68
+ bid = s["attributes"].get("lg.branch_id")
69
+ if bid not in groups:
70
+ groups[bid] = []
71
+ groups[bid].append(s)
72
+
73
+ # Order: original first (branch_id=None), then branches by first span's start_time
74
+ def sort_key(item):
75
+ bid, spans = item
76
+ first_ts = spans[0]["start_time"] if spans else 0
77
+ return (1 if bid is None else 0, first_ts)
78
+
79
+ branches = []
80
+ for bid, branch_spans in sorted(groups.items(), key=sort_key):
81
+ is_original = bid is None
82
+
83
+ # Compute fork_point: the span immediately before this branch's first span
84
+ fork_point = None
85
+ if not is_original and branch_spans:
86
+ for i, s in enumerate(spans):
87
+ if s["span_id"] == branch_spans[0]["span_id"]:
88
+ if i > 0:
89
+ fork_point = spans[i - 1]["span_id"]
90
+ break
91
+
92
+ branches.append({
93
+ "branch_id": bid if not is_original else "__original__",
94
+ "is_original": is_original,
95
+ "spans": branch_spans,
96
+ "meta": {
97
+ "span_count": len(branch_spans),
98
+ "fork_point": fork_point,
99
+ },
100
+ })
101
+
102
+ return {"branches": branches}
103
+
104
+
105
+ # ── Checkpoints ────────────────────────────────────────────────
106
+
107
+ @app.get("/api/traces/{thread_id}/checkpoints")
108
+ def get_checkpoints(thread_id: str, request: Request):
109
+ """Return checkpoints with their *next* node info for matching to spans."""
110
+ graph = request.app.state.graph
111
+ if not graph:
112
+ return {"checkpoints": []}
113
+
114
+ config = {"configurable": {"thread_id": thread_id}}
115
+ history = list(graph.get_state_history(config))
116
+
117
+ checkpoints = []
118
+ for state in history:
119
+ cp = {
120
+ "checkpoint_id": state.config["configurable"]["checkpoint_id"],
121
+ "next": list(state.next) if state.next else [],
122
+ "has_messages": bool(state.values.get("messages")),
123
+ }
124
+ checkpoints.append(cp)
125
+
126
+ return {"checkpoints": checkpoints}
127
+
128
+
129
+ # ── Branch replay ──────────────────────────────────────────────
130
+
131
+ @app.post("/api/branch")
132
+ def branch_replay(req: BranchRequest, request: Request):
133
+ graph = request.app.state.graph
134
+ if not graph:
135
+ raise HTTPException(400, "Graph not loaded — pass --app <module:graph>")
136
+
137
+ config = {
138
+ "configurable": {
139
+ "thread_id": req.thread_id,
140
+ "checkpoint_id": req.checkpoint_id,
141
+ }
142
+ }
143
+
144
+ # Sanity-check the checkpoint exists and extract the state
145
+ try:
146
+ snapshot = graph.get_state(config)
147
+ except Exception as e:
148
+ raise HTTPException(400, f"Checkpoint {req.checkpoint_id} not found: {e}")
149
+
150
+ full_config = snapshot.config
151
+
152
+ # Ensure checkpoint_ns is present — LangGraph requires it when
153
+ # resuming from a checkpoint.
154
+ full_config.setdefault("configurable", {})
155
+ if "checkpoint_ns" not in full_config["configurable"]:
156
+ full_config["configurable"]["checkpoint_ns"] = ""
157
+
158
+ # ── Resolve real tool_call_id ──────────────────────────────
159
+ # The UI sends the tool *name* (e.g. "get_weather") as
160
+ # tool_call_id. We need to find the actual ID from the
161
+ # AIMessage tool_calls in the checkpoint state.
162
+ if req.span_type == "tool_call":
163
+ tool_name = req.tool_call_id or ""
164
+ messages = snapshot.values.get("messages", [])
165
+ real_tool_call_id = None
166
+
167
+ for msg in reversed(messages):
168
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
169
+ for tc in msg.tool_calls:
170
+ if isinstance(tc, dict) and tc.get("name") == tool_name:
171
+ real_tool_call_id = tc["id"]
172
+ break
173
+ elif hasattr(tc, "name") and tc.name == tool_name:
174
+ real_tool_call_id = tc.id
175
+ break
176
+ if real_tool_call_id:
177
+ break
178
+
179
+ if not real_tool_call_id:
180
+ raise HTTPException(
181
+ 400,
182
+ f"No tool call named '{tool_name}' found in checkpoint state "
183
+ f"(messages: {len(messages)}). Available tool calls: "
184
+ + ", ".join(
185
+ tc.get("name", tc.name) if isinstance(tc, dict) else tc.name
186
+ for msg in messages if hasattr(msg, "tool_calls")
187
+ for tc in (msg.tool_calls or [])
188
+ ),
189
+ )
190
+
191
+ overridden_msg = ToolMessage(
192
+ content=req.new_output,
193
+ tool_call_id=real_tool_call_id,
194
+ )
195
+ node = req.node_name or "tools"
196
+
197
+ elif req.span_type == "llm_call":
198
+ overridden_msg = AIMessage(content=req.new_output)
199
+ node = req.node_name or "agent"
200
+
201
+ else:
202
+ raise HTTPException(400, f"Unsupported span_type: {req.span_type}")
203
+
204
+ # ── Replay ─────────────────────────────────────────────────
205
+ branch_id = f"branch_{uuid.uuid4().hex[:12]}"
206
+ try:
207
+ with replay_trace(full_config, sqlite_path=request.app.state.db_path, branch_id=branch_id):
208
+ result = replay_branch(
209
+ graph=graph,
210
+ config=full_config,
211
+ node_name=node,
212
+ new_values={"messages": [overridden_msg]},
213
+ )
214
+ except Exception as e:
215
+ raise HTTPException(500, f"Branch replay failed: {e}")
216
+
217
+ return {
218
+ "status": "ok",
219
+ "thread_id": req.thread_id,
220
+ "branch_id": branch_id,
221
+ "checkpoint_id": full_config["configurable"].get("checkpoint_id"),
222
+ }
@@ -0,0 +1,150 @@
1
+ import argparse
2
+ import importlib
3
+ import os
4
+ import sqlite3
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import uvicorn
9
+ from fastapi.staticfiles import StaticFiles
10
+ from fastapi.responses import HTMLResponse, FileResponse
11
+
12
+ from agentstep.server.api import app
13
+ from agentstep.sdk.tracer import setup_otel
14
+ from langgraph.checkpoint.sqlite import SqliteSaver
15
+
16
+
17
+ def _resolve_ui_build() -> Path | None:
18
+ """Walk up from the package dir looking for ui/dist, then try CWD."""
19
+ # The package lives at <repo>/src/agentstep/server/cli.py
20
+ # We want <repo>/ui/dist — three levels up from this file.
21
+ here = Path(__file__).resolve().parent
22
+ candidates = [
23
+ here.parents[2] / "ui" / "dist", # repo-rooted: src/agentstep/server -> src/agentstep -> src -> repo
24
+ Path.cwd() / "ui" / "dist",
25
+ Path.cwd() / "dist",
26
+ ]
27
+ for p in candidates:
28
+ if (p / "index.html").is_file():
29
+ return p
30
+ return None
31
+
32
+
33
+ def parse_app_string(app_str: str):
34
+ """Import a LangGraph from ``module:graph`` or ``module.graph``."""
35
+ if ":" in app_str:
36
+ module_path, attr = app_str.split(":", 1)
37
+ elif "." in app_str:
38
+ module_path, attr = app_str.rsplit(".", 1)
39
+ else:
40
+ raise ValueError("Use module:graph or module.graph syntax")
41
+
42
+ if "" not in sys.path and "." not in sys.path:
43
+ sys.path.insert(0, "")
44
+
45
+ module = importlib.import_module(module_path)
46
+ graph = getattr(module, attr)
47
+
48
+ if callable(graph) and not hasattr(graph, "invoke"):
49
+ graph = graph()
50
+
51
+ return graph
52
+
53
+
54
+ def _mount_spa(ui_path: Path) -> None:
55
+ """Mount the built React app at /, with SPA fallback to index.html.
56
+
57
+ Order matters: StaticFiles is mounted AFTER the FastAPI app already
58
+ registered /api/* routes, but Starlette matches more-specific paths
59
+ first, so API calls still work. The catch-all on / serves real files
60
+ (CSS, JS) and falls back to index.html for client-side routes.
61
+ """
62
+ # Serve static assets (hashed files in /assets/) directly
63
+ app.mount(
64
+ "/assets",
65
+ StaticFiles(directory=str(ui_path / "assets")),
66
+ name="assets",
67
+ )
68
+ app.mount(
69
+ "/favicon.svg",
70
+ StaticFiles(directory=str(ui_path), html=False),
71
+ name="favicon",
72
+ )
73
+
74
+ @app.get("/", include_in_schema=False)
75
+ @app.get("/{full_path:path}", include_in_schema=False)
76
+ async def spa(full_path: str = ""):
77
+ # If a real file exists in the dist dir (e.g. icons.svg), serve it.
78
+ target = ui_path / full_path
79
+ if full_path and target.is_file():
80
+ return FileResponse(str(target))
81
+ # Otherwise, SPA fallback to index.html.
82
+ return FileResponse(str(ui_path / "index.html"))
83
+
84
+
85
+ def main():
86
+ parser = argparse.ArgumentParser(description="Agent Replay Debugger")
87
+ parser.add_argument("db", help="Path to trace SQLite database (e.g. trace.sqlite)")
88
+ parser.add_argument(
89
+ "--app", required=True,
90
+ help="Import path to the compiled LangGraph (e.g. sample:graph)",
91
+ )
92
+ parser.add_argument("--port", type=int, default=7337, help="Port to serve on")
93
+ parser.add_argument(
94
+ "--dev-ui", action="store_true",
95
+ help="Backend-only mode for development. Disables the bundled UI. "
96
+ "Start the Vite dev server separately (cd ui && npm run dev).",
97
+ )
98
+ parser.add_argument(
99
+ "--host", default="127.0.0.1",
100
+ help="Host to bind to (default 127.0.0.1). Use 0.0.0.0 for LAN access.",
101
+ )
102
+
103
+ args = parser.parse_args()
104
+
105
+ print(f"Loading graph from {args.app}…")
106
+ try:
107
+ graph = parse_app_string(args.app)
108
+ except Exception as e:
109
+ print(f"Failed to load graph: {e}", file=sys.stderr)
110
+ sys.exit(1)
111
+
112
+ print(f"Connecting to {args.db}…")
113
+ conn = sqlite3.connect(args.db, check_same_thread=False)
114
+ setup_otel(args.db)
115
+
116
+ checkpointer = SqliteSaver(conn)
117
+ checkpointer.setup()
118
+
119
+ app.state.db_conn = conn
120
+ app.state.db_path = args.db
121
+ app.state.graph = graph
122
+ app.state.checkpointer = checkpointer
123
+
124
+ if args.dev_ui:
125
+ print(
126
+ "Dev UI mode: bundled UI disabled.\n"
127
+ " Start the Vite dev server in another terminal:\n"
128
+ " cd ui && npm install && npm run dev\n"
129
+ " Then open http://localhost:5173"
130
+ )
131
+
132
+ if not args.dev_ui:
133
+ ui_path = _resolve_ui_build()
134
+ if ui_path is not None:
135
+ print(f"Serving UI from {ui_path}")
136
+ _mount_spa(ui_path)
137
+ else:
138
+ print(
139
+ "WARNING: UI build not found.\n"
140
+ " Run from ui/: npm run build\n"
141
+ " Or start the Vite dev server on :5173 and pass --no-ui.",
142
+ file=sys.stderr,
143
+ )
144
+
145
+ print(f"Replay Debugger -> http://{args.host}:{args.port}")
146
+ uvicorn.run(app, host=args.host, port=args.port, log_level="info")
147
+
148
+
149
+ if __name__ == "__main__":
150
+ main()
@@ -0,0 +1,72 @@
1
+ from typing import Any
2
+
3
+
4
+ def replay_branch(
5
+ graph: Any,
6
+ config: dict,
7
+ node_name: str,
8
+ new_values: dict | Any,
9
+ ) -> dict:
10
+ """Fork execution from a specific checkpoint with modified values.
11
+
12
+ Args:
13
+ graph: The compiled LangGraph with a checkpointer.
14
+ config: Config containing ``thread_id`` and optionally ``checkpoint_id``.
15
+ node_name: The node to associate the new values with (e.g. ``"tools"``).
16
+ new_values: State update dict (e.g. ``{"messages": [ToolMessage(...)]}``).
17
+
18
+ Returns:
19
+ The final state after the branched execution completes.
20
+ """
21
+ # 1. Validate the graph has a checkpointer
22
+ if not getattr(graph, "checkpointer", None):
23
+ raise ValueError(
24
+ "Graph must have a checkpointer to replay branches. "
25
+ "Compile with e.g. checkpointer=SqliteSaver(conn)."
26
+ )
27
+
28
+ # 2. Validate the checkpoint exists
29
+ try:
30
+ snapshot = graph.get_state(config)
31
+ except Exception as e:
32
+ raise ValueError(
33
+ f"Could not load checkpoint for config {config}: {e}"
34
+ ) from e
35
+
36
+ # 3. Validate the node exists in the graph
37
+ if node_name not in graph.nodes:
38
+ raise ValueError(
39
+ f"Node '{node_name}' not found in graph. "
40
+ f"Available: {list(graph.nodes.keys())}"
41
+ )
42
+
43
+ # 4. Create a branched checkpoint with the overridden state
44
+ new_config = graph.update_state(
45
+ config=snapshot.config,
46
+ values=new_values,
47
+ as_node=node_name,
48
+ )
49
+
50
+ # Ensure checkpoint_ns is present — required by LangGraph for
51
+ # resuming from a checkpoint via invoke() on some versions.
52
+ new_config.setdefault("configurable", {})
53
+ if "checkpoint_ns" not in new_config["configurable"]:
54
+ new_config["configurable"]["checkpoint_ns"] = (
55
+ snapshot.config.get("configurable", {}).get("checkpoint_ns", "")
56
+ )
57
+
58
+ # Propagate callbacks from the original config so the tracer
59
+ # works during the branched execution.
60
+ if "callbacks" in config:
61
+ new_config["callbacks"] = config["callbacks"]
62
+
63
+ # 5. Resume from the new branch
64
+ try:
65
+ return graph.invoke(None, config=new_config)
66
+ except Exception as e:
67
+ partial = graph.get_state(new_config)
68
+ return {
69
+ "status": "partial",
70
+ "error": str(e),
71
+ "checkpoint_id": partial.config["configurable"].get("checkpoint_id"),
72
+ }
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentstep
3
+ Version: 0.1.1
4
+ Summary: Time-travel debugger and branch explorer for LangGraph AI agents. Capture execution traces, inspect LLM and tool calls, and branch from any point with overridden outputs.
5
+ Project-URL: Homepage, https://github.com/vanshvisariya/agentstep
6
+ Project-URL: Repository, https://github.com/vanshvisariya/agentstep
7
+ Project-URL: Issues, https://github.com/vanshvisariya/agentstep/issues
8
+ Author-email: vansh visariya <vanshvisariya.workdev@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agent,branch,debugger,langgraph,llm,opentelemetry,otel,replay,tracing
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Framework :: FastAPI
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Debuggers
19
+ Requires-Python: >=3.13
20
+ Requires-Dist: fastapi>=0.138.0
21
+ Requires-Dist: langchain-groq>=1.1.3
22
+ Requires-Dist: langchain-openai>=1.3.3
23
+ Requires-Dist: langgraph-checkpoint-sqlite>=3.1.0
24
+ Requires-Dist: langgraph>=1.2.6
25
+ Requires-Dist: opentelemetry-api>=1.43.0
26
+ Requires-Dist: opentelemetry-sdk>=1.43.0
27
+ Requires-Dist: opentelemetry-semantic-conventions>=0.64b0
28
+ Requires-Dist: pydantic>=2.13.4
29
+ Requires-Dist: python-dotenv>=1.2.2
30
+ Requires-Dist: uvicorn>=0.49.0
31
+ Description-Content-Type: text/markdown
32
+
33
+ # Agent Step
34
+
35
+ A time-travel debugger and branch explorer for **[LangGraph](https://langchain-ai.github.io/langgraph/)** agents. Capture every LLM call and tool invocation as a span, browse them in a web timeline, then **branch from any point** — override the output and replay to see how the rest of the graph would behave differently.
36
+
37
+ Think `pdb` + a REPL for agent workflows, with a SQLite file you can hand to a teammate.
38
+
39
+ ---
40
+
41
+ ## Why you'd use it
42
+
43
+ When an agent goes off the rails, you usually want to answer one of:
44
+
45
+ - *"What did the LLM actually say at step 4?"* — captured.
46
+ - *"What would have happened if the weather tool returned snow instead of fog?"* — **branch and replay.**
47
+ - *"Why did the agent loop?"* — the timeline shows every call with timing and full prompts/outputs.
48
+
49
+ Without this, you're adding `print()` statements and re-running with a different seed. With this, you replay against the original trace.
50
+
51
+ ---
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install agentstep
57
+ ```
58
+
59
+ Or from the repo (development):
60
+
61
+ ```bash
62
+ git clone https://github.com/vanshvisariya/agent-replay
63
+ cd agent-replay
64
+ pip install -e .
65
+ ```
66
+
67
+ Requires **Python 3.13+**.
68
+
69
+ ---
70
+
71
+ ## Quick start
72
+
73
+ This walks through using Agent Replay on a LangGraph agent in your own project.
74
+
75
+ ### 1. Wrap your graph execution
76
+
77
+ The SDK exposes one thing: `replay_trace`, a context manager that instruments your graph with OpenTelemetry callbacks and writes spans to a SQLite file.
78
+
79
+ ```python
80
+ from langgraph.graph import StateGraph, START, END
81
+ from agent_replay.sdk.tracer import replay_trace
82
+
83
+ # build your compiled graph the way you already do
84
+ graph = ...
85
+
86
+ # a thread_id identifies one conversation/run in the trace
87
+ config = {"configurable": {"thread_id": "user-42"}}
88
+
89
+ with replay_trace(config, sqlite_path="trace.sqlite") as cfg:
90
+ for chunk in graph.stream(inputs, cfg, stream_mode="values"):
91
+ print(chunk)
92
+ ```
93
+
94
+ That's the entire API surface for instrumentation. The context manager:
95
+
96
+ 1. Sets up an OpenTelemetry tracer pointed at your SQLite file.
97
+ 2. Injects a callback handler into `config["callbacks"]`.
98
+ 3. Records every `llm_call` and `tool_call` span with timing, prompts, completions, and outputs.
99
+
100
+ The original `config` is mutated in place; you don't need to swap it back.
101
+
102
+ ### 2. Launch the debugger
103
+
104
+ In a terminal:
105
+
106
+ ```bash
107
+ replay-debugger trace.sqlite --app my_module:graph
108
+ ```
109
+
110
+ - `trace.sqlite` is the file you wrote spans to.
111
+ - `--app my_module:graph` is a Python import path to your compiled graph. Three forms work:
112
+ - `my_module:graph` — `graph` is a compiled LangGraph instance.
113
+ - `my_module.graph` — same thing, dotted form.
114
+ - `my_module:make_graph` — `make_graph` is a callable that returns a compiled graph (it gets called at startup).
115
+
116
+ Open <http://localhost:7337>.
117
+
118
+ You should see your thread in the left sidebar and a timeline of spans on the right.
119
+
120
+ ### 3. Branch from any span
121
+
122
+ 1. Click any span — the right panel shows the checkpoint, attributes, and full completion.
123
+ 2. Click **branch from here**.
124
+ 3. Edit the override output (new tool result or new LLM completion).
125
+ 4. Click **run_branch**.
126
+
127
+ The original trace stays intact. The fork becomes a new branch in the timeline, labeled with a small `b0` chip, color-coded so you can tell at a glance which branch you're looking at.
128
+
129
+ ---
130
+
131
+ ## What gets captured
132
+
133
+ | Span type | What's recorded |
134
+ |---|---|
135
+ | `llm_call` | prompt, completion, system, input/output token counts, wall time |
136
+ | `tool_call` | tool name, input string, output string, wall time |
137
+
138
+ Every span carries:
139
+
140
+ - `lg.thread_id` — the LangGraph `thread_id` so spans from one conversation group together.
141
+ - `lg.branch_id` — set automatically on spans created during a branch replay, so the debugger can group them separately.
142
+
143
+ Other graph node executions, sub-graphs, and conditional edges are not yet instrumented as spans — but the checkpoint data is still preserved by LangGraph itself, so branch replay works regardless.
144
+
145
+ ---
146
+
147
+ ## Working example
148
+
149
+ The repo ships a runnable demo (`sample.py`) with a fake LLM so you don't need any API keys:
150
+
151
+ ```bash
152
+ git clone https://github.com/vanshvisariya/replay
153
+ cd agent-replay
154
+ pip install -e .
155
+ python sample.py # writes trace.sqlite
156
+ replay-debugger trace.sqlite --app sample:graph
157
+ ```
158
+
159
+ Then open <http://localhost:7337>. Click the LLM call → click **branch from here** → change the response → watch the timeline fork.
160
+
161
+ ---
162
+
163
+ ## Development workflow
164
+
165
+ When hacking on the debugger itself, run the backend and frontend with hot reload:
166
+
167
+ ```bash
168
+ # Terminal 1 — backend on :7337, API only
169
+ replay-debugger trace.sqlite --app sample:graph --dev-ui
170
+
171
+ # Terminal 2 — Vite dev server on :5173 (proxies /api/* to :7337)
172
+ cd ui
173
+ npm install
174
+ npm run dev
175
+ ```
176
+
177
+ Open <http://localhost:5173> instead. Edits to React files hot-reload; backend edits need a restart.
178
+
179
+ ---
180
+
181
+ ## Programmatic branch replay
182
+
183
+ The web UI is the main way to branch, but the same operation is available as a function for scripted use:
184
+
185
+ ```python
186
+ from agent_replay.server.replayer import replay_branch
187
+
188
+ result = replay_branch(
189
+ thread_id="user-42",
190
+ checkpoint_id="1efb...", # from GET /api/traces/{tid}/checkpoints
191
+ node_name="tools", # or "agent"
192
+ span_type="tool_call", # or "llm_call"
193
+ tool_call_id="get_weather", # tool name for tool spans
194
+ new_output="It's snowing in SF.",
195
+ db_path="trace.sqlite",
196
+ )
197
+ print(result) # branch_id of the new replay
198
+ ```
199
+
200
+ Useful for regression tests, CI, or batch-exploration of failure modes.
201
+
202
+ ---
203
+
204
+ ## API reference
205
+
206
+ The FastAPI server (started by the `replay-debugger` CLI) exposes:
207
+
208
+ | Method | Path | Purpose |
209
+ |---|---|---|
210
+ | `GET` | `/api/threads` | List all thread IDs in the database. |
211
+ | `GET` | `/api/traces/{thread_id}` | All spans for a thread, grouped by branch. |
212
+ | `GET` | `/api/traces/{thread_id}/checkpoints` | All checkpoints for a thread. |
213
+ | `POST` | `/api/branch` | Fork the graph from a checkpoint with an overridden output. |
214
+
215
+ `POST /api/branch` body:
216
+
217
+ ```json
218
+ {
219
+ "thread_id": "user-42",
220
+ "checkpoint_id": "1efb...",
221
+ "node_name": "agent",
222
+ "span_type": "llm_call",
223
+ "tool_call_id": null,
224
+ "new_output": "The weather is sunny and 72°F."
225
+ }
226
+ ```
227
+
228
+ Response: `{"branch_id": "branch_a1b2c3...", "status": "ok"}`.
229
+
230
+ ---
231
+
232
+ ## Where things live in your file
233
+
234
+ After running the demo once:
235
+
236
+ ```
237
+ trace.sqlite
238
+ ├── spans table ← every llm_call / tool_call, with start/end nanoseconds + JSON attributes
239
+ ├── checkpoints table ← LangGraph state snapshots (one per node execution)
240
+ └── thread metadata ← implicit, keyed off lg.thread_id in span attributes
241
+ ```
242
+
243
+ Everything is one file. Copy it, share it, commit it for reproduction.
244
+
245
+ ---
246
+
247
+ ## Limitations
248
+
249
+ - **Python 3.13+ only** — pinned in `pyproject.toml`.
250
+ - **LangGraph checkpointers must use SQLite** — `SqliteSaver` is the only supported backend currently; the branch endpoint reads from the same file the tracer wrote to.
251
+ - **No remote export** — spans stay local. (The exporter is OpenTelemetry-native, so wiring Jaeger/Zipkin out the side is doable but not built in.)
252
+ - **Two span types** — only LLM and tool calls. If you want full graph-node tracing, file an issue.
253
+
254
+ ---
255
+
256
+ ## Contributing
257
+
258
+ ```bash
259
+ git clone https://github.com/vanshvisariya/agent-replay
260
+ cd agent-replay
261
+ pip install -e .
262
+ cd ui && npm install
263
+ ```
264
+
265
+ Layout:
266
+
267
+ ```
268
+ src/agent_replay/
269
+ ├── sdk/
270
+ │ ├── tracer.py ← replay_trace() + ReplayCallbackHandler
271
+ │ └── exporter.py ← OTel span exporter → SQLite
272
+ └── server/
273
+ ├── api.py ← FastAPI endpoints
274
+ ├── replayer.py ← branch replay logic (used by API + programmatic)
275
+ └── cli.py ← `replay-debugger` entry point
276
+ ui/
277
+ └── src/App.tsx ← single-file React app
278
+ sample.py ← runnable weather-agent demo
279
+ ```
280
+
281
+ ---
282
+
283
+ ## License
284
+
285
+ MIT — see [LICENSE](./LICENSE).
@@ -0,0 +1,10 @@
1
+ agentstep/sdk/exporter.py,sha256=FJf7R69nISXjBjhjjF54iA_h7AiiO2_GYxA83-walVQ,2641
2
+ agentstep/sdk/tracer.py,sha256=gmSg4nyqsCYHuURJ75GrR2NUrahLSXZ_Hic4RCqOpsg,4701
3
+ agentstep/server/api.py,sha256=ePvyKC7UN2YO4t02h-VcghFNT5zecvfQ96Z4KMAMO7Y,8272
4
+ agentstep/server/cli.py,sha256=Tmz2IaYIAF38yCxQdQGy4UAXPugJ_dMTwQ9r9q1mGvI,5061
5
+ agentstep/server/replayer.py,sha256=v58H_tkD36RsRfCIOeHxCQkexHieMXZipD2FakUR7BA,2512
6
+ agentstep-0.1.1.dist-info/METADATA,sha256=X-X8iN4ybCZnXhQmJ98bSRPS1qovuq4uG-_2g4d97m8,9403
7
+ agentstep-0.1.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ agentstep-0.1.1.dist-info/entry_points.txt,sha256=591qUoQFp-P9_3_ewiGmj04DwvN83cAl5qB8IVHhKqI,62
9
+ agentstep-0.1.1.dist-info/licenses/LICENSE,sha256=J4oigRWzmkf4ySJTfap_VMVYs5-nv4YFrKZwRQ7rjKY,1090
10
+ agentstep-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ replay-debugger = agentstep.server.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 vansh visariya
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.