replai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replai-0.1.0/.github/workflows/ci.yml +18 -0
- replai-0.1.0/.gitignore +23 -0
- replai-0.1.0/CONTRIBUTING.md +25 -0
- replai-0.1.0/LICENSE +21 -0
- replai-0.1.0/PKG-INFO +106 -0
- replai-0.1.0/README.md +80 -0
- replai-0.1.0/example.py +45 -0
- replai-0.1.0/pyproject.toml +38 -0
- replai-0.1.0/src/replai/__init__.py +109 -0
- replai-0.1.0/src/replai/cli.py +64 -0
- replai-0.1.0/src/replai/context.py +69 -0
- replai-0.1.0/src/replai/instrument.py +96 -0
- replai-0.1.0/src/replai/models.py +47 -0
- replai-0.1.0/src/replai/store.py +106 -0
- replai-0.1.0/src/replai/viewer/__init__.py +0 -0
- replai-0.1.0/src/replai/viewer/app.py +29 -0
- replai-0.1.0/src/replai/viewer/static/index.html +127 -0
- replai-0.1.0/tests/__init__.py +0 -0
- replai-0.1.0/tests/test_store.py +66 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
test:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
matrix:
|
|
10
|
+
python-version: ["3.10", "3.12"]
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: actions/setup-python@v5
|
|
14
|
+
with:
|
|
15
|
+
python-version: ${{ matrix.python-version }}
|
|
16
|
+
- run: pip install -e ".[dev]"
|
|
17
|
+
- run: ruff check .
|
|
18
|
+
- run: pytest -q
|
replai-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
|
|
11
|
+
# Tooling
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
.mypy_cache/
|
|
15
|
+
|
|
16
|
+
# replai local data
|
|
17
|
+
.replai/
|
|
18
|
+
*.db
|
|
19
|
+
|
|
20
|
+
# OS / editors
|
|
21
|
+
.DS_Store
|
|
22
|
+
.idea/
|
|
23
|
+
.vscode/
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
replai is a personal project maintained solely by its author. The source is open
|
|
4
|
+
(MIT licensed) so you are free to use, fork, and adapt it for your own needs.
|
|
5
|
+
|
|
6
|
+
## Pull requests
|
|
7
|
+
|
|
8
|
+
External pull requests are **not accepted**. All code is written by the author.
|
|
9
|
+
If you open a PR it will be closed — please don't take it personally, this is just
|
|
10
|
+
how the project is run.
|
|
11
|
+
|
|
12
|
+
## Bug reports
|
|
13
|
+
|
|
14
|
+
Bug reports are welcome. If something doesn't work:
|
|
15
|
+
|
|
16
|
+
1. Open an [issue](../../issues).
|
|
17
|
+
2. Describe what you did, what you expected, and what actually happened.
|
|
18
|
+
3. Include your Python version, replai version, and a minimal example if you can.
|
|
19
|
+
|
|
20
|
+
Feature ideas are fine to raise in an issue too, but there's no guarantee they'll
|
|
21
|
+
be picked up.
|
|
22
|
+
|
|
23
|
+
## Fixes
|
|
24
|
+
|
|
25
|
+
If you've found a bug, report it in an issue — the author will handle the fix.
|
replai-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Robert
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
replai-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: replai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first, framework-agnostic debugger for LLM agents — see, replay, and diff what your agent did.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Rostanic20/replai
|
|
6
|
+
Project-URL: Issues, https://github.com/Rostanic20/replai/issues
|
|
7
|
+
Author: Robert
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,ai,debugging,llm,local-first,observability,tracing
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: fastapi>=0.110; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
20
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
21
|
+
Requires-Dist: uvicorn>=0.27; extra == 'dev'
|
|
22
|
+
Provides-Extra: viewer
|
|
23
|
+
Requires-Dist: fastapi>=0.110; extra == 'viewer'
|
|
24
|
+
Requires-Dist: uvicorn>=0.27; extra == 'viewer'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# replai
|
|
28
|
+
|
|
29
|
+
**A local-first debugger for LLM agents.** See exactly what your agent did — every model call, tool call, and decision — step by step. No account, no cloud, no Docker. Just `pip install` and look.
|
|
30
|
+
|
|
31
|
+
> ⚠️ **Early alpha (v0.1).** The capture engine and local viewer work today. Replay and run-diffing are next on the roadmap.
|
|
32
|
+
|
|
33
|
+
<!-- TODO: drop a GIF of the timeline view here — this is the most important part of the README. -->
|
|
34
|
+
|
|
35
|
+
## Why
|
|
36
|
+
|
|
37
|
+
When an AI agent does the wrong thing, you're usually staring at a wall of logs trying to reconstruct what happened. Production observability platforms exist, but they're heavy — dashboards, servers, accounts — built for *monitoring at scale*, not for the moment you're on your laptop going *"wait, why did it call that tool?"*
|
|
38
|
+
|
|
39
|
+
`replai` is the other thing: a **debugger** for the dev inner loop. Drop it in, run your agent, and get a clickable, step-by-step timeline of everything it did — locally.
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
pip install "replai[viewer]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quickstart
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import replai
|
|
51
|
+
replai.init() # auto-captures Anthropic & OpenAI calls
|
|
52
|
+
|
|
53
|
+
# ... run your agent exactly as you normally would ...
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Then open the viewer:
|
|
57
|
+
|
|
58
|
+
```sh
|
|
59
|
+
replai ui
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Want to annotate your own steps?
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
with replai.run("my-agent"):
|
|
66
|
+
with replai.span("retrieve", type="tool_call") as s:
|
|
67
|
+
s.output = my_retriever(query)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Or decorate functions and tools:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
@replai.tool
|
|
74
|
+
def web_search(query): ...
|
|
75
|
+
|
|
76
|
+
@replai.trace
|
|
77
|
+
def plan(goal): ...
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Try it with no API keys
|
|
81
|
+
|
|
82
|
+
```sh
|
|
83
|
+
python example.py
|
|
84
|
+
replai ui
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## How it works
|
|
88
|
+
|
|
89
|
+
- **Auto-instrumentation** wraps the Anthropic / OpenAI clients, so calls are captured with zero code changes.
|
|
90
|
+
- **`@replai.trace` / `@replai.tool` / `replai.span()`** annotate your own functions and tool calls. Spans nest automatically.
|
|
91
|
+
- Everything is stored in a local **SQLite** file (`~/.replai/replai.db`). Nothing leaves your machine.
|
|
92
|
+
- A small **FastAPI** viewer renders each run as a step-by-step timeline.
|
|
93
|
+
|
|
94
|
+
## Roadmap
|
|
95
|
+
|
|
96
|
+
- [x] Capture engine (LLM + tool + function spans, sync & async)
|
|
97
|
+
- [x] Local timeline viewer
|
|
98
|
+
- [ ] **Replay** — step through a run; re-run from any step
|
|
99
|
+
- [ ] **Diff** — compare two runs, highlight where they diverged
|
|
100
|
+
- [ ] Framework adapters (LangChain, LlamaIndex, …)
|
|
101
|
+
- [ ] MCP tool-call capture
|
|
102
|
+
- [ ] OpenTelemetry GenAI export
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT
|
replai-0.1.0/README.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# replai
|
|
2
|
+
|
|
3
|
+
**A local-first debugger for LLM agents.** See exactly what your agent did — every model call, tool call, and decision — step by step. No account, no cloud, no Docker. Just `pip install` and look.
|
|
4
|
+
|
|
5
|
+
> ⚠️ **Early alpha (v0.1).** The capture engine and local viewer work today. Replay and run-diffing are next on the roadmap.
|
|
6
|
+
|
|
7
|
+
<!-- TODO: drop a GIF of the timeline view here — this is the most important part of the README. -->
|
|
8
|
+
|
|
9
|
+
## Why
|
|
10
|
+
|
|
11
|
+
When an AI agent does the wrong thing, you're usually staring at a wall of logs trying to reconstruct what happened. Production observability platforms exist, but they're heavy — dashboards, servers, accounts — built for *monitoring at scale*, not for the moment you're on your laptop going *"wait, why did it call that tool?"*
|
|
12
|
+
|
|
13
|
+
`replai` is the other thing: a **debugger** for the dev inner loop. Drop it in, run your agent, and get a clickable, step-by-step timeline of everything it did — locally.
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
```sh
|
|
18
|
+
pip install "replai[viewer]"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quickstart
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
import replai
|
|
25
|
+
replai.init() # auto-captures Anthropic & OpenAI calls
|
|
26
|
+
|
|
27
|
+
# ... run your agent exactly as you normally would ...
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Then open the viewer:
|
|
31
|
+
|
|
32
|
+
```sh
|
|
33
|
+
replai ui
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Want to annotate your own steps?
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
with replai.run("my-agent"):
|
|
40
|
+
with replai.span("retrieve", type="tool_call") as s:
|
|
41
|
+
s.output = my_retriever(query)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Or decorate functions and tools:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
@replai.tool
|
|
48
|
+
def web_search(query): ...
|
|
49
|
+
|
|
50
|
+
@replai.trace
|
|
51
|
+
def plan(goal): ...
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Try it with no API keys
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
python example.py
|
|
58
|
+
replai ui
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## How it works
|
|
62
|
+
|
|
63
|
+
- **Auto-instrumentation** wraps the Anthropic / OpenAI clients, so calls are captured with zero code changes.
|
|
64
|
+
- **`@replai.trace` / `@replai.tool` / `replai.span()`** annotate your own functions and tool calls. Spans nest automatically.
|
|
65
|
+
- Everything is stored in a local **SQLite** file (`~/.replai/replai.db`). Nothing leaves your machine.
|
|
66
|
+
- A small **FastAPI** viewer renders each run as a step-by-step timeline.
|
|
67
|
+
|
|
68
|
+
## Roadmap
|
|
69
|
+
|
|
70
|
+
- [x] Capture engine (LLM + tool + function spans, sync & async)
|
|
71
|
+
- [x] Local timeline viewer
|
|
72
|
+
- [ ] **Replay** — step through a run; re-run from any step
|
|
73
|
+
- [ ] **Diff** — compare two runs, highlight where they diverged
|
|
74
|
+
- [ ] Framework adapters (LangChain, LlamaIndex, …)
|
|
75
|
+
- [ ] MCP tool-call capture
|
|
76
|
+
- [ ] OpenTelemetry GenAI export
|
|
77
|
+
|
|
78
|
+
## License
|
|
79
|
+
|
|
80
|
+
MIT
|
replai-0.1.0/example.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""A runnable demo — no API keys needed.
|
|
2
|
+
|
|
3
|
+
python example.py
|
|
4
|
+
replai ui
|
|
5
|
+
"""
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
import replai
|
|
9
|
+
|
|
10
|
+
replai.init(instrument=False)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@replai.tool
|
|
14
|
+
def web_search(query: str):
|
|
15
|
+
time.sleep(0.15)
|
|
16
|
+
return [f"result about {query}", "another result"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@replai.trace(name="plan")
|
|
20
|
+
def plan(goal: str):
|
|
21
|
+
replai.record_llm_call(
|
|
22
|
+
model="claude-opus-4",
|
|
23
|
+
input=[{"role": "user", "content": goal}],
|
|
24
|
+
output="I'll search the web, then summarize.",
|
|
25
|
+
tokens_in=14, tokens_out=9,
|
|
26
|
+
)
|
|
27
|
+
return "search then summarize"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def agent(goal: str):
|
|
31
|
+
with replai.run("demo-agent", goal=goal):
|
|
32
|
+
plan(goal)
|
|
33
|
+
hits = web_search("python sqlite tutorial")
|
|
34
|
+
replai.record_llm_call(
|
|
35
|
+
model="claude-opus-4",
|
|
36
|
+
input=str(hits),
|
|
37
|
+
output="Here is the summary you asked for.",
|
|
38
|
+
tokens_in=22, tokens_out=18,
|
|
39
|
+
)
|
|
40
|
+
return "done"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
if __name__ == "__main__":
|
|
44
|
+
agent("learn how to use sqlite in python")
|
|
45
|
+
print("Recorded a run. Now launch the viewer:\n\n replai ui\n")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "replai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Local-first, framework-agnostic debugger for LLM agents — see, replay, and diff what your agent did."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Robert" }]
|
|
13
|
+
keywords = ["llm", "agents", "observability", "debugging", "ai", "tracing", "local-first"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Software Development :: Debuggers",
|
|
20
|
+
]
|
|
21
|
+
dependencies = []
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
viewer = ["fastapi>=0.110", "uvicorn>=0.27"]
|
|
25
|
+
dev = ["pytest>=8", "ruff>=0.4", "fastapi>=0.110", "uvicorn>=0.27"]
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
replai = "replai.cli:main"
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/Rostanic20/replai"
|
|
32
|
+
Issues = "https://github.com/Rostanic20/replai/issues"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["src/replai"]
|
|
36
|
+
|
|
37
|
+
[tool.ruff]
|
|
38
|
+
line-length = 100
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""replai — a local-first, framework-agnostic debugger for LLM agents.
|
|
2
|
+
|
|
3
|
+
See exactly what your agent did, step by step — then replay and diff runs.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import contextlib
|
|
9
|
+
import functools
|
|
10
|
+
import time
|
|
11
|
+
from typing import Any, Optional
|
|
12
|
+
|
|
13
|
+
from . import context as _ctx
|
|
14
|
+
from .models import Run, Span
|
|
15
|
+
from .store import Store
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
__all__ = ["init", "run", "span", "trace", "tool", "record_llm_call", "Store", "Run", "Span"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def init(db: Optional[str] = None, instrument: bool = True) -> Store:
|
|
22
|
+
"""Set up replai. Call once at startup.
|
|
23
|
+
|
|
24
|
+
db: path to the local SQLite file (defaults to ~/.replai/replai.db).
|
|
25
|
+
instrument: auto-capture Anthropic & OpenAI client calls.
|
|
26
|
+
"""
|
|
27
|
+
store = Store(db)
|
|
28
|
+
_ctx.set_store(store)
|
|
29
|
+
if instrument:
|
|
30
|
+
from .instrument import instrument as _do_instrument
|
|
31
|
+
_do_instrument()
|
|
32
|
+
return store
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@contextlib.contextmanager
|
|
36
|
+
def run(name: str = "run", **metadata):
|
|
37
|
+
"""Open a run — the top-level container for everything an agent does."""
|
|
38
|
+
store = _ctx.get_store()
|
|
39
|
+
r = Run(name=name, metadata=metadata)
|
|
40
|
+
store.save_run(r)
|
|
41
|
+
token = _ctx.enter_run(r)
|
|
42
|
+
try:
|
|
43
|
+
yield r
|
|
44
|
+
finally:
|
|
45
|
+
r.end = time.time()
|
|
46
|
+
store.save_run(r)
|
|
47
|
+
_ctx.exit_run(token)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@contextlib.contextmanager
|
|
51
|
+
def span(name: str, type: str = "function", input: Any = None, **metadata):
|
|
52
|
+
"""Record a step. Nests automatically inside the current run/span."""
|
|
53
|
+
store = _ctx.get_store()
|
|
54
|
+
r = _ctx.ensure_run()
|
|
55
|
+
sp = Span(
|
|
56
|
+
run_id=r.id, name=name, type=type,
|
|
57
|
+
parent_id=_ctx.current_span_id(), input=input, metadata=metadata,
|
|
58
|
+
)
|
|
59
|
+
store.save_span(sp)
|
|
60
|
+
_ctx.push_span(sp.id)
|
|
61
|
+
try:
|
|
62
|
+
yield sp
|
|
63
|
+
except Exception as exc:
|
|
64
|
+
sp.error = repr(exc)
|
|
65
|
+
raise
|
|
66
|
+
finally:
|
|
67
|
+
sp.end = time.time()
|
|
68
|
+
store.save_span(sp)
|
|
69
|
+
_ctx.pop_span()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def trace(fn=None, *, name: Optional[str] = None, type: str = "function"):
|
|
73
|
+
"""Decorator that records a function call as a span (sync or async)."""
|
|
74
|
+
def deco(f):
|
|
75
|
+
span_name = name or getattr(f, "__name__", "fn")
|
|
76
|
+
|
|
77
|
+
if asyncio.iscoroutinefunction(f):
|
|
78
|
+
@functools.wraps(f)
|
|
79
|
+
async def awrapper(*args, **kwargs):
|
|
80
|
+
with span(span_name, type=type, input={"args": args, "kwargs": kwargs}) as s:
|
|
81
|
+
out = await f(*args, **kwargs)
|
|
82
|
+
s.output = out
|
|
83
|
+
return out
|
|
84
|
+
return awrapper
|
|
85
|
+
|
|
86
|
+
@functools.wraps(f)
|
|
87
|
+
def wrapper(*args, **kwargs):
|
|
88
|
+
with span(span_name, type=type, input={"args": args, "kwargs": kwargs}) as s:
|
|
89
|
+
out = f(*args, **kwargs)
|
|
90
|
+
s.output = out
|
|
91
|
+
return out
|
|
92
|
+
return wrapper
|
|
93
|
+
|
|
94
|
+
return deco(fn) if callable(fn) else deco
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def tool(fn=None, *, name: Optional[str] = None):
|
|
98
|
+
"""Decorator for tool calls — same as @trace but tagged as a tool."""
|
|
99
|
+
return trace(fn, name=name, type="tool_call")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def record_llm_call(model: str, input: Any, output: Any, *,
|
|
103
|
+
tokens_in: Optional[int] = None, tokens_out: Optional[int] = None,
|
|
104
|
+
name: str = "llm_call") -> Span:
|
|
105
|
+
"""Manually record a single LLM call (when not using auto-instrumentation)."""
|
|
106
|
+
return _ctx.record_span(
|
|
107
|
+
name, "llm_call", input=input, output=output, model=model,
|
|
108
|
+
tokens_in=tokens_in, tokens_out=tokens_out,
|
|
109
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main(argv=None) -> None:
|
|
8
|
+
parser = argparse.ArgumentParser(
|
|
9
|
+
prog="replai", description="Local-first debugger for LLM agents."
|
|
10
|
+
)
|
|
11
|
+
sub = parser.add_subparsers(dest="command")
|
|
12
|
+
|
|
13
|
+
ui = sub.add_parser("ui", help="Launch the local viewer")
|
|
14
|
+
ui.add_argument("--host", default="127.0.0.1")
|
|
15
|
+
ui.add_argument("--port", type=int, default=8473)
|
|
16
|
+
ui.add_argument("--db", default=None)
|
|
17
|
+
|
|
18
|
+
sub.add_parser("runs", help="List recorded runs")
|
|
19
|
+
sub.add_parser("version", help="Show the installed version")
|
|
20
|
+
|
|
21
|
+
args = parser.parse_args(argv)
|
|
22
|
+
|
|
23
|
+
if args.command == "ui":
|
|
24
|
+
_launch_ui(args.host, args.port, args.db)
|
|
25
|
+
elif args.command == "runs":
|
|
26
|
+
_list_runs()
|
|
27
|
+
elif args.command == "version":
|
|
28
|
+
from . import __version__
|
|
29
|
+
print(__version__)
|
|
30
|
+
else:
|
|
31
|
+
parser.print_help()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _launch_ui(host: str, port: int, db) -> None:
|
|
35
|
+
try:
|
|
36
|
+
import uvicorn
|
|
37
|
+
except ImportError:
|
|
38
|
+
raise SystemExit("The viewer needs extra deps. Install with: pip install 'replai[viewer]'")
|
|
39
|
+
from . import context as _ctx
|
|
40
|
+
from .store import Store
|
|
41
|
+
_ctx.set_store(Store(db))
|
|
42
|
+
from .viewer.app import app
|
|
43
|
+
print(f"replai viewer → http://{host}:{port} (Ctrl-C to stop)")
|
|
44
|
+
uvicorn.run(app, host=host, port=port, log_level="warning")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _list_runs() -> None:
|
|
48
|
+
from .store import Store
|
|
49
|
+
runs = Store().runs()
|
|
50
|
+
if not runs:
|
|
51
|
+
print("No runs recorded yet. Instrument your agent, run it, then try again.")
|
|
52
|
+
return
|
|
53
|
+
for r in runs:
|
|
54
|
+
print(f"{r['id']} {r['name']:<24} {_ago(r['start'])}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _ago(ts) -> str:
|
|
58
|
+
if not ts:
|
|
59
|
+
return ""
|
|
60
|
+
secs = int(time.time() - ts)
|
|
61
|
+
for unit, n in (("d", 86400), ("h", 3600), ("m", 60)):
|
|
62
|
+
if secs >= n:
|
|
63
|
+
return f"{secs // n}{unit} ago"
|
|
64
|
+
return f"{secs}s ago"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextvars
|
|
4
|
+
import time
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from .models import Run, Span
|
|
8
|
+
from .store import Store
|
|
9
|
+
|
|
10
|
+
_store: Optional[Store] = None
|
|
11
|
+
_current_run: contextvars.ContextVar = contextvars.ContextVar("replai_run", default=None)
|
|
12
|
+
_span_stack: contextvars.ContextVar = contextvars.ContextVar("replai_span_stack", default=())
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def set_store(store: Store) -> None:
|
|
16
|
+
global _store
|
|
17
|
+
_store = store
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_store() -> Store:
|
|
21
|
+
global _store
|
|
22
|
+
if _store is None:
|
|
23
|
+
_store = Store()
|
|
24
|
+
return _store
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def current_run() -> Optional[Run]:
|
|
28
|
+
return _current_run.get()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def enter_run(run: Run):
|
|
32
|
+
return _current_run.set(run)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def exit_run(token) -> None:
|
|
36
|
+
_current_run.reset(token)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def ensure_run() -> Run:
|
|
40
|
+
run = current_run()
|
|
41
|
+
if run is None:
|
|
42
|
+
run = Run(name="auto")
|
|
43
|
+
get_store().save_run(run)
|
|
44
|
+
enter_run(run)
|
|
45
|
+
return run
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def current_span_id() -> Optional[str]:
|
|
49
|
+
stack = _span_stack.get()
|
|
50
|
+
return stack[-1] if stack else None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def push_span(span_id: str) -> None:
|
|
54
|
+
_span_stack.set(_span_stack.get() + (span_id,))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def pop_span() -> None:
|
|
58
|
+
stack = _span_stack.get()
|
|
59
|
+
if stack:
|
|
60
|
+
_span_stack.set(stack[:-1])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def record_span(name: str, type: str, **fields) -> Span:
|
|
64
|
+
"""Record an already-completed span (used for one-shot LLM/tool calls)."""
|
|
65
|
+
run = ensure_run()
|
|
66
|
+
span = Span(run_id=run.id, name=name, type=type, parent_id=current_span_id(), **fields)
|
|
67
|
+
span.end = time.time()
|
|
68
|
+
get_store().save_span(span)
|
|
69
|
+
return span
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Best-effort auto-instrumentation of LLM client libraries.
|
|
2
|
+
|
|
3
|
+
Patches the Anthropic and OpenAI SDK call sites so every model call is captured
|
|
4
|
+
without changing your code. Safe no-op if a library isn't installed, and the
|
|
5
|
+
capture wrappers never raise into your actual call.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import functools
|
|
10
|
+
|
|
11
|
+
from . import context as _ctx
|
|
12
|
+
|
|
13
|
+
_patched = False
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def instrument() -> None:
|
|
17
|
+
global _patched
|
|
18
|
+
if _patched:
|
|
19
|
+
return
|
|
20
|
+
_patch_anthropic()
|
|
21
|
+
_patch_openai()
|
|
22
|
+
_patched = True
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _patch_anthropic() -> None:
|
|
26
|
+
try:
|
|
27
|
+
from anthropic.resources.messages import Messages
|
|
28
|
+
except Exception:
|
|
29
|
+
return
|
|
30
|
+
if getattr(Messages.create, "_replai", False):
|
|
31
|
+
return
|
|
32
|
+
original = Messages.create
|
|
33
|
+
|
|
34
|
+
@functools.wraps(original)
|
|
35
|
+
def create(self, *args, **kwargs):
|
|
36
|
+
resp = original(self, *args, **kwargs)
|
|
37
|
+
try:
|
|
38
|
+
usage = getattr(resp, "usage", None)
|
|
39
|
+
_ctx.record_span(
|
|
40
|
+
"anthropic.messages.create", "llm_call",
|
|
41
|
+
input=kwargs.get("messages"),
|
|
42
|
+
output=_anthropic_text(resp),
|
|
43
|
+
model=kwargs.get("model"),
|
|
44
|
+
tokens_in=getattr(usage, "input_tokens", None),
|
|
45
|
+
tokens_out=getattr(usage, "output_tokens", None),
|
|
46
|
+
)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
return resp
|
|
50
|
+
|
|
51
|
+
create._replai = True
|
|
52
|
+
Messages.create = create
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _patch_openai() -> None:
|
|
56
|
+
try:
|
|
57
|
+
from openai.resources.chat.completions import Completions
|
|
58
|
+
except Exception:
|
|
59
|
+
return
|
|
60
|
+
if getattr(Completions.create, "_replai", False):
|
|
61
|
+
return
|
|
62
|
+
original = Completions.create
|
|
63
|
+
|
|
64
|
+
@functools.wraps(original)
|
|
65
|
+
def create(self, *args, **kwargs):
|
|
66
|
+
resp = original(self, *args, **kwargs)
|
|
67
|
+
try:
|
|
68
|
+
usage = getattr(resp, "usage", None)
|
|
69
|
+
_ctx.record_span(
|
|
70
|
+
"openai.chat.completions.create", "llm_call",
|
|
71
|
+
input=kwargs.get("messages"),
|
|
72
|
+
output=_openai_text(resp),
|
|
73
|
+
model=kwargs.get("model"),
|
|
74
|
+
tokens_in=getattr(usage, "prompt_tokens", None),
|
|
75
|
+
tokens_out=getattr(usage, "completion_tokens", None),
|
|
76
|
+
)
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
return resp
|
|
80
|
+
|
|
81
|
+
create._replai = True
|
|
82
|
+
Completions.create = create
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _anthropic_text(resp) -> str:
|
|
86
|
+
try:
|
|
87
|
+
return "".join(getattr(block, "text", "") for block in resp.content)
|
|
88
|
+
except Exception:
|
|
89
|
+
return str(resp)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _openai_text(resp):
|
|
93
|
+
try:
|
|
94
|
+
return resp.choices[0].message.content
|
|
95
|
+
except Exception:
|
|
96
|
+
return str(resp)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _id() -> str:
|
|
10
|
+
return uuid.uuid4().hex[:12]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _now() -> float:
|
|
14
|
+
return time.time()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class Span:
|
|
19
|
+
run_id: str
|
|
20
|
+
name: str
|
|
21
|
+
type: str = "function" # llm_call | tool_call | function
|
|
22
|
+
id: str = field(default_factory=_id)
|
|
23
|
+
parent_id: Optional[str] = None
|
|
24
|
+
start: float = field(default_factory=_now)
|
|
25
|
+
end: Optional[float] = None
|
|
26
|
+
input: Any = None
|
|
27
|
+
output: Any = None
|
|
28
|
+
error: Optional[str] = None
|
|
29
|
+
model: Optional[str] = None
|
|
30
|
+
tokens_in: Optional[int] = None
|
|
31
|
+
tokens_out: Optional[int] = None
|
|
32
|
+
metadata: dict = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def duration_ms(self) -> Optional[float]:
|
|
36
|
+
if self.end is None:
|
|
37
|
+
return None
|
|
38
|
+
return round((self.end - self.start) * 1000, 1)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class Run:
|
|
43
|
+
name: str
|
|
44
|
+
id: str = field(default_factory=_id)
|
|
45
|
+
start: float = field(default_factory=_now)
|
|
46
|
+
end: Optional[float] = None
|
|
47
|
+
metadata: dict = field(default_factory=dict)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sqlite3
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from .models import Run, Span
|
|
10
|
+
|
|
11
|
+
DEFAULT_DB = Path(os.environ.get("REPLAI_DB", str(Path.home() / ".replai" / "replai.db")))
|
|
12
|
+
|
|
13
|
+
SCHEMA = """
|
|
14
|
+
CREATE TABLE IF NOT EXISTS runs (
|
|
15
|
+
id TEXT PRIMARY KEY,
|
|
16
|
+
name TEXT,
|
|
17
|
+
start REAL,
|
|
18
|
+
end REAL,
|
|
19
|
+
metadata TEXT
|
|
20
|
+
);
|
|
21
|
+
CREATE TABLE IF NOT EXISTS spans (
|
|
22
|
+
id TEXT PRIMARY KEY,
|
|
23
|
+
run_id TEXT,
|
|
24
|
+
parent_id TEXT,
|
|
25
|
+
name TEXT,
|
|
26
|
+
type TEXT,
|
|
27
|
+
start REAL,
|
|
28
|
+
end REAL,
|
|
29
|
+
input TEXT,
|
|
30
|
+
output TEXT,
|
|
31
|
+
error TEXT,
|
|
32
|
+
model TEXT,
|
|
33
|
+
tokens_in INTEGER,
|
|
34
|
+
tokens_out INTEGER,
|
|
35
|
+
metadata TEXT
|
|
36
|
+
);
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_spans_run ON spans(run_id);
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _enc(value) -> Optional[str]:
|
|
42
|
+
if value is None:
|
|
43
|
+
return None
|
|
44
|
+
return json.dumps(value, default=str)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _dec(value):
|
|
48
|
+
return json.loads(value) if value else None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Store:
|
|
52
|
+
def __init__(self, path: Optional[str | Path] = None):
|
|
53
|
+
self.path = Path(path) if path else DEFAULT_DB
|
|
54
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
self._conn = sqlite3.connect(str(self.path), check_same_thread=False)
|
|
56
|
+
self._conn.row_factory = sqlite3.Row
|
|
57
|
+
self._conn.executescript(SCHEMA)
|
|
58
|
+
self._conn.commit()
|
|
59
|
+
|
|
60
|
+
def save_run(self, run: Run) -> None:
|
|
61
|
+
self._conn.execute(
|
|
62
|
+
"INSERT OR REPLACE INTO runs (id, name, start, end, metadata) VALUES (?, ?, ?, ?, ?)",
|
|
63
|
+
(run.id, run.name, run.start, run.end, _enc(run.metadata)),
|
|
64
|
+
)
|
|
65
|
+
self._conn.commit()
|
|
66
|
+
|
|
67
|
+
def save_span(self, span: Span) -> None:
|
|
68
|
+
self._conn.execute(
|
|
69
|
+
"""INSERT OR REPLACE INTO spans
|
|
70
|
+
(id, run_id, parent_id, name, type, start, end,
|
|
71
|
+
input, output, error, model, tokens_in, tokens_out, metadata)
|
|
72
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
73
|
+
(
|
|
74
|
+
span.id, span.run_id, span.parent_id, span.name, span.type, span.start, span.end,
|
|
75
|
+
_enc(span.input), _enc(span.output), span.error, span.model,
|
|
76
|
+
span.tokens_in, span.tokens_out, _enc(span.metadata),
|
|
77
|
+
),
|
|
78
|
+
)
|
|
79
|
+
self._conn.commit()
|
|
80
|
+
|
|
81
|
+
def runs(self) -> list[dict]:
|
|
82
|
+
rows = self._conn.execute("SELECT * FROM runs ORDER BY start DESC").fetchall()
|
|
83
|
+
return [dict(r) for r in rows]
|
|
84
|
+
|
|
85
|
+
def run(self, run_id: str) -> Optional[dict]:
|
|
86
|
+
row = self._conn.execute("SELECT * FROM runs WHERE id = ?", (run_id,)).fetchone()
|
|
87
|
+
return dict(row) if row else None
|
|
88
|
+
|
|
89
|
+
def spans(self, run_id: str) -> list[dict]:
|
|
90
|
+
rows = self._conn.execute(
|
|
91
|
+
"SELECT * FROM spans WHERE run_id = ? ORDER BY start", (run_id,)
|
|
92
|
+
).fetchall()
|
|
93
|
+
result = []
|
|
94
|
+
for row in rows:
|
|
95
|
+
span = dict(row)
|
|
96
|
+
span["input"] = _dec(span["input"])
|
|
97
|
+
span["output"] = _dec(span["output"])
|
|
98
|
+
span["metadata"] = _dec(span["metadata"])
|
|
99
|
+
span["duration_ms"] = (
|
|
100
|
+
round((span["end"] - span["start"]) * 1000, 1) if span["end"] else None
|
|
101
|
+
)
|
|
102
|
+
result.append(span)
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
def close(self) -> None:
|
|
106
|
+
self._conn.close()
|
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from fastapi import FastAPI
|
|
6
|
+
from fastapi.staticfiles import StaticFiles
|
|
7
|
+
|
|
8
|
+
from ..context import get_store
|
|
9
|
+
|
|
10
|
+
STATIC = Path(__file__).parent / "static"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_app() -> FastAPI:
|
|
14
|
+
app = FastAPI(title="replai", docs_url=None, redoc_url=None)
|
|
15
|
+
|
|
16
|
+
@app.get("/api/runs")
|
|
17
|
+
def list_runs():
|
|
18
|
+
return get_store().runs()
|
|
19
|
+
|
|
20
|
+
@app.get("/api/runs/{run_id}")
|
|
21
|
+
def run_detail(run_id: str):
|
|
22
|
+
store = get_store()
|
|
23
|
+
return {"run": store.run(run_id), "spans": store.spans(run_id)}
|
|
24
|
+
|
|
25
|
+
app.mount("/", StaticFiles(directory=str(STATIC), html=True), name="static")
|
|
26
|
+
return app
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
app = create_app()
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
6
|
+
<title>replai</title>
|
|
7
|
+
<style>
|
|
8
|
+
:root {
|
|
9
|
+
--bg: #0e0f13; --panel: #16181f; --line: #262932; --text: #e6e8ee;
|
|
10
|
+
--muted: #8b909e; --accent: #6ea8fe; --llm: #b07cff; --tool: #59c19a; --fn: #d6975a;
|
|
11
|
+
--err: #ff6b6b;
|
|
12
|
+
}
|
|
13
|
+
* { box-sizing: border-box; }
|
|
14
|
+
body { margin: 0; font: 14px/1.5 ui-monospace, SFMono-Regular, Menlo, monospace;
|
|
15
|
+
background: var(--bg); color: var(--text); height: 100vh; display: flex; }
|
|
16
|
+
#sidebar { width: 280px; border-right: 1px solid var(--line); overflow-y: auto; flex: none; }
|
|
17
|
+
#main { flex: 1; display: flex; min-width: 0; }
|
|
18
|
+
#timeline { flex: 1; overflow-y: auto; padding: 14px; min-width: 0; }
|
|
19
|
+
#detail { width: 42%; border-left: 1px solid var(--line); overflow-y: auto; padding: 14px;
|
|
20
|
+
background: var(--panel); }
|
|
21
|
+
h1 { font-size: 15px; margin: 0; padding: 14px; border-bottom: 1px solid var(--line);
|
|
22
|
+
letter-spacing: .5px; }
|
|
23
|
+
h1 span { color: var(--accent); }
|
|
24
|
+
.run { padding: 10px 14px; border-bottom: 1px solid var(--line); cursor: pointer; }
|
|
25
|
+
.run:hover { background: var(--panel); }
|
|
26
|
+
.run.active { background: var(--panel); border-left: 2px solid var(--accent); }
|
|
27
|
+
.run .name { font-weight: 600; }
|
|
28
|
+
.run .meta { color: var(--muted); font-size: 12px; }
|
|
29
|
+
.span { padding: 7px 10px; margin: 3px 0; border-radius: 6px; cursor: pointer;
|
|
30
|
+
background: var(--panel); border: 1px solid transparent; display: flex; gap: 8px;
|
|
31
|
+
align-items: center; }
|
|
32
|
+
.span:hover { border-color: var(--line); }
|
|
33
|
+
.span.active { border-color: var(--accent); }
|
|
34
|
+
.badge { font-size: 10px; text-transform: uppercase; padding: 1px 6px; border-radius: 4px;
|
|
35
|
+
font-weight: 700; letter-spacing: .5px; flex: none; }
|
|
36
|
+
.badge.llm_call { background: rgba(176,124,255,.18); color: var(--llm); }
|
|
37
|
+
.badge.tool_call { background: rgba(89,193,154,.18); color: var(--tool); }
|
|
38
|
+
.badge.function { background: rgba(214,151,90,.18); color: var(--fn); }
|
|
39
|
+
.span .label { flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
40
|
+
.span .dur { color: var(--muted); font-size: 12px; flex: none; }
|
|
41
|
+
.span.err .label { color: var(--err); }
|
|
42
|
+
.empty { color: var(--muted); padding: 40px 14px; text-align: center; }
|
|
43
|
+
pre { background: var(--bg); border: 1px solid var(--line); border-radius: 6px; padding: 10px;
|
|
44
|
+
overflow-x: auto; white-space: pre-wrap; word-break: break-word; }
|
|
45
|
+
.k { color: var(--muted); font-size: 12px; margin: 14px 0 4px; text-transform: uppercase;
|
|
46
|
+
letter-spacing: .5px; }
|
|
47
|
+
.pill { display: inline-block; background: var(--bg); border: 1px solid var(--line);
|
|
48
|
+
border-radius: 4px; padding: 1px 7px; margin-right: 6px; font-size: 12px; }
|
|
49
|
+
</style>
|
|
50
|
+
</head>
|
|
51
|
+
<body>
|
|
52
|
+
<div id="sidebar"><h1>repl<span>ai</span></h1><div id="runs"></div></div>
|
|
53
|
+
<div id="main">
|
|
54
|
+
<div id="timeline"><div class="empty">Select a run.</div></div>
|
|
55
|
+
<div id="detail"><div class="empty">Select a step.</div></div>
|
|
56
|
+
</div>
|
|
57
|
+
<script>
|
|
58
|
+
const $ = s => document.querySelector(s);
|
|
59
|
+
let activeRun = null, activeSpan = null;
|
|
60
|
+
|
|
61
|
+
async function loadRuns() {
|
|
62
|
+
const runs = await (await fetch('/api/runs')).json();
|
|
63
|
+
const el = $('#runs');
|
|
64
|
+
if (!runs.length) { el.innerHTML = '<div class="empty">No runs yet.<br>Run your agent, then refresh.</div>'; return; }
|
|
65
|
+
el.innerHTML = '';
|
|
66
|
+
for (const r of runs) {
|
|
67
|
+
const d = document.createElement('div');
|
|
68
|
+
d.className = 'run' + (r.id === activeRun ? ' active' : '');
|
|
69
|
+
const when = r.start ? new Date(r.start * 1000).toLocaleString() : '';
|
|
70
|
+
d.innerHTML = `<div class="name">${esc(r.name)}</div><div class="meta">${esc(r.id)} · ${when}</div>`;
|
|
71
|
+
d.onclick = () => loadRun(r.id);
|
|
72
|
+
el.appendChild(d);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function loadRun(id) {
|
|
77
|
+
activeRun = id; activeSpan = null;
|
|
78
|
+
await loadRuns();
|
|
79
|
+
const { spans } = await (await fetch('/api/runs/' + id)).json();
|
|
80
|
+
const tl = $('#timeline');
|
|
81
|
+
$('#detail').innerHTML = '<div class="empty">Select a step.</div>';
|
|
82
|
+
if (!spans.length) { tl.innerHTML = '<div class="empty">No steps recorded.</div>'; return; }
|
|
83
|
+
const byParent = {};
|
|
84
|
+
for (const s of spans) (byParent[s.parent_id] = byParent[s.parent_id] || []).push(s);
|
|
85
|
+
tl.innerHTML = '';
|
|
86
|
+
render(byParent, null, 0, tl, spans);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function render(byParent, parent, depth, container, all) {
|
|
90
|
+
for (const s of (byParent[parent] || [])) {
|
|
91
|
+
const row = document.createElement('div');
|
|
92
|
+
row.className = 'span ' + (s.error ? 'err' : '') + (s.id === activeSpan ? ' active' : '');
|
|
93
|
+
row.style.marginLeft = (depth * 18) + 'px';
|
|
94
|
+
const dur = s.duration_ms != null ? s.duration_ms + 'ms' : '';
|
|
95
|
+
row.innerHTML = `<span class="badge ${s.type}">${s.type.replace('_call','')}</span>`
|
|
96
|
+
+ `<span class="label">${esc(s.name)}</span><span class="dur">${dur}</span>`;
|
|
97
|
+
row.onclick = () => showDetail(s, row);
|
|
98
|
+
container.appendChild(row);
|
|
99
|
+
render(byParent, s.id, depth + 1, container, all);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function showDetail(s, row) {
|
|
104
|
+
activeSpan = s.id;
|
|
105
|
+
document.querySelectorAll('.span.active').forEach(e => e.classList.remove('active'));
|
|
106
|
+
if (row) row.classList.add('active');
|
|
107
|
+
const d = $('#detail');
|
|
108
|
+
const tokens = (s.tokens_in != null || s.tokens_out != null)
|
|
109
|
+
? `<span class="pill">in ${s.tokens_in ?? '?'}</span><span class="pill">out ${s.tokens_out ?? '?'}</span>` : '';
|
|
110
|
+
d.innerHTML =
|
|
111
|
+
`<div><span class="badge ${s.type}">${s.type.replace('_call','')}</span> <b>${esc(s.name)}</b></div>`
|
|
112
|
+
+ (s.model ? `<div class="k">model</div><span class="pill">${esc(s.model)}</span>` : '')
|
|
113
|
+
+ (tokens ? `<div class="k">tokens</div>${tokens}` : '')
|
|
114
|
+
+ (s.duration_ms != null ? `<div class="k">duration</div><span class="pill">${s.duration_ms} ms</span>` : '')
|
|
115
|
+
+ (s.error ? `<div class="k">error</div><pre style="color:var(--err)">${esc(s.error)}</pre>` : '')
|
|
116
|
+
+ `<div class="k">input</div><pre>${esc(fmt(s.input))}</pre>`
|
|
117
|
+
+ `<div class="k">output</div><pre>${esc(fmt(s.output))}</pre>`;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const fmt = v => v == null ? '—' : (typeof v === 'string' ? v : JSON.stringify(v, null, 2));
|
|
121
|
+
const esc = s => String(s).replace(/[&<>]/g, c => ({'&':'&','<':'<','>':'>'}[c]));
|
|
122
|
+
|
|
123
|
+
loadRuns();
|
|
124
|
+
setInterval(() => { if (!activeRun) loadRuns(); }, 3000);
|
|
125
|
+
</script>
|
|
126
|
+
</body>
|
|
127
|
+
</html>
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import replai
|
|
2
|
+
from replai.store import Store
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_run_and_span(tmp_path):
|
|
6
|
+
db = str(tmp_path / "t.db")
|
|
7
|
+
replai.init(db=db, instrument=False)
|
|
8
|
+
|
|
9
|
+
with replai.run("t") as r:
|
|
10
|
+
with replai.span("step", type="tool_call", input={"x": 1}) as s:
|
|
11
|
+
s.output = {"y": 2}
|
|
12
|
+
|
|
13
|
+
store = Store(db)
|
|
14
|
+
runs = store.runs()
|
|
15
|
+
assert len(runs) == 1
|
|
16
|
+
assert runs[0]["name"] == "t"
|
|
17
|
+
|
|
18
|
+
spans = store.spans(r.id)
|
|
19
|
+
assert len(spans) == 1
|
|
20
|
+
assert spans[0]["type"] == "tool_call"
|
|
21
|
+
assert spans[0]["input"] == {"x": 1}
|
|
22
|
+
assert spans[0]["output"] == {"y": 2}
|
|
23
|
+
assert spans[0]["end"] is not None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_nested_spans_and_parents(tmp_path):
|
|
27
|
+
db = str(tmp_path / "n.db")
|
|
28
|
+
replai.init(db=db, instrument=False)
|
|
29
|
+
|
|
30
|
+
with replai.run("nested") as r:
|
|
31
|
+
with replai.span("outer") as outer:
|
|
32
|
+
with replai.span("inner"):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
spans = {s["name"]: s for s in Store(db).spans(r.id)}
|
|
36
|
+
assert spans["inner"]["parent_id"] == outer.id
|
|
37
|
+
assert spans["outer"]["parent_id"] is None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_error_is_recorded(tmp_path):
|
|
41
|
+
db = str(tmp_path / "e.db")
|
|
42
|
+
replai.init(db=db, instrument=False)
|
|
43
|
+
|
|
44
|
+
with replai.run("boom") as r:
|
|
45
|
+
try:
|
|
46
|
+
with replai.span("explodes"):
|
|
47
|
+
raise ValueError("nope")
|
|
48
|
+
except ValueError:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
spans = Store(db).spans(r.id)
|
|
52
|
+
assert "ValueError" in spans[0]["error"]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_record_llm_call(tmp_path):
|
|
56
|
+
db = str(tmp_path / "l.db")
|
|
57
|
+
replai.init(db=db, instrument=False)
|
|
58
|
+
|
|
59
|
+
with replai.run("chat") as r:
|
|
60
|
+
replai.record_llm_call(model="m", input="hi", output="hello",
|
|
61
|
+
tokens_in=2, tokens_out=3)
|
|
62
|
+
|
|
63
|
+
span = Store(db).spans(r.id)[0]
|
|
64
|
+
assert span["type"] == "llm_call"
|
|
65
|
+
assert span["model"] == "m"
|
|
66
|
+
assert span["tokens_out"] == 3
|