agentic-lab 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentic_lab-0.1.0/PKG-INFO +387 -0
- agentic_lab-0.1.0/README.md +332 -0
- agentic_lab-0.1.0/pyproject.toml +222 -0
- agentic_lab-0.1.0/src/agentlab/__init__.py +269 -0
- agentic_lab-0.1.0/src/agentlab/_defaults.toml +40 -0
- agentic_lab-0.1.0/src/agentlab/_proto/__init__.py +23 -0
- agentic_lab-0.1.0/src/agentlab/_proto/trace_pb2.py +132 -0
- agentic_lab-0.1.0/src/agentlab/_proto/trace_pb2.pyi +835 -0
- agentic_lab-0.1.0/src/agentlab/bridges/__init__.py +10 -0
- agentic_lab-0.1.0/src/agentlab/bridges/otel_genai.py +351 -0
- agentic_lab-0.1.0/src/agentlab/cli.py +636 -0
- agentic_lab-0.1.0/src/agentlab/config.py +280 -0
- agentic_lab-0.1.0/src/agentlab/core/__init__.py +247 -0
- agentic_lab-0.1.0/src/agentlab/core/_clock.py +41 -0
- agentic_lab-0.1.0/src/agentlab/core/_fmt.py +64 -0
- agentic_lab-0.1.0/src/agentlab/core/_replay_seam.py +125 -0
- agentic_lab-0.1.0/src/agentlab/core/agent.py +179 -0
- agentic_lab-0.1.0/src/agentlab/core/attributes.py +114 -0
- agentic_lab-0.1.0/src/agentlab/core/budget.py +31 -0
- agentic_lab-0.1.0/src/agentlab/core/context.py +672 -0
- agentic_lab-0.1.0/src/agentlab/core/continuation.py +160 -0
- agentic_lab-0.1.0/src/agentlab/core/cost.py +95 -0
- agentic_lab-0.1.0/src/agentlab/core/determinism.py +1144 -0
- agentic_lab-0.1.0/src/agentlab/core/env.py +392 -0
- agentic_lab-0.1.0/src/agentlab/core/eval.py +180 -0
- agentic_lab-0.1.0/src/agentlab/core/guardrail.py +203 -0
- agentic_lab-0.1.0/src/agentlab/core/handles.py +495 -0
- agentic_lab-0.1.0/src/agentlab/core/handoff.py +347 -0
- agentic_lab-0.1.0/src/agentlab/core/ids.py +141 -0
- agentic_lab-0.1.0/src/agentlab/core/link.py +152 -0
- agentic_lab-0.1.0/src/agentlab/core/llm.py +530 -0
- agentic_lab-0.1.0/src/agentlab/core/memory.py +138 -0
- agentic_lab-0.1.0/src/agentlab/core/message.py +205 -0
- agentic_lab-0.1.0/src/agentlab/core/op.py +207 -0
- agentic_lab-0.1.0/src/agentlab/core/op_names.py +33 -0
- agentic_lab-0.1.0/src/agentlab/core/retrieval.py +139 -0
- agentic_lab-0.1.0/src/agentlab/core/run.py +277 -0
- agentic_lab-0.1.0/src/agentlab/core/span.py +894 -0
- agentic_lab-0.1.0/src/agentlab/core/span_event.py +305 -0
- agentic_lab-0.1.0/src/agentlab/core/status.py +61 -0
- agentic_lab-0.1.0/src/agentlab/core/step.py +689 -0
- agentic_lab-0.1.0/src/agentlab/core/tool.py +285 -0
- agentic_lab-0.1.0/src/agentlab/core/types.py +22 -0
- agentic_lab-0.1.0/src/agentlab/errors.py +760 -0
- agentic_lab-0.1.0/src/agentlab/integrations/__init__.py +6 -0
- agentic_lab-0.1.0/src/agentlab/integrations/langchain/__init__.py +15 -0
- agentic_lab-0.1.0/src/agentlab/integrations/langchain/_callbacks.py +345 -0
- agentic_lab-0.1.0/src/agentlab/integrations/langchain/_helpers.py +148 -0
- agentic_lab-0.1.0/src/agentlab/io/__init__.py +31 -0
- agentic_lab-0.1.0/src/agentlab/io/format.py +293 -0
- agentic_lab-0.1.0/src/agentlab/io/http_capture.py +1064 -0
- agentic_lab-0.1.0/src/agentlab/io/http_format.py +449 -0
- agentic_lab-0.1.0/src/agentlab/io/otel_attributes.py +463 -0
- agentic_lab-0.1.0/src/agentlab/io/proto/__init__.py +26 -0
- agentic_lab-0.1.0/src/agentlab/io/proto/codec.py +1321 -0
- agentic_lab-0.1.0/src/agentlab/io/proto/framing.py +156 -0
- agentic_lab-0.1.0/src/agentlab/io/proto/reader.py +188 -0
- agentic_lab-0.1.0/src/agentlab/io/proto/writer.py +299 -0
- agentic_lab-0.1.0/src/agentlab/io/reader.py +237 -0
- agentic_lab-0.1.0/src/agentlab/io/redaction.py +376 -0
- agentic_lab-0.1.0/src/agentlab/io/writer.py +408 -0
- agentic_lab-0.1.0/src/agentlab/llm/__init__.py +106 -0
- agentic_lab-0.1.0/src/agentlab/llm/_autoemit.py +540 -0
- agentic_lab-0.1.0/src/agentlab/llm/base.py +355 -0
- agentic_lab-0.1.0/src/agentlab/llm/errors.py +60 -0
- agentic_lab-0.1.0/src/agentlab/llm/factory.py +79 -0
- agentic_lab-0.1.0/src/agentlab/llm/matchers/__init__.py +120 -0
- agentic_lab-0.1.0/src/agentlab/llm/matchers/_common.py +435 -0
- agentic_lab-0.1.0/src/agentlab/llm/matchers/anthropic.py +517 -0
- agentic_lab-0.1.0/src/agentlab/llm/matchers/bedrock.py +511 -0
- agentic_lab-0.1.0/src/agentlab/llm/matchers/google.py +543 -0
- agentic_lab-0.1.0/src/agentlab/llm/matchers/openai.py +1200 -0
- agentic_lab-0.1.0/src/agentlab/llm/pricing.py +267 -0
- agentic_lab-0.1.0/src/agentlab/llm/providers/__init__.py +5 -0
- agentic_lab-0.1.0/src/agentlab/llm/providers/openrouter.py +442 -0
- agentic_lab-0.1.0/src/agentlab/llm/types.py +484 -0
- agentic_lab-0.1.0/src/agentlab/promote.py +291 -0
- agentic_lab-0.1.0/src/agentlab/py.typed +0 -0
- agentic_lab-0.1.0/src/agentlab/pytest.py +280 -0
- agentic_lab-0.1.0/src/agentlab/recorder.py +1027 -0
- agentic_lab-0.1.0/src/agentlab/replay/__init__.py +93 -0
- agentic_lab-0.1.0/src/agentlab/replay/api.py +365 -0
- agentic_lab-0.1.0/src/agentlab/replay/cache.py +350 -0
- agentic_lab-0.1.0/src/agentlab/replay/context.py +26 -0
- agentic_lab-0.1.0/src/agentlab/replay/diff.py +313 -0
- agentic_lab-0.1.0/src/agentlab/replay/drift.py +92 -0
- agentic_lab-0.1.0/src/agentlab/replay/session.py +294 -0
- agentic_lab-0.1.0/src/agentlab/storage/__init__.py +39 -0
- agentic_lab-0.1.0/src/agentlab/storage/base.py +381 -0
- agentic_lab-0.1.0/src/agentlab/storage/factory.py +104 -0
- agentic_lab-0.1.0/src/agentlab/storage/jsonl_store.py +576 -0
- agentic_lab-0.1.0/src/agentlab/storage/names.py +602 -0
- agentic_lab-0.1.0/src/agentlab/storage/proto_store.py +444 -0
- agentic_lab-0.1.0/src/agentlab/ui/README.md +86 -0
- agentic_lab-0.1.0/src/agentlab/ui/__init__.py +16 -0
- agentic_lab-0.1.0/src/agentlab/ui/_agents.py +412 -0
- agentic_lab-0.1.0/src/agentlab/ui/_assets/__init__.py +10 -0
- agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/assets/index-DGnShqkT.js +211 -0
- agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/assets/index-DqoRSI-a.css +1 -0
- agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/favicon.svg +11 -0
- agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/index.html +29 -0
- agentic_lab-0.1.0/src/agentlab/ui/_dto.py +325 -0
- agentic_lab-0.1.0/src/agentlab/ui/_errors.py +51 -0
- agentic_lab-0.1.0/src/agentlab/ui/_mapping.py +350 -0
- agentic_lab-0.1.0/src/agentlab/ui/_runtime.py +75 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/__init__.py +9 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/agents.py +446 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/agents_overview.py +242 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/app.py +672 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/dashboard.py +316 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/replay_runner.py +466 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/validation.py +80 -0
- agentic_lab-0.1.0/src/agentlab/ui/_server/watcher.py +101 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: agentic-lab
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Universal record-and-replay for LLM agents.
|
|
5
|
+
Keywords: llm,agents,replay,tracing,evals
|
|
6
|
+
Author: Ambuj Agrawal, Garima Luthra
|
|
7
|
+
Author-email: Ambuj Agrawal <ambujagrawal741@gmail.com>, Garima Luthra <garimaluthra2198@gmail.com>
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Classifier: Typing :: Typed
|
|
14
|
+
Requires-Dist: httpx>=0.27,<1.0
|
|
15
|
+
Requires-Dist: pydantic>=2.7,<3.0
|
|
16
|
+
Requires-Dist: pydantic-settings>=2.4,<3.0
|
|
17
|
+
Requires-Dist: structlog>=24.1,<26.0
|
|
18
|
+
Requires-Dist: tenacity>=8.5,<10.0
|
|
19
|
+
Requires-Dist: python-dotenv>=1.0,<2.0
|
|
20
|
+
Requires-Dist: protobuf>=5.27,<7.0
|
|
21
|
+
Requires-Dist: pytest>=8.2,<9.0 ; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest-asyncio>=0.23,<1.0 ; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest-cov>=5.0,<7.0 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest-xdist[psutil]>=3.6,<4.0 ; extra == 'dev'
|
|
25
|
+
Requires-Dist: respx>=0.21,<1.0 ; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff>=0.6,<1.0 ; extra == 'dev'
|
|
27
|
+
Requires-Dist: mypy>=1.11,<2.0 ; extra == 'dev'
|
|
28
|
+
Requires-Dist: pre-commit>=3.7,<5.0 ; extra == 'dev'
|
|
29
|
+
Requires-Dist: openai>=1.40,<2.0 ; extra == 'dev'
|
|
30
|
+
Requires-Dist: starlette>=0.40,<1.0 ; extra == 'dev'
|
|
31
|
+
Requires-Dist: uvicorn>=0.30,<1.0 ; extra == 'dev'
|
|
32
|
+
Requires-Dist: anyio>=4.4,<5.0 ; extra == 'dev'
|
|
33
|
+
Requires-Dist: langchain-core>=0.3,<1.0 ; extra == 'dev'
|
|
34
|
+
Requires-Dist: playwright>=1.59,<2.0 ; extra == 'dev'
|
|
35
|
+
Requires-Dist: langchain-core>=0.3,<1.0 ; extra == 'langchain'
|
|
36
|
+
Requires-Dist: langchain-openai>=0.3.35 ; extra == 'langchain'
|
|
37
|
+
Requires-Dist: langgraph>=1.0.1 ; extra == 'langchain'
|
|
38
|
+
Requires-Dist: starlette>=0.40,<1.0 ; extra == 'ui'
|
|
39
|
+
Requires-Dist: uvicorn>=0.30,<1.0 ; extra == 'ui'
|
|
40
|
+
Requires-Dist: anyio>=4.4,<5.0 ; extra == 'ui'
|
|
41
|
+
Maintainer: Ambuj Agrawal, Garima Luthra
|
|
42
|
+
Maintainer-email: Ambuj Agrawal <ambujagrawal741@gmail.com>, Garima Luthra <garimaluthra2198@gmail.com>
|
|
43
|
+
Requires-Python: >=3.12
|
|
44
|
+
Project-URL: Homepage, https://github.com/ambuj-krishna-agrawal/agent-lab
|
|
45
|
+
Project-URL: Repository, https://github.com/ambuj-krishna-agrawal/agent-lab
|
|
46
|
+
Project-URL: Issues, https://github.com/ambuj-krishna-agrawal/agent-lab/issues
|
|
47
|
+
Project-URL: Documentation, https://github.com/ambuj-krishna-agrawal/agent-lab#readme
|
|
48
|
+
Project-URL: Error reference, https://github.com/ambuj-krishna-agrawal/agent-lab/blob/main/docs/errors.md
|
|
49
|
+
Project-URL: Examples, https://github.com/ambuj-krishna-agrawal/agent-lab/tree/main/example
|
|
50
|
+
Project-URL: Changelog, https://github.com/ambuj-krishna-agrawal/agent-lab/blob/main/CHANGELOG.md
|
|
51
|
+
Provides-Extra: dev
|
|
52
|
+
Provides-Extra: langchain
|
|
53
|
+
Provides-Extra: ui
|
|
54
|
+
Description-Content-Type: text/markdown
|
|
55
|
+
|
|
56
|
+
# AgentLab
|
|
57
|
+
|
|
58
|
+
> Universal record-and-replay for LLM agents.
|
|
59
|
+
|
|
60
|
+
**Status:** pre-alpha, APIs will change.
|
|
61
|
+
|
|
62
|
+
AgentLab captures model calls, tools, state transitions, and timing into a
|
|
63
|
+
trace you can replay without hitting the network. It is built around a
|
|
64
|
+
framework-agnostic core and an HTTP capture layer that works with any SDK
|
|
65
|
+
that routes requests through `httpx`.
|
|
66
|
+
|
|
67
|
+
## Overhead
|
|
68
|
+
|
|
69
|
+
Per-LLM-call cost of running inside `agentlab.record()`:
|
|
70
|
+
|
|
71
|
+
| metric | baseline | recorded | overhead |
|
|
72
|
+
|----------------|----------|-----------|-----------|
|
|
73
|
+
| latency p50 | 13.5 ms | 14.7 ms | +1.16 ms |
|
|
74
|
+
| latency p99 | 14.4 ms | 15.9 ms | +1.52 ms |
|
|
75
|
+
|
|
76
|
+
Measured against an in-process loopback HTTP server with a 10 ms upstream
|
|
77
|
+
delay (eliminates network jitter so the delta isolates SDK overhead:
|
|
78
|
+
HTTP capture, span emit, JSONL write+fsync, matcher, LLMSpan build).
|
|
79
|
+
Real LLM calls land in the 100 ms – 2000 ms range, so this works out to
|
|
80
|
+
under 1% wall-clock overhead in practice.
|
|
81
|
+
|
|
82
|
+
Reproduce with:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
uv run python scripts/bench_record_overhead.py --calls 200 --runs 5
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Installation
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install agentic-lab # minimal SDK
|
|
92
|
+
pip install 'agentic-lab[ui]' # + Starlette UI server
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
The PyPI distribution is **`agentic-lab`**; the importable Python
|
|
96
|
+
module is **`agentlab`**:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import agentlab as al
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
For local development, this repo is `uv`-managed:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
git clone https://github.com/ambuj-krishna-agrawal/agent-lab.git
|
|
106
|
+
cd agent-lab
|
|
107
|
+
uv sync --all-extras --frozen
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Use `--frozen` by default so your environment matches `uv.lock` and CI.
|
|
111
|
+
|
|
112
|
+
## Documentation
|
|
113
|
+
|
|
114
|
+
* [Quickstart](#quickstart) — five minutes from install to a replayable trace.
|
|
115
|
+
* [Provider coverage](#provider-coverage) — every supported LLM provider + how to add custom ones.
|
|
116
|
+
* [Error reference](docs/errors.md) — every `AGL-…` code with a remediation sentence (auto-generated from `src/agentlab/errors.py`).
|
|
117
|
+
* [Changelog](CHANGELOG.md) — version history.
|
|
118
|
+
* [`AGENTS.md`](AGENTS.md) — invariants and quality gates contributors must respect.
|
|
119
|
+
* [`CONTRIBUTING.md`](CONTRIBUTING.md) — human-contributor process.
|
|
120
|
+
|
|
121
|
+
## Configuration
|
|
122
|
+
|
|
123
|
+
- Secrets live in `.env` (git-ignored). Copy `.env.example` and set the
|
|
124
|
+
provider keys you use.
|
|
125
|
+
- Non-secret defaults live in `src/agentlab/_defaults.toml` and can be
|
|
126
|
+
overridden by `AGENTLAB_*` environment variables.
|
|
127
|
+
- Full typed config lives in `src/agentlab/config.py`.
|
|
128
|
+
|
|
129
|
+
## Quickstart
|
|
130
|
+
|
|
131
|
+
Five minutes from `pip install` to a trace you can replay without an
|
|
132
|
+
API key. The full runnable script lives at
|
|
133
|
+
[`example/quickstart.py`](example/quickstart.py); the inline version:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
import os
|
|
137
|
+
import openai
|
|
138
|
+
import agentlab as al
|
|
139
|
+
|
|
140
|
+
client = openai.OpenAI(
|
|
141
|
+
api_key=os.environ["OPENROUTER_API_KEY"],
|
|
142
|
+
base_url="https://openrouter.ai/api/v1",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# 1. Record.
|
|
146
|
+
with (
|
|
147
|
+
al.record(agent_name="quickstart") as recording,
|
|
148
|
+
al.agent(name="quickstart", version="0"),
|
|
149
|
+
al.step(role=al.StepRole.EXECUTE),
|
|
150
|
+
):
|
|
151
|
+
response = client.chat.completions.create(
|
|
152
|
+
model="openai/gpt-4o-mini",
|
|
153
|
+
messages=[{"role": "user", "content": "Reply with the single word 'ok'."}],
|
|
154
|
+
max_tokens=16,
|
|
155
|
+
)
|
|
156
|
+
print("model said:", response.choices[0].message.content)
|
|
157
|
+
print("trace at: ", recording.directory)
|
|
158
|
+
|
|
159
|
+
# 2. Replay — no network, no key.
|
|
160
|
+
with al.replay(str(recording.directory)) as session:
|
|
161
|
+
replay = client.chat.completions.create(
|
|
162
|
+
model="openai/gpt-4o-mini",
|
|
163
|
+
messages=[{"role": "user", "content": "Reply with the single word 'ok'."}],
|
|
164
|
+
max_tokens=16,
|
|
165
|
+
)
|
|
166
|
+
print("replay said:", replay.choices[0].message.content)
|
|
167
|
+
print("cache hits: ", session.cache_hits)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
pip install 'agentic-lab[ui]' openai
|
|
172
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
173
|
+
python example/quickstart.py
|
|
174
|
+
agentlab serve --root ~/.agentlab/traces
|
|
175
|
+
# → http://127.0.0.1:7861/
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
The `with al.agent(...)` and `al.step(...)` envelopes give the
|
|
179
|
+
auto-emitted `LLMSpan` a typed parent (the V4 schema forbids LLM
|
|
180
|
+
under bare RUN). Production agents normally establish these once
|
|
181
|
+
near their entrypoints and don't repeat them per-call — see
|
|
182
|
+
[`example/workflows/`](example/) for that shape.
|
|
183
|
+
|
|
184
|
+
## Larger example agents
|
|
185
|
+
|
|
186
|
+
Three reference agents under [`example/`](example/) cover the
|
|
187
|
+
Anthropic [building-effective-agents](https://www.anthropic.com/research/building-effective-agents)
|
|
188
|
+
shapes:
|
|
189
|
+
|
|
190
|
+
| Folder | Shape | What it does |
|
|
191
|
+
|---|---|---|
|
|
192
|
+
| `workflows/` | Workflow (fixed code path) | Decompose → Wikipedia search → cite → LLM-as-judge → revise. |
|
|
193
|
+
| `autonomous/` | Autonomous (model picks each step) | LangGraph observe-plan-act loop that triages support tickets. |
|
|
194
|
+
| `hybrid/` | Workflow + autonomous sub-agent | Incident-response pipeline with autonomous investigation step. |
|
|
195
|
+
|
|
196
|
+
All three use OpenRouter via `langchain-openai`, real (or
|
|
197
|
+
realistic) tools, and produce traces directly into `example_traces/`
|
|
198
|
+
that `agentlab serve` can browse.
|
|
199
|
+
|
|
200
|
+
## Provider coverage
|
|
201
|
+
|
|
202
|
+
Inside an `agentlab.record()` block AgentLab patches `httpx` transport
|
|
203
|
+
methods, so **every** SDK that routes through `httpx` (which is most
|
|
204
|
+
modern Python LLM SDKs) lands its raw exchange in `http.jsonl`. That
|
|
205
|
+
file is the source of truth for replay; the typed `LLMSpan` is a
|
|
206
|
+
best-effort view layered on top.
|
|
207
|
+
|
|
208
|
+
The built-in matchers turn recognised exchanges into typed `LLMSpan`s
|
|
209
|
+
out of the box:
|
|
210
|
+
|
|
211
|
+
| Provider | Endpoint(s) | Stream? |
|
|
212
|
+
|-----------------------------------|------------------------------------------------------|---------|
|
|
213
|
+
| OpenAI chat completions | `api.openai.com/v1/chat/completions` | yes |
|
|
214
|
+
| OpenAI Responses | `api.openai.com/v1/responses` | yes |
|
|
215
|
+
| OpenAI Embeddings | `api.openai.com/v1/embeddings` | n/a |
|
|
216
|
+
| Azure OpenAI chat completions | `*.openai.azure.com/openai/deployments/<dep>/chat/completions` | yes |
|
|
217
|
+
| Anthropic Messages | `api.anthropic.com/v1/messages` | yes |
|
|
218
|
+
| AWS Bedrock — Invoke | `bedrock-runtime.<region>.amazonaws.com/model/<id>/invoke[-with-response-stream]` | partial[^1] |
|
|
219
|
+
| AWS Bedrock — Converse | `bedrock-runtime.<region>.amazonaws.com/model/<id>/converse[-stream]` | partial[^1] |
|
|
220
|
+
| Google Gemini | `generativelanguage.googleapis.com/.../models/<m>:[stream]generateContent` | yes |
|
|
221
|
+
| Vertex AI — Gemini | `<region>-aiplatform.googleapis.com/.../models/<m>:[stream]generateContent` | yes |
|
|
222
|
+
| Vertex AI — Anthropic (Claude) | `<region>-aiplatform.googleapis.com/.../models/<m>:[stream]rawPredict` | yes |
|
|
223
|
+
| OpenRouter | `openrouter.ai/api/v1/chat/completions` | yes |
|
|
224
|
+
| Together AI | `api.together.{xyz,ai}/v1/chat/completions` | yes |
|
|
225
|
+
| Groq | `api.groq.com/openai/v1/chat/completions` | yes |
|
|
226
|
+
| Mistral | `api.mistral.ai/v1/chat/completions` | yes |
|
|
227
|
+
| Fireworks | `api.fireworks.ai/inference/v1/chat/completions` | yes |
|
|
228
|
+
| DeepInfra | `api.deepinfra.com/v1/openai/chat/completions` | yes |
|
|
229
|
+
| Perplexity | `api.perplexity.ai/chat/completions` | yes |
|
|
230
|
+
|
|
231
|
+
[^1]: Bedrock streaming uses AWS event-stream binary framing.
|
|
232
|
+
Buffered responses populate every LLMSpan field; streamed responses
|
|
233
|
+
record the request side and a `validation_errors` entry explaining
|
|
234
|
+
why the response side is empty. The raw bytes are still preserved
|
|
235
|
+
in `http.jsonl`.
|
|
236
|
+
|
|
237
|
+
### Adding a custom or self-hosted provider
|
|
238
|
+
|
|
239
|
+
OpenAI-compatible hosts (vLLM, Ollama, your private gateway) need one
|
|
240
|
+
line:
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
import agentlab as al
|
|
244
|
+
from agentlab.llm.matchers.openai import HostPathMatcher
|
|
245
|
+
|
|
246
|
+
al.register_llm_provider(HostPathMatcher(
|
|
247
|
+
name="my-vllm",
|
|
248
|
+
host_suffix="llm.internal.example.com",
|
|
249
|
+
path_prefix="/v1/chat/completions",
|
|
250
|
+
))
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
For wholly different body shapes, subclass `agentlab.llm.LLMProviderMatcher`.
|
|
254
|
+
|
|
255
|
+
### Pricing
|
|
256
|
+
|
|
257
|
+
The SDK is **token-only by default** — `LLMSpan.cost.usd` stays at
|
|
258
|
+
`0.0` and the span is annotated with `agentlab.llm.pricing.unknown=True`.
|
|
259
|
+
Provider list-prices change too often to bake into the SDK. Operators
|
|
260
|
+
who want USD computed on every span install their own table:
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
from agentlab.llm.pricing import PriceRow, StaticPriceTable, set_price_table
|
|
264
|
+
|
|
265
|
+
set_price_table(StaticPriceTable(rows=(
|
|
266
|
+
PriceRow("openai", "gpt-4o", 2.50, 10.00),
|
|
267
|
+
PriceRow("anthropic", "claude-3-5-sonnet*", 3.00, 15.00),
|
|
268
|
+
)))
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Strict mode for unrecognised exchanges
|
|
272
|
+
|
|
273
|
+
By default, exchanges that don't match any provider matcher log a
|
|
274
|
+
warning (one per `(trace, host)`) and the raw exchange remains in
|
|
275
|
+
`http.jsonl`. Power users can opt into stricter behaviour:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
with al.record(strict_unknown_provider="raise"): # or "emit_op"
|
|
279
|
+
...
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
`"raise"` surfaces the gap as `UnknownLLMProviderError`; `"emit_op"`
|
|
283
|
+
records the call as a typed `OpSpan` so the trace tree is complete
|
|
284
|
+
even without a matcher.
|
|
285
|
+
|
|
286
|
+
## UI and examples
|
|
287
|
+
|
|
288
|
+
Run the backend UI server against bundled traces:
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
uv run agentlab --root example_traces serve --port 7861
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
Optional frontend dev server with HMR:
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
cd frontend
|
|
298
|
+
npm install
|
|
299
|
+
npm run dev
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
The bundled runnable agents are seeded from `example/` and are available from
|
|
303
|
+
the Agents page when the server starts successfully.
|
|
304
|
+
|
|
305
|
+
## Production deployment
|
|
306
|
+
|
|
307
|
+
The OSS UI server can be hosted on a single EC2 box behind Caddy, with a
|
|
308
|
+
separate Next.js + Clerk marketing/auth site on Vercel that redirects
|
|
309
|
+
authenticated users to it. See [`deploy/README.md`](deploy/README.md)
|
|
310
|
+
for the end-to-end runbook.
|
|
311
|
+
|
|
312
|
+
## UI walkthrough
|
|
313
|
+
|
|
314
|
+
### Dashboard
|
|
315
|
+

|
|
316
|
+
|
|
317
|
+
### Traces list
|
|
318
|
+

|
|
319
|
+
|
|
320
|
+
### Trace detail
|
|
321
|
+

|
|
322
|
+
|
|
323
|
+
### Agents
|
|
324
|
+

|
|
325
|
+
|
|
326
|
+
### Settings
|
|
327
|
+

|
|
328
|
+
|
|
329
|
+
## Development
|
|
330
|
+
|
|
331
|
+
Run the local quality gate:
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
bash scripts/check.sh
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
Equivalent commands:
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
uv run ruff check .
|
|
341
|
+
uv run ruff format --check .
|
|
342
|
+
uv run mypy
|
|
343
|
+
uv run pytest tests/unit tests/integration -n auto --dist=worksteal
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
## Testing
|
|
347
|
+
|
|
348
|
+
Current test tiers:
|
|
349
|
+
|
|
350
|
+
- `tests/unit/`: hermetic unit tests (no real network).
|
|
351
|
+
- `tests/integration/`: in-process integration tests with mocked HTTP where needed.
|
|
352
|
+
|
|
353
|
+
For live-provider smoke runs, use the runnable examples in `example/` through
|
|
354
|
+
their CLIs or the UI Agents page.
|
|
355
|
+
|
|
356
|
+
## Project layout
|
|
357
|
+
|
|
358
|
+
```text
|
|
359
|
+
agentlab/
|
|
360
|
+
├── src/agentlab/
|
|
361
|
+
│ ├── __init__.py # public API surface
|
|
362
|
+
│ ├── cli.py # `agentlab` console entry point
|
|
363
|
+
│ ├── config.py # typed settings
|
|
364
|
+
│ ├── recorder.py # public `record()` context manager
|
|
365
|
+
│ ├── _defaults.toml # bundled non-secret defaults
|
|
366
|
+
│ ├── _proto/ # generated protobuf bindings (private)
|
|
367
|
+
│ ├── bridges/ # export bridges (e.g. OTel GenAI)
|
|
368
|
+
│ ├── core/ # recording primitives
|
|
369
|
+
│ ├── io/ # trace IO + HTTP capture
|
|
370
|
+
│ ├── integrations/ # framework adapters
|
|
371
|
+
│ ├── llm/ # provider-agnostic LLM client
|
|
372
|
+
│ ├── replay/ # deterministic replay engine
|
|
373
|
+
│ ├── storage/ # JSONL + protobuf stores
|
|
374
|
+
│ ├── ui/ # Starlette UI server + DTO mapping
|
|
375
|
+
│ ├── pytest.py # pytest plugin
|
|
376
|
+
│ └── promote.py # replay-test scaffold generator
|
|
377
|
+
├── frontend/ # React SPA for the UI server
|
|
378
|
+
├── example/ # bundled runnable agent seeds
|
|
379
|
+
├── proto/agentlab/v1/trace.proto
|
|
380
|
+
├── scripts/ # check, proto regen, UI screenshot helpers
|
|
381
|
+
├── tests/{unit,integration}/
|
|
382
|
+
└── uv.lock
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
## License
|
|
386
|
+
|
|
387
|
+
Apache 2.0 — see [`LICENSE`](LICENSE).
|