agentic-lab 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. agentic_lab-0.1.0/PKG-INFO +387 -0
  2. agentic_lab-0.1.0/README.md +332 -0
  3. agentic_lab-0.1.0/pyproject.toml +222 -0
  4. agentic_lab-0.1.0/src/agentlab/__init__.py +269 -0
  5. agentic_lab-0.1.0/src/agentlab/_defaults.toml +40 -0
  6. agentic_lab-0.1.0/src/agentlab/_proto/__init__.py +23 -0
  7. agentic_lab-0.1.0/src/agentlab/_proto/trace_pb2.py +132 -0
  8. agentic_lab-0.1.0/src/agentlab/_proto/trace_pb2.pyi +835 -0
  9. agentic_lab-0.1.0/src/agentlab/bridges/__init__.py +10 -0
  10. agentic_lab-0.1.0/src/agentlab/bridges/otel_genai.py +351 -0
  11. agentic_lab-0.1.0/src/agentlab/cli.py +636 -0
  12. agentic_lab-0.1.0/src/agentlab/config.py +280 -0
  13. agentic_lab-0.1.0/src/agentlab/core/__init__.py +247 -0
  14. agentic_lab-0.1.0/src/agentlab/core/_clock.py +41 -0
  15. agentic_lab-0.1.0/src/agentlab/core/_fmt.py +64 -0
  16. agentic_lab-0.1.0/src/agentlab/core/_replay_seam.py +125 -0
  17. agentic_lab-0.1.0/src/agentlab/core/agent.py +179 -0
  18. agentic_lab-0.1.0/src/agentlab/core/attributes.py +114 -0
  19. agentic_lab-0.1.0/src/agentlab/core/budget.py +31 -0
  20. agentic_lab-0.1.0/src/agentlab/core/context.py +672 -0
  21. agentic_lab-0.1.0/src/agentlab/core/continuation.py +160 -0
  22. agentic_lab-0.1.0/src/agentlab/core/cost.py +95 -0
  23. agentic_lab-0.1.0/src/agentlab/core/determinism.py +1144 -0
  24. agentic_lab-0.1.0/src/agentlab/core/env.py +392 -0
  25. agentic_lab-0.1.0/src/agentlab/core/eval.py +180 -0
  26. agentic_lab-0.1.0/src/agentlab/core/guardrail.py +203 -0
  27. agentic_lab-0.1.0/src/agentlab/core/handles.py +495 -0
  28. agentic_lab-0.1.0/src/agentlab/core/handoff.py +347 -0
  29. agentic_lab-0.1.0/src/agentlab/core/ids.py +141 -0
  30. agentic_lab-0.1.0/src/agentlab/core/link.py +152 -0
  31. agentic_lab-0.1.0/src/agentlab/core/llm.py +530 -0
  32. agentic_lab-0.1.0/src/agentlab/core/memory.py +138 -0
  33. agentic_lab-0.1.0/src/agentlab/core/message.py +205 -0
  34. agentic_lab-0.1.0/src/agentlab/core/op.py +207 -0
  35. agentic_lab-0.1.0/src/agentlab/core/op_names.py +33 -0
  36. agentic_lab-0.1.0/src/agentlab/core/retrieval.py +139 -0
  37. agentic_lab-0.1.0/src/agentlab/core/run.py +277 -0
  38. agentic_lab-0.1.0/src/agentlab/core/span.py +894 -0
  39. agentic_lab-0.1.0/src/agentlab/core/span_event.py +305 -0
  40. agentic_lab-0.1.0/src/agentlab/core/status.py +61 -0
  41. agentic_lab-0.1.0/src/agentlab/core/step.py +689 -0
  42. agentic_lab-0.1.0/src/agentlab/core/tool.py +285 -0
  43. agentic_lab-0.1.0/src/agentlab/core/types.py +22 -0
  44. agentic_lab-0.1.0/src/agentlab/errors.py +760 -0
  45. agentic_lab-0.1.0/src/agentlab/integrations/__init__.py +6 -0
  46. agentic_lab-0.1.0/src/agentlab/integrations/langchain/__init__.py +15 -0
  47. agentic_lab-0.1.0/src/agentlab/integrations/langchain/_callbacks.py +345 -0
  48. agentic_lab-0.1.0/src/agentlab/integrations/langchain/_helpers.py +148 -0
  49. agentic_lab-0.1.0/src/agentlab/io/__init__.py +31 -0
  50. agentic_lab-0.1.0/src/agentlab/io/format.py +293 -0
  51. agentic_lab-0.1.0/src/agentlab/io/http_capture.py +1064 -0
  52. agentic_lab-0.1.0/src/agentlab/io/http_format.py +449 -0
  53. agentic_lab-0.1.0/src/agentlab/io/otel_attributes.py +463 -0
  54. agentic_lab-0.1.0/src/agentlab/io/proto/__init__.py +26 -0
  55. agentic_lab-0.1.0/src/agentlab/io/proto/codec.py +1321 -0
  56. agentic_lab-0.1.0/src/agentlab/io/proto/framing.py +156 -0
  57. agentic_lab-0.1.0/src/agentlab/io/proto/reader.py +188 -0
  58. agentic_lab-0.1.0/src/agentlab/io/proto/writer.py +299 -0
  59. agentic_lab-0.1.0/src/agentlab/io/reader.py +237 -0
  60. agentic_lab-0.1.0/src/agentlab/io/redaction.py +376 -0
  61. agentic_lab-0.1.0/src/agentlab/io/writer.py +408 -0
  62. agentic_lab-0.1.0/src/agentlab/llm/__init__.py +106 -0
  63. agentic_lab-0.1.0/src/agentlab/llm/_autoemit.py +540 -0
  64. agentic_lab-0.1.0/src/agentlab/llm/base.py +355 -0
  65. agentic_lab-0.1.0/src/agentlab/llm/errors.py +60 -0
  66. agentic_lab-0.1.0/src/agentlab/llm/factory.py +79 -0
  67. agentic_lab-0.1.0/src/agentlab/llm/matchers/__init__.py +120 -0
  68. agentic_lab-0.1.0/src/agentlab/llm/matchers/_common.py +435 -0
  69. agentic_lab-0.1.0/src/agentlab/llm/matchers/anthropic.py +517 -0
  70. agentic_lab-0.1.0/src/agentlab/llm/matchers/bedrock.py +511 -0
  71. agentic_lab-0.1.0/src/agentlab/llm/matchers/google.py +543 -0
  72. agentic_lab-0.1.0/src/agentlab/llm/matchers/openai.py +1200 -0
  73. agentic_lab-0.1.0/src/agentlab/llm/pricing.py +267 -0
  74. agentic_lab-0.1.0/src/agentlab/llm/providers/__init__.py +5 -0
  75. agentic_lab-0.1.0/src/agentlab/llm/providers/openrouter.py +442 -0
  76. agentic_lab-0.1.0/src/agentlab/llm/types.py +484 -0
  77. agentic_lab-0.1.0/src/agentlab/promote.py +291 -0
  78. agentic_lab-0.1.0/src/agentlab/py.typed +0 -0
  79. agentic_lab-0.1.0/src/agentlab/pytest.py +280 -0
  80. agentic_lab-0.1.0/src/agentlab/recorder.py +1027 -0
  81. agentic_lab-0.1.0/src/agentlab/replay/__init__.py +93 -0
  82. agentic_lab-0.1.0/src/agentlab/replay/api.py +365 -0
  83. agentic_lab-0.1.0/src/agentlab/replay/cache.py +350 -0
  84. agentic_lab-0.1.0/src/agentlab/replay/context.py +26 -0
  85. agentic_lab-0.1.0/src/agentlab/replay/diff.py +313 -0
  86. agentic_lab-0.1.0/src/agentlab/replay/drift.py +92 -0
  87. agentic_lab-0.1.0/src/agentlab/replay/session.py +294 -0
  88. agentic_lab-0.1.0/src/agentlab/storage/__init__.py +39 -0
  89. agentic_lab-0.1.0/src/agentlab/storage/base.py +381 -0
  90. agentic_lab-0.1.0/src/agentlab/storage/factory.py +104 -0
  91. agentic_lab-0.1.0/src/agentlab/storage/jsonl_store.py +576 -0
  92. agentic_lab-0.1.0/src/agentlab/storage/names.py +602 -0
  93. agentic_lab-0.1.0/src/agentlab/storage/proto_store.py +444 -0
  94. agentic_lab-0.1.0/src/agentlab/ui/README.md +86 -0
  95. agentic_lab-0.1.0/src/agentlab/ui/__init__.py +16 -0
  96. agentic_lab-0.1.0/src/agentlab/ui/_agents.py +412 -0
  97. agentic_lab-0.1.0/src/agentlab/ui/_assets/__init__.py +10 -0
  98. agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/assets/index-DGnShqkT.js +211 -0
  99. agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/assets/index-DqoRSI-a.css +1 -0
  100. agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/favicon.svg +11 -0
  101. agentic_lab-0.1.0/src/agentlab/ui/_assets/dist/index.html +29 -0
  102. agentic_lab-0.1.0/src/agentlab/ui/_dto.py +325 -0
  103. agentic_lab-0.1.0/src/agentlab/ui/_errors.py +51 -0
  104. agentic_lab-0.1.0/src/agentlab/ui/_mapping.py +350 -0
  105. agentic_lab-0.1.0/src/agentlab/ui/_runtime.py +75 -0
  106. agentic_lab-0.1.0/src/agentlab/ui/_server/__init__.py +9 -0
  107. agentic_lab-0.1.0/src/agentlab/ui/_server/agents.py +446 -0
  108. agentic_lab-0.1.0/src/agentlab/ui/_server/agents_overview.py +242 -0
  109. agentic_lab-0.1.0/src/agentlab/ui/_server/app.py +672 -0
  110. agentic_lab-0.1.0/src/agentlab/ui/_server/dashboard.py +316 -0
  111. agentic_lab-0.1.0/src/agentlab/ui/_server/replay_runner.py +466 -0
  112. agentic_lab-0.1.0/src/agentlab/ui/_server/validation.py +80 -0
  113. agentic_lab-0.1.0/src/agentlab/ui/_server/watcher.py +101 -0
@@ -0,0 +1,387 @@
1
+ Metadata-Version: 2.3
2
+ Name: agentic-lab
3
+ Version: 0.1.0
4
+ Summary: Universal record-and-replay for LLM agents.
5
+ Keywords: llm,agents,replay,tracing,evals
6
+ Author: Ambuj Agrawal, Garima Luthra
7
+ Author-email: Ambuj Agrawal <ambujagrawal741@gmail.com>, Garima Luthra <garimaluthra2198@gmail.com>
8
+ License: Apache-2.0
9
+ Classifier: Development Status :: 2 - Pre-Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Classifier: Typing :: Typed
14
+ Requires-Dist: httpx>=0.27,<1.0
15
+ Requires-Dist: pydantic>=2.7,<3.0
16
+ Requires-Dist: pydantic-settings>=2.4,<3.0
17
+ Requires-Dist: structlog>=24.1,<26.0
18
+ Requires-Dist: tenacity>=8.5,<10.0
19
+ Requires-Dist: python-dotenv>=1.0,<2.0
20
+ Requires-Dist: protobuf>=5.27,<7.0
21
+ Requires-Dist: pytest>=8.2,<9.0 ; extra == 'dev'
22
+ Requires-Dist: pytest-asyncio>=0.23,<1.0 ; extra == 'dev'
23
+ Requires-Dist: pytest-cov>=5.0,<7.0 ; extra == 'dev'
24
+ Requires-Dist: pytest-xdist[psutil]>=3.6,<4.0 ; extra == 'dev'
25
+ Requires-Dist: respx>=0.21,<1.0 ; extra == 'dev'
26
+ Requires-Dist: ruff>=0.6,<1.0 ; extra == 'dev'
27
+ Requires-Dist: mypy>=1.11,<2.0 ; extra == 'dev'
28
+ Requires-Dist: pre-commit>=3.7,<5.0 ; extra == 'dev'
29
+ Requires-Dist: openai>=1.40,<2.0 ; extra == 'dev'
30
+ Requires-Dist: starlette>=0.40,<1.0 ; extra == 'dev'
31
+ Requires-Dist: uvicorn>=0.30,<1.0 ; extra == 'dev'
32
+ Requires-Dist: anyio>=4.4,<5.0 ; extra == 'dev'
33
+ Requires-Dist: langchain-core>=0.3,<1.0 ; extra == 'dev'
34
+ Requires-Dist: playwright>=1.59,<2.0 ; extra == 'dev'
35
+ Requires-Dist: langchain-core>=0.3,<1.0 ; extra == 'langchain'
36
+ Requires-Dist: langchain-openai>=0.3.35 ; extra == 'langchain'
37
+ Requires-Dist: langgraph>=1.0.1 ; extra == 'langchain'
38
+ Requires-Dist: starlette>=0.40,<1.0 ; extra == 'ui'
39
+ Requires-Dist: uvicorn>=0.30,<1.0 ; extra == 'ui'
40
+ Requires-Dist: anyio>=4.4,<5.0 ; extra == 'ui'
41
+ Maintainer: Ambuj Agrawal, Garima Luthra
42
+ Maintainer-email: Ambuj Agrawal <ambujagrawal741@gmail.com>, Garima Luthra <garimaluthra2198@gmail.com>
43
+ Requires-Python: >=3.12
44
+ Project-URL: Homepage, https://github.com/ambuj-krishna-agrawal/agent-lab
45
+ Project-URL: Repository, https://github.com/ambuj-krishna-agrawal/agent-lab
46
+ Project-URL: Issues, https://github.com/ambuj-krishna-agrawal/agent-lab/issues
47
+ Project-URL: Documentation, https://github.com/ambuj-krishna-agrawal/agent-lab#readme
48
+ Project-URL: Error reference, https://github.com/ambuj-krishna-agrawal/agent-lab/blob/main/docs/errors.md
49
+ Project-URL: Examples, https://github.com/ambuj-krishna-agrawal/agent-lab/tree/main/example
50
+ Project-URL: Changelog, https://github.com/ambuj-krishna-agrawal/agent-lab/blob/main/CHANGELOG.md
51
+ Provides-Extra: dev
52
+ Provides-Extra: langchain
53
+ Provides-Extra: ui
54
+ Description-Content-Type: text/markdown
55
+
56
+ # AgentLab
57
+
58
+ > Universal record-and-replay for LLM agents.
59
+
60
+ **Status:** pre-alpha, APIs will change.
61
+
62
+ AgentLab captures model calls, tools, state transitions, and timing into a
63
+ trace you can replay without hitting the network. It is built around a
64
+ framework-agnostic core and an HTTP capture layer that works with any SDK
65
+ that routes requests through `httpx`.
66
+
67
+ ## Overhead
68
+
69
+ Per-LLM-call cost of running inside `agentlab.record()`:
70
+
71
+ | metric | baseline | recorded | overhead |
72
+ |----------------|----------|-----------|-----------|
73
+ | latency p50 | 13.5 ms | 14.7 ms | +1.16 ms |
74
+ | latency p99 | 14.4 ms | 15.9 ms | +1.52 ms |
75
+
76
+ Measured against an in-process loopback HTTP server with a 10 ms upstream
77
+ delay (eliminates network jitter so the delta isolates SDK overhead:
78
+ HTTP capture, span emit, JSONL write+fsync, matcher, LLMSpan build).
79
+ Real LLM calls land in the 100 ms – 2000 ms range, so this works out to
80
+ under 1% wall-clock overhead in practice.
81
+
82
+ Reproduce with:
83
+
84
+ ```bash
85
+ uv run python scripts/bench_record_overhead.py --calls 200 --runs 5
86
+ ```
87
+
88
+ ## Installation
89
+
90
+ ```bash
91
+ pip install agentic-lab # minimal SDK
92
+ pip install 'agentic-lab[ui]' # + Starlette UI server
93
+ ```
94
+
95
+ The PyPI distribution is **`agentic-lab`**; the importable Python
96
+ module is **`agentlab`**:
97
+
98
+ ```python
99
+ import agentlab as al
100
+ ```
101
+
102
+ For local development, this repo is `uv`-managed:
103
+
104
+ ```bash
105
+ git clone https://github.com/ambuj-krishna-agrawal/agent-lab.git
106
+ cd agent-lab
107
+ uv sync --all-extras --frozen
108
+ ```
109
+
110
+ Use `--frozen` by default so your environment matches `uv.lock` and CI.
111
+
112
+ ## Documentation
113
+
114
+ * [Quickstart](#quickstart) — five minutes from install to a replayable trace.
115
+ * [Provider coverage](#provider-coverage) — every supported LLM provider + how to add custom ones.
116
+ * [Error reference](docs/errors.md) — every `AGL-…` code with a remediation sentence (auto-generated from `src/agentlab/errors.py`).
117
+ * [Changelog](CHANGELOG.md) — version history.
118
+ * [`AGENTS.md`](AGENTS.md) — invariants and quality gates contributors must respect.
119
+ * [`CONTRIBUTING.md`](CONTRIBUTING.md) — human-contributor process.
120
+
121
+ ## Configuration
122
+
123
+ - Secrets live in `.env` (git-ignored). Copy `.env.example` and set the
124
+ provider keys you use.
125
+ - Non-secret defaults live in `src/agentlab/_defaults.toml` and can be
126
+ overridden by `AGENTLAB_*` environment variables.
127
+ - Full typed config lives in `src/agentlab/config.py`.
128
+
129
+ ## Quickstart
130
+
131
+ Five minutes from `pip install` to a trace you can replay without an
132
+ API key. The full runnable script lives at
133
+ [`example/quickstart.py`](example/quickstart.py); the inline version:
134
+
135
+ ```python
136
+ import os
137
+ import openai
138
+ import agentlab as al
139
+
140
+ client = openai.OpenAI(
141
+ api_key=os.environ["OPENROUTER_API_KEY"],
142
+ base_url="https://openrouter.ai/api/v1",
143
+ )
144
+
145
+ # 1. Record.
146
+ with (
147
+ al.record(agent_name="quickstart") as recording,
148
+ al.agent(name="quickstart", version="0"),
149
+ al.step(role=al.StepRole.EXECUTE),
150
+ ):
151
+ response = client.chat.completions.create(
152
+ model="openai/gpt-4o-mini",
153
+ messages=[{"role": "user", "content": "Reply with the single word 'ok'."}],
154
+ max_tokens=16,
155
+ )
156
+ print("model said:", response.choices[0].message.content)
157
+ print("trace at: ", recording.directory)
158
+
159
+ # 2. Replay — no network, no key.
160
+ with al.replay(str(recording.directory)) as session:
161
+ replay = client.chat.completions.create(
162
+ model="openai/gpt-4o-mini",
163
+ messages=[{"role": "user", "content": "Reply with the single word 'ok'."}],
164
+ max_tokens=16,
165
+ )
166
+ print("replay said:", replay.choices[0].message.content)
167
+ print("cache hits: ", session.cache_hits)
168
+ ```
169
+
170
+ ```bash
171
+ pip install 'agentic-lab[ui]' openai
172
+ export OPENROUTER_API_KEY=sk-or-...
173
+ python example/quickstart.py
174
+ agentlab serve --root ~/.agentlab/traces
175
+ # → http://127.0.0.1:7861/
176
+ ```
177
+
178
+ The `with al.agent(...)` and `al.step(...)` envelopes give the
179
+ auto-emitted `LLMSpan` a typed parent (the V4 schema forbids LLM
180
+ under bare RUN). Production agents normally establish these once
181
+ near their entrypoints and don't repeat them per-call — see
182
+ [`example/workflows/`](example/) for that shape.
183
+
184
+ ## Larger example agents
185
+
186
+ Three reference agents under [`example/`](example/) cover the
187
+ Anthropic [building-effective-agents](https://www.anthropic.com/research/building-effective-agents)
188
+ shapes:
189
+
190
+ | Folder | Shape | What it does |
191
+ |---|---|---|
192
+ | `workflows/` | Workflow (fixed code path) | Decompose → Wikipedia search → cite → LLM-as-judge → revise. |
193
+ | `autonomous/` | Autonomous (model picks each step) | LangGraph observe-plan-act loop that triages support tickets. |
194
+ | `hybrid/` | Workflow + autonomous sub-agent | Incident-response pipeline with autonomous investigation step. |
195
+
196
+ All three use OpenRouter via `langchain-openai`, real (or
197
+ realistic) tools, and produce traces directly into `example_traces/`
198
+ that `agentlab serve` can browse.
199
+
200
+ ## Provider coverage
201
+
202
+ Inside an `agentlab.record()` block AgentLab patches `httpx` transport
203
+ methods, so **every** SDK that routes through `httpx` (which is most
204
+ modern Python LLM SDKs) lands its raw exchange in `http.jsonl`. That
205
+ file is the source of truth for replay; the typed `LLMSpan` is a
206
+ best-effort view layered on top.
207
+
208
+ The built-in matchers turn recognised exchanges into typed `LLMSpan`s
209
+ out of the box:
210
+
211
+ | Provider | Endpoint(s) | Stream? |
212
+ |-----------------------------------|------------------------------------------------------|---------|
213
+ | OpenAI chat completions | `api.openai.com/v1/chat/completions` | yes |
214
+ | OpenAI Responses | `api.openai.com/v1/responses` | yes |
215
+ | OpenAI Embeddings | `api.openai.com/v1/embeddings` | n/a |
216
+ | Azure OpenAI chat completions | `*.openai.azure.com/openai/deployments/<dep>/chat/completions` | yes |
217
+ | Anthropic Messages | `api.anthropic.com/v1/messages` | yes |
218
+ | AWS Bedrock — Invoke | `bedrock-runtime.<region>.amazonaws.com/model/<id>/invoke[-with-response-stream]` | partial[^1] |
219
+ | AWS Bedrock — Converse | `bedrock-runtime.<region>.amazonaws.com/model/<id>/converse[-stream]` | partial[^1] |
220
+ | Google Gemini | `generativelanguage.googleapis.com/.../models/<m>:[stream]generateContent` | yes |
221
+ | Vertex AI — Gemini | `<region>-aiplatform.googleapis.com/.../models/<m>:[stream]generateContent` | yes |
222
+ | Vertex AI — Anthropic (Claude) | `<region>-aiplatform.googleapis.com/.../models/<m>:[stream]rawPredict` | yes |
223
+ | OpenRouter | `openrouter.ai/api/v1/chat/completions` | yes |
224
+ | Together AI | `api.together.{xyz,ai}/v1/chat/completions` | yes |
225
+ | Groq | `api.groq.com/openai/v1/chat/completions` | yes |
226
+ | Mistral | `api.mistral.ai/v1/chat/completions` | yes |
227
+ | Fireworks | `api.fireworks.ai/inference/v1/chat/completions` | yes |
228
+ | DeepInfra | `api.deepinfra.com/v1/openai/chat/completions` | yes |
229
+ | Perplexity | `api.perplexity.ai/chat/completions` | yes |
230
+
231
+ [^1]: Bedrock streaming uses AWS event-stream binary framing.
232
+ Buffered responses populate every LLMSpan field; streamed responses
233
+ record the request side and a `validation_errors` entry explaining
234
+ why the response side is empty. The raw bytes are still preserved
235
+ in `http.jsonl`.
236
+
237
+ ### Adding a custom or self-hosted provider
238
+
239
+ OpenAI-compatible hosts (vLLM, Ollama, your private gateway) need one
240
+ line:
241
+
242
+ ```python
243
+ import agentlab as al
244
+ from agentlab.llm.matchers.openai import HostPathMatcher
245
+
246
+ al.register_llm_provider(HostPathMatcher(
247
+ name="my-vllm",
248
+ host_suffix="llm.internal.example.com",
249
+ path_prefix="/v1/chat/completions",
250
+ ))
251
+ ```
252
+
253
+ For wholly different body shapes, subclass `agentlab.llm.LLMProviderMatcher`.
254
+
255
+ ### Pricing
256
+
257
+ The SDK is **token-only by default** — `LLMSpan.cost.usd` stays at
258
+ `0.0` and the span is annotated with `agentlab.llm.pricing.unknown=True`.
259
+ Provider list-prices change too often to bake into the SDK. Operators
260
+ who want USD computed on every span install their own table:
261
+
262
+ ```python
263
+ from agentlab.llm.pricing import PriceRow, StaticPriceTable, set_price_table
264
+
265
+ set_price_table(StaticPriceTable(rows=(
266
+ PriceRow("openai", "gpt-4o", 2.50, 10.00),
267
+ PriceRow("anthropic", "claude-3-5-sonnet*", 3.00, 15.00),
268
+ )))
269
+ ```
270
+
271
+ ### Strict mode for unrecognised exchanges
272
+
273
+ By default, exchanges that don't match any provider matcher log a
274
+ warning (one per `(trace, host)`) and the raw exchange remains in
275
+ `http.jsonl`. Power users can opt into stricter behaviour:
276
+
277
+ ```python
278
+ with al.record(strict_unknown_provider="raise"): # or "emit_op"
279
+ ...
280
+ ```
281
+
282
+ `"raise"` surfaces the gap as `UnknownLLMProviderError`; `"emit_op"`
283
+ records the call as a typed `OpSpan` so the trace tree is complete
284
+ even without a matcher.
285
+
286
+ ## UI and examples
287
+
288
+ Run the backend UI server against bundled traces:
289
+
290
+ ```bash
291
+ uv run agentlab --root example_traces serve --port 7861
292
+ ```
293
+
294
+ Optional frontend dev server with HMR:
295
+
296
+ ```bash
297
+ cd frontend
298
+ npm install
299
+ npm run dev
300
+ ```
301
+
302
+ The bundled runnable agents are seeded from `example/` and are available from
303
+ the Agents page when the server starts successfully.
304
+
305
+ ## Production deployment
306
+
307
+ The OSS UI server can be hosted on a single EC2 box behind Caddy, with a
308
+ separate Next.js + Clerk marketing/auth site on Vercel that redirects
309
+ authenticated users to it. See [`deploy/README.md`](deploy/README.md)
310
+ for the end-to-end runbook.
311
+
312
+ ## UI walkthrough
313
+
314
+ ### Dashboard
315
+ ![Dashboard](docs/assets/dashboard.png)
316
+
317
+ ### Traces list
318
+ ![Traces list](docs/assets/traces-list.png)
319
+
320
+ ### Trace detail
321
+ ![Trace detail](docs/assets/trace-detail.png)
322
+
323
+ ### Agents
324
+ ![Agents](docs/assets/agents.png)
325
+
326
+ ### Settings
327
+ ![Settings](docs/assets/settings.png)
328
+
329
+ ## Development
330
+
331
+ Run the local quality gate:
332
+
333
+ ```bash
334
+ bash scripts/check.sh
335
+ ```
336
+
337
+ Equivalent commands:
338
+
339
+ ```bash
340
+ uv run ruff check .
341
+ uv run ruff format --check .
342
+ uv run mypy
343
+ uv run pytest tests/unit tests/integration -n auto --dist=worksteal
344
+ ```
345
+
346
+ ## Testing
347
+
348
+ Current test tiers:
349
+
350
+ - `tests/unit/`: hermetic unit tests (no real network).
351
+ - `tests/integration/`: in-process integration tests with mocked HTTP where needed.
352
+
353
+ For live-provider smoke runs, use the runnable examples in `example/` through
354
+ their CLIs or the UI Agents page.
355
+
356
+ ## Project layout
357
+
358
+ ```text
359
+ agentlab/
360
+ ├── src/agentlab/
361
+ │ ├── __init__.py # public API surface
362
+ │ ├── cli.py # `agentlab` console entry point
363
+ │ ├── config.py # typed settings
364
+ │ ├── recorder.py # public `record()` context manager
365
+ │ ├── _defaults.toml # bundled non-secret defaults
366
+ │ ├── _proto/ # generated protobuf bindings (private)
367
+ │ ├── bridges/ # export bridges (e.g. OTel GenAI)
368
+ │ ├── core/ # recording primitives
369
+ │ ├── io/ # trace IO + HTTP capture
370
+ │ ├── integrations/ # framework adapters
371
+ │ ├── llm/ # provider-agnostic LLM client
372
+ │ ├── replay/ # deterministic replay engine
373
+ │ ├── storage/ # JSONL + protobuf stores
374
+ │ ├── ui/ # Starlette UI server + DTO mapping
375
+ │ ├── pytest.py # pytest plugin
376
+ │ └── promote.py # replay-test scaffold generator
377
+ ├── frontend/ # React SPA for the UI server
378
+ ├── example/ # bundled runnable agent seeds
379
+ ├── proto/agentlab/v1/trace.proto
380
+ ├── scripts/ # check, proto regen, UI screenshot helpers
381
+ ├── tests/{unit,integration}/
382
+ └── uv.lock
383
+ ```
384
+
385
+ ## License
386
+
387
+ Apache 2.0 — see [`LICENSE`](LICENSE).