agenttape 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenttape-0.1.0/.gitignore +36 -0
- agenttape-0.1.0/CHANGELOG.md +52 -0
- agenttape-0.1.0/LICENSE +21 -0
- agenttape-0.1.0/PKG-INFO +208 -0
- agenttape-0.1.0/README.md +148 -0
- agenttape-0.1.0/pyproject.toml +163 -0
- agenttape-0.1.0/src/agenttape/__init__.py +74 -0
- agenttape-0.1.0/src/agenttape/__main__.py +8 -0
- agenttape-0.1.0/src/agenttape/_box.py +59 -0
- agenttape-0.1.0/src/agenttape/adapters/__init__.py +68 -0
- agenttape-0.1.0/src/agenttape/adapters/base.py +76 -0
- agenttape-0.1.0/src/agenttape/adapters/http.py +269 -0
- agenttape-0.1.0/src/agenttape/adapters/langgraph.py +108 -0
- agenttape-0.1.0/src/agenttape/adapters/openai.py +274 -0
- agenttape-0.1.0/src/agenttape/assets.py +118 -0
- agenttape-0.1.0/src/agenttape/boundaries.py +131 -0
- agenttape-0.1.0/src/agenttape/callbacks.py +147 -0
- agenttape-0.1.0/src/agenttape/canonical.py +116 -0
- agenttape-0.1.0/src/agenttape/cassette.py +121 -0
- agenttape-0.1.0/src/agenttape/cli.py +322 -0
- agenttape-0.1.0/src/agenttape/config.py +232 -0
- agenttape-0.1.0/src/agenttape/diff.py +323 -0
- agenttape-0.1.0/src/agenttape/engine.py +595 -0
- agenttape-0.1.0/src/agenttape/errors.py +153 -0
- agenttape-0.1.0/src/agenttape/events.py +54 -0
- agenttape-0.1.0/src/agenttape/export.py +110 -0
- agenttape-0.1.0/src/agenttape/freeze.py +410 -0
- agenttape-0.1.0/src/agenttape/matchers.py +137 -0
- agenttape-0.1.0/src/agenttape/metrics.py +98 -0
- agenttape-0.1.0/src/agenttape/py.typed +0 -0
- agenttape-0.1.0/src/agenttape/pytest_plugin.py +192 -0
- agenttape-0.1.0/src/agenttape/recorder.py +283 -0
- agenttape-0.1.0/src/agenttape/redaction.py +148 -0
- agenttape-0.1.0/src/agenttape/schema.py +141 -0
- agenttape-0.1.0/src/agenttape/timeline.py +116 -0
- agenttape-0.1.0/src/agenttape/validate.py +123 -0
- agenttape-0.1.0/src/agenttape/viewer.py +138 -0
- agenttape-0.1.0/src/agenttape/yaml_io.py +656 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
.eggs/
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# Test / coverage
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.coverage
|
|
19
|
+
.coverage.*
|
|
20
|
+
htmlcov/
|
|
21
|
+
coverage.xml
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
.ruff_cache/
|
|
24
|
+
|
|
25
|
+
# Docs
|
|
26
|
+
site/
|
|
27
|
+
|
|
28
|
+
# OS / editor
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
.idea/
|
|
32
|
+
.vscode/
|
|
33
|
+
|
|
34
|
+
# AgentTape scratch
|
|
35
|
+
*.derived.yaml
|
|
36
|
+
.agenttape_tmp/
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format is based on
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres
|
|
5
|
+
to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- Bumped optional/dev/docs dependency floors to current stable major lines while
|
|
12
|
+
preserving Python 3.10–3.13 support: `openai>=1.66` (matches `responses.create`),
|
|
13
|
+
`langgraph>=1.0`, `langchain-core>=0.3`, `llama-index-core>=0.12`, `crewai>=1.0`,
|
|
14
|
+
`pyautogen>=0.9`, `mcp>=1.9`, `httpx>=0.27`, `requests>=2.32.4` (security),
|
|
15
|
+
`numpy>=1.26`, `opentelemetry-sdk>=1.30`, `pytest>=8.0`, `pytest-cov>=5.0`,
|
|
16
|
+
`ruff>=0.6`, `mypy>=1.11`, `types-PyYAML>=6.0.12`, `mkdocs-material>=9.5`,
|
|
17
|
+
`mkdocstrings[python]>=0.26`, `PyYAML>=6.0.1`.
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- Python 3.14 to the CI test matrix and the PyPI classifiers.
|
|
22
|
+
|
|
23
|
+
## [0.1.0] - 2026-06-16
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- Core interception engine: record / replay of LLM, tool, retrieval, memory and
|
|
28
|
+
raw HTTP boundaries into ordered cassettes.
|
|
29
|
+
- Cassette format (YAML default, JSON supported) with assets sidecar for large
|
|
30
|
+
payloads referenced by content hash. Pure-stdlib YAML reader/writer with
|
|
31
|
+
optional PyYAML acceleration.
|
|
32
|
+
- Cassette modes: `none`, `once`, `new_episodes`, `all`, `record`.
|
|
33
|
+
- Request canonicalization + sha256 match keys; pluggable matchers (`exact`,
|
|
34
|
+
`ignore_volatile`, `ordered`, `custom`).
|
|
35
|
+
- Mixed / partial replay (`live` / `frozen` sets) with side-effect guardrail and
|
|
36
|
+
record-back into a derived cassette.
|
|
37
|
+
- Determinism freeze layer: clock, RNG, UUID freeze; env snapshot + drift warning.
|
|
38
|
+
- Record-time redaction of secrets/PII (denylist + regex rules + header redaction).
|
|
39
|
+
- Public API: `record` / `replay` decorators, `use_cassette` context manager, and
|
|
40
|
+
an `AgentTape` callback/hook object.
|
|
41
|
+
- OpenAI adapter (chat + responses + tool calling) and an always-on
|
|
42
|
+
`httpx` / `requests` fallback. LangGraph adapter and a documented extension
|
|
43
|
+
interface for the rest.
|
|
44
|
+
- CLI: `init`, `record`, `replay`, `inspect`, `timeline`, `diff`, `redact`,
|
|
45
|
+
`validate`, `export` (json/otel), `view`, `rm`.
|
|
46
|
+
- Diff engine: run / prompt / state-memory / output diffs (CLI + importable).
|
|
47
|
+
- `pytest-agenttape` plugin: marker, fixture, offline-by-default mode, record
|
|
48
|
+
flag, mode flag, mismatch diffs, snapshot assertions.
|
|
49
|
+
- Self-contained static HTML viewer (single + two-cassette diff), no server.
|
|
50
|
+
|
|
51
|
+
[Unreleased]: https://github.com/MITHRAN-BALACHANDER/AgentTape/compare/v0.1.0...HEAD
|
|
52
|
+
[0.1.0]: https://github.com/MITHRAN-BALACHANDER/AgentTape/releases/tag/v0.1.0
|
agenttape-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AgentTape Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
agenttape-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agenttape
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Deterministic record/replay of LLM and tool calls for AI agents, captured into human-readable cassettes.
|
|
5
|
+
Project-URL: Homepage, https://github.com/MITHRAN-BALACHANDER/AgentTape
|
|
6
|
+
Project-URL: Documentation, https://MITHRAN-BALACHANDER.github.io/AgentTape/
|
|
7
|
+
Project-URL: Repository, https://github.com/MITHRAN-BALACHANDER/AgentTape
|
|
8
|
+
Project-URL: Changelog, https://github.com/MITHRAN-BALACHANDER/AgentTape/blob/main/CHANGELOG.md
|
|
9
|
+
Project-URL: Issues, https://github.com/MITHRAN-BALACHANDER/AgentTape/issues
|
|
10
|
+
Author: AgentTape Contributors
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: agents,ai,cassette,determinism,langgraph,llm,openai,record,replay,testing
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Topic :: Software Development :: Testing
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Provides-Extra: autogen
|
|
27
|
+
Requires-Dist: pyautogen>=0.9; extra == 'autogen'
|
|
28
|
+
Provides-Extra: crewai
|
|
29
|
+
Requires-Dist: crewai>=1.0; extra == 'crewai'
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: mypy>=1.11; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: pyyaml>=6.0.1; extra == 'dev'
|
|
35
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
36
|
+
Requires-Dist: types-pyyaml>=6.0.12; extra == 'dev'
|
|
37
|
+
Provides-Extra: docs
|
|
38
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
39
|
+
Requires-Dist: mkdocstrings[python]>=0.26; extra == 'docs'
|
|
40
|
+
Provides-Extra: http
|
|
41
|
+
Requires-Dist: httpx>=0.27; extra == 'http'
|
|
42
|
+
Requires-Dist: requests>=2.32.4; extra == 'http'
|
|
43
|
+
Provides-Extra: langchain
|
|
44
|
+
Requires-Dist: langchain-core>=0.3; extra == 'langchain'
|
|
45
|
+
Provides-Extra: langgraph
|
|
46
|
+
Requires-Dist: langgraph>=1.0; extra == 'langgraph'
|
|
47
|
+
Provides-Extra: llamaindex
|
|
48
|
+
Requires-Dist: llama-index-core>=0.12; extra == 'llamaindex'
|
|
49
|
+
Provides-Extra: mcp
|
|
50
|
+
Requires-Dist: mcp>=1.9; extra == 'mcp'
|
|
51
|
+
Provides-Extra: numpy
|
|
52
|
+
Requires-Dist: numpy>=1.26; extra == 'numpy'
|
|
53
|
+
Provides-Extra: openai
|
|
54
|
+
Requires-Dist: openai>=1.66; extra == 'openai'
|
|
55
|
+
Provides-Extra: otel
|
|
56
|
+
Requires-Dist: opentelemetry-sdk>=1.30; extra == 'otel'
|
|
57
|
+
Provides-Extra: yaml
|
|
58
|
+
Requires-Dist: pyyaml>=6.0.1; extra == 'yaml'
|
|
59
|
+
Description-Content-Type: text/markdown
|
|
60
|
+
|
|
61
|
+
# AgentTape
|
|
62
|
+
|
|
63
|
+
**Deterministic record and replay for AI agents.** Capture every external
|
|
64
|
+
interaction your agent makes — LLM calls *and* tool calls — into human-readable
|
|
65
|
+
"cassettes", then replay them deterministically so your tests run **offline, for
|
|
66
|
+
free, with zero side effects.**
|
|
67
|
+
|
|
68
|
+
[](https://github.com/MITHRAN-BALACHANDER/AgentTape/actions/workflows/ci.yml)
|
|
69
|
+
[](https://pypi.org/project/agenttape/)
|
|
70
|
+
[](https://pypi.org/project/agenttape/)
|
|
71
|
+
[](LICENSE)
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Why
|
|
76
|
+
|
|
77
|
+
Agent tests today are slow, flaky, expensive, and dangerous: they hit real LLM
|
|
78
|
+
APIs (cost + nondeterminism) and real tools (a tool that charges a card, writes a
|
|
79
|
+
row, or posts to Slack *actually does it*). AgentTape records those interactions
|
|
80
|
+
once and replays them deterministically afterwards. Your CI runs with **no network
|
|
81
|
+
access, no API keys, and no risk of a real side effect.**
|
|
82
|
+
|
|
83
|
+
* **Local-first** — no server, no telemetry, nothing leaves your machine.
|
|
84
|
+
* **Deterministic** — same inputs produce the same recorded outputs, byte-for-byte.
|
|
85
|
+
* **Zero side effects in replay** — a replayed tool *never* executes for real.
|
|
86
|
+
* **Almost-no-code** — add a decorator or a `with` block.
|
|
87
|
+
* **Git-friendly** — cassettes are YAML: diffable, reviewable, hand-editable.
|
|
88
|
+
* **Zero core dependencies** — the engine is pure standard library.
|
|
89
|
+
|
|
90
|
+
## Install
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pip install agenttape # core (stdlib only)
|
|
94
|
+
pip install "agenttape[openai]" # + OpenAI adapter
|
|
95
|
+
pip install "agenttape[yaml]" # + PyYAML for extra-robust YAML loading
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 30-second quickstart
|
|
99
|
+
|
|
100
|
+
Record once (real API call), then replay forever (no network):
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import agenttape
|
|
104
|
+
from openai import OpenAI
|
|
105
|
+
|
|
106
|
+
def run_agent():
|
|
107
|
+
client = OpenAI()
|
|
108
|
+
resp = client.chat.completions.create(
|
|
109
|
+
model="gpt-4o-mini",
|
|
110
|
+
messages=[{"role": "user", "content": "Say hi in 3 words"}],
|
|
111
|
+
)
|
|
112
|
+
return resp.choices[0].message.content
|
|
113
|
+
|
|
114
|
+
# --- record (hits the real API once, writes cassettes/hello.yaml) ---
|
|
115
|
+
with agenttape.use_cassette("hello", mode="record"):
|
|
116
|
+
print(run_agent())
|
|
117
|
+
|
|
118
|
+
# --- replay (zero network calls, milliseconds, free, deterministic) ---
|
|
119
|
+
with agenttape.use_cassette("hello", mode="none"):
|
|
120
|
+
print(run_agent()) # identical output, served from the cassette
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Or as a decorator:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
@agenttape.replay("hello") # mode="none": offline + deterministic
|
|
127
|
+
def test_agent():
|
|
128
|
+
assert "hi" in run_agent().lower()
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Mixed / partial replay — "freeze all but one"
|
|
132
|
+
|
|
133
|
+
Change *one* thing — a prompt, a model, a single tool — and re-run while every
|
|
134
|
+
**other** expensive or dangerous boundary stays frozen from the recording:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
# Only the LLM runs for real; every tool is served from the cassette.
|
|
138
|
+
# A *derived* cassette is written — the original is never mutated.
|
|
139
|
+
with agenttape.use_cassette("checkout", live={"llm"}):
|
|
140
|
+
result = run_agent() # new prompt → new LLM output, tools stay frozen
|
|
141
|
+
|
|
142
|
+
# See exactly what changed:
|
|
143
|
+
# agenttape diff cassettes/checkout.yaml cassettes/checkout.derived.yaml
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Any boundary that is **not** in `live` and has no recording raises
|
|
147
|
+
`UnmatchedInteractionError` — AgentTape will never silently run a real side effect.
|
|
148
|
+
|
|
149
|
+
## Hand-edit a response
|
|
150
|
+
|
|
151
|
+
Cassettes are just YAML. Open `cassettes/hello.yaml`, edit a recorded LLM
|
|
152
|
+
response, save, and re-run in `mode="none"` — your agent behaves differently with
|
|
153
|
+
**no API call at all.** Perfect for testing edge cases and failure paths.
|
|
154
|
+
|
|
155
|
+
## pytest plugin
|
|
156
|
+
|
|
157
|
+
Ships in the box. Tests run offline and deterministically by default:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
import pytest
|
|
161
|
+
|
|
162
|
+
@pytest.mark.agenttape("weather_agent")
|
|
163
|
+
def test_weather(agenttape_cassette):
|
|
164
|
+
assert run_agent() == "It's sunny."
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
pytest # replay mode, offline, free (CI default)
|
|
169
|
+
pytest --agenttape-record # (re)record cassettes against the real API
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## CLI
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
agenttape init # scaffold agenttape.toml + cassettes/
|
|
176
|
+
agenttape inspect cassettes/hello # tokens, latency, cost, per-step I/O
|
|
177
|
+
agenttape timeline cassettes/hello # ASCII waterfall of the run
|
|
178
|
+
agenttape diff a.yaml b.yaml # prompt / model / tool / cost / output diff
|
|
179
|
+
agenttape validate cassettes/hello # schema + determinism + leaked-secret lint
|
|
180
|
+
agenttape view cassettes/hello # self-contained static HTML, no server
|
|
181
|
+
agenttape redact cassettes/hello # re-run secret/PII redaction
|
|
182
|
+
agenttape export cassettes/hello --format otel
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## What this is — and what it isn't
|
|
186
|
+
|
|
187
|
+
**It is:**
|
|
188
|
+
|
|
189
|
+
* A deterministic record/replay layer for agent I/O (LLM + tools + raw HTTP).
|
|
190
|
+
* A way to make agent tests fast, free, offline and side-effect-free.
|
|
191
|
+
* A diff/inspection toolkit for understanding and reviewing agent runs.
|
|
192
|
+
|
|
193
|
+
**It isn't:**
|
|
194
|
+
|
|
195
|
+
* It is **not** a way to "replay with a different prompt/model and get a
|
|
196
|
+
deterministic answer for free." Replay reconstructs *recorded* bytes. The moment
|
|
197
|
+
you change an input to a boundary marked `live`, that boundary **really executes**
|
|
198
|
+
(real API call, real cost) and produces a **new** recording. AgentTape is
|
|
199
|
+
explicit about this everywhere — pure replay vs. re-execution are different verbs
|
|
200
|
+
and we never blur them.
|
|
201
|
+
* It is **not** an evaluation framework, a prompt optimizer, or a tracing SaaS.
|
|
202
|
+
|
|
203
|
+
See the [determinism guide](docs/determinism.md) and the
|
|
204
|
+
[cassette format spec](docs/format.md) for details.
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# AgentTape
|
|
2
|
+
|
|
3
|
+
**Deterministic record and replay for AI agents.** Capture every external
|
|
4
|
+
interaction your agent makes — LLM calls *and* tool calls — into human-readable
|
|
5
|
+
"cassettes", then replay them deterministically so your tests run **offline, for
|
|
6
|
+
free, with zero side effects.**
|
|
7
|
+
|
|
8
|
+
[](https://github.com/MITHRAN-BALACHANDER/AgentTape/actions/workflows/ci.yml)
|
|
9
|
+
[](https://pypi.org/project/agenttape/)
|
|
10
|
+
[](https://pypi.org/project/agenttape/)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Why
|
|
16
|
+
|
|
17
|
+
Agent tests today are slow, flaky, expensive, and dangerous: they hit real LLM
|
|
18
|
+
APIs (cost + nondeterminism) and real tools (a tool that charges a card, writes a
|
|
19
|
+
row, or posts to Slack *actually does it*). AgentTape records those interactions
|
|
20
|
+
once and replays them deterministically afterwards. Your CI runs with **no network
|
|
21
|
+
access, no API keys, and no risk of a real side effect.**
|
|
22
|
+
|
|
23
|
+
* **Local-first** — no server, no telemetry, nothing leaves your machine.
|
|
24
|
+
* **Deterministic** — same inputs produce the same recorded outputs, byte-for-byte.
|
|
25
|
+
* **Zero side effects in replay** — a replayed tool *never* executes for real.
|
|
26
|
+
* **Almost-no-code** — add a decorator or a `with` block.
|
|
27
|
+
* **Git-friendly** — cassettes are YAML: diffable, reviewable, hand-editable.
|
|
28
|
+
* **Zero core dependencies** — the engine is pure standard library.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install agenttape # core (stdlib only)
|
|
34
|
+
pip install "agenttape[openai]" # + OpenAI adapter
|
|
35
|
+
pip install "agenttape[yaml]" # + PyYAML for extra-robust YAML loading
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## 30-second quickstart
|
|
39
|
+
|
|
40
|
+
Record once (real API call), then replay forever (no network):
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import agenttape
|
|
44
|
+
from openai import OpenAI
|
|
45
|
+
|
|
46
|
+
def run_agent():
|
|
47
|
+
client = OpenAI()
|
|
48
|
+
resp = client.chat.completions.create(
|
|
49
|
+
model="gpt-4o-mini",
|
|
50
|
+
messages=[{"role": "user", "content": "Say hi in 3 words"}],
|
|
51
|
+
)
|
|
52
|
+
return resp.choices[0].message.content
|
|
53
|
+
|
|
54
|
+
# --- record (hits the real API once, writes cassettes/hello.yaml) ---
|
|
55
|
+
with agenttape.use_cassette("hello", mode="record"):
|
|
56
|
+
print(run_agent())
|
|
57
|
+
|
|
58
|
+
# --- replay (zero network calls, milliseconds, free, deterministic) ---
|
|
59
|
+
with agenttape.use_cassette("hello", mode="none"):
|
|
60
|
+
print(run_agent()) # identical output, served from the cassette
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Or as a decorator:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
@agenttape.replay("hello") # mode="none": offline + deterministic
|
|
67
|
+
def test_agent():
|
|
68
|
+
assert "hi" in run_agent().lower()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Mixed / partial replay — "freeze all but one"
|
|
72
|
+
|
|
73
|
+
Change *one* thing — a prompt, a model, a single tool — and re-run while every
|
|
74
|
+
**other** expensive or dangerous boundary stays frozen from the recording:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
# Only the LLM runs for real; every tool is served from the cassette.
|
|
78
|
+
# A *derived* cassette is written — the original is never mutated.
|
|
79
|
+
with agenttape.use_cassette("checkout", live={"llm"}):
|
|
80
|
+
result = run_agent() # new prompt → new LLM output, tools stay frozen
|
|
81
|
+
|
|
82
|
+
# See exactly what changed:
|
|
83
|
+
# agenttape diff cassettes/checkout.yaml cassettes/checkout.derived.yaml
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Any boundary that is **not** in `live` and has no recording raises
|
|
87
|
+
`UnmatchedInteractionError` — AgentTape will never silently run a real side effect.
|
|
88
|
+
|
|
89
|
+
## Hand-edit a response
|
|
90
|
+
|
|
91
|
+
Cassettes are just YAML. Open `cassettes/hello.yaml`, edit a recorded LLM
|
|
92
|
+
response, save, and re-run in `mode="none"` — your agent behaves differently with
|
|
93
|
+
**no API call at all.** Perfect for testing edge cases and failure paths.
|
|
94
|
+
|
|
95
|
+
## pytest plugin
|
|
96
|
+
|
|
97
|
+
Ships in the box. Tests run offline and deterministically by default:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import pytest
|
|
101
|
+
|
|
102
|
+
@pytest.mark.agenttape("weather_agent")
|
|
103
|
+
def test_weather(agenttape_cassette):
|
|
104
|
+
assert run_agent() == "It's sunny."
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
pytest # replay mode, offline, free (CI default)
|
|
109
|
+
pytest --agenttape-record # (re)record cassettes against the real API
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## CLI
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
agenttape init # scaffold agenttape.toml + cassettes/
|
|
116
|
+
agenttape inspect cassettes/hello # tokens, latency, cost, per-step I/O
|
|
117
|
+
agenttape timeline cassettes/hello # ASCII waterfall of the run
|
|
118
|
+
agenttape diff a.yaml b.yaml # prompt / model / tool / cost / output diff
|
|
119
|
+
agenttape validate cassettes/hello # schema + determinism + leaked-secret lint
|
|
120
|
+
agenttape view cassettes/hello # self-contained static HTML, no server
|
|
121
|
+
agenttape redact cassettes/hello # re-run secret/PII redaction
|
|
122
|
+
agenttape export cassettes/hello --format otel
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## What this is — and what it isn't
|
|
126
|
+
|
|
127
|
+
**It is:**
|
|
128
|
+
|
|
129
|
+
* A deterministic record/replay layer for agent I/O (LLM + tools + raw HTTP).
|
|
130
|
+
* A way to make agent tests fast, free, offline and side-effect-free.
|
|
131
|
+
* A diff/inspection toolkit for understanding and reviewing agent runs.
|
|
132
|
+
|
|
133
|
+
**It isn't:**
|
|
134
|
+
|
|
135
|
+
* It is **not** a way to "replay with a different prompt/model and get a
|
|
136
|
+
deterministic answer for free." Replay reconstructs *recorded* bytes. The moment
|
|
137
|
+
you change an input to a boundary marked `live`, that boundary **really executes**
|
|
138
|
+
(real API call, real cost) and produces a **new** recording. AgentTape is
|
|
139
|
+
explicit about this everywhere — pure replay vs. re-execution are different verbs
|
|
140
|
+
and we never blur them.
|
|
141
|
+
* It is **not** an evaluation framework, a prompt optimizer, or a tracing SaaS.
|
|
142
|
+
|
|
143
|
+
See the [determinism guide](docs/determinism.md) and the
|
|
144
|
+
[cassette format spec](docs/format.md) for details.
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agenttape"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Deterministic record/replay of LLM and tool calls for AI agents, captured into human-readable cassettes."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "AgentTape Contributors" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"ai",
|
|
15
|
+
"agents",
|
|
16
|
+
"llm",
|
|
17
|
+
"testing",
|
|
18
|
+
"record",
|
|
19
|
+
"replay",
|
|
20
|
+
"cassette",
|
|
21
|
+
"determinism",
|
|
22
|
+
"openai",
|
|
23
|
+
"langgraph",
|
|
24
|
+
]
|
|
25
|
+
classifiers = [
|
|
26
|
+
"Development Status :: 4 - Beta",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
"License :: OSI Approved :: MIT License",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Programming Language :: Python :: 3.13",
|
|
34
|
+
"Programming Language :: Python :: 3.14",
|
|
35
|
+
"Topic :: Software Development :: Testing",
|
|
36
|
+
"Typing :: Typed",
|
|
37
|
+
]
|
|
38
|
+
# Zero required runtime dependencies for the core — stdlib only.
|
|
39
|
+
dependencies = []
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/MITHRAN-BALACHANDER/AgentTape"
|
|
43
|
+
Documentation = "https://MITHRAN-BALACHANDER.github.io/AgentTape/"
|
|
44
|
+
Repository = "https://github.com/MITHRAN-BALACHANDER/AgentTape"
|
|
45
|
+
Changelog = "https://github.com/MITHRAN-BALACHANDER/AgentTape/blob/main/CHANGELOG.md"
|
|
46
|
+
Issues = "https://github.com/MITHRAN-BALACHANDER/AgentTape/issues"
|
|
47
|
+
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
# Robust YAML round-tripping when present; core falls back to a stdlib subset emitter/parser.
|
|
50
|
+
yaml = ["PyYAML>=6.0.1"]
|
|
51
|
+
# responses.create (patched by OpenAIAdapter) only exists from openai 1.66+.
|
|
52
|
+
openai = ["openai>=1.66"]
|
|
53
|
+
langgraph = ["langgraph>=1.0"]
|
|
54
|
+
langchain = ["langchain-core>=0.3"]
|
|
55
|
+
llamaindex = ["llama-index-core>=0.12"]
|
|
56
|
+
crewai = ["crewai>=1.0"]
|
|
57
|
+
autogen = ["pyautogen>=0.9"]
|
|
58
|
+
mcp = ["mcp>=1.9"]
|
|
59
|
+
http = ["httpx>=0.27", "requests>=2.32.4"]
|
|
60
|
+
# numpy 1.26 is the last 1.x line; numpy 2.3+ drops Python 3.10.
|
|
61
|
+
numpy = ["numpy>=1.26"]
|
|
62
|
+
otel = ["opentelemetry-sdk>=1.30"]
|
|
63
|
+
dev = [
|
|
64
|
+
"pytest>=8.0",
|
|
65
|
+
"pytest-cov>=5.0",
|
|
66
|
+
"ruff>=0.6",
|
|
67
|
+
"mypy>=1.11",
|
|
68
|
+
"PyYAML>=6.0.1",
|
|
69
|
+
"types-PyYAML>=6.0.12",
|
|
70
|
+
]
|
|
71
|
+
docs = [
|
|
72
|
+
"mkdocs-material>=9.5",
|
|
73
|
+
"mkdocstrings[python]>=0.26",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
[project.scripts]
|
|
77
|
+
agenttape = "agenttape.cli:main"
|
|
78
|
+
|
|
79
|
+
[project.entry-points.pytest11]
|
|
80
|
+
agenttape = "agenttape.pytest_plugin"
|
|
81
|
+
|
|
82
|
+
[tool.hatch.build.targets.wheel]
|
|
83
|
+
packages = ["src/agenttape"]
|
|
84
|
+
|
|
85
|
+
[tool.hatch.build.targets.sdist]
|
|
86
|
+
include = ["src/agenttape", "README.md", "LICENSE", "CHANGELOG.md"]
|
|
87
|
+
|
|
88
|
+
[tool.ruff]
|
|
89
|
+
line-length = 100
|
|
90
|
+
target-version = "py310"
|
|
91
|
+
src = ["src", "tests"]
|
|
92
|
+
|
|
93
|
+
[tool.ruff.lint]
|
|
94
|
+
select = ["E", "F", "I", "UP", "B", "C4", "SIM", "RUF"]
|
|
95
|
+
ignore = [
|
|
96
|
+
"E501", # line length handled by formatter
|
|
97
|
+
"B008", # function calls in argument defaults (used intentionally)
|
|
98
|
+
"SIM105", # contextlib.suppress not always clearer
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
[tool.ruff.lint.per-file-ignores]
|
|
102
|
+
"tests/*" = ["B011", "B017", "SIM117"]
|
|
103
|
+
|
|
104
|
+
[tool.mypy]
|
|
105
|
+
python_version = "3.10"
|
|
106
|
+
strict = true
|
|
107
|
+
files = ["src/agenttape"]
|
|
108
|
+
warn_unused_configs = true
|
|
109
|
+
disallow_untyped_defs = true
|
|
110
|
+
no_implicit_optional = true
|
|
111
|
+
warn_redundant_casts = true
|
|
112
|
+
warn_unused_ignores = true
|
|
113
|
+
# Optional adapter deps are typed when installed but absent in the strict core
|
|
114
|
+
# type-check environment. Skipping their imports makes them Any consistently, so the
|
|
115
|
+
# dynamic monkeypatching in adapters type-checks the same with or without them.
|
|
116
|
+
[[tool.mypy.overrides]]
|
|
117
|
+
module = [
|
|
118
|
+
"yaml",
|
|
119
|
+
"numpy",
|
|
120
|
+
"openai",
|
|
121
|
+
"openai.*",
|
|
122
|
+
"langgraph",
|
|
123
|
+
"langgraph.*",
|
|
124
|
+
"httpx",
|
|
125
|
+
"requests",
|
|
126
|
+
"requests.*",
|
|
127
|
+
"tomli",
|
|
128
|
+
"tomllib",
|
|
129
|
+
]
|
|
130
|
+
ignore_missing_imports = true
|
|
131
|
+
follow_imports = "skip"
|
|
132
|
+
|
|
133
|
+
# pytest is typed when installed (dev extra); only stub it out if entirely absent.
|
|
134
|
+
[[tool.mypy.overrides]]
|
|
135
|
+
module = ["pytest", "_pytest.*"]
|
|
136
|
+
ignore_missing_imports = true
|
|
137
|
+
|
|
138
|
+
# Adapters patch third-party (Any) classes; tolerate ignores that may be redundant
|
|
139
|
+
# depending on whether the optional dependency is installed.
|
|
140
|
+
[[tool.mypy.overrides]]
|
|
141
|
+
module = ["agenttape.adapters.*"]
|
|
142
|
+
warn_unused_ignores = false
|
|
143
|
+
|
|
144
|
+
[tool.pytest.ini_options]
|
|
145
|
+
testpaths = ["tests"]
|
|
146
|
+
addopts = "-q"
|
|
147
|
+
markers = [
|
|
148
|
+
"agenttape: bind a test to an AgentTape cassette (see pytest-agenttape plugin).",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
[tool.coverage.run]
|
|
152
|
+
branch = true
|
|
153
|
+
source = ["agenttape"]
|
|
154
|
+
omit = ["*/adapters/_scaffold_*.py"]
|
|
155
|
+
|
|
156
|
+
[tool.coverage.report]
|
|
157
|
+
exclude_lines = [
|
|
158
|
+
"pragma: no cover",
|
|
159
|
+
"if TYPE_CHECKING:",
|
|
160
|
+
"raise NotImplementedError",
|
|
161
|
+
"@(abc\\.)?abstractmethod",
|
|
162
|
+
"\\.\\.\\.",
|
|
163
|
+
]
|