agenttape 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. agenttape-0.1.0/.gitignore +36 -0
  2. agenttape-0.1.0/CHANGELOG.md +52 -0
  3. agenttape-0.1.0/LICENSE +21 -0
  4. agenttape-0.1.0/PKG-INFO +208 -0
  5. agenttape-0.1.0/README.md +148 -0
  6. agenttape-0.1.0/pyproject.toml +163 -0
  7. agenttape-0.1.0/src/agenttape/__init__.py +74 -0
  8. agenttape-0.1.0/src/agenttape/__main__.py +8 -0
  9. agenttape-0.1.0/src/agenttape/_box.py +59 -0
  10. agenttape-0.1.0/src/agenttape/adapters/__init__.py +68 -0
  11. agenttape-0.1.0/src/agenttape/adapters/base.py +76 -0
  12. agenttape-0.1.0/src/agenttape/adapters/http.py +269 -0
  13. agenttape-0.1.0/src/agenttape/adapters/langgraph.py +108 -0
  14. agenttape-0.1.0/src/agenttape/adapters/openai.py +274 -0
  15. agenttape-0.1.0/src/agenttape/assets.py +118 -0
  16. agenttape-0.1.0/src/agenttape/boundaries.py +131 -0
  17. agenttape-0.1.0/src/agenttape/callbacks.py +147 -0
  18. agenttape-0.1.0/src/agenttape/canonical.py +116 -0
  19. agenttape-0.1.0/src/agenttape/cassette.py +121 -0
  20. agenttape-0.1.0/src/agenttape/cli.py +322 -0
  21. agenttape-0.1.0/src/agenttape/config.py +232 -0
  22. agenttape-0.1.0/src/agenttape/diff.py +323 -0
  23. agenttape-0.1.0/src/agenttape/engine.py +595 -0
  24. agenttape-0.1.0/src/agenttape/errors.py +153 -0
  25. agenttape-0.1.0/src/agenttape/events.py +54 -0
  26. agenttape-0.1.0/src/agenttape/export.py +110 -0
  27. agenttape-0.1.0/src/agenttape/freeze.py +410 -0
  28. agenttape-0.1.0/src/agenttape/matchers.py +137 -0
  29. agenttape-0.1.0/src/agenttape/metrics.py +98 -0
  30. agenttape-0.1.0/src/agenttape/py.typed +0 -0
  31. agenttape-0.1.0/src/agenttape/pytest_plugin.py +192 -0
  32. agenttape-0.1.0/src/agenttape/recorder.py +283 -0
  33. agenttape-0.1.0/src/agenttape/redaction.py +148 -0
  34. agenttape-0.1.0/src/agenttape/schema.py +141 -0
  35. agenttape-0.1.0/src/agenttape/timeline.py +116 -0
  36. agenttape-0.1.0/src/agenttape/validate.py +123 -0
  37. agenttape-0.1.0/src/agenttape/viewer.py +138 -0
  38. agenttape-0.1.0/src/agenttape/yaml_io.py +656 -0
@@ -0,0 +1,36 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ .eggs/
7
+ build/
8
+ dist/
9
+ *.egg
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # Test / coverage
17
+ .pytest_cache/
18
+ .coverage
19
+ .coverage.*
20
+ htmlcov/
21
+ coverage.xml
22
+ .mypy_cache/
23
+ .ruff_cache/
24
+
25
+ # Docs
26
+ site/
27
+
28
+ # OS / editor
29
+ .DS_Store
30
+ Thumbs.db
31
+ .idea/
32
+ .vscode/
33
+
34
+ # AgentTape scratch
35
+ *.derived.yaml
36
+ .agenttape_tmp/
@@ -0,0 +1,52 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres
5
+ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ### Changed
10
+
11
+ - Bumped optional/dev/docs dependency floors to current stable major lines while
12
+ preserving Python 3.10–3.13 support: `openai>=1.66` (matches `responses.create`),
13
+ `langgraph>=1.0`, `langchain-core>=0.3`, `llama-index-core>=0.12`, `crewai>=1.0`,
14
+ `pyautogen>=0.9`, `mcp>=1.9`, `httpx>=0.27`, `requests>=2.32.4` (security),
15
+ `numpy>=1.26`, `opentelemetry-sdk>=1.30`, `pytest>=8.0`, `pytest-cov>=5.0`,
16
+ `ruff>=0.6`, `mypy>=1.11`, `types-PyYAML>=6.0.12`, `mkdocs-material>=9.5`,
17
+ `mkdocstrings[python]>=0.26`, `PyYAML>=6.0.1`.
18
+
19
+ ### Added
20
+
21
+ - Python 3.14 to the CI test matrix and the PyPI classifiers.
22
+
23
+ ## [0.1.0] - 2026-06-16
24
+
25
+ ### Added
26
+
27
+ - Core interception engine: record / replay of LLM, tool, retrieval, memory and
28
+ raw HTTP boundaries into ordered cassettes.
29
+ - Cassette format (YAML default, JSON supported) with assets sidecar for large
30
+ payloads referenced by content hash. Pure-stdlib YAML reader/writer with
31
+ optional PyYAML acceleration.
32
+ - Cassette modes: `none`, `once`, `new_episodes`, `all`, `record`.
33
+ - Request canonicalization + sha256 match keys; pluggable matchers (`exact`,
34
+ `ignore_volatile`, `ordered`, `custom`).
35
+ - Mixed / partial replay (`live` / `frozen` sets) with side-effect guardrail and
36
+ record-back into a derived cassette.
37
+ - Determinism freeze layer: clock, RNG, UUID freeze; env snapshot + drift warning.
38
+ - Record-time redaction of secrets/PII (denylist + regex rules + header redaction).
39
+ - Public API: `record` / `replay` decorators, `use_cassette` context manager, and
40
+ an `AgentTape` callback/hook object.
41
+ - OpenAI adapter (chat + responses + tool calling) and an always-on
42
+ `httpx` / `requests` fallback. LangGraph adapter and a documented extension
43
+ interface for the rest.
44
+ - CLI: `init`, `record`, `replay`, `inspect`, `timeline`, `diff`, `redact`,
45
+ `validate`, `export` (json/otel), `view`, `rm`.
46
+ - Diff engine: run / prompt / state-memory / output diffs (CLI + importable).
47
+ - `pytest-agenttape` plugin: marker, fixture, offline-by-default mode, record
48
+ flag, mode flag, mismatch diffs, snapshot assertions.
49
+ - Self-contained static HTML viewer (single + two-cassette diff), no server.
50
+
51
+ [Unreleased]: https://github.com/MITHRAN-BALACHANDER/AgentTape/compare/v0.1.0...HEAD
52
+ [0.1.0]: https://github.com/MITHRAN-BALACHANDER/AgentTape/releases/tag/v0.1.0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 AgentTape Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: agenttape
3
+ Version: 0.1.0
4
+ Summary: Deterministic record/replay of LLM and tool calls for AI agents, captured into human-readable cassettes.
5
+ Project-URL: Homepage, https://github.com/MITHRAN-BALACHANDER/AgentTape
6
+ Project-URL: Documentation, https://MITHRAN-BALACHANDER.github.io/AgentTape/
7
+ Project-URL: Repository, https://github.com/MITHRAN-BALACHANDER/AgentTape
8
+ Project-URL: Changelog, https://github.com/MITHRAN-BALACHANDER/AgentTape/blob/main/CHANGELOG.md
9
+ Project-URL: Issues, https://github.com/MITHRAN-BALACHANDER/AgentTape/issues
10
+ Author: AgentTape Contributors
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: agents,ai,cassette,determinism,langgraph,llm,openai,record,replay,testing
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Topic :: Software Development :: Testing
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.10
26
+ Provides-Extra: autogen
27
+ Requires-Dist: pyautogen>=0.9; extra == 'autogen'
28
+ Provides-Extra: crewai
29
+ Requires-Dist: crewai>=1.0; extra == 'crewai'
30
+ Provides-Extra: dev
31
+ Requires-Dist: mypy>=1.11; extra == 'dev'
32
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
33
+ Requires-Dist: pytest>=8.0; extra == 'dev'
34
+ Requires-Dist: pyyaml>=6.0.1; extra == 'dev'
35
+ Requires-Dist: ruff>=0.6; extra == 'dev'
36
+ Requires-Dist: types-pyyaml>=6.0.12; extra == 'dev'
37
+ Provides-Extra: docs
38
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
39
+ Requires-Dist: mkdocstrings[python]>=0.26; extra == 'docs'
40
+ Provides-Extra: http
41
+ Requires-Dist: httpx>=0.27; extra == 'http'
42
+ Requires-Dist: requests>=2.32.4; extra == 'http'
43
+ Provides-Extra: langchain
44
+ Requires-Dist: langchain-core>=0.3; extra == 'langchain'
45
+ Provides-Extra: langgraph
46
+ Requires-Dist: langgraph>=1.0; extra == 'langgraph'
47
+ Provides-Extra: llamaindex
48
+ Requires-Dist: llama-index-core>=0.12; extra == 'llamaindex'
49
+ Provides-Extra: mcp
50
+ Requires-Dist: mcp>=1.9; extra == 'mcp'
51
+ Provides-Extra: numpy
52
+ Requires-Dist: numpy>=1.26; extra == 'numpy'
53
+ Provides-Extra: openai
54
+ Requires-Dist: openai>=1.66; extra == 'openai'
55
+ Provides-Extra: otel
56
+ Requires-Dist: opentelemetry-sdk>=1.30; extra == 'otel'
57
+ Provides-Extra: yaml
58
+ Requires-Dist: pyyaml>=6.0.1; extra == 'yaml'
59
+ Description-Content-Type: text/markdown
60
+
61
+ # AgentTape
62
+
63
+ **Deterministic record and replay for AI agents.** Capture every external
64
+ interaction your agent makes — LLM calls *and* tool calls — into human-readable
65
+ "cassettes", then replay them deterministically so your tests run **offline, for
66
+ free, with zero side effects.**
67
+
68
+ [![CI](https://github.com/MITHRAN-BALACHANDER/AgentTape/actions/workflows/ci.yml/badge.svg)](https://github.com/MITHRAN-BALACHANDER/AgentTape/actions/workflows/ci.yml)
69
+ [![PyPI](https://img.shields.io/pypi/v/agenttape.svg)](https://pypi.org/project/agenttape/)
70
+ [![Python](https://img.shields.io/pypi/pyversions/agenttape.svg)](https://pypi.org/project/agenttape/)
71
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
72
+
73
+ ---
74
+
75
+ ## Why
76
+
77
+ Agent tests today are slow, flaky, expensive, and dangerous: they hit real LLM
78
+ APIs (cost + nondeterminism) and real tools (a tool that charges a card, writes a
79
+ row, or posts to Slack *actually does it*). AgentTape records those interactions
80
+ once and replays them deterministically afterwards. Your CI runs with **no network
81
+ access, no API keys, and no risk of a real side effect.**
82
+
83
+ * **Local-first** — no server, no telemetry, nothing leaves your machine.
84
+ * **Deterministic** — same inputs produce the same recorded outputs, byte-for-byte.
85
+ * **Zero side effects in replay** — a replayed tool *never* executes for real.
86
+ * **Almost-no-code** — add a decorator or a `with` block.
87
+ * **Git-friendly** — cassettes are YAML: diffable, reviewable, hand-editable.
88
+ * **Zero core dependencies** — the engine is pure standard library.
89
+
90
+ ## Install
91
+
92
+ ```bash
93
+ pip install agenttape # core (stdlib only)
94
+ pip install "agenttape[openai]" # + OpenAI adapter
95
+ pip install "agenttape[yaml]" # + PyYAML for extra-robust YAML loading
96
+ ```
97
+
98
+ ## 30-second quickstart
99
+
100
+ Record once (real API call), then replay forever (no network):
101
+
102
+ ```python
103
+ import agenttape
104
+ from openai import OpenAI
105
+
106
+ def run_agent():
107
+ client = OpenAI()
108
+ resp = client.chat.completions.create(
109
+ model="gpt-4o-mini",
110
+ messages=[{"role": "user", "content": "Say hi in 3 words"}],
111
+ )
112
+ return resp.choices[0].message.content
113
+
114
+ # --- record (hits the real API once, writes cassettes/hello.yaml) ---
115
+ with agenttape.use_cassette("hello", mode="record"):
116
+ print(run_agent())
117
+
118
+ # --- replay (zero network calls, milliseconds, free, deterministic) ---
119
+ with agenttape.use_cassette("hello", mode="none"):
120
+ print(run_agent()) # identical output, served from the cassette
121
+ ```
122
+
123
+ Or as a decorator:
124
+
125
+ ```python
126
+ @agenttape.replay("hello") # mode="none": offline + deterministic
127
+ def test_agent():
128
+ assert "hi" in run_agent().lower()
129
+ ```
130
+
131
+ ## Mixed / partial replay — "freeze all but one"
132
+
133
+ Change *one* thing — a prompt, a model, a single tool — and re-run while every
134
+ **other** expensive or dangerous boundary stays frozen from the recording:
135
+
136
+ ```python
137
+ # Only the LLM runs for real; every tool is served from the cassette.
138
+ # A *derived* cassette is written — the original is never mutated.
139
+ with agenttape.use_cassette("checkout", live={"llm"}):
140
+ result = run_agent() # new prompt → new LLM output, tools stay frozen
141
+
142
+ # See exactly what changed:
143
+ # agenttape diff cassettes/checkout.yaml cassettes/checkout.derived.yaml
144
+ ```
145
+
146
+ Any boundary that is **not** in `live` and has no recording raises
147
+ `UnmatchedInteractionError` — AgentTape will never silently run a real side effect.
148
+
149
+ ## Hand-edit a response
150
+
151
+ Cassettes are just YAML. Open `cassettes/hello.yaml`, edit a recorded LLM
152
+ response, save, and re-run in `mode="none"` — your agent behaves differently with
153
+ **no API call at all.** Perfect for testing edge cases and failure paths.
154
+
155
+ ## pytest plugin
156
+
157
+ Ships in the box. Tests run offline and deterministically by default:
158
+
159
+ ```python
160
+ import pytest
161
+
162
+ @pytest.mark.agenttape("weather_agent")
163
+ def test_weather(agenttape_cassette):
164
+ assert run_agent() == "It's sunny."
165
+ ```
166
+
167
+ ```bash
168
+ pytest # replay mode, offline, free (CI default)
169
+ pytest --agenttape-record # (re)record cassettes against the real API
170
+ ```
171
+
172
+ ## CLI
173
+
174
+ ```bash
175
+ agenttape init # scaffold agenttape.toml + cassettes/
176
+ agenttape inspect cassettes/hello # tokens, latency, cost, per-step I/O
177
+ agenttape timeline cassettes/hello # ASCII waterfall of the run
178
+ agenttape diff a.yaml b.yaml # prompt / model / tool / cost / output diff
179
+ agenttape validate cassettes/hello # schema + determinism + leaked-secret lint
180
+ agenttape view cassettes/hello # self-contained static HTML, no server
181
+ agenttape redact cassettes/hello # re-run secret/PII redaction
182
+ agenttape export cassettes/hello --format otel
183
+ ```
184
+
185
+ ## What this is — and what it isn't
186
+
187
+ **It is:**
188
+
189
+ * A deterministic record/replay layer for agent I/O (LLM + tools + raw HTTP).
190
+ * A way to make agent tests fast, free, offline and side-effect-free.
191
+ * A diff/inspection toolkit for understanding and reviewing agent runs.
192
+
193
+ **It isn't:**
194
+
195
+ * It is **not** a way to "replay with a different prompt/model and get a
196
+ deterministic answer for free." Replay reconstructs *recorded* bytes. The moment
197
+ you change an input to a boundary marked `live`, that boundary **really executes**
198
+ (real API call, real cost) and produces a **new** recording. AgentTape is
199
+ explicit about this everywhere — pure replay vs. re-execution are different verbs
200
+ and we never blur them.
201
+ * It is **not** an evaluation framework, a prompt optimizer, or a tracing SaaS.
202
+
203
+ See the [determinism guide](docs/determinism.md) and the
204
+ [cassette format spec](docs/format.md) for details.
205
+
206
+ ## License
207
+
208
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,148 @@
1
+ # AgentTape
2
+
3
+ **Deterministic record and replay for AI agents.** Capture every external
4
+ interaction your agent makes — LLM calls *and* tool calls — into human-readable
5
+ "cassettes", then replay them deterministically so your tests run **offline, for
6
+ free, with zero side effects.**
7
+
8
+ [![CI](https://github.com/MITHRAN-BALACHANDER/AgentTape/actions/workflows/ci.yml/badge.svg)](https://github.com/MITHRAN-BALACHANDER/AgentTape/actions/workflows/ci.yml)
9
+ [![PyPI](https://img.shields.io/pypi/v/agenttape.svg)](https://pypi.org/project/agenttape/)
10
+ [![Python](https://img.shields.io/pypi/pyversions/agenttape.svg)](https://pypi.org/project/agenttape/)
11
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
12
+
13
+ ---
14
+
15
+ ## Why
16
+
17
+ Agent tests today are slow, flaky, expensive, and dangerous: they hit real LLM
18
+ APIs (cost + nondeterminism) and real tools (a tool that charges a card, writes a
19
+ row, or posts to Slack *actually does it*). AgentTape records those interactions
20
+ once and replays them deterministically afterwards. Your CI runs with **no network
21
+ access, no API keys, and no risk of a real side effect.**
22
+
23
+ * **Local-first** — no server, no telemetry, nothing leaves your machine.
24
+ * **Deterministic** — same inputs produce the same recorded outputs, byte-for-byte.
25
+ * **Zero side effects in replay** — a replayed tool *never* executes for real.
26
+ * **Almost-no-code** — add a decorator or a `with` block.
27
+ * **Git-friendly** — cassettes are YAML: diffable, reviewable, hand-editable.
28
+ * **Zero core dependencies** — the engine is pure standard library.
29
+
30
+ ## Install
31
+
32
+ ```bash
33
+ pip install agenttape # core (stdlib only)
34
+ pip install "agenttape[openai]" # + OpenAI adapter
35
+ pip install "agenttape[yaml]" # + PyYAML for extra-robust YAML loading
36
+ ```
37
+
38
+ ## 30-second quickstart
39
+
40
+ Record once (real API call), then replay forever (no network):
41
+
42
+ ```python
43
+ import agenttape
44
+ from openai import OpenAI
45
+
46
+ def run_agent():
47
+ client = OpenAI()
48
+ resp = client.chat.completions.create(
49
+ model="gpt-4o-mini",
50
+ messages=[{"role": "user", "content": "Say hi in 3 words"}],
51
+ )
52
+ return resp.choices[0].message.content
53
+
54
+ # --- record (hits the real API once, writes cassettes/hello.yaml) ---
55
+ with agenttape.use_cassette("hello", mode="record"):
56
+ print(run_agent())
57
+
58
+ # --- replay (zero network calls, milliseconds, free, deterministic) ---
59
+ with agenttape.use_cassette("hello", mode="none"):
60
+ print(run_agent()) # identical output, served from the cassette
61
+ ```
62
+
63
+ Or as a decorator:
64
+
65
+ ```python
66
+ @agenttape.replay("hello") # mode="none": offline + deterministic
67
+ def test_agent():
68
+ assert "hi" in run_agent().lower()
69
+ ```
70
+
71
+ ## Mixed / partial replay — "freeze all but one"
72
+
73
+ Change *one* thing — a prompt, a model, a single tool — and re-run while every
74
+ **other** expensive or dangerous boundary stays frozen from the recording:
75
+
76
+ ```python
77
+ # Only the LLM runs for real; every tool is served from the cassette.
78
+ # A *derived* cassette is written — the original is never mutated.
79
+ with agenttape.use_cassette("checkout", live={"llm"}):
80
+ result = run_agent() # new prompt → new LLM output, tools stay frozen
81
+
82
+ # See exactly what changed:
83
+ # agenttape diff cassettes/checkout.yaml cassettes/checkout.derived.yaml
84
+ ```
85
+
86
+ Any boundary that is **not** in `live` and has no recording raises
87
+ `UnmatchedInteractionError` — AgentTape will never silently run a real side effect.
88
+
89
+ ## Hand-edit a response
90
+
91
+ Cassettes are just YAML. Open `cassettes/hello.yaml`, edit a recorded LLM
92
+ response, save, and re-run in `mode="none"` — your agent behaves differently with
93
+ **no API call at all.** Perfect for testing edge cases and failure paths.
94
+
95
+ ## pytest plugin
96
+
97
+ Ships in the box. Tests run offline and deterministically by default:
98
+
99
+ ```python
100
+ import pytest
101
+
102
+ @pytest.mark.agenttape("weather_agent")
103
+ def test_weather(agenttape_cassette):
104
+ assert run_agent() == "It's sunny."
105
+ ```
106
+
107
+ ```bash
108
+ pytest # replay mode, offline, free (CI default)
109
+ pytest --agenttape-record # (re)record cassettes against the real API
110
+ ```
111
+
112
+ ## CLI
113
+
114
+ ```bash
115
+ agenttape init # scaffold agenttape.toml + cassettes/
116
+ agenttape inspect cassettes/hello # tokens, latency, cost, per-step I/O
117
+ agenttape timeline cassettes/hello # ASCII waterfall of the run
118
+ agenttape diff a.yaml b.yaml # prompt / model / tool / cost / output diff
119
+ agenttape validate cassettes/hello # schema + determinism + leaked-secret lint
120
+ agenttape view cassettes/hello # self-contained static HTML, no server
121
+ agenttape redact cassettes/hello # re-run secret/PII redaction
122
+ agenttape export cassettes/hello --format otel
123
+ ```
124
+
125
+ ## What this is — and what it isn't
126
+
127
+ **It is:**
128
+
129
+ * A deterministic record/replay layer for agent I/O (LLM + tools + raw HTTP).
130
+ * A way to make agent tests fast, free, offline and side-effect-free.
131
+ * A diff/inspection toolkit for understanding and reviewing agent runs.
132
+
133
+ **It isn't:**
134
+
135
+ * It is **not** a way to "replay with a different prompt/model and get a
136
+ deterministic answer for free." Replay reconstructs *recorded* bytes. The moment
137
+ you change an input to a boundary marked `live`, that boundary **really executes**
138
+ (real API call, real cost) and produces a **new** recording. AgentTape is
139
+ explicit about this everywhere — pure replay vs. re-execution are different verbs
140
+ and we never blur them.
141
+ * It is **not** an evaluation framework, a prompt optimizer, or a tracing SaaS.
142
+
143
+ See the [determinism guide](docs/determinism.md) and the
144
+ [cassette format spec](docs/format.md) for details.
145
+
146
+ ## License
147
+
148
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,163 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agenttape"
7
+ version = "0.1.0"
8
+ description = "Deterministic record/replay of LLM and tool calls for AI agents, captured into human-readable cassettes."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "AgentTape Contributors" }]
13
+ keywords = [
14
+ "ai",
15
+ "agents",
16
+ "llm",
17
+ "testing",
18
+ "record",
19
+ "replay",
20
+ "cassette",
21
+ "determinism",
22
+ "openai",
23
+ "langgraph",
24
+ ]
25
+ classifiers = [
26
+ "Development Status :: 4 - Beta",
27
+ "Intended Audience :: Developers",
28
+ "License :: OSI Approved :: MIT License",
29
+ "Programming Language :: Python :: 3",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Programming Language :: Python :: 3.12",
33
+ "Programming Language :: Python :: 3.13",
34
+ "Programming Language :: Python :: 3.14",
35
+ "Topic :: Software Development :: Testing",
36
+ "Typing :: Typed",
37
+ ]
38
+ # Zero required runtime dependencies for the core — stdlib only.
39
+ dependencies = []
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/MITHRAN-BALACHANDER/AgentTape"
43
+ Documentation = "https://MITHRAN-BALACHANDER.github.io/AgentTape/"
44
+ Repository = "https://github.com/MITHRAN-BALACHANDER/AgentTape"
45
+ Changelog = "https://github.com/MITHRAN-BALACHANDER/AgentTape/blob/main/CHANGELOG.md"
46
+ Issues = "https://github.com/MITHRAN-BALACHANDER/AgentTape/issues"
47
+
48
+ [project.optional-dependencies]
49
+ # Robust YAML round-tripping when present; core falls back to a stdlib subset emitter/parser.
50
+ yaml = ["PyYAML>=6.0.1"]
51
+ # responses.create (patched by OpenAIAdapter) only exists from openai 1.66+.
52
+ openai = ["openai>=1.66"]
53
+ langgraph = ["langgraph>=1.0"]
54
+ langchain = ["langchain-core>=0.3"]
55
+ llamaindex = ["llama-index-core>=0.12"]
56
+ crewai = ["crewai>=1.0"]
57
+ autogen = ["pyautogen>=0.9"]
58
+ mcp = ["mcp>=1.9"]
59
+ http = ["httpx>=0.27", "requests>=2.32.4"]
60
+ # numpy 1.26 is the last 1.x line; numpy 2.3+ drops Python 3.10.
61
+ numpy = ["numpy>=1.26"]
62
+ otel = ["opentelemetry-sdk>=1.30"]
63
+ dev = [
64
+ "pytest>=8.0",
65
+ "pytest-cov>=5.0",
66
+ "ruff>=0.6",
67
+ "mypy>=1.11",
68
+ "PyYAML>=6.0.1",
69
+ "types-PyYAML>=6.0.12",
70
+ ]
71
+ docs = [
72
+ "mkdocs-material>=9.5",
73
+ "mkdocstrings[python]>=0.26",
74
+ ]
75
+
76
+ [project.scripts]
77
+ agenttape = "agenttape.cli:main"
78
+
79
+ [project.entry-points.pytest11]
80
+ agenttape = "agenttape.pytest_plugin"
81
+
82
+ [tool.hatch.build.targets.wheel]
83
+ packages = ["src/agenttape"]
84
+
85
+ [tool.hatch.build.targets.sdist]
86
+ include = ["src/agenttape", "README.md", "LICENSE", "CHANGELOG.md"]
87
+
88
+ [tool.ruff]
89
+ line-length = 100
90
+ target-version = "py310"
91
+ src = ["src", "tests"]
92
+
93
+ [tool.ruff.lint]
94
+ select = ["E", "F", "I", "UP", "B", "C4", "SIM", "RUF"]
95
+ ignore = [
96
+ "E501", # line length handled by formatter
97
+ "B008", # function calls in argument defaults (used intentionally)
98
+ "SIM105", # contextlib.suppress not always clearer
99
+ ]
100
+
101
+ [tool.ruff.lint.per-file-ignores]
102
+ "tests/*" = ["B011", "B017", "SIM117"]
103
+
104
+ [tool.mypy]
105
+ python_version = "3.10"
106
+ strict = true
107
+ files = ["src/agenttape"]
108
+ warn_unused_configs = true
109
+ disallow_untyped_defs = true
110
+ no_implicit_optional = true
111
+ warn_redundant_casts = true
112
+ warn_unused_ignores = true
113
+ # Optional adapter deps are typed when installed but absent in the strict core
114
+ # type-check environment. Skipping their imports makes them Any consistently, so the
115
+ # dynamic monkeypatching in adapters type-checks the same with or without them.
116
+ [[tool.mypy.overrides]]
117
+ module = [
118
+ "yaml",
119
+ "numpy",
120
+ "openai",
121
+ "openai.*",
122
+ "langgraph",
123
+ "langgraph.*",
124
+ "httpx",
125
+ "requests",
126
+ "requests.*",
127
+ "tomli",
128
+ "tomllib",
129
+ ]
130
+ ignore_missing_imports = true
131
+ follow_imports = "skip"
132
+
133
+ # pytest is typed when installed (dev extra); only stub it out if entirely absent.
134
+ [[tool.mypy.overrides]]
135
+ module = ["pytest", "_pytest.*"]
136
+ ignore_missing_imports = true
137
+
138
+ # Adapters patch third-party (Any) classes; tolerate ignores that may be redundant
139
+ # depending on whether the optional dependency is installed.
140
+ [[tool.mypy.overrides]]
141
+ module = ["agenttape.adapters.*"]
142
+ warn_unused_ignores = false
143
+
144
+ [tool.pytest.ini_options]
145
+ testpaths = ["tests"]
146
+ addopts = "-q"
147
+ markers = [
148
+ "agenttape: bind a test to an AgentTape cassette (see pytest-agenttape plugin).",
149
+ ]
150
+
151
+ [tool.coverage.run]
152
+ branch = true
153
+ source = ["agenttape"]
154
+ omit = ["*/adapters/_scaffold_*.py"]
155
+
156
+ [tool.coverage.report]
157
+ exclude_lines = [
158
+ "pragma: no cover",
159
+ "if TYPE_CHECKING:",
160
+ "raise NotImplementedError",
161
+ "@(abc\\.)?abstractmethod",
162
+ "\\.\\.\\.",
163
+ ]