replayd 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ *.so
7
+ *.egg
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ .eggs/
12
+ *.whl
13
+
14
+ # Virtual environments
15
+ .venv/
16
+ venv/
17
+ env/
18
+ ENV/
19
+
20
+ # replayd runtime data — never commit captured runs or tests from real agents
21
+ .replayd/
22
+
23
+ # Environment variables — never commit secrets
24
+ .env
25
+ .env.*
26
+
27
+ # Testing & coverage
28
+ .pytest_cache/
29
+ .coverage
30
+ htmlcov/
31
+ coverage.xml
32
+
33
+ # Type checking
34
+ .mypy_cache/
35
+ .ruff_cache/
36
+
37
+ # IDE
38
+ .vscode/
39
+ .idea/
40
+ *.swp
41
+ *.swo
42
+
43
+ # macOS
44
+ .DS_Store
replayd-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Stonepath Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
replayd-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,247 @@
1
+ Metadata-Version: 2.4
2
+ Name: replayd
3
+ Version: 0.1.0
4
+ Summary: Turn failed AI agent runs into replayable regression tests
5
+ Project-URL: Homepage, https://stonepathlab.net
6
+ Project-URL: Repository, https://github.com/TaimoorKhan10/replayd
7
+ Author-email: Taimoor Khan <taimoorkhaniajaznabi@gmail.com>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2026 Stonepath Labs
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Requires-Python: >=3.10
31
+ Provides-Extra: dev
32
+ Requires-Dist: anthropic>=0.40.0; extra == 'dev'
33
+ Requires-Dist: pytest>=8.0; extra == 'dev'
34
+ Provides-Extra: semantic
35
+ Requires-Dist: anthropic>=0.40.0; extra == 'semantic'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # replayd
39
+
40
+ [![PyPI](https://img.shields.io/pypi/v/replayd)](https://pypi.org/project/replayd/)
41
+ [![Python](https://img.shields.io/pypi/pyversions/replayd)](https://pypi.org/project/replayd/)
42
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
43
+
44
+ **Turn failed AI agent runs into replayable regression tests.**
45
+
46
+ When an AI agent fails in production, that failure becomes a test that runs before every future deployment. If the same failure returns after a prompt, model, or tool change, the release is blocked.
47
+
48
+ ```
49
+ pip install replayd
50
+ ```
51
+
52
+ ---
53
+
54
+ ## The problem
55
+
56
+ AI agents regress silently. A team fixes a bug, changes a prompt or model, and the same bug quietly returns. Traditional software has regression tests and CI/CD to catch this. AI agents have nothing equivalent.
57
+
58
+ replayd is the open source fix. It replays known failures before you ship so the same mistake cannot return.
59
+
60
+ ---
61
+
62
+ ## Quickstart
63
+
64
+ ```python
65
+ from replayd import Replayd
66
+
67
+ rp = Replayd()
68
+
69
+ # 1. Capture a run — assign run.output inside the block
70
+ with rp.capture(input=user_input, model="gpt-4o") as run:
71
+ run.output = your_agent.run(user_input)
72
+
73
+ # 2. Mark it as failed
74
+ rp.mark_failed(run.id, reason="agent approved refund after policy limit")
75
+
76
+ # 3. Save as a regression test
77
+ rp.save_test(
78
+ run.id,
79
+ forbidden_actions=["approve_refund"],
80
+ expected_action="escalate",
81
+ )
82
+
83
+ # 4. Later — after changing your prompt or model — replay all tests
84
+ results = rp.replay_all(agent=your_agent_fn)
85
+
86
+ for r in results:
87
+ print(r.verdict, r.reason)
88
+ ```
89
+
90
+ ---
91
+
92
+ ## See it working
93
+
94
+ Run the included example (`python examples/basic_example.py`) and you get:
95
+
96
+ ```
97
+ Capturing a refund-approval agent run...
98
+ agent called: approve_refund(amount=1200) [policy limit is $500]
99
+ output: {'action': 'approve_refund', 'amount': 1200}
100
+
101
+ Marking run as failed...
102
+ reason: agent approved refund of $1200, exceeding $500 policy limit
103
+
104
+ Saving as regression test...
105
+ forbidden: approve_refund | expected: escalate
106
+
107
+ -----------------------------------------
108
+ Replay #1 -- buggy agent (regression should be caught)
109
+ [FAIL] Forbidden action 'approve_refund' was called during replay.
110
+
111
+ Replay #2 -- fixed agent (regression should be resolved)
112
+ [PASS] No forbidden actions called; all expected actions present.
113
+ -----------------------------------------
114
+ 1 failure caught. 1 resolved.
115
+ ```
116
+
117
+ The failure was captured, saved, replayed against a broken agent (FAIL), and replayed again against the fixed agent (PASS). That is the full loop.
118
+
119
+ ---
120
+
121
+ ## Recording tool calls
122
+
123
+ replayd cannot intercept tool calls automatically. Wrap your agent's tool dispatcher to record them:
124
+
125
+ ```python
126
+ def my_agent(input, run_ctx):
127
+ result = call_tool("search", {"query": input["query"]})
128
+ run_ctx.record_tool_call("search", {"query": input["query"]}, result)
129
+ # ... rest of agent logic
130
+ return final_output
131
+ ```
132
+
133
+ Pass this two-argument callable to `replay_all`:
134
+
135
+ ```python
136
+ results = rp.replay_all(agent=my_agent)
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Grading
142
+
143
+ replayd does **not** grade on exact output matching. LLMs are non-deterministic — the same correct behavior will produce different output text every run, so exact matching creates false failures. The wrong tool being called, however, is a fact. replayd grades on facts.
144
+
145
+ | Failure type | Grading method |
146
+ |---|---|
147
+ | Wrong tool called, wrong argument, wrong state | Deterministic assertion — no LLM needed, never flaky |
148
+ | Policy violated, wrong reasoning, bad decision | LLM-as-judge via `grader_prompt` |
149
+
150
+ The structural check always runs first. If a forbidden action fires, the test fails immediately without calling the LLM.
151
+
152
+ ### Semantic grading
153
+
154
+ For failures that can only be evaluated by reading the output:
155
+
156
+ ```python
157
+ rp.save_test(
158
+ run.id,
159
+ grader_prompt="Did the agent approve a refund that exceeds the $500 policy limit?",
160
+ )
161
+ ```
162
+
163
+ Requires:
164
+
165
+ ```
166
+ pip install "replayd[semantic]"
167
+ export ANTHROPIC_API_KEY=sk-...
168
+ ```
169
+
170
+ ---
171
+
172
+ ## Storage
173
+
174
+ Runs and tests are stored as JSON files in `.replayd/` in your working directory:
175
+
176
+ ```
177
+ .replayd/
178
+ runs/<run-id>.json ← full record of each captured run
179
+ tests/<test-id>.json ← saved regression tests
180
+ ```
181
+
182
+ No database. No hosted backend. Check `.replayd/tests/` into version control to share tests with your team. The `.gitignore` included in this repo excludes `.replayd/` by default — commit only the `tests/` subfolder, not captured runs.
183
+
184
+ ---
185
+
186
+ ## CI integration
187
+
188
+ Save a script at `scripts/regression_check.py` in your repo:
189
+
190
+ ```python
191
+ import sys
192
+ from replayd import Replayd
193
+ from your_agent import agent_fn # your agent wrapped as (input, run_ctx) -> output
194
+
195
+ rp = Replayd()
196
+ results = rp.replay_all(agent=agent_fn)
197
+
198
+ failures = [r for r in results if not r]
199
+ for f in failures:
200
+ print(f"FAIL [{f.test.failure_reason}]: {f.reason}")
201
+
202
+ if failures:
203
+ sys.exit(1)
204
+ ```
205
+
206
+ Then in your workflow:
207
+
208
+ ```yaml
209
+ # .github/workflows/regression.yml
210
+ - name: Run regression tests
211
+ run: python scripts/regression_check.py
212
+ ```
213
+
214
+ ---
215
+
216
+ ## What replayd is not
217
+
218
+ replayd is not an observability tool. LangSmith, Braintrust, and Arize tell you what happened after the fact. replayd is an **active release gate** — it replays known failures before you ship. Passive vs active. That is the distinction.
219
+
220
+ ---
221
+
222
+ ## Part of TAQ by Stonepath Labs
223
+
224
+ replayd is the open source core of [TAQ](https://stonepathlab.net) — the full AI release control platform.
225
+
226
+ TAQ adds: a dashboard, hosted backend, team access controls, release gate enforcement, and audit logs. replayd gets your team started with the concept. TAQ is what you run it on in production.
227
+
228
+ **[stonepathlab.net](https://stonepathlab.net)**
229
+
230
+ ---
231
+
232
+ ## Contributing
233
+
234
+ Bug reports and pull requests are welcome. Open an issue on GitHub to discuss anything before sending a large PR.
235
+
236
+ The build has no dependencies — `pip install -e ".[dev]"` gives you everything needed to run tests:
237
+
238
+ ```
239
+ pip install -e ".[dev]"
240
+ pytest
241
+ ```
242
+
243
+ ---
244
+
245
+ ## License
246
+
247
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,210 @@
1
+ # replayd
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/replayd)](https://pypi.org/project/replayd/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/replayd)](https://pypi.org/project/replayd/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+
7
+ **Turn failed AI agent runs into replayable regression tests.**
8
+
9
+ When an AI agent fails in production, that failure becomes a test that runs before every future deployment. If the same failure returns after a prompt, model, or tool change, the release is blocked.
10
+
11
+ ```
12
+ pip install replayd
13
+ ```
14
+
15
+ ---
16
+
17
+ ## The problem
18
+
19
+ AI agents regress silently. A team fixes a bug, changes a prompt or model, and the same bug quietly returns. Traditional software has regression tests and CI/CD to catch this. AI agents have nothing equivalent.
20
+
21
+ replayd is the open source fix. It replays known failures before you ship so the same mistake cannot return.
22
+
23
+ ---
24
+
25
+ ## Quickstart
26
+
27
+ ```python
28
+ from replayd import Replayd
29
+
30
+ rp = Replayd()
31
+
32
+ # 1. Capture a run — assign run.output inside the block
33
+ with rp.capture(input=user_input, model="gpt-4o") as run:
34
+ run.output = your_agent.run(user_input)
35
+
36
+ # 2. Mark it as failed
37
+ rp.mark_failed(run.id, reason="agent approved refund after policy limit")
38
+
39
+ # 3. Save as a regression test
40
+ rp.save_test(
41
+ run.id,
42
+ forbidden_actions=["approve_refund"],
43
+ expected_action="escalate",
44
+ )
45
+
46
+ # 4. Later — after changing your prompt or model — replay all tests
47
+ results = rp.replay_all(agent=your_agent_fn)
48
+
49
+ for r in results:
50
+ print(r.verdict, r.reason)
51
+ ```
52
+
53
+ ---
54
+
55
+ ## See it working
56
+
57
+ Run the included example (`python examples/basic_example.py`) and you get:
58
+
59
+ ```
60
+ Capturing a refund-approval agent run...
61
+ agent called: approve_refund(amount=1200) [policy limit is $500]
62
+ output: {'action': 'approve_refund', 'amount': 1200}
63
+
64
+ Marking run as failed...
65
+ reason: agent approved refund of $1200, exceeding $500 policy limit
66
+
67
+ Saving as regression test...
68
+ forbidden: approve_refund | expected: escalate
69
+
70
+ -----------------------------------------
71
+ Replay #1 -- buggy agent (regression should be caught)
72
+ [FAIL] Forbidden action 'approve_refund' was called during replay.
73
+
74
+ Replay #2 -- fixed agent (regression should be resolved)
75
+ [PASS] No forbidden actions called; all expected actions present.
76
+ -----------------------------------------
77
+ 1 failure caught. 1 resolved.
78
+ ```
79
+
80
+ The failure was captured, saved, replayed against a broken agent (FAIL), and replayed again against the fixed agent (PASS). That is the full loop.
81
+
82
+ ---
83
+
84
+ ## Recording tool calls
85
+
86
+ replayd cannot intercept tool calls automatically. Wrap your agent's tool dispatcher to record them:
87
+
88
+ ```python
89
+ def my_agent(input, run_ctx):
90
+ result = call_tool("search", {"query": input["query"]})
91
+ run_ctx.record_tool_call("search", {"query": input["query"]}, result)
92
+ # ... rest of agent logic
93
+ return final_output
94
+ ```
95
+
96
+ Pass this two-argument callable to `replay_all`:
97
+
98
+ ```python
99
+ results = rp.replay_all(agent=my_agent)
100
+ ```
101
+
102
+ ---
103
+
104
+ ## Grading
105
+
106
+ replayd does **not** grade on exact output matching. LLMs are non-deterministic — the same correct behavior will produce different output text every run, so exact matching creates false failures. The wrong tool being called, however, is a fact. replayd grades on facts.
107
+
108
+ | Failure type | Grading method |
109
+ |---|---|
110
+ | Wrong tool called, wrong argument, wrong state | Deterministic assertion — no LLM needed, never flaky |
111
+ | Policy violated, wrong reasoning, bad decision | LLM-as-judge via `grader_prompt` |
112
+
113
+ The structural check always runs first. If a forbidden action fires, the test fails immediately without calling the LLM.
114
+
115
+ ### Semantic grading
116
+
117
+ For failures that can only be evaluated by reading the output:
118
+
119
+ ```python
120
+ rp.save_test(
121
+ run.id,
122
+ grader_prompt="Did the agent approve a refund that exceeds the $500 policy limit?",
123
+ )
124
+ ```
125
+
126
+ Requires:
127
+
128
+ ```
129
+ pip install "replayd[semantic]"
130
+ export ANTHROPIC_API_KEY=sk-...
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Storage
136
+
137
+ Runs and tests are stored as JSON files in `.replayd/` in your working directory:
138
+
139
+ ```
140
+ .replayd/
141
+ runs/<run-id>.json ← full record of each captured run
142
+ tests/<test-id>.json ← saved regression tests
143
+ ```
144
+
145
+ No database. No hosted backend. Check `.replayd/tests/` into version control to share tests with your team. The `.gitignore` included in this repo excludes `.replayd/` by default — commit only the `tests/` subfolder, not captured runs.
146
+
147
+ ---
148
+
149
+ ## CI integration
150
+
151
+ Save a script at `scripts/regression_check.py` in your repo:
152
+
153
+ ```python
154
+ import sys
155
+ from replayd import Replayd
156
+ from your_agent import agent_fn # your agent wrapped as (input, run_ctx) -> output
157
+
158
+ rp = Replayd()
159
+ results = rp.replay_all(agent=agent_fn)
160
+
161
+ failures = [r for r in results if not r]
162
+ for f in failures:
163
+ print(f"FAIL [{f.test.failure_reason}]: {f.reason}")
164
+
165
+ if failures:
166
+ sys.exit(1)
167
+ ```
168
+
169
+ Then in your workflow:
170
+
171
+ ```yaml
172
+ # .github/workflows/regression.yml
173
+ - name: Run regression tests
174
+ run: python scripts/regression_check.py
175
+ ```
176
+
177
+ ---
178
+
179
+ ## What replayd is not
180
+
181
+ replayd is not an observability tool. LangSmith, Braintrust, and Arize tell you what happened after the fact. replayd is an **active release gate** — it replays known failures before you ship. Passive vs active. That is the distinction.
182
+
183
+ ---
184
+
185
+ ## Part of TAQ by Stonepath Labs
186
+
187
+ replayd is the open source core of [TAQ](https://stonepathlab.net) — the full AI release control platform.
188
+
189
+ TAQ adds: a dashboard, hosted backend, team access controls, release gate enforcement, and audit logs. replayd gets your team started with the concept. TAQ is what you run it on in production.
190
+
191
+ **[stonepathlab.net](https://stonepathlab.net)**
192
+
193
+ ---
194
+
195
+ ## Contributing
196
+
197
+ Bug reports and pull requests are welcome. Open an issue on GitHub to discuss anything before sending a large PR.
198
+
199
+ The build has no dependencies — `pip install -e ".[dev]"` gives you everything needed to run tests:
200
+
201
+ ```
202
+ pip install -e ".[dev]"
203
+ pytest
204
+ ```
205
+
206
+ ---
207
+
208
+ ## License
209
+
210
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,118 @@
1
+ """
2
+ End-to-end example of replayd.
3
+
4
+ Run from the repo root with:
5
+ pip install -e .
6
+ python examples/basic_example.py
7
+
8
+ Or without installing:
9
+ PYTHONPATH=. python examples/basic_example.py
10
+
11
+ The example simulates a refund-approval agent that has a bug: it approves
12
+ refunds above the $500 policy limit. We capture the failure, save it as a
13
+ regression test, then replay it against both the buggy agent (expects FAIL)
14
+ and the fixed agent (expects PASS).
15
+ """
16
+
17
+ from replayd import Replayd
18
+ from replayd.capture import RunContext
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Mock agents
23
+ # ---------------------------------------------------------------------------
24
+
25
+ def buggy_agent(input: dict, run_ctx: RunContext) -> dict:
26
+ amount = input.get("amount", 0)
27
+ run_ctx.record_tool_call(
28
+ name="approve_refund",
29
+ arguments={"amount": amount, "customer_id": input.get("customer_id")},
30
+ result={"approved": True},
31
+ )
32
+ return {"action": "approve_refund", "amount": amount}
33
+
34
+
35
+ def fixed_agent(input: dict, run_ctx: RunContext) -> dict:
36
+ amount = input.get("amount", 0)
37
+ policy_limit = 500
38
+
39
+ if amount > policy_limit:
40
+ run_ctx.record_tool_call(
41
+ name="escalate",
42
+ arguments={"reason": "refund exceeds policy limit", "amount": amount},
43
+ result={"ticket_id": "ESC-001"},
44
+ )
45
+ return {"action": "escalate", "amount": amount}
46
+
47
+ run_ctx.record_tool_call(
48
+ name="approve_refund",
49
+ arguments={"amount": amount, "customer_id": input.get("customer_id")},
50
+ result={"approved": True},
51
+ )
52
+ return {"action": "approve_refund", "amount": amount}
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Main
57
+ # ---------------------------------------------------------------------------
58
+
59
+ def main():
60
+ import shutil
61
+ import os
62
+
63
+ if os.path.exists(".replayd"):
64
+ shutil.rmtree(".replayd")
65
+
66
+ rp = Replayd()
67
+ user_input = {"customer_id": "cust-42", "amount": 1200, "reason": "defective product"}
68
+
69
+ # --- Capture ------------------------------------------------------------
70
+ print("Capturing a refund-approval agent run...")
71
+ with rp.capture(input=user_input, model="mock-v1") as run:
72
+ run.output = buggy_agent(user_input, run)
73
+
74
+ for tc in rp.get_run(run.id).tool_calls:
75
+ args = ", ".join(f"{k}={v}" for k, v in tc.arguments.items())
76
+ print(f" agent called: {tc.name}({args}) [policy limit is $500]")
77
+ print(f" output: {run.output}")
78
+
79
+ # --- Mark failed --------------------------------------------------------
80
+ print("\nMarking run as failed...")
81
+ failure_reason = "agent approved refund of $1200, exceeding $500 policy limit"
82
+ rp.mark_failed(run.id, reason=failure_reason)
83
+ print(f" reason: {failure_reason}")
84
+
85
+ # --- Save as test -------------------------------------------------------
86
+ print("\nSaving as regression test...")
87
+ test = rp.save_test(
88
+ run.id,
89
+ forbidden_actions=["approve_refund"],
90
+ expected_action="escalate",
91
+ )
92
+ print(f" forbidden: approve_refund | expected: escalate")
93
+
94
+ print()
95
+ print("-" * 41)
96
+
97
+ # --- Replay: buggy agent ------------------------------------------------
98
+ print("Replay #1 -- buggy agent (regression should be caught)")
99
+ results = rp.replay_all(agent=buggy_agent)
100
+ for r in results:
101
+ verdict = "FAIL" if r.verdict.value == "fail" else "PASS"
102
+ print(f" [{verdict}] {r.reason}")
103
+
104
+ print()
105
+
106
+ # --- Replay: fixed agent ------------------------------------------------
107
+ print("Replay #2 -- fixed agent (regression should be resolved)")
108
+ results = rp.replay_all(agent=fixed_agent)
109
+ for r in results:
110
+ verdict = "FAIL" if r.verdict.value == "fail" else "PASS"
111
+ print(f" [{verdict}] {r.reason}")
112
+
113
+ print("-" * 41)
114
+ print("1 failure caught. 1 resolved.")
115
+
116
+
117
+ if __name__ == "__main__":
118
+ main()
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "replayd"
7
+ version = "0.1.0"
8
+ description = "Turn failed AI agent runs into replayable regression tests"
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ requires-python = ">=3.10"
12
+ dependencies = []
13
+ authors = [
14
+ { name = "Taimoor Khan", email = "taimoorkhaniajaznabi@gmail.com" }
15
+ ]
16
+
17
+ [project.optional-dependencies]
18
+ semantic = ["anthropic>=0.40.0"]
19
+ dev = [
20
+ "pytest>=8.0",
21
+ "anthropic>=0.40.0",
22
+ ]
23
+
24
+ [project.urls]
25
+ Homepage = "https://stonepathlab.net"
26
+ Repository = "https://github.com/TaimoorKhan10/replayd"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["replayd"]
30
+
31
+ [tool.pytest.ini_options]
32
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ from replayd.core import Replayd
2
+ from replayd.models import CapturedRun, TestCase, ReplayResult, ToolCall
3
+
4
+ __all__ = ["Replayd", "CapturedRun", "TestCase", "ReplayResult", "ToolCall"]