arcwright-ai 0.1.1.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. arcwright_ai-0.1.1.dev0/.github/workflows/ci.yml +19 -0
  2. arcwright_ai-0.1.1.dev0/.gitignore +30 -0
  3. arcwright_ai-0.1.1.dev0/.pre-commit-config.yaml +13 -0
  4. arcwright_ai-0.1.1.dev0/LICENSE +21 -0
  5. arcwright_ai-0.1.1.dev0/PKG-INFO +325 -0
  6. arcwright_ai-0.1.1.dev0/README.md +292 -0
  7. arcwright_ai-0.1.1.dev0/langgraph.json +6 -0
  8. arcwright_ai-0.1.1.dev0/pyproject.toml +104 -0
  9. arcwright_ai-0.1.1.dev0/src/arcwright_ai/__init__.py +12 -0
  10. arcwright_ai-0.1.1.dev0/src/arcwright_ai/agent/__init__.py +16 -0
  11. arcwright_ai-0.1.1.dev0/src/arcwright_ai/agent/invoker.py +567 -0
  12. arcwright_ai-0.1.1.dev0/src/arcwright_ai/agent/prompt.py +59 -0
  13. arcwright_ai-0.1.1.dev0/src/arcwright_ai/agent/sandbox.py +181 -0
  14. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/__init__.py +7 -0
  15. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/app.py +29 -0
  16. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/clean.py +226 -0
  17. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/dispatch.py +876 -0
  18. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/halt.py +700 -0
  19. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/resume.py +171 -0
  20. arcwright_ai-0.1.1.dev0/src/arcwright_ai/cli/status.py +904 -0
  21. arcwright_ai-0.1.1.dev0/src/arcwright_ai/context/__init__.py +21 -0
  22. arcwright_ai-0.1.1.dev0/src/arcwright_ai/context/answerer.py +400 -0
  23. arcwright_ai-0.1.1.dev0/src/arcwright_ai/context/injector.py +517 -0
  24. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/__init__.py +129 -0
  25. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/config.py +777 -0
  26. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/constants.py +183 -0
  27. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/events.py +43 -0
  28. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/exceptions.py +112 -0
  29. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/io.py +94 -0
  30. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/lifecycle.py +60 -0
  31. arcwright_ai-0.1.1.dev0/src/arcwright_ai/core/types.py +212 -0
  32. arcwright_ai-0.1.1.dev0/src/arcwright_ai/engine/__init__.py +30 -0
  33. arcwright_ai-0.1.1.dev0/src/arcwright_ai/engine/graph.py +72 -0
  34. arcwright_ai-0.1.1.dev0/src/arcwright_ai/engine/nodes.py +1791 -0
  35. arcwright_ai-0.1.1.dev0/src/arcwright_ai/engine/state.py +91 -0
  36. arcwright_ai-0.1.1.dev0/src/arcwright_ai/output/__init__.py +41 -0
  37. arcwright_ai-0.1.1.dev0/src/arcwright_ai/output/provenance.py +250 -0
  38. arcwright_ai-0.1.1.dev0/src/arcwright_ai/output/run_manager.py +564 -0
  39. arcwright_ai-0.1.1.dev0/src/arcwright_ai/output/summary.py +939 -0
  40. arcwright_ai-0.1.1.dev0/src/arcwright_ai/py.typed +0 -0
  41. arcwright_ai-0.1.1.dev0/src/arcwright_ai/scm/__init__.py +32 -0
  42. arcwright_ai-0.1.1.dev0/src/arcwright_ai/scm/branch.py +793 -0
  43. arcwright_ai-0.1.1.dev0/src/arcwright_ai/scm/git.py +281 -0
  44. arcwright_ai-0.1.1.dev0/src/arcwright_ai/scm/pr.py +941 -0
  45. arcwright_ai-0.1.1.dev0/src/arcwright_ai/scm/worktree.py +367 -0
  46. arcwright_ai-0.1.1.dev0/src/arcwright_ai/validation/__init__.py +37 -0
  47. arcwright_ai-0.1.1.dev0/src/arcwright_ai/validation/pipeline.py +231 -0
  48. arcwright_ai-0.1.1.dev0/src/arcwright_ai/validation/v3_reflexion.py +426 -0
  49. arcwright_ai-0.1.1.dev0/src/arcwright_ai/validation/v6_invariant.py +507 -0
  50. arcwright_ai-0.1.1.dev0/tests/__init__.py +1 -0
  51. arcwright_ai-0.1.1.dev0/tests/conftest.py +72 -0
  52. arcwright_ai-0.1.1.dev0/tests/fixtures/mock_sdk.py +154 -0
  53. arcwright_ai-0.1.1.dev0/tests/fixtures/projects/invalid_project/README.md +3 -0
  54. arcwright_ai-0.1.1.dev0/tests/fixtures/projects/partial_project/README.md +3 -0
  55. arcwright_ai-0.1.1.dev0/tests/fixtures/projects/valid_project/README.md +3 -0
  56. arcwright_ai-0.1.1.dev0/tests/integration/.gitkeep +0 -0
  57. arcwright_ai-0.1.1.dev0/tests/integration/__init__.py +3 -0
  58. arcwright_ai-0.1.1.dev0/tests/test_agent/.gitkeep +0 -0
  59. arcwright_ai-0.1.1.dev0/tests/test_agent/__init__.py +3 -0
  60. arcwright_ai-0.1.1.dev0/tests/test_agent/test_invoker.py +512 -0
  61. arcwright_ai-0.1.1.dev0/tests/test_agent/test_prompt.py +131 -0
  62. arcwright_ai-0.1.1.dev0/tests/test_agent/test_sandbox.py +228 -0
  63. arcwright_ai-0.1.1.dev0/tests/test_cli/.gitkeep +0 -0
  64. arcwright_ai-0.1.1.dev0/tests/test_cli/__init__.py +3 -0
  65. arcwright_ai-0.1.1.dev0/tests/test_cli/test_clean.py +424 -0
  66. arcwright_ai-0.1.1.dev0/tests/test_cli/test_dispatch.py +1108 -0
  67. arcwright_ai-0.1.1.dev0/tests/test_cli/test_halt.py +1133 -0
  68. arcwright_ai-0.1.1.dev0/tests/test_cli/test_init.py +253 -0
  69. arcwright_ai-0.1.1.dev0/tests/test_cli/test_resume.py +734 -0
  70. arcwright_ai-0.1.1.dev0/tests/test_cli/test_status.py +697 -0
  71. arcwright_ai-0.1.1.dev0/tests/test_cli/test_validate_setup.py +420 -0
  72. arcwright_ai-0.1.1.dev0/tests/test_context/.gitkeep +0 -0
  73. arcwright_ai-0.1.1.dev0/tests/test_context/__init__.py +3 -0
  74. arcwright_ai-0.1.1.dev0/tests/test_context/test_answerer.py +336 -0
  75. arcwright_ai-0.1.1.dev0/tests/test_context/test_injector.py +386 -0
  76. arcwright_ai-0.1.1.dev0/tests/test_core/.gitkeep +0 -0
  77. arcwright_ai-0.1.1.dev0/tests/test_core/__init__.py +3 -0
  78. arcwright_ai-0.1.1.dev0/tests/test_core/test_config.py +711 -0
  79. arcwright_ai-0.1.1.dev0/tests/test_core/test_constants.py +230 -0
  80. arcwright_ai-0.1.1.dev0/tests/test_core/test_events.py +74 -0
  81. arcwright_ai-0.1.1.dev0/tests/test_core/test_exceptions.py +182 -0
  82. arcwright_ai-0.1.1.dev0/tests/test_core/test_io.py +121 -0
  83. arcwright_ai-0.1.1.dev0/tests/test_core/test_lifecycle.py +160 -0
  84. arcwright_ai-0.1.1.dev0/tests/test_core/test_types.py +368 -0
  85. arcwright_ai-0.1.1.dev0/tests/test_engine/.gitkeep +0 -0
  86. arcwright_ai-0.1.1.dev0/tests/test_engine/__init__.py +3 -0
  87. arcwright_ai-0.1.1.dev0/tests/test_engine/test_graph.py +448 -0
  88. arcwright_ai-0.1.1.dev0/tests/test_engine/test_nodes.py +3165 -0
  89. arcwright_ai-0.1.1.dev0/tests/test_engine/test_scm_integration.py +279 -0
  90. arcwright_ai-0.1.1.dev0/tests/test_engine/test_state.py +115 -0
  91. arcwright_ai-0.1.1.dev0/tests/test_output/.gitkeep +0 -0
  92. arcwright_ai-0.1.1.dev0/tests/test_output/__init__.py +3 -0
  93. arcwright_ai-0.1.1.dev0/tests/test_output/test_cost_formatting.py +199 -0
  94. arcwright_ai-0.1.1.dev0/tests/test_output/test_provenance.py +483 -0
  95. arcwright_ai-0.1.1.dev0/tests/test_output/test_run_manager.py +615 -0
  96. arcwright_ai-0.1.1.dev0/tests/test_output/test_summary.py +1337 -0
  97. arcwright_ai-0.1.1.dev0/tests/test_scm/.gitkeep +0 -0
  98. arcwright_ai-0.1.1.dev0/tests/test_scm/__init__.py +3 -0
  99. arcwright_ai-0.1.1.dev0/tests/test_scm/test_branch.py +1112 -0
  100. arcwright_ai-0.1.1.dev0/tests/test_scm/test_branch_integration.py +372 -0
  101. arcwright_ai-0.1.1.dev0/tests/test_scm/test_clean_integration.py +182 -0
  102. arcwright_ai-0.1.1.dev0/tests/test_scm/test_git.py +224 -0
  103. arcwright_ai-0.1.1.dev0/tests/test_scm/test_pr.py +1196 -0
  104. arcwright_ai-0.1.1.dev0/tests/test_scm/test_scm_integration.py +461 -0
  105. arcwright_ai-0.1.1.dev0/tests/test_scm/test_worktree.py +567 -0
  106. arcwright_ai-0.1.1.dev0/tests/test_scm/test_worktree_integration.py +146 -0
  107. arcwright_ai-0.1.1.dev0/tests/test_validation/.gitkeep +0 -0
  108. arcwright_ai-0.1.1.dev0/tests/test_validation/__init__.py +3 -0
  109. arcwright_ai-0.1.1.dev0/tests/test_validation/test_pipeline.py +542 -0
  110. arcwright_ai-0.1.1.dev0/tests/test_validation/test_v3_reflexion.py +682 -0
  111. arcwright_ai-0.1.1.dev0/tests/test_validation/test_v6_invariant.py +465 -0
  112. arcwright_ai-0.1.1.dev0/uv.lock +1724 -0
@@ -0,0 +1,19 @@
1
+ name: CI
2
+ on: [push, pull_request]
3
+ jobs:
4
+ lint-test:
5
+ runs-on: ubuntu-latest
6
+ strategy:
7
+ matrix:
8
+ python-version: ["3.11", "3.12", "3.13"]
9
+ steps:
10
+ - uses: actions/checkout@v4
11
+ - uses: actions/setup-python@v5
12
+ with:
13
+ python-version: ${{ matrix.python-version }}
14
+ - run: pip install -e ".[dev]"
15
+ - run: ruff check .
16
+ - run: ruff format --check .
17
+ - run: mypy --strict src/
18
+ # Exit code 5 = no tests collected; acceptable until Epic 1 stories add tests
19
+ - run: pytest || [ $? -eq 5 ]
@@ -0,0 +1,30 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+
9
+ # Virtual environments
10
+ .venv/
11
+ .venv-studio/
12
+ venv/
13
+
14
+ # IDE
15
+ .idea/
16
+ .vscode/
17
+ *.swp
18
+
19
+ # Arcwright AI runtime (never committed)
20
+ .arcwright-ai/tmp/
21
+ .arcwright-ai/runs/
22
+
23
+ # mypy
24
+ .mypy_cache/
25
+
26
+ # pytest
27
+ .pytest_cache/
28
+
29
+ # ruff
30
+ .ruff_cache/
@@ -0,0 +1,13 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.8.6
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+ - repo: https://github.com/pre-commit/mirrors-mypy
9
+ rev: v1.15.0
10
+ hooks:
11
+ - id: mypy
12
+ additional_dependencies: [pydantic>=2.7, typer>=0.12]
13
+ args: [--strict]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ed
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,325 @@
1
+ Metadata-Version: 2.4
2
+ Name: arcwright-ai
3
+ Version: 0.1.1.dev0
4
+ Summary: Deterministic orchestration shell for autonomous AI agent execution
5
+ Project-URL: Homepage, https://github.com/ProductEngineerIO/arcwright-ai
6
+ Project-URL: Repository, https://github.com/ProductEngineerIO/arcwright-ai
7
+ Project-URL: Issues, https://github.com/ProductEngineerIO/arcwright-ai/issues
8
+ Author: Ed Hertzog
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Build Tools
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: claude-code-sdk>=0.0.10
21
+ Requires-Dist: langgraph<1.0,>=0.2
22
+ Requires-Dist: pydantic<3.0,>=2.7
23
+ Requires-Dist: pyyaml<7.0,>=6.0
24
+ Requires-Dist: typer[all]<1.0,>=0.12
25
+ Provides-Extra: dev
26
+ Requires-Dist: mypy>=1.13; extra == 'dev'
27
+ Requires-Dist: pre-commit>=4.0; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.8; extra == 'dev'
31
+ Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # Arcwright AI
35
+
36
+ Deterministic orchestration shell for autonomous AI agent execution.
37
+
38
+ Arcwright AI takes BMAD planning artifacts (PRD, Architecture, Epics, Stories) and autonomously executes them through Claude, enforcing validation gates, tracking decision provenance, and writing structured run artifacts after every execution.
39
+
40
+ ---
41
+
42
+ ## Table of Contents
43
+
44
+ - [Prerequisites](#prerequisites)
45
+ - [Installation](#installation)
46
+ - [Project Setup](#project-setup)
47
+ - [Running Stories](#running-stories)
48
+ - [Run Artifacts](#run-artifacts)
49
+ - [Understanding the Output](#understanding-the-output)
50
+ - [LangGraph Studio](#langgraph-studio)
51
+ - [Development](#development)
52
+ - [Troubleshooting](#troubleshooting)
53
+
54
+ ---
55
+
56
+ ## Prerequisites
57
+
58
+ - **Python 3.11+** (3.14 recommended; see [LangGraph Studio](#langgraph-studio) for the exception)
59
+ - **Claude API key** (Anthropic): `ARCWRIGHT_API_CLAUDE_API_KEY`
60
+ - A project initialised with BMAD (`_spec/planning-artifacts/` containing PRD, architecture, epics, and story files)
61
+
62
+ ---
63
+
64
+ ## Installation
65
+
66
+ From the `arcwright-ai/` directory:
67
+
68
+ ```bash
69
+ python -m venv .venv
70
+ .venv/bin/pip install -e ".[dev]"
71
+ ```
72
+
73
+ Set your API key (add to your shell profile or `.env`):
74
+
75
+ ```bash
76
+ export ARCWRIGHT_API_CLAUDE_API_KEY="sk-ant-..."
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Project Setup
82
+
83
+ Before dispatching stories, initialise Arcwright AI in your **target project** (the project whose stories you want to implement — not this repo):
84
+
85
+ ```bash
86
+ # From inside the target project root:
87
+ arcwright-ai init
88
+
89
+ # Or point explicitly:
90
+ arcwright-ai init --path /path/to/your/project
91
+ ```
92
+
93
+ This creates `.arcwright-ai/` with the following layout:
94
+
95
+ ```
96
+ .arcwright-ai/
97
+ ├── config.yaml ← project-level configuration (committed)
98
+ ├── runs/ ← execution artifacts (git-ignored)
99
+ ├── worktrees/ ← git worktrees (git-ignored)
100
+ └── tmp/ ← transient scratch space (git-ignored)
101
+ ```
102
+
103
+ **`config.yaml` defaults** (edit to suit your project):
104
+
105
+ ```yaml
106
+ model:
107
+ version: "claude-opus-4-6"
108
+
109
+ limits:
110
+ tokens_per_story: 200000
111
+ cost_per_run: 10.0
112
+ retry_budget: 3
113
+ timeout_per_story: 300
114
+
115
+ methodology:
116
+ artifacts_path: "_bmad-output" # where your BMAD planning docs live
117
+ type: "bmad"
118
+
119
+ scm:
120
+ branch_template: "arcwright-ai/{story_slug}"
121
+ ```
122
+
123
+ > **API key security**: Never put your API key in `config.yaml`. Set it via
124
+ > `ARCWRIGHT_API_CLAUDE_API_KEY` environment variable, or in the global
125
+ > `~/.arcwright-ai/config.yaml` (user-level, outside any repo).
126
+
127
+ Verify your setup:
128
+
129
+ ```bash
130
+ arcwright-ai validate-setup
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Running Stories
136
+
137
+ Dispatch a single story by its `epic.story` identifier (e.g., story 4 of epic 2 is `2.4`):
138
+
139
+ ```bash
140
+ # From inside the target project root:
141
+ arcwright-ai dispatch --story 2.4
142
+
143
+ # Dashes also work:
144
+ arcwright-ai dispatch --story 2-4
145
+ ```
146
+
147
+ The pipeline runs:
148
+
149
+ ```
150
+ preflight → budget_check → agent_dispatch → validate → commit → finalize
151
+ ```
152
+
153
+ Each node writes artifacts to `.arcwright-ai/runs/<run-id>/stories/<story-slug>/`.
154
+
155
+ **Exit codes:**
156
+
157
+ | Code | Meaning |
158
+ |------|---------|
159
+ | `0` | Story completed successfully |
160
+ | `1` | Unexpected error (configuration, I/O, etc.) |
161
+ | `2` | Story escalated (validation failed, could not auto-fix) |
162
+
163
+ ---
164
+
165
+ ## Run Artifacts
166
+
167
+ Every execution produces a run directory:
168
+
169
+ ```
170
+ .arcwright-ai/runs/<run-id>/
171
+ ├── run.yaml ← metadata: status, cost, story list
172
+ └── stories/<story-slug>/
173
+ ├── context-bundle.md ← assembled context injected into the agent
174
+ ├── agent-output.md ← raw output from Claude
175
+ ├── validation.md ← V6 invariant + V3 reflexion results and decision log
176
+ ├── halt-report.md ← populated only on escalation
177
+ └── summary.md ← produced by finalize node (success or halt)
178
+ ```
179
+
180
+ **Run ID format**: `YYYYMMDD-HHMMSS-<4-char-id>` (e.g. `20260305-022632-4b90`)
181
+
182
+ ### Reading a halt report
183
+
184
+ When a run escalates, check these files in order:
185
+
186
+ 1. **`halt-report.md`** — escalation reason, retry history, suggested fix
187
+ 2. **`validation.md`** — exact V6 invariant failures and V3 reflexion AC results
188
+ 3. **`agent-output.md`** — what Claude produced (verify files actually exist on disk before trusting V6 failures)
189
+
190
+ ---
191
+
192
+ ## Understanding the Output
193
+
194
+ ### `status: escalated` vs. failure
195
+
196
+ `escalated` means the pipeline ran successfully but validation could not be satisfied within the retry budget. It does **not** mean the agent crashed. The agent's work (files, code) is still on disk in the target project.
197
+
198
+ Escalation reasons:
199
+
200
+ | Reason | Meaning |
201
+ |--------|---------|
202
+ | `v6_invariant_failure` | Hard rule violation (missing file, bad name, syntax error) — retries won't help without a fix |
203
+ | `max_retries_exhausted` | V3 reflexion (AC review) kept failing after N retries |
204
+ | `budget_exceeded` | Token/cost ceiling hit before validation passed |
205
+
206
+ ### False-positive V6 failures
207
+
208
+ If `validation.md` shows a `file_existence` failure for a file that **does** exist on disk, check whether the path in the error has a leading backtick (e.g., `` `backend/app/routers/admin.py ``). This is a known pattern when the agent uses inline code formatting in markdown headers. The V6 checker strips backticks as of the current version. If you see this after upgrading from an older run, the files are fine — re-run to get a clean pass.
209
+
210
+ ---
211
+
212
+ ## LangGraph Studio
213
+
214
+ Arcwright AI ships a [`langgraph.json`](langgraph.json) config so you can visualise and inspect the execution graph in [LangGraph Studio](https://smith.langchain.com/studio/).
215
+
216
+ ### Why a separate venv?
217
+
218
+ The main `.venv` uses **Python 3.14**. The `langgraph-api` package (required for `langgraph dev`) depends on `pyo3`-based Rust extensions that do not yet publish wheels for Python 3.14 and cannot be compiled without matching support. A separate Python 3.13 venv is used exclusively for Studio.
219
+
220
+ ### One-time setup
221
+
222
+ Ensure Python 3.13 is available (via Homebrew or pyenv), then:
223
+
224
+ ```bash
225
+ cd arcwright-ai/
226
+
227
+ # Create Studio venv with Python 3.13
228
+ python3.13 -m venv .venv-studio
229
+
230
+ # Install project + LangGraph Studio deps
231
+ .venv-studio/bin/pip install -e ".[dev]" "langgraph-cli[inmem]"
232
+ ```
233
+
234
+ ### Starting Studio
235
+
236
+ ```bash
237
+ cd arcwright-ai/
238
+ .venv-studio/bin/langgraph dev
239
+ ```
240
+
241
+ The server starts at `http://127.0.0.1:2024`. Open the Studio UI at:
242
+
243
+ ```
244
+ https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
245
+ ```
246
+
247
+ You'll see the `story_graph` with all nodes and conditional edges:
248
+
249
+ ```
250
+ START → preflight → budget_check ──(ok)──→ agent_dispatch → validate ──(success)──→ commit → finalize → END
251
+ │ │
252
+ (exceeded) (escalated)
253
+ └──────────────────────────────────────┴──→ finalize → END
254
+ ↑(retry)
255
+ validate ─┘ → budget_check
256
+ ```
257
+
258
+ > A free [LangSmith](https://smith.langchain.com) account is required to use the Studio UI. The local API server itself runs without one.
259
+
260
+ ---
261
+
262
+ ## Development
263
+
264
+ All development commands use the main `.venv` (Python 3.14):
265
+
266
+ ```bash
267
+ # Install dev dependencies
268
+ pip install -e ".[dev]"
269
+ # Prefer explicit venv invocation to avoid interpreter mismatch:
270
+ .venv/bin/pip install -e ".[dev]"
271
+
272
+ # Run tests
273
+ .venv/bin/python -m pytest -q
274
+
275
+ # Lint
276
+ .venv/bin/ruff check .
277
+ .venv/bin/ruff format --check .
278
+
279
+ # Type check
280
+ .venv/bin/python -m mypy --strict src/
281
+
282
+ # All quality gates in one pass
283
+ .venv/bin/ruff check . && .venv/bin/ruff format --check . && .venv/bin/python -m mypy --strict src/ && .venv/bin/python -m pytest -q
284
+ ```
285
+
286
+ ### Python version note
287
+
288
+ The project targets Python 3.11+ and is developed against 3.14. The `.venv-studio` venv (Python 3.13) is **only** for running `langgraph dev`. Do not use it for tests or type checking — results may differ.
289
+
290
+ ---
291
+
292
+ ## Troubleshooting
293
+
294
+ **`ModuleNotFoundError: No module named 'arcwright_ai'`**
295
+
296
+ The venv's editable install link may be stale or was not processed correctly on Python 3.14. Re-install:
297
+
298
+ ```bash
299
+ cd arcwright-ai/
300
+ .venv/bin/pip install -e .
301
+ ```
302
+
303
+ This rewrites the `.pth` file. Verify with:
304
+
305
+ ```bash
306
+ .venv/bin/python -c "import arcwright_ai; print(arcwright_ai.__file__)"
307
+ ```
308
+
309
+ **`langgraph dev` fails with `Required package 'langgraph-api' is not installed`**
310
+
311
+ You're using the main `.venv` (Python 3.14). Use `.venv-studio` instead:
312
+
313
+ ```bash
314
+ .venv-studio/bin/langgraph dev
315
+ ```
316
+
317
+ **Story dispatched but files don't match what validation expected**
318
+
319
+ Check `.arcwright-ai/config.yaml` in the target project. The `methodology.artifacts_path` must point to the directory containing your BMAD planning artifacts (PRD, architecture, epics). Default is `_bmad-output`; adjust if your project uses `_spec/planning-artifacts` or another path.
320
+
321
+ **Dev agent File List is consistently incomplete or doesn't match `git diff` output after a BMAD update**
322
+
323
+ The dev-story workflow in this project includes a custom git diff audit step (Step 9 of `instructions.xml`) that was added to address a systemic issue where 67% of stories had File List discrepancies. This customization lives in `_bmad/bmm/workflows/4-implementation/dev-story/` — a directory that is gitignored and gets overwritten by BMAD framework updates.
324
+
325
+ If you have recently run a BMAD update and agent File Lists are again going unaudited, the customization was likely overwritten. See the **BMAD Workflow Customizations** section in the root [`README.md`](../README.md#bmad-workflow-customizations) for the exact changes to re-apply.
@@ -0,0 +1,292 @@
1
+ # Arcwright AI
2
+
3
+ Deterministic orchestration shell for autonomous AI agent execution.
4
+
5
+ Arcwright AI takes BMAD planning artifacts (PRD, Architecture, Epics, Stories) and autonomously executes them through Claude, enforcing validation gates, tracking decision provenance, and writing structured run artifacts after every execution.
6
+
7
+ ---
8
+
9
+ ## Table of Contents
10
+
11
+ - [Prerequisites](#prerequisites)
12
+ - [Installation](#installation)
13
+ - [Project Setup](#project-setup)
14
+ - [Running Stories](#running-stories)
15
+ - [Run Artifacts](#run-artifacts)
16
+ - [Understanding the Output](#understanding-the-output)
17
+ - [LangGraph Studio](#langgraph-studio)
18
+ - [Development](#development)
19
+ - [Troubleshooting](#troubleshooting)
20
+
21
+ ---
22
+
23
+ ## Prerequisites
24
+
25
+ - **Python 3.11+** (3.14 recommended; see [LangGraph Studio](#langgraph-studio) for the exception)
26
+ - **Claude API key** (Anthropic): `ARCWRIGHT_API_CLAUDE_API_KEY`
27
+ - A project initialised with BMAD (`_spec/planning-artifacts/` containing PRD, architecture, epics, and story files)
28
+
29
+ ---
30
+
31
+ ## Installation
32
+
33
+ From the `arcwright-ai/` directory:
34
+
35
+ ```bash
36
+ python -m venv .venv
37
+ .venv/bin/pip install -e ".[dev]"
38
+ ```
39
+
40
+ Set your API key (add to your shell profile or `.env`):
41
+
42
+ ```bash
43
+ export ARCWRIGHT_API_CLAUDE_API_KEY="sk-ant-..."
44
+ ```
45
+
46
+ ---
47
+
48
+ ## Project Setup
49
+
50
+ Before dispatching stories, initialise Arcwright AI in your **target project** (the project whose stories you want to implement — not this repo):
51
+
52
+ ```bash
53
+ # From inside the target project root:
54
+ arcwright-ai init
55
+
56
+ # Or point explicitly:
57
+ arcwright-ai init --path /path/to/your/project
58
+ ```
59
+
60
+ This creates `.arcwright-ai/` with the following layout:
61
+
62
+ ```
63
+ .arcwright-ai/
64
+ ├── config.yaml ← project-level configuration (committed)
65
+ ├── runs/ ← execution artifacts (git-ignored)
66
+ ├── worktrees/ ← git worktrees (git-ignored)
67
+ └── tmp/ ← transient scratch space (git-ignored)
68
+ ```
69
+
70
+ **`config.yaml` defaults** (edit to suit your project):
71
+
72
+ ```yaml
73
+ model:
74
+ version: "claude-opus-4-6"
75
+
76
+ limits:
77
+ tokens_per_story: 200000
78
+ cost_per_run: 10.0
79
+ retry_budget: 3
80
+ timeout_per_story: 300
81
+
82
+ methodology:
83
+ artifacts_path: "_bmad-output" # where your BMAD planning docs live
84
+ type: "bmad"
85
+
86
+ scm:
87
+ branch_template: "arcwright-ai/{story_slug}"
88
+ ```
89
+
90
+ > **API key security**: Never put your API key in `config.yaml`. Set it via
91
+ > `ARCWRIGHT_API_CLAUDE_API_KEY` environment variable, or in the global
92
+ > `~/.arcwright-ai/config.yaml` (user-level, outside any repo).
93
+
94
+ Verify your setup:
95
+
96
+ ```bash
97
+ arcwright-ai validate-setup
98
+ ```
99
+
100
+ ---
101
+
102
+ ## Running Stories
103
+
104
+ Dispatch a single story by its `epic.story` identifier (e.g., story 4 of epic 2 is `2.4`):
105
+
106
+ ```bash
107
+ # From inside the target project root:
108
+ arcwright-ai dispatch --story 2.4
109
+
110
+ # Dashes also work:
111
+ arcwright-ai dispatch --story 2-4
112
+ ```
113
+
114
+ The pipeline runs:
115
+
116
+ ```
117
+ preflight → budget_check → agent_dispatch → validate → commit → finalize
118
+ ```
119
+
120
+ Each node writes artifacts to `.arcwright-ai/runs/<run-id>/stories/<story-slug>/`.
121
+
122
+ **Exit codes:**
123
+
124
+ | Code | Meaning |
125
+ |------|---------|
126
+ | `0` | Story completed successfully |
127
+ | `1` | Unexpected error (configuration, I/O, etc.) |
128
+ | `2` | Story escalated (validation failed, could not auto-fix) |
129
+
130
+ ---
131
+
132
+ ## Run Artifacts
133
+
134
+ Every execution produces a run directory:
135
+
136
+ ```
137
+ .arcwright-ai/runs/<run-id>/
138
+ ├── run.yaml ← metadata: status, cost, story list
139
+ └── stories/<story-slug>/
140
+ ├── context-bundle.md ← assembled context injected into the agent
141
+ ├── agent-output.md ← raw output from Claude
142
+ ├── validation.md ← V6 invariant + V3 reflexion results and decision log
143
+ ├── halt-report.md ← populated only on escalation
144
+ └── summary.md ← produced by finalize node (success or halt)
145
+ ```
146
+
147
+ **Run ID format**: `YYYYMMDD-HHMMSS-<4-char-id>` (e.g. `20260305-022632-4b90`)
148
+
149
+ ### Reading a halt report
150
+
151
+ When a run escalates, check these files in order:
152
+
153
+ 1. **`halt-report.md`** — escalation reason, retry history, suggested fix
154
+ 2. **`validation.md`** — exact V6 invariant failures and V3 reflexion AC results
155
+ 3. **`agent-output.md`** — what Claude produced (verify files actually exist on disk before trusting V6 failures)
156
+
157
+ ---
158
+
159
+ ## Understanding the Output
160
+
161
+ ### `status: escalated` vs. failure
162
+
163
+ `escalated` means the pipeline ran successfully but validation could not be satisfied within the retry budget. It does **not** mean the agent crashed. The agent's work (files, code) is still on disk in the target project.
164
+
165
+ Escalation reasons:
166
+
167
+ | Reason | Meaning |
168
+ |--------|---------|
169
+ | `v6_invariant_failure` | Hard rule violation (missing file, bad name, syntax error) — retries won't help without a fix |
170
+ | `max_retries_exhausted` | V3 reflexion (AC review) kept failing after N retries |
171
+ | `budget_exceeded` | Token/cost ceiling hit before validation passed |
172
+
173
+ ### False-positive V6 failures
174
+
175
+ If `validation.md` shows a `file_existence` failure for a file that **does** exist on disk, check whether the path in the error has a leading backtick (e.g., `` `backend/app/routers/admin.py ``). This is a known pattern when the agent uses inline code formatting in markdown headers. The V6 checker strips backticks as of the current version. If you see this after upgrading from an older run, the files are fine — re-run to get a clean pass.
176
+
177
+ ---
178
+
179
+ ## LangGraph Studio
180
+
181
+ Arcwright AI ships a [`langgraph.json`](langgraph.json) config so you can visualise and inspect the execution graph in [LangGraph Studio](https://smith.langchain.com/studio/).
182
+
183
+ ### Why a separate venv?
184
+
185
+ The main `.venv` uses **Python 3.14**. The `langgraph-api` package (required for `langgraph dev`) depends on `pyo3`-based Rust extensions that do not yet publish wheels for Python 3.14 and cannot be compiled without matching support. A separate Python 3.13 venv is used exclusively for Studio.
186
+
187
+ ### One-time setup
188
+
189
+ Ensure Python 3.13 is available (via Homebrew or pyenv), then:
190
+
191
+ ```bash
192
+ cd arcwright-ai/
193
+
194
+ # Create Studio venv with Python 3.13
195
+ python3.13 -m venv .venv-studio
196
+
197
+ # Install project + LangGraph Studio deps
198
+ .venv-studio/bin/pip install -e ".[dev]" "langgraph-cli[inmem]"
199
+ ```
200
+
201
+ ### Starting Studio
202
+
203
+ ```bash
204
+ cd arcwright-ai/
205
+ .venv-studio/bin/langgraph dev
206
+ ```
207
+
208
+ The server starts at `http://127.0.0.1:2024`. Open the Studio UI at:
209
+
210
+ ```
211
+ https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
212
+ ```
213
+
214
+ You'll see the `story_graph` with all nodes and conditional edges:
215
+
216
+ ```
217
+ START → preflight → budget_check ──(ok)──→ agent_dispatch → validate ──(success)──→ commit → finalize → END
218
+ │ │
219
+ (exceeded) (escalated)
220
+ └──────────────────────────────────────┴──→ finalize → END
221
+ ↑(retry)
222
+ validate ─┘ → budget_check
223
+ ```
224
+
225
+ > A free [LangSmith](https://smith.langchain.com) account is required to use the Studio UI. The local API server itself runs without one.
226
+
227
+ ---
228
+
229
+ ## Development
230
+
231
+ All development commands use the main `.venv` (Python 3.14):
232
+
233
+ ```bash
234
+ # Install dev dependencies
235
+ pip install -e ".[dev]"
236
+ # Prefer explicit venv invocation to avoid interpreter mismatch:
237
+ .venv/bin/pip install -e ".[dev]"
238
+
239
+ # Run tests
240
+ .venv/bin/python -m pytest -q
241
+
242
+ # Lint
243
+ .venv/bin/ruff check .
244
+ .venv/bin/ruff format --check .
245
+
246
+ # Type check
247
+ .venv/bin/python -m mypy --strict src/
248
+
249
+ # All quality gates in one pass
250
+ .venv/bin/ruff check . && .venv/bin/ruff format --check . && .venv/bin/python -m mypy --strict src/ && .venv/bin/python -m pytest -q
251
+ ```
252
+
253
+ ### Python version note
254
+
255
+ The project targets Python 3.11+ and is developed against 3.14. The `.venv-studio` venv (Python 3.13) is **only** for running `langgraph dev`. Do not use it for tests or type checking — results may differ.
256
+
257
+ ---
258
+
259
+ ## Troubleshooting
260
+
261
+ **`ModuleNotFoundError: No module named 'arcwright_ai'`**
262
+
263
+ The venv's editable install link may be stale or was not processed correctly on Python 3.14. Re-install:
264
+
265
+ ```bash
266
+ cd arcwright-ai/
267
+ .venv/bin/pip install -e .
268
+ ```
269
+
270
+ This rewrites the `.pth` file. Verify with:
271
+
272
+ ```bash
273
+ .venv/bin/python -c "import arcwright_ai; print(arcwright_ai.__file__)"
274
+ ```
275
+
276
+ **`langgraph dev` fails with `Required package 'langgraph-api' is not installed`**
277
+
278
+ You're using the main `.venv` (Python 3.14). Use `.venv-studio` instead:
279
+
280
+ ```bash
281
+ .venv-studio/bin/langgraph dev
282
+ ```
283
+
284
+ **Story dispatched but files don't match what validation expected**
285
+
286
+ Check `.arcwright-ai/config.yaml` in the target project. The `methodology.artifacts_path` must point to the directory containing your BMAD planning artifacts (PRD, architecture, epics). Default is `_bmad-output`; adjust if your project uses `_spec/planning-artifacts` or another path.
287
+
288
+ **Dev agent File List is consistently incomplete or doesn't match `git diff` output after a BMAD update**
289
+
290
+ The dev-story workflow in this project includes a custom git diff audit step (Step 9 of `instructions.xml`) that was added to address a systemic issue where 67% of stories had File List discrepancies. This customization lives in `_bmad/bmm/workflows/4-implementation/dev-story/` — a directory that is gitignored and gets overwritten by BMAD framework updates.
291
+
292
+ If you have recently run a BMAD update and agent File Lists are again going unaudited, the customization was likely overwritten. See the **BMAD Workflow Customizations** section in the root [`README.md`](../README.md#bmad-workflow-customizations) for the exact changes to re-apply.