@nomos-arc/arc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.claude/settings.local.json +10 -0
  2. package/.nomos-config.json +5 -0
  3. package/CLAUDE.md +108 -0
  4. package/LICENSE +190 -0
  5. package/README.md +569 -0
  6. package/dist/cli.js +21120 -0
  7. package/docs/auth/googel_plan.yaml +1093 -0
  8. package/docs/auth/google_task.md +235 -0
  9. package/docs/auth/hardened_blueprint.yaml +1658 -0
  10. package/docs/auth/red_team_report.yaml +336 -0
  11. package/docs/auth/session_state.yaml +162 -0
  12. package/docs/certificate/cer_enhance_plan.md +605 -0
  13. package/docs/certificate/certificate_report.md +338 -0
  14. package/docs/dev_overview.md +419 -0
  15. package/docs/feature_assessment.md +156 -0
  16. package/docs/how_it_works.md +78 -0
  17. package/docs/infrastructure/map.md +867 -0
  18. package/docs/init/master_plan.md +3581 -0
  19. package/docs/init/red_team_report.md +215 -0
  20. package/docs/init/report_phase_1a.md +304 -0
  21. package/docs/integrity-gate/enhance_drift.md +703 -0
  22. package/docs/integrity-gate/overview.md +108 -0
  23. package/docs/management/manger-task.md +99 -0
  24. package/docs/management/scafffold.md +76 -0
  25. package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
  26. package/docs/map/RED_TEAM_REPORT.md +159 -0
  27. package/docs/map/map_task.md +147 -0
  28. package/docs/map/semantic_graph_task.md +792 -0
  29. package/docs/map/semantic_master_plan.md +705 -0
  30. package/docs/phase7/TEAM_RED.md +249 -0
  31. package/docs/phase7/plan.md +1682 -0
  32. package/docs/phase7/task.md +275 -0
  33. package/docs/prompts/USAGE.md +312 -0
  34. package/docs/prompts/architect.md +165 -0
  35. package/docs/prompts/executer.md +190 -0
  36. package/docs/prompts/hardener.md +190 -0
  37. package/docs/prompts/red_team.md +146 -0
  38. package/docs/verification/goveranance-overview.md +396 -0
  39. package/docs/verification/governance-overview.md +245 -0
  40. package/docs/verification/verification-arc-ar.md +560 -0
  41. package/docs/verification/verification-architecture.md +560 -0
  42. package/docs/very_next.md +52 -0
  43. package/docs/whitepaper.md +89 -0
  44. package/overview.md +1469 -0
  45. package/package.json +63 -0
  46. package/src/adapters/__tests__/git.test.ts +296 -0
  47. package/src/adapters/__tests__/stdio.test.ts +70 -0
  48. package/src/adapters/git.ts +226 -0
  49. package/src/adapters/pty.ts +159 -0
  50. package/src/adapters/stdio.ts +113 -0
  51. package/src/cli.ts +83 -0
  52. package/src/commands/apply.ts +47 -0
  53. package/src/commands/auth.ts +301 -0
  54. package/src/commands/certificate.ts +89 -0
  55. package/src/commands/discard.ts +24 -0
  56. package/src/commands/drift.ts +116 -0
  57. package/src/commands/index.ts +78 -0
  58. package/src/commands/init.ts +121 -0
  59. package/src/commands/list.ts +75 -0
  60. package/src/commands/map.ts +55 -0
  61. package/src/commands/plan.ts +30 -0
  62. package/src/commands/review.ts +58 -0
  63. package/src/commands/run.ts +63 -0
  64. package/src/commands/search.ts +147 -0
  65. package/src/commands/show.ts +63 -0
  66. package/src/commands/status.ts +59 -0
  67. package/src/core/__tests__/budget.test.ts +213 -0
  68. package/src/core/__tests__/certificate.test.ts +385 -0
  69. package/src/core/__tests__/config.test.ts +191 -0
  70. package/src/core/__tests__/preflight.test.ts +24 -0
  71. package/src/core/__tests__/prompt.test.ts +358 -0
  72. package/src/core/__tests__/review.test.ts +161 -0
  73. package/src/core/__tests__/state.test.ts +362 -0
  74. package/src/core/auth/__tests__/manager.test.ts +166 -0
  75. package/src/core/auth/__tests__/server.test.ts +220 -0
  76. package/src/core/auth/gcp-projects.ts +160 -0
  77. package/src/core/auth/manager.ts +114 -0
  78. package/src/core/auth/server.ts +141 -0
  79. package/src/core/budget.ts +119 -0
  80. package/src/core/certificate.ts +502 -0
  81. package/src/core/config.ts +212 -0
  82. package/src/core/errors.ts +54 -0
  83. package/src/core/factory.ts +49 -0
  84. package/src/core/graph/__tests__/builder.test.ts +272 -0
  85. package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
  86. package/src/core/graph/__tests__/enricher.test.ts +299 -0
  87. package/src/core/graph/__tests__/parser.test.ts +200 -0
  88. package/src/core/graph/__tests__/pipeline.test.ts +202 -0
  89. package/src/core/graph/__tests__/renderer.test.ts +128 -0
  90. package/src/core/graph/__tests__/resolver.test.ts +185 -0
  91. package/src/core/graph/__tests__/scanner.test.ts +231 -0
  92. package/src/core/graph/__tests__/show.test.ts +134 -0
  93. package/src/core/graph/builder.ts +303 -0
  94. package/src/core/graph/constraints.ts +94 -0
  95. package/src/core/graph/contract-writer.ts +93 -0
  96. package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
  97. package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
  98. package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
  99. package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
  100. package/src/core/graph/drift/classifier.ts +165 -0
  101. package/src/core/graph/drift/comparator.ts +205 -0
  102. package/src/core/graph/drift/reporter.ts +77 -0
  103. package/src/core/graph/enricher.ts +251 -0
  104. package/src/core/graph/grammar-paths.ts +30 -0
  105. package/src/core/graph/html-template.ts +493 -0
  106. package/src/core/graph/map-schema.ts +137 -0
  107. package/src/core/graph/parser.ts +336 -0
  108. package/src/core/graph/pipeline.ts +209 -0
  109. package/src/core/graph/renderer.ts +92 -0
  110. package/src/core/graph/resolver.ts +195 -0
  111. package/src/core/graph/scanner.ts +145 -0
  112. package/src/core/logger.ts +46 -0
  113. package/src/core/orchestrator.ts +792 -0
  114. package/src/core/plan-file-manager.ts +66 -0
  115. package/src/core/preflight.ts +64 -0
  116. package/src/core/prompt.ts +173 -0
  117. package/src/core/review.ts +95 -0
  118. package/src/core/state.ts +294 -0
  119. package/src/core/worktree-coordinator.ts +77 -0
  120. package/src/search/__tests__/chunk-extractor.test.ts +339 -0
  121. package/src/search/__tests__/embedder-auth.test.ts +124 -0
  122. package/src/search/__tests__/embedder.test.ts +267 -0
  123. package/src/search/__tests__/graph-enricher.test.ts +178 -0
  124. package/src/search/__tests__/indexer.test.ts +518 -0
  125. package/src/search/__tests__/integration.test.ts +649 -0
  126. package/src/search/__tests__/query-engine.test.ts +334 -0
  127. package/src/search/__tests__/similarity.test.ts +78 -0
  128. package/src/search/__tests__/vector-store.test.ts +281 -0
  129. package/src/search/chunk-extractor.ts +167 -0
  130. package/src/search/embedder.ts +209 -0
  131. package/src/search/graph-enricher.ts +95 -0
  132. package/src/search/indexer.ts +483 -0
  133. package/src/search/lexical-searcher.ts +190 -0
  134. package/src/search/query-engine.ts +225 -0
  135. package/src/search/vector-store.ts +311 -0
  136. package/src/types/index.ts +572 -0
  137. package/src/utils/__tests__/ansi.test.ts +54 -0
  138. package/src/utils/__tests__/frontmatter.test.ts +79 -0
  139. package/src/utils/__tests__/sanitize.test.ts +229 -0
  140. package/src/utils/ansi.ts +19 -0
  141. package/src/utils/context.ts +44 -0
  142. package/src/utils/frontmatter.ts +27 -0
  143. package/src/utils/sanitize.ts +78 -0
  144. package/test/e2e/lifecycle.test.ts +330 -0
  145. package/test/fixtures/mock-planner-hang.ts +5 -0
  146. package/test/fixtures/mock-planner.ts +26 -0
  147. package/test/fixtures/mock-reviewer-bad.ts +8 -0
  148. package/test/fixtures/mock-reviewer-retry.ts +34 -0
  149. package/test/fixtures/mock-reviewer.ts +18 -0
  150. package/test/fixtures/sample-project/src/circular-a.ts +6 -0
  151. package/test/fixtures/sample-project/src/circular-b.ts +6 -0
  152. package/test/fixtures/sample-project/src/config.ts +15 -0
  153. package/test/fixtures/sample-project/src/main.ts +19 -0
  154. package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
  155. package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
  156. package/test/fixtures/sample-project/src/types.ts +14 -0
  157. package/test/fixtures/sample-project/src/utils/index.ts +14 -0
  158. package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
  159. package/tsconfig.json +20 -0
  160. package/vitest.config.ts +12 -0
@@ -0,0 +1,3581 @@
1
+ # Master Plan: arc CLI (nomos-arc.ai) — Phase 1a
2
+
3
+ > **AI-Executable Atomic Task Plan**
4
+ > Generated from `overview.md` (Phase 1 Architecture Specification).
5
+ > Each task is self-contained: an AI agent can check out a single task and complete it 100% without clarification.
6
+ > **Revision 3 — Hardening Pass. All P0 and P1 issues from the second red-team audit resolved. Orchestrator decomposed. Process safety hardened. Token/cost model corrected. No code should be written until this revision is structurally sound.**
7
+
8
+ ---
9
+
10
+ ## Scope Decision (Supervised-First Architecture)
11
+
12
+ > **Phase 1a is Supervised-Only.** Auto mode (headless PTY + Expect Logic) is deferred to Phase 1b. This eliminates the primary source of PTY fragility: rolling-buffer pattern matching and response_map heuristics. The PtyAdapter in Phase 1a is a pure Tee Stream — it pipes PTY output directly to the developer's terminal and captures a stripped copy for logging. The developer is the quality gate inside the session. The system relies on exit code (0 = success, non-0 = failure) to advance the state machine. E2E tests use a Mock Binary that modifies files and exits 0 — no TTY simulation needed.
13
+
14
+ ---
15
+
16
+ ## Execution Rules
17
+
18
+ 1. **Complete tasks in order within each milestone.** Cross-milestone dependencies are noted explicitly.
19
+ 2. **Never skip a verification command.** If it fails, the task is not done.
20
+ 3. **JSON is the source of truth.** The CLI must NEVER read from Markdown files for state — only JSON.
21
+ 4. **All file paths in this plan are relative to the project root** (the directory containing `.nomos-config.json`).
22
+ 5. **Stack:** Node.js 20+, TypeScript 5+, commander.js, node-pty, simple-git, proper-lockfile, Zod, Winston, esbuild, vitest.
23
+ 6. **ESM project.** `package.json` sets `"type": "module"`. Use `import` everywhere. Use `createRequire` only when importing JSON. Never use bare `require()`.
24
+ 7. **`simple-git` worktree commands.** `simple-git` does not expose `.worktree()` as a method. Always use `git.raw(['worktree', 'add', ...])` for worktree operations.
25
+ 8. **File locations.** All `tasks-management/` files (state, plans, logs) live in the **project root** directory. The worktree at `/tmp/nomos-worktrees/...` is only the `cwd` for subprocess execution. State is never written inside the worktree.
26
+ 9. **Prompt delivery.** The assembled prompt is passed as a CLI argument via the `-p` flag (e.g., `claude -p "$PROMPT"`), NOT written to PTY stdin. The `-p` flag is appended to `binary.args` at spawn time.
27
+ 10. **Argument safety (Shell Injection Prevention).** NEVER concatenate command arguments into a single shell string. Always pass `cmd` and `args` as separate parameters to `pty.spawn()` and `child_process.spawn()`. Never set `shell: true` on `spawn()` options. This ensures prompt content containing `$(...)`, backticks, or metacharacters is passed literally, not interpreted by a shell.
28
+ 11. **Schema versioning.** All persisted JSON state files include a `schema_version: number` field (starting at `1`). On read, if the version is older than current, run the corresponding migration function before Zod validation. Maintain a `migrations` map: `{ 1: migrateV1toV2 }` (empty in Phase 1a, but the infrastructure must exist from day one).
29
+ 12. **Worktree validation before subprocess spawn.** Before spawning any subprocess, validate that `state.shadow_branch.worktree` exists on disk. If missing, attempt recovery: check if the git branch still exists → if yes, recreate worktree from existing branch (`git worktree add <path> <existing-branch>` without `-b`); if branch is also gone → transition to `failed` with reason `worktree_missing`.
30
+ 13. **Target branch verification before merge.** Before any `git merge` in `arc apply`, verify the project root is on the expected target branch (default: `main`). Abort with a clear error if the current branch doesn't match.
31
+ 14. **`commitToShadowBranch` file list.** This function ONLY commits plan output files: `tasks-management/plans/{taskId}-v{n}.diff` and, if it exists, `tasks-management/plans/{taskId}-v{n}.md`. It NEVER commits state JSON files (`tasks-management/state/`) — these are gitignored and machine-specific. If `config.git.include_logs` is `true`, also commit `tasks-management/logs/{taskId}-v{n}.log`. The orchestrator must build this explicit list and pass it to `commitToShadowBranch`. No wildcard adds. *(RTV-2 fix: resolves the explicit contradiction between Task 1.1 gitignore and Task 5.1 auto-commit description.)*
32
+
33
+ ---
34
+
35
+ ## Milestone 1: Project Scaffolding & Shared Utilities
36
+
37
+ ### [x] Task 1.1 — Package, TypeScript & Vitest Configuration *(Completed: 2026-04-03)*
38
+
39
+ **Component:** `package.json`, `tsconfig.json`, `vitest.config.ts`
40
+ **Objective:** Initialize the Node.js project with all Phase 1a dependencies, TypeScript strict mode, and vitest configured for ESM.
41
+
42
+ **Technical Instruction:**
43
+
44
+ 1. Run `npm init -y` and set the following in `package.json`:
45
+ - `"name": "nomos-arc"`, `"version": "0.1.0"`, `"type": "module"`
46
+ - `"engines": { "node": ">=20.0.0" }` *(L-1 fix: enforce minimum Node version)*
47
+ - `"bin": { "arc": "./dist/cli.js" }`
48
+ - `"scripts"`:
49
+ - `"build": "esbuild src/cli.ts --bundle --platform=node --target=node20 --outfile=dist/cli.js --format=esm --external:node-pty --external:simple-git --external:proper-lockfile --external:winston --external:gray-matter --banner:js='#!/usr/bin/env node'"`
50
+ - `"dev": "tsx src/cli.ts"`
51
+ - `"test": "vitest run"`
52
+ - `"test:watch": "vitest"`
53
+ - `"test:unit": "vitest run --dir src"`
54
+ - `"test:e2e": "vitest run --dir test"`
55
+ - `"lint": "tsc --noEmit"`
56
+ 2. Install production dependencies:
57
+ ```
58
+ npm install commander zod winston proper-lockfile simple-git node-pty gray-matter
59
+ ```
60
+ 3. Install dev dependencies:
61
+ ```
62
+ npm install -D typescript @types/node @types/proper-lockfile esbuild tsx vitest
63
+ ```
64
+ 4. Create `tsconfig.json`:
65
+ ```json
66
+ {
67
+ "compilerOptions": {
68
+ "target": "ES2022",
69
+ "module": "Node16",
70
+ "moduleResolution": "Node16",
71
+ "outDir": "dist",
72
+ "rootDir": "src",
73
+ "strict": true,
74
+ "esModuleInterop": true,
75
+ "skipLibCheck": true,
76
+ "forceConsistentCasingInFileNames": true,
77
+ "resolveJsonModule": true,
78
+ "declaration": true,
79
+ "declarationMap": true,
80
+ "sourceMap": true
81
+ },
82
+ "include": ["src/**/*"],
83
+ "exclude": ["node_modules", "dist", "test"]
84
+ }
85
+ ```
86
+
87
+ 5. Create `vitest.config.ts` at project root:
88
+ ```typescript
89
+ import { defineConfig } from 'vitest/config';
90
+
91
+ export default defineConfig({
92
+ test: {
93
+ include: [
94
+ 'src/**/__tests__/**/*.test.ts',
95
+ 'test/**/*.test.ts',
96
+ ],
97
+ testTimeout: 10000,
98
+ hookTimeout: 10000,
99
+ },
100
+ });
101
+ ```
102
+
103
+ 6. Create `.gitignore`:
104
+ ```gitignore
105
+ node_modules/
106
+ dist/
107
+ *.tmp
108
+ tasks-management/logs/
109
+ tasks-management/state/
110
+ tasks-management/state/*.lock
111
+ ```
112
+ - *(H-5 fix)* `tasks-management/state/` is gitignored because state files contain machine-specific absolute paths (`/tmp/nomos-worktrees/...`), runtime token counts, and timestamps. These are ephemeral and must never be version-controlled. If state needs to be shared across machines, a separate `arc export` command should strip machine-specific fields first — that's a Phase 2 concern.
113
+ - *(L-4 fix)* `tasks-management/state/*.lock` prevents `proper-lockfile` lock directories from being accidentally staged.
114
+ - **Impact on `commitToShadowBranch`:** Since state files are gitignored, `commitToShadowBranch` ONLY commits plan diffs and (optionally) logs — NOT state JSONs. See Execution Rule #14 for the authoritative file list.
115
+
116
+ **Dependencies:** None.
117
+ **Definition of Done:** `npx tsc --noEmit` exits 0. `package.json` contains all listed dependencies. `vitest.config.ts` exists.
118
+ **Verification Command:** `npx tsc --noEmit && npx tsx -e "import { createRequire } from 'module'; const require = createRequire(import.meta.url); const p = require('./package.json'); console.assert(p.bin.arc, 'missing bin'); console.log('OK')"`
119
+
120
+ ---
121
+
122
+ ### [x] Task 1.2 — Directory Structure & Entry Point Stub *(Completed: 2026-04-03)*
123
+
124
+ **Component:** `src/` directory tree, `src/cli.ts`
125
+ **Objective:** Create the full source directory structure and a minimal CLI entry point that prints version info.
126
+
127
+ **Technical Instruction:**
128
+
129
+ 1. Create the following directories:
130
+ ```
131
+ src/
132
+ src/core/ # ConfigManager, StateManager, Logger
133
+ src/commands/ # One file per CLI command
134
+ src/adapters/ # PTY adapter, Stdio adapter, Git adapter
135
+ src/types/ # Shared TypeScript types/interfaces
136
+ src/utils/ # Pure utility functions (ANSI strip, sanitize, etc.)
137
+ test/
138
+ test/fixtures/ # Mock binaries, sample configs
139
+ test/e2e/ # End-to-end tests
140
+ ```
141
+
142
+ 2. Create `src/cli.ts` — the CLI entry point:
143
+ ```typescript
144
+ import { Command } from 'commander';
145
+ import { createRequire } from 'module';
146
+
147
+ const require = createRequire(import.meta.url);
148
+ const pkg = require('../package.json');
149
+
150
+ const program = new Command();
151
+
152
+ program
153
+ .name('arc')
154
+ .description('The Architect — AI Orchestrator CLI')
155
+ .version(pkg.version);
156
+
157
+ // Commands will be registered here by subsequent tasks.
158
+
159
+ program.parse();
160
+ ```
161
+
162
+ 3. Create `src/types/index.ts` with a placeholder export:
163
+ ```typescript
164
+ export type ExecutionMode = 'supervised' | 'auto' | 'dry-run';
165
+ ```
166
+
167
+ **Dependencies:** Task 1.1
168
+ **Definition of Done:** `npx tsx src/cli.ts --version` prints `0.1.0`. `npx tsc --noEmit` passes.
169
+ **Verification Command:** `npx tsx src/cli.ts --version`
170
+
171
+ ---
172
+
173
+ ### [x] Task 1.3 — Type Definitions *(Completed: 2026-04-03)*
174
+
175
+ **Component:** `src/types/index.ts`
176
+ **Objective:** Define all shared TypeScript interfaces and types used across the project.
177
+
178
+ **Technical Instruction:**
179
+
180
+ Replace `src/types/index.ts` with the complete type definitions below. All types are exported. No `any`.
181
+
182
+ ```typescript
183
+ // ─── Execution Modes ────────────────────────────────────────────────────────
184
+ export type ExecutionMode = 'supervised' | 'auto' | 'dry-run';
185
+
186
+ // ─── Config Types ────────────────────────────────────────────────────────────
187
+ export interface BinaryConfig {
188
+ cmd: string;
189
+ args: string[];
190
+ pty: boolean;
191
+ total_timeout_ms: number;
192
+ heartbeat_timeout_ms: number;
193
+ max_output_bytes: number;
194
+ usage_pattern: string | null;
195
+ }
196
+
197
+ export interface NomosConfig {
198
+ execution: {
199
+ default_mode: ExecutionMode;
200
+ shadow_branch_prefix: string;
201
+ worktree_base: string;
202
+ supervised_heartbeat_timeout_ms: number;
203
+ };
204
+ binaries: {
205
+ planner: BinaryConfig;
206
+ reviewer: BinaryConfig;
207
+ };
208
+ convergence: {
209
+ score_threshold: number;
210
+ max_iterations: number;
211
+ };
212
+ budget: {
213
+ max_tokens_per_task: number;
214
+ warn_at_percent: number;
215
+ cost_per_1k_tokens: Record<string, number | { input: number; output: number }>;
216
+ };
217
+ security: {
218
+ sanitize_patterns: string[];
219
+ entropy_threshold: number;
220
+ sanitize_on: ('input' | 'output')[];
221
+ safe_commands: string[];
222
+ redaction_label: string;
223
+ };
224
+ git: {
225
+ auto_commit: boolean;
226
+ include_logs: boolean;
227
+ commit_prefix: string;
228
+ sign_commits: boolean;
229
+ };
230
+ review: {
231
+ max_context_files: number; // max affected files to inject into reviewer prompt
232
+ };
233
+ logging: {
234
+ level: string;
235
+ retain_days: number;
236
+ };
237
+ }
238
+
239
+ // ─── Task State Types ─────────────────────────────────────────────────────────
240
+ export type TaskStatus =
241
+ | 'init'
242
+ | 'planning'
243
+ | 'pending_review'
244
+ | 'reviewing'
245
+ | 'refinement'
246
+ | 'approved'
247
+ | 'merged'
248
+ | 'discarded'
249
+ | 'failed'
250
+ | 'merge_conflict'
251
+ | 'stalled';
252
+
253
+ export interface ReviewIssue {
254
+ severity: 'high' | 'medium' | 'low';
255
+ category: 'security' | 'performance' | 'architecture' | 'correctness' | 'maintainability';
256
+ description: string;
257
+ suggestion: string;
258
+ }
259
+
260
+ export interface ReviewResult {
261
+ score: number;
262
+ mode: ExecutionMode;
263
+ issues: ReviewIssue[];
264
+ summary: string;
265
+ }
266
+
267
+ export interface HistoryEntry {
268
+ version: number;
269
+ step: 'planning' | 'reviewing';
270
+ mode: ExecutionMode;
271
+ binary: string;
272
+ started_at: string; // ISO 8601
273
+ completed_at: string; // ISO 8601
274
+ raw_output: string;
275
+ output_hash: string; // sha256:<hex>
276
+ input_tokens: number; // RT2-4.2 fix: separate input tokens (cheaper rate)
277
+ output_tokens: number; // RT2-4.2 fix: separate output tokens (expensive rate)
278
+ tokens_used: number; // total (input_tokens + output_tokens) — kept for backward compat
279
+ tokens_source: 'metered' | 'estimated'; // RTV-4 fix: explicit source so arc status can label correctly
280
+ rules_snapshot: string[];
281
+ review: ReviewResult | null;
282
+ }
283
+
284
+ export interface TaskState {
285
+ schema_version: number; // W5 fix: starts at 1, enables future migrations
286
+ task_id: string;
287
+ current_version: number;
288
+ // H-4 fix: locked_by removed. Process ownership is tracked via proper-lockfile
289
+ // file locks in StateManager.write(), not via an in-state JSON field.
290
+ meta: {
291
+ status: TaskStatus;
292
+ created_at: string; // ISO 8601
293
+ updated_at: string; // ISO 8601
294
+ approval_reason?: 'score_threshold' | 'max_iterations_reached'; // RTV-6 fix: enables arc run exit code 2
295
+ };
296
+ orchestration: {
297
+ planner_bin: string;
298
+ reviewer_bin: string;
299
+ };
300
+ shadow_branch: {
301
+ branch: string;
302
+ worktree: string; // absolute path, machine-specific
303
+ base_commit: string; // SHA of the commit the branch forked from
304
+ status: 'active' | 'merged' | 'discarded';
305
+ };
306
+ context: {
307
+ files: string[]; // context_files from task frontmatter
308
+ rules: string[]; // list of rule file names loaded
309
+ rules_hash: string; // sha256:<hex> of concatenated rule content
310
+ };
311
+ budget: {
312
+ tokens_used: number;
313
+ estimated_cost_usd: number;
314
+ };
315
+ history: HistoryEntry[];
316
+ }
317
+
318
+ export interface TaskFrontmatter {
319
+ title: string;
320
+ priority: 'high' | 'medium' | 'low';
321
+ context_files?: string[];
322
+ status?: TaskStatus;
323
+ }
324
+
325
+ // ─── Adapter Types ────────────────────────────────────────────────────────────
326
+ export interface ExecutionResult {
327
+ exitCode: number;
328
+ rawOutput: string;
329
+ strippedOutput: string;
330
+ duration_ms: number;
331
+ killed: boolean;
332
+ killReason?: 'heartbeat_timeout' | 'total_timeout' | 'terminate_action';
333
+ }
334
+
335
+ // PtySpawnOptions: used by PtyAdapter.execute()
336
+ export interface PtySpawnOptions {
337
+ cmd: string;
338
+ args: string[]; // includes '-p' and the assembled prompt
339
+ cwd: string; // worktree path
340
+ env: Record<string, string>;
341
+ mode: ExecutionMode;
342
+ heartbeat_timeout_ms: number;
343
+ total_timeout_ms: number;
344
+ max_output_bytes: number;
345
+ }
346
+
347
+ // StdioSpawnOptions: used by StdioAdapter.execute()
348
+ export interface StdioSpawnOptions {
349
+ cmd: string;
350
+ args: string[];
351
+ cwd: string;
352
+ env: Record<string, string>;
353
+ stdinData: string; // the assembled review prompt, piped to stdin
354
+ heartbeat_timeout_ms: number;
355
+ total_timeout_ms: number;
356
+ max_output_bytes: number;
357
+ }
358
+
359
+ // ─── Transport Interfaces ─────────────────────────────────────────────────────
360
+ // Formal interfaces for the planner and reviewer adapters.
361
+ // Implement these in any new adapter (e.g., SDKAdapter) to make it a drop-in
362
+ // replacement for PTY or Stdio without touching Orchestrator logic.
363
+ export interface PlannerTransport {
364
+ execute(options: PtySpawnOptions): Promise<ExecutionResult>;
365
+ }
366
+
367
+ export interface ReviewerTransport {
368
+ execute(options: StdioSpawnOptions): Promise<ExecutionResult>;
369
+ }
370
+
371
+ // ─── Orchestrator Helper Types ─────────────────────────────────────────────────
372
+ export interface StateTransitionOptions {
373
+ reason?: string; // failure reason (e.g., 'binary_not_found', 'execution_timeout')
374
+ version_increment?: boolean; // whether to bump current_version
375
+ history_entry?: HistoryEntry; // attach a new history entry
376
+ review_result?: ReviewResult; // attach review to latest history entry
377
+ approval_reason?: 'score_threshold' | 'max_iterations_reached'; // set meta.approval_reason
378
+ }
379
+ ```
380
+
381
+ **Note on `PtySpawnOptions` and `StdioSpawnOptions`:** These interfaces are defined here in `src/types/index.ts` so that all adapters share a single canonical definition. Tasks 3.1 and 3.2 import from `'../types/index.js'` (note the `.js` extension — required for Node16 ESM module resolution).
382
+
383
+ **Dependencies:** Task 1.2
384
+ **Definition of Done:** `npx tsc --noEmit` passes. All types from the architecture spec are represented. `ExecutionResult`, `PtySpawnOptions`, `StdioSpawnOptions`, `StateTransitionOptions`, `tokens_source`, and `approval_reason` are defined.
385
+ **Verification Command:** `npx tsc --noEmit`
386
+
387
+ ---
388
+
389
+ ### [x] Task 1.4 — Logger Service *(Completed: 2026-04-03)*
390
+
391
+ **Component:** `src/core/logger.ts`, `src/utils/ansi.ts`
392
+ **Objective:** Create a Winston-based logger that strips ANSI codes when writing to files, supports log levels, and uses the `[nomos:<level>]` prefix format.
393
+
394
+ **Technical Instruction:**
395
+
396
+ 1. Create `src/utils/ansi.ts`:
397
+ - Export `stripAnsi(input: string): string` that removes:
398
+ - CSI sequences: `/\x1b\[[0-9;]*[a-zA-Z]/g`
399
+ - OSC sequences: `/\x1b\][^\x07]*\x07/g`
400
+ - Remaining C0 control chars (except `\n`, `\t`): `/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g`
401
+ - This function is reused by the PTY adapter, Stdio adapter, and logger.
402
+
403
+ 2. Create `src/core/logger.ts`:
404
+ - Use Winston with two transports:
405
+ - **Console transport:** Colorized, format: `[nomos:<level>] <message>`
406
+ - **File transport:** Writes to `{logDir}/nomos.log`, ANSI-stripped, JSON format.
407
+ - Export `createLogger(level: string, logDir?: string): winston.Logger`.
408
+ - The file transport applies `stripAnsi` from `src/utils/ansi.ts`.
409
+ - Log level defaults to `'info'`.
410
+
411
+ 3. Create `src/utils/__tests__/ansi.test.ts`:
412
+ - Test that `stripAnsi` removes 256-color codes, bold, reset, OSC sequences.
413
+ - Test that newlines and tabs are preserved.
414
+ - Test that plain text passes through unchanged.
415
+
416
+ **Dependencies:** Task 1.2
417
+ **Definition of Done:** Logger writes to console and file. ANSI stripping removes all escape sequences. Unit tests pass.
418
+ **Verification Command:** `npx vitest run src/utils/__tests__/ansi.test.ts`
419
+
420
+ ---
421
+
422
+ ### [x] Task 1.5 — ConfigManager (Zod Schema + Walk-up Discovery) *(Completed: 2026-04-03)*
423
+
424
+ **Component:** `src/core/config.ts`, `src/core/errors.ts`
425
+ **Objective:** Implement config loading with walk-up directory discovery, Zod schema validation, per-binary defaults, and a `getDefaultConfig()` factory.
426
+
427
+ **Technical Instruction:**
428
+
429
+ 1. Create `src/core/errors.ts`:
430
+ ```typescript
431
+ export type NomosErrorCode =
432
+ | 'config_not_found'
433
+ | 'config_invalid'
434
+ | 'binary_not_found'
435
+ | 'task_not_found'
436
+ | 'task_exists'
437
+ | 'invalid_transition'
438
+ | 'state_locked'
439
+ | 'convergence_failed'
440
+ | 'review_failed'
441
+ | 'budget_exceeded'
442
+ | 'no_tty'
443
+ | 'worktree_creation_failed'
444
+ | 'worktree_missing'
445
+ | 'worktree_unrecoverable'
446
+ | 'branch_exists'
447
+ | 'wrong_branch'
448
+ | 'dirty_working_tree'
449
+ | 'path_traversal'
450
+ | 'state_migration_failed'
451
+ | 'invalid_frontmatter'
452
+ | 'rules_missing'
453
+ | 'no_changes'
454
+ | 'secrets_detected' // RT2-5.1 fix: context_files contain secrets
455
+ | 'base_commit_unreachable'; // RT2-2.1 fix: baseCommit SHA orphaned after rebase
456
+
457
+ export class NomosError extends Error {
458
+ constructor(
459
+ public readonly code: NomosErrorCode,
460
+ message: string,
461
+ ) {
462
+ super(message);
463
+ this.name = 'NomosError';
464
+ }
465
+ }
466
+ ```
467
+
468
+ 2. Create `src/core/config.ts`:
469
+
470
+ **Zod Schema — define in this exact order to avoid forward references:**
471
+
472
+ ```typescript
473
+ import { z } from 'zod';
474
+ import * as fs from 'fs';
475
+ import * as path from 'path';
476
+ import { NomosError } from './errors.js';
477
+ import type { NomosConfig } from '../types/index.js';
478
+
479
+ // ── Per-binary defaults ──────────────────────────────────────────────────────
480
+ // IMPORTANT: planner and reviewer have DIFFERENT defaults.
481
+ // Using z.object().default({}) at each nested level performs shallow merge ONLY at
482
+ // the top sub-object level. To correctly deep-merge a partial user config (e.g., only
483
+ // providing `cmd`), every individual field must carry its own .default() value.
484
+ // This ensures `{ cmd: 'my-claude' }` correctly inherits pty: true, timeouts, etc.
485
+
486
+ const PlannerBinarySchema = z.object({
487
+ cmd: z.string().default('claude'),
488
+ args: z.array(z.string()).default([]),
489
+ pty: z.boolean().default(true),
490
+ total_timeout_ms: z.number().positive().default(300000),
491
+ heartbeat_timeout_ms: z.number().positive().default(120000),
492
+ max_output_bytes: z.number().positive().default(1048576),
493
+ usage_pattern: z.string().nullable().default('Tokens used:\\s*(\\d+)'),
494
+ });
495
+
496
+ const ReviewerBinarySchema = z.object({
497
+ cmd: z.string().default('codex'),
498
+ args: z.array(z.string()).default(['-q', '--full-auto']),
499
+ pty: z.boolean().default(false),
500
+ total_timeout_ms: z.number().positive().default(120000),
501
+ heartbeat_timeout_ms: z.number().positive().default(120000),
502
+ max_output_bytes: z.number().positive().default(524288),
503
+ usage_pattern: z.string().nullable().default(null),
504
+ });
505
+
506
+ export const NomosConfigSchema = z.object({
507
+ execution: z.object({
508
+ default_mode: z.enum(['supervised', 'dry-run']).default('supervised'),
509
+ shadow_branch_prefix: z.string().default('nomos/'),
510
+ worktree_base: z.string().default('/tmp/nomos-worktrees/'),
511
+ supervised_heartbeat_timeout_ms: z.number().positive().default(300000),
512
+ }).default({}),
513
+ binaries: z.object({
514
+ planner: PlannerBinarySchema.default({}),
515
+ reviewer: ReviewerBinarySchema.default({}),
516
+ }).default({}),
517
+ convergence: z.object({
518
+ score_threshold: z.number().min(0).max(1).default(0.9),
519
+ max_iterations: z.number().int().positive().default(3),
520
+ }).default({}),
521
+ budget: z.object({
522
+ max_tokens_per_task: z.number().positive().default(100000),
523
+ warn_at_percent: z.number().min(0).max(100).default(80),
524
+ // RT2-4.3 fix: cost_per_1k_tokens supports both flat number (backward compat,
525
+ // treated as output rate with input = rate/5) and { input, output } objects.
526
+ cost_per_1k_tokens: z.record(z.string(), z.union([
527
+ z.number(),
528
+ z.object({ input: z.number(), output: z.number() }),
529
+ ])).default({
530
+ claude: { input: 0.003, output: 0.015 },
531
+ codex: { input: 0.0005, output: 0.002 },
532
+ }),
533
+ }).default({}),
534
+ security: z.object({
535
+ sanitize_patterns: z.array(z.string()).default([]),
536
+ entropy_threshold: z.number().positive().default(4.5),
537
+ sanitize_on: z.array(z.enum(['input', 'output'])).default(['input']),
538
+ safe_commands: z.array(z.string()).default([]),
539
+ redaction_label: z.string().default('[REDACTED]'),
540
+ }).default({}),
541
+ git: z.object({
542
+ auto_commit: z.boolean().default(true),
543
+ include_logs: z.boolean().default(false),
544
+ commit_prefix: z.string().default('[nomos]'),
545
+ sign_commits: z.boolean().default(false),
546
+ }).default({}),
547
+ review: z.object({
548
+ max_context_files: z.number().int().positive().default(5),
549
+ // Budget note: each affected file adds ~50 lines to the reviewer prompt.
550
+ // At default 5 files this is roughly 250 extra lines (~1500 tokens).
551
+ // Tune downward if reviewer token costs are a concern.
552
+ }).default({}),
553
+ logging: z.object({
554
+ level: z.string().default('info'),
555
+ retain_days: z.number().positive().default(30),
556
+ }).default({}),
557
+ });
558
+ ```
559
+
560
+ **`getDefaultConfig()` — RTV-3 fix: This function was referenced in factory.ts but never defined.**
561
+ ```typescript
562
+ /**
563
+ * Returns a fully-defaulted NomosConfig by parsing an empty object through the Zod schema.
564
+ * Every field receives its default value. Used by `arc init` (no config file exists yet)
565
+ * and in tests that need a baseline config without a file on disk.
566
+ */
567
+ export function getDefaultConfig(): NomosConfig {
568
+ return NomosConfigSchema.parse({}) as NomosConfig;
569
+ }
570
+ ```
571
+
572
+ **`findConfigFile(startDir: string): string`:**
573
+ - Walk from `startDir` up to filesystem root.
574
+ - Look for `.nomos-config.json` in each directory.
575
+ - If found, return the absolute path. The directory becomes the **project root**.
576
+ - If not found at root, throw `NomosError('config_not_found', 'No .nomos-config.json found. Run: arc init to scaffold a new project.')`.
577
+ - Use `fs.realpathSync` to resolve symlinks.
578
+
579
+ **`loadConfig(startDir?: string): { config: NomosConfig; projectRoot: string }`:**
580
+ - Calls `findConfigFile(startDir ?? process.cwd())`.
581
+ - Reads and parses JSON.
582
+ - Validates with `NomosConfigSchema`. On error, throw `NomosError('config_invalid', ...)` with the Zod field path included.
583
+ - Returns `{ config, projectRoot }`.
584
+
585
+ 3. Create `src/core/__tests__/config.test.ts`:
586
+ - Test walk-up discovery: config in parent directory is found.
587
+ - Test minimal config `{ "binaries": { "planner": { "cmd": "my-claude" }, "reviewer": { "cmd": "my-codex" } } }` — verify `planner.pty === true`, `reviewer.pty === false`, `planner.heartbeat_timeout_ms === 120000`, `reviewer.heartbeat_timeout_ms === 120000`, `convergence.score_threshold === 0.9`.
588
+ - Test that providing only `planner.cmd` does NOT wipe other planner defaults like `pty`, `heartbeat_timeout_ms` (deep-merge validation).
589
+ - Test invalid config: wrong type for `score_threshold` produces `NomosError` with field path in message.
590
+ - Test missing config: throws `NomosError('config_not_found')`.
591
+ - Test `getDefaultConfig()` returns a valid `NomosConfig` with all fields populated.
592
+ - Use `fs.mkdtempSync` for isolated temp directories.
593
+
594
+ **Dependencies:** Task 1.3
595
+ **Definition of Done:** Walk-up discovery works from nested directories. Minimal config loads with all defaults applied (including different planner/reviewer defaults). Partial nested config deep-merges correctly (only provided fields override, others retain defaults). `getDefaultConfig()` is exported and returns a fully-valid config. Invalid config produces a Zod error with field path.
596
+ **Verification Command:** `npx vitest run src/core/__tests__/config.test.ts`
597
+
598
+ ---
599
+
600
+ ### [x] Task 1.6 — StateManager (Atomic JSON Writes + File Locking) *(Completed: 2026-04-03)*
601
+
602
+ **Component:** `src/core/state.ts`
603
+ **Objective:** Implement the state manager with atomic write-then-rename, file locking via `proper-lockfile`, state transition validation, transition metadata, and a Zod schema for `TaskState`.
604
+
605
+ **Technical Instruction:**
606
+
607
+ 1. Create `src/core/state.ts`:
608
+
609
+ **`TaskState` Zod Schema — define at the top of `state.ts` for use in `read()`. This is the authoritative Zod representation of `TaskState` from `src/types/index.ts`:**
610
+
611
+ ```typescript
612
+ import { z } from 'zod';
613
+ import * as fs from 'fs';
614
+ import * as path from 'path';
615
+ import * as lockfile from 'proper-lockfile';
616
+ import type { Logger } from 'winston';
617
+ import { NomosError } from './errors.js';
618
+ import type { TaskState, TaskStatus, StateTransitionOptions, HistoryEntry } from '../types/index.js';
619
+
620
+ const ReviewIssueSchema = z.object({
621
+ severity: z.enum(['high', 'medium', 'low']),
622
+ category: z.enum(['security', 'performance', 'architecture', 'correctness', 'maintainability']),
623
+ description: z.string().min(5),
624
+ suggestion: z.string().min(5),
625
+ });
626
+
627
+ const ReviewResultSchema = z.object({
628
+ score: z.number().min(0).max(1),
629
+ mode: z.enum(['supervised', 'auto', 'dry-run']),
630
+ issues: z.array(ReviewIssueSchema),
631
+ summary: z.string().min(10),
632
+ });
633
+
634
+ const HistoryEntrySchema = z.object({
635
+ version: z.number().int().nonnegative(),
636
+ step: z.enum(['planning', 'reviewing']),
637
+ mode: z.enum(['supervised', 'auto', 'dry-run']),
638
+ binary: z.string(),
639
+ started_at: z.string().datetime(),
640
+ completed_at: z.string().datetime(),
641
+ raw_output: z.string(),
642
+ output_hash: z.string(),
643
+ input_tokens: z.number().nonnegative(), // RT2-4.2 fix: separate input tokens
644
+ output_tokens: z.number().nonnegative(), // RT2-4.2 fix: separate output tokens
645
+ tokens_used: z.number().nonnegative(), // total (input + output)
646
+ tokens_source: z.enum(['metered', 'estimated']),
647
+ rules_snapshot: z.array(z.string()),
648
+ review: ReviewResultSchema.nullable(),
649
+ });
650
+
651
+ const TaskMetaSchema = z.object({
652
+ status: z.enum([
653
+ 'init', 'planning', 'pending_review', 'reviewing',
654
+ 'refinement', 'approved', 'merged', 'discarded',
655
+ 'failed', 'merge_conflict', 'stalled',
656
+ ]),
657
+ created_at: z.string().datetime(),
658
+ updated_at: z.string().datetime(),
659
+ approval_reason: z.enum(['score_threshold', 'max_iterations_reached']).optional(),
660
+ });
661
+
662
+ const TaskStateSchema = z.object({
663
+ schema_version: z.number().int().nonnegative(),
664
+ task_id: z.string(),
665
+ current_version: z.number().int().nonnegative(),
666
+ meta: TaskMetaSchema,
667
+ orchestration: z.object({
668
+ planner_bin: z.string(),
669
+ reviewer_bin: z.string(),
670
+ }),
671
+ shadow_branch: z.object({
672
+ branch: z.string(),
673
+ worktree: z.string(),
674
+ base_commit: z.string(),
675
+ status: z.enum(['active', 'merged', 'discarded']),
676
+ }),
677
+ context: z.object({
678
+ files: z.array(z.string()),
679
+ rules: z.array(z.string()),
680
+ rules_hash: z.string(),
681
+ }),
682
+ budget: z.object({
683
+ tokens_used: z.number().nonnegative(),
684
+ estimated_cost_usd: z.number().nonnegative(),
685
+ }),
686
+ history: z.array(HistoryEntrySchema),
687
+ });
688
+ ```
689
+
690
+ **`StateManager` class** with constructor `(stateDir: string, logger: Logger)`:
691
+
692
+ - **Schema migration infrastructure:**
693
+ ```typescript
694
+ const CURRENT_SCHEMA_VERSION = 1;
695
+ const migrations: Record<number, (state: any) => any> = {
696
+ // Phase 1a: no migrations needed. Add here for future versions:
697
+ // 1: migrateV1toV2,
698
+ };
699
+
700
+ function migrateState(raw: any): any {
701
+ let version = raw.schema_version ?? 0;
702
+ while (version < CURRENT_SCHEMA_VERSION) {
703
+ const migrator = migrations[version];
704
+ if (!migrator) throw new NomosError('state_migration_failed',
705
+ `No migration path from schema_version ${version} to ${CURRENT_SCHEMA_VERSION}`);
706
+ raw = migrator(raw);
707
+ version++;
708
+ }
709
+ raw.schema_version = CURRENT_SCHEMA_VERSION;
710
+ return raw;
711
+ }
712
+ ```
713
+
714
+ - **`read(taskId: string): Promise<TaskState>`**:
715
+ - Read from `stateDir/{taskId}.json`. If not found, throw `NomosError('task_not_found', ...)`.
716
+ - Parse raw JSON.
717
+ - Run `migrateState()` first, then validate with `TaskStateSchema.parse()`.
718
+ - Return typed `TaskState`.
719
+
720
+ - **`write(taskId: string, state: TaskState): Promise<void>`**:
721
+ - Acquire lock: `lockfile.lock(filePath, { retries: { retries: 5, minTimeout: 200, maxTimeout: 5000 }, stale: 30000 })`.
722
+ - **C2 fix:** `stale: 30000` auto-breaks locks older than 30s from crashed processes.
723
+ - Write to `{taskId}.json.tmp`.
724
+ - `fsync` the temp file descriptor.
725
+ - Atomic rename `{taskId}.json.tmp` → `{taskId}.json`.
726
+ - Release lock in `finally` block.
727
+
728
+ - **`create(taskId: string, initialState: TaskState): Promise<void>`**:
729
+ - Check if `{taskId}.json` exists → throw `NomosError('task_exists')` if so.
730
+ - Call `write()`.
731
+
732
+ - **`transition(taskId: string, newStatus: TaskStatus, options?: StateTransitionOptions): Promise<TaskState>`**:
733
+ - Read current state.
734
+ - Validate transition is in `VALID_TRANSITIONS[currentStatus]` — throw `NomosError('invalid_transition')` if not.
735
+ - Update `meta.status = newStatus` and `meta.updated_at = new Date().toISOString()`.
736
+ - If `options.reason`: store as a log entry (log at warn level with the reason string).
737
+ - If `options.version_increment`: `state.current_version++`.
738
+ - If `options.history_entry`: `state.history.push(options.history_entry)`.
739
+ - If `options.review_result`: attach to `state.history[state.history.length - 1].review`.
740
+ - If `options.approval_reason`: `state.meta.approval_reason = options.approval_reason`.
741
+ - Write and return updated state.
742
+
743
+ - **Valid transitions map:**
744
+ ```typescript
745
+ const VALID_TRANSITIONS: Record<TaskStatus, TaskStatus[]> = {
746
+ init: ['planning', 'discarded'],
747
+ planning: ['pending_review', 'failed', 'stalled', 'discarded'],
748
+ pending_review: ['reviewing', 'discarded'],
749
+ reviewing: ['refinement', 'approved', 'failed', 'discarded'],
750
+ refinement: ['planning', 'discarded'],
751
+ approved: ['merged', 'merge_conflict', 'discarded'],
752
+ merge_conflict: ['approved', 'discarded'],
753
+ stalled: ['planning', 'discarded'],
754
+ failed: ['planning', 'discarded'],
755
+ merged: [],
756
+ discarded: [],
757
+ };
758
+ ```
759
+
760
+ - **`cleanupTempFiles(stateDir: string): void`**:
761
+ - Scan for orphaned `.json.tmp` files and delete them with a warning log.
762
+ - **DO NOT** manually remove `.lock` files — `proper-lockfile`'s `stale: 30000` handles that. Manual removal races with ownership tracking.
763
+
764
+ - **`listTasks(): Promise<TaskState[]>`**:
765
+ - Read all `.json` files in `stateDir`, parse each (running migration), return array.
766
+
767
+ 2. Create `src/core/__tests__/state.test.ts`:
768
+ - Test atomic write survives simulated crash (write `.tmp`, don't rename, verify original intact).
769
+ - Test file lock prevents concurrent writes.
770
+ - Test valid state transitions: `init → planning → pending_review`.
771
+ - Test invalid transitions throw `NomosError('invalid_transition')`: `merged → planning`.
772
+ - Test transition with `version_increment: true` bumps `current_version`.
773
+ - Test transition with `history_entry` appends to history array.
774
+ - Test transition with `review_result` attaches to last history entry's `.review`.
775
+ - Test transition with `approval_reason` sets `meta.approval_reason`.
776
+ - Test `cleanupTempFiles` removes `.json.tmp` files.
777
+ - Test `cleanupTempFiles` does NOT remove `.lock` files.
778
+ - Test `stalled → planning`, `failed → planning`, `merge_conflict → approved` are valid.
779
+ - Test `read()` on a state file with missing `schema_version` (simulates pre-v1): `migrateState` sets it to `1`.
780
+ - **Note on stale lock test:** Do NOT write a 30-second wait. Instead, manually create a `.json.lock` file with a very old `mtime` (use `fs.utimesSync` to backdate by 60s) and verify the next `write()` call succeeds. This tests the stale behavior without waiting.
781
+ - Use temp directories for isolation.
782
+
783
+ **Dependencies:** Task 1.3, Task 1.4 (for logger), Task 1.5 (for NomosError)
784
+ **Definition of Done:** Atomic writes are crash-safe. `TaskStateSchema` Zod schema validates the full type. Lock contention retries 5 times. Stale locks (>30s) auto-break. Schema migration infrastructure exists. All valid transitions pass. All invalid transitions throw. `approval_reason` is set via `StateTransitionOptions`. `tokens_source` is preserved in history entries.
785
+ **Verification Command:** `npx vitest run src/core/__tests__/state.test.ts`
786
+
787
+ ---
788
+
789
+ ### [x] Task 1.7 — Input Sanitizer *(Completed: 2026-04-03)*
790
+
791
+ **Component:** `src/utils/sanitize.ts`
792
+ **Objective:** Implement the three-layer sanitization pipeline (pattern matching, entropy detection, file scanning, env sanitization).
793
+
794
+ **Technical Instruction:**
795
+
796
+ 1. Create `src/utils/sanitize.ts`:
797
+
798
+ ```typescript
799
+ import type { NomosConfig } from '../types/index.js';
800
+
801
+ // ── Pattern-based sanitization ───────────────────────────────────────────────
802
+ export function sanitizeByPatterns(
803
+ input: string,
804
+ patterns: string[],
805
+ label: string = '[REDACTED]',
806
+ ): { output: string; matches: string[] } {
807
+ const matches: string[] = [];
808
+ let output = input;
809
+ for (const pattern of patterns) {
810
+ const re = new RegExp(pattern, 'g');
811
+ output = output.replace(re, (match) => { matches.push(match); return label; });
812
+ }
813
+ return { output, matches };
814
+ }
815
+
816
+ // ── Shannon entropy ───────────────────────────────────────────────────────────
817
+ export function calculateEntropy(str: string): number {
818
+ const freq = new Map<string, number>();
819
+ for (const ch of str) freq.set(ch, (freq.get(ch) ?? 0) + 1);
820
+ let entropy = 0;
821
+ for (const count of freq.values()) {
822
+ const p = count / str.length;
823
+ entropy -= p * Math.log2(p);
824
+ }
825
+ return entropy;
826
+ }
827
+
828
+ export function detectHighEntropyStrings(input: string, threshold: number): string[] {
829
+ const candidates = input.match(/[a-zA-Z0-9_-]{32,}/g) ?? [];
830
+ return candidates.filter(s => calculateEntropy(s) >= threshold);
831
+ }
832
+
833
+ // ── PTY prompt sanitization ──────────────────────────────────────────────────
834
+ // Used to clean prompt content before it becomes a CLI argument.
835
+ // Even though the prompt is passed via -p flag (not PTY stdin), the content
836
+ // must be free of terminal escape sequences that could corrupt argument parsing.
837
+ export function sanitizeForPty(prompt: string): string {
838
+ return prompt
839
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') // C0 control chars except \n \t
840
+ .replace(/\x1b\[[0-9;]*[A-Za-z]/g, '') // CSI sequences
841
+ .replace(/\x1b\][^\x07]*\x07/g, ''); // OSC sequences
842
+ }
843
+
844
+ // ── File secret scanning ─────────────────────────────────────────────────────
845
+ // RT2-5.1 fix: Returns matching pattern strings (not just boolean) so the error
846
+ // message can tell the user WHICH patterns triggered. Empty array = no secrets found.
847
+ export function scanFileForSecrets(content: string, patterns: string[]): string[] {
848
+ return patterns.filter(p => new RegExp(p).test(content));
849
+ }
850
+
851
+ // ── Environment variable sanitization ─────────────────────────────────────────
852
+ // C3 fix: Match against env var NAMES only — never against 'key=value' strings.
853
+ // Matching 'key=value' causes false positives (e.g., pattern 'TOKEN' deletes 'COLORTERM').
854
+ const ALWAYS_DENY: RegExp[] = [
855
+ /^(ANTHROPIC|OPENAI|AWS|AZURE|GCP|GOOGLE|GITHUB|GITLAB|HUGGING_?FACE)_.*?(KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL)/i,
856
+ /^(DATABASE_URL|REDIS_URL|MONGO_URI|DB_PASSWORD)$/i,
857
+ /^(SSH_AUTH_SOCK|GPG_TTY)$/i,
858
+ ];
859
+
860
+ export function sanitizeEnv(
861
+ env: Record<string, string | undefined>,
862
+ denylist: string[],
863
+ ): Record<string, string> {
864
+ const compiledDeny = denylist.map(p => new RegExp(p, 'i'));
865
+ const result: Record<string, string> = {};
866
+ for (const [key, value] of Object.entries(env)) {
867
+ if (value === undefined) continue;
868
+ const denied =
869
+ ALWAYS_DENY.some(re => re.test(key)) ||
870
+ compiledDeny.some(re => re.test(key));
871
+ if (!denied) result[key] = value;
872
+ }
873
+ return result;
874
+ }
875
+ ```
876
+
877
+ 2. Create `src/utils/__tests__/sanitize.test.ts`:
878
+ - Test pattern matching catches `API_KEY=sk-abc123`, `Bearer eyJ...`, `-----BEGIN PRIVATE KEY-----`.
879
+ - Test entropy detection flags `ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx` (36 chars, high entropy).
880
+ - Test entropy detection ignores `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa` (32 identical chars, low entropy).
881
+ - Test `sanitizeForPty` strips control chars but preserves newlines and tabs.
882
+ - Test `scanFileForSecrets` returns matching patterns array for `.env` content.
883
+ - Test `scanFileForSecrets` returns empty array when no secrets found.
884
+ - Test `sanitizeEnv` removes `OPENAI_API_KEY` and `AWS_SECRET_ACCESS_KEY`.
885
+ - Test `sanitizeEnv` keeps `COLORTERM`, `PATH`, `HOME`, `LANG`.
886
+ - Test `sanitizeEnv` with denylist `['CUSTOM_SECRET']` removes `CUSTOM_SECRET` but keeps `CUSTOM_VALUE`.
887
+
888
+ **Dependencies:** Task 1.2
889
+ **Definition of Done:** All three sanitization layers work independently. `sanitizeEnv` uses name-only matching. Unit tests cover each layer.
890
+ **Verification Command:** `npx vitest run src/utils/__tests__/sanitize.test.ts`
891
+
892
+ ---
893
+
894
+ ## Milestone 2: Git Worktree & Shadow Branching
895
+
896
+ ### [x] Task 2.1 — GitAdapter (Worktree + Shadow Branch + Diff + Merge) *(Completed: 2026-04-03)*
897
+
898
+ **Component:** `src/adapters/git.ts`
899
+ **Objective:** Implement Git worktree creation, removal, diff extraction, shadow branch commits, and merge-to-main using `simple-git`.
900
+
901
+ **Technical Instruction:**
902
+
903
+ 1. Create `src/adapters/git.ts`:
904
+
905
+ ```typescript
906
+ import simpleGit, { type SimpleGit } from 'simple-git';
907
+ import * as fs from 'fs';
908
+ import * as path from 'path';
909
+ import * as os from 'os';
910
+ import type { Logger } from 'winston';
911
+ import { NomosError } from '../core/errors.js';
912
+ import type { NomosConfig } from '../types/index.js';
913
+ ```
914
+
915
+ **`GitAdapter` class** with constructor `(projectRoot: string, config: NomosConfig, logger: Logger)`.
916
+ Initialize `git = simpleGit(projectRoot)`.
917
+
918
+ - **`resolveWorktreePath(taskId: string): string`**:
919
+ - Compute: `{worktree_base}/{projectName}/{taskId}/`
920
+ - `projectName = path.basename(projectRoot)`
921
+ - `worktree_base` from `config.execution.worktree_base` (default `/tmp/nomos-worktrees/`)
922
+ - On Windows: use `path.join(process.env.LOCALAPPDATA ?? os.tmpdir(), 'nomos-worktrees')` as base.
923
+
924
+ - **`resolveBranchName(taskId: string): string`**:
925
+ - Returns `${config.execution.shadow_branch_prefix}${taskId}`.
926
+
927
+ - **`createWorktree(taskId: string): Promise<{ branch: string; worktreePath: string; baseCommit: string }>`**:
928
+ - `branchName = this.resolveBranchName(taskId)`.
929
+ - `worktreePath = this.resolveWorktreePath(taskId)`.
930
+ - Ensure parent dir exists: `fs.mkdirSync(path.dirname(worktreePath), { recursive: true })`.
931
+ - Check branch doesn't exist: `const existing = await git.branch(['--list', branchName])`. If `existing.all.length > 0`, throw `NomosError('branch_exists', \`Branch "${branchName}" already exists. Run: arc discard ${taskId}\`)`.
932
+ - Run: `await git.raw(['worktree', 'add', worktreePath, '-b', branchName])`.
933
+ - Get base commit: `const baseCommit = (await git.revparse(['HEAD'])).trim()`.
934
+ - Return `{ branch: branchName, worktreePath, baseCommit }`.
935
+
936
+ - **`recoverWorktree(taskId: string, existingBranch: string): Promise<string>`** (W2 fix):
937
+ - `worktreePath = this.resolveWorktreePath(taskId)`.
938
+ - Check branch exists: `const existing = await git.branch(['--list', existingBranch])`. If branch is gone, throw `NomosError('worktree_unrecoverable', ...)`.
939
+ - Recreate without `-b`: `await git.raw(['worktree', 'add', worktreePath, existingBranch])`.
940
+ - **Note:** `recoverWorktree` does NOT check git user identity. That check lives in `commitToShadowBranch` and will be triggered when the orchestrator attempts to commit after recovery. No double-checking needed.
941
+ - Return `worktreePath`.
942
+
943
+ - **`removeWorktree(taskId: string, force: boolean = false): Promise<void>`**:
944
+ - `worktreePath = this.resolveWorktreePath(taskId)`.
945
+ - `branchName = this.resolveBranchName(taskId)`.
946
+ - `await git.raw(['worktree', 'remove', worktreePath, '--force'])`.
947
+ - `await git.branch([force ? '-D' : '-d', branchName])`.
948
+
949
+ - **`worktreeExists(taskId: string): boolean`**:
950
+ - `return fs.existsSync(this.resolveWorktreePath(taskId))`.
951
+
952
+ - **`grep(pattern: string, cwd: string, timeoutMs: number = 5000): Promise<string[]>`** *(RT2-2.1 fix: Ghost Method Resolution — this method was called in Task 5.1 but never defined)*:
953
+ - Execute `git grep -l` with a timeout to find files matching a regex pattern.
954
+ - Returns **relative paths** from `cwd` (not absolute paths).
955
+ ```typescript
956
+ async grep(pattern: string, cwd: string, timeoutMs: number = 5000): Promise<string[]> {
957
+ const searchGit = simpleGit(cwd);
958
+ try {
959
+ const result = await Promise.race([
960
+ searchGit.raw(['grep', '-l', '-E', pattern]),
961
+ new Promise<never>((_, reject) =>
962
+ setTimeout(() => reject(new Error('grep timeout')), timeoutMs)
963
+ ),
964
+ ]);
965
+ // git grep returns relative paths separated by newlines
966
+ return result.trim().split('\n').filter(Boolean);
967
+ } catch {
968
+ // git grep exits 1 when no matches found, or timeout — both return empty
969
+ return [];
970
+ }
971
+ }
972
+ ```
973
+
974
+ - **`getDiff(taskId: string, baseCommit: string): Promise<string>`**:
975
+ - **Must use `baseCommit` from state, NOT `HEAD~1`.** Multi-commit sessions need the full diff since branch point.
976
+ - **RT2-2.1 fix — `baseCommit` reachability pre-check:** Validate the SHA is still reachable before diffing. If anyone ran `git rebase` or `git push --force` between `arc init` and `arc plan`, the SHA becomes orphaned and `git diff` throws `fatal: unknown revision`.
977
+ ```typescript
978
+ const worktreePath = this.resolveWorktreePath(taskId);
979
+ const worktreeGit = simpleGit(worktreePath);
980
+
981
+ // RT2-2.1 fix: verify baseCommit is reachable before diff
982
+ try {
983
+ await worktreeGit.raw(['cat-file', '-t', baseCommit]);
984
+ } catch {
985
+ throw new NomosError('base_commit_unreachable',
986
+ `Base commit ${baseCommit.slice(0, 8)} is no longer reachable (likely due to rebase or force-push). ` +
987
+ `Run: arc discard ${taskId} && arc init ${taskId} to reinitialize.`);
988
+ }
989
+
990
+ const diff = await worktreeGit.diff([`${baseCommit}..HEAD`, '--', '.']);
991
+ return diff;
992
+ ```
993
+
994
+ - **`commitToShadowBranch(taskId: string, message: string, files: string[]): Promise<void>`**:
995
+ - **Per Execution Rule #14:** The caller (orchestrator) is responsible for passing only the correct files. This method does NOT filter — it trusts the list. However, it DOES enforce path safety.
996
+ ```typescript
997
+ const worktreePath = this.resolveWorktreePath(taskId);
998
+ const worktreeGit = simpleGit(worktreePath);
999
+
1000
+ for (const file of files) {
1001
+ const src = path.resolve(this.projectRoot, file);
1002
+ const dst = path.resolve(worktreePath, file);
1003
+ // W6: Path traversal defense
1004
+ if (!dst.startsWith(path.resolve(worktreePath) + path.sep) &&
1005
+ dst !== path.resolve(worktreePath)) {
1006
+ throw new NomosError('path_traversal', `File path "${file}" resolves outside worktree`);
1007
+ }
1008
+ if (!src.startsWith(path.resolve(this.projectRoot) + path.sep) &&
1009
+ src !== path.resolve(this.projectRoot)) {
1010
+ throw new NomosError('path_traversal', `File path "${file}" resolves outside project root`);
1011
+ }
1012
+ fs.mkdirSync(path.dirname(dst), { recursive: true });
1013
+ fs.copyFileSync(src, dst);
1014
+ }
1015
+
1016
+ await worktreeGit.add(files);
1017
+
1018
+ // M-3 fix: Check git identity before commit to avoid cryptic "Author identity unknown"
1019
+ // error in Docker/CI. This applies to the WORKTREE's git config, not the project root.
1020
+ const email = await worktreeGit.raw(['config', '--get', 'user.email']).catch(() => '');
1021
+ if (!email.trim()) {
1022
+ throw new NomosError('config_invalid',
1023
+ 'Git user identity not configured in the worktree or globally. Run:\n' +
1024
+ ' git config --global user.email "you@example.com"\n' +
1025
+ ' git config --global user.name "Your Name"');
1026
+ }
1027
+
1028
+ await worktreeGit.commit(message);
1029
+ ```
1030
+
1031
+ - **`mergeToMain(taskId: string, version: number, commitPrefix: string, targetBranch: string = 'main'): Promise<{ success: boolean; conflicts?: string[] }>`**:
1032
+ ```typescript
1033
+ // W3 fix: Target branch verification
1034
+ const currentBranch = (await git.branch()).current;
1035
+ if (currentBranch !== targetBranch) {
1036
+ throw new NomosError('wrong_branch',
1037
+ `Cannot apply: expected to be on "${targetBranch}" but currently on "${currentBranch}". ` +
1038
+ `Switch first: git checkout ${targetBranch}`);
1039
+ }
1040
+ // W7 fix: Dirty working tree check
1041
+ const status = await git.status();
1042
+ if (!status.isClean()) {
1043
+ throw new NomosError('dirty_working_tree',
1044
+ 'Cannot apply: working tree has uncommitted changes. Commit or stash them first.');
1045
+ }
1046
+ const branchName = this.resolveBranchName(taskId);
1047
+ const message = `${commitPrefix} apply(${taskId}): merge approved plan v${version}`;
1048
+ try {
1049
+ await git.merge([branchName, '--no-ff', '-m', message]);
1050
+ return { success: true };
1051
+ } catch {
1052
+ const mergeStatus = await git.status();
1053
+ const conflicts = mergeStatus.conflicted;
1054
+ await git.merge(['--abort']);
1055
+ return { success: false, conflicts };
1056
+ }
1057
+ ```
1058
+
1059
+ - **`isGitRepo(): Promise<boolean>`**: `return git.checkIsRepo()`.
1060
+ - **`getCurrentCommit(): Promise<string>`**: `return (await git.revparse(['HEAD'])).trim()`.
1061
+ - **`raw(args: string[]): Promise<string>`**: `return git.raw(args)`. *(RT2-6.1 fix: exposed for `--force` recovery handler in init.ts, which needs direct git worktree and branch commands without going through higher-level abstractions.)*
1062
+
1063
+ 2. Create `src/adapters/__tests__/git.test.ts`:
1064
+ - Setup: `git init` temp dir, configure `user.email` and `user.name`, initial commit.
1065
+ - Test worktree creation: branch exists, worktree path exists.
1066
+ - Test worktree removal: path and branch cleaned up.
1067
+ - Test `resolveWorktreePath` produces correct paths.
1068
+ - Test `getDiff` includes changes from multiple commits (not just HEAD~1): create worktree → make 2 commits → assert diff includes both.
1069
+ - Test `commitToShadowBranch` copies files and commits them.
1070
+ - Test `commitToShadowBranch` throws `path_traversal` for `../../etc/passwd`.
1071
+ - Test `mergeToMain` with clean merge.
1072
+ - Test `mergeToMain` with conflict returns `{ success: false, conflicts: [...] }`.
1073
+ - Test `mergeToMain` throws `wrong_branch` when on a different branch.
1074
+ - Test `mergeToMain` throws `dirty_working_tree` when changes are uncommitted.
1075
+ - Test `createWorktree` throws `branch_exists` when the branch already exists.
1076
+ - Test `recoverWorktree` recreates worktree from existing branch.
1077
+ - Test `recoverWorktree` throws `worktree_unrecoverable` when branch is also gone.
1078
+ - Test `grep` returns relative paths matching a pattern.
1079
+ - Test `grep` returns empty array when no matches found.
1080
+ - Test `grep` returns empty array on timeout (use an impossibly short timeout of 1ms on a large pattern if needed, or mock).
1081
+ - Test `getDiff` throws `NomosError('base_commit_unreachable')` when `baseCommit` SHA is unreachable (simulate by passing a fabricated SHA).
1082
+ - Use temp git repos. Clean up worktrees in `afterEach`.
1083
+
1084
+ **Dependencies:** Task 1.3, Task 1.5
1085
+ **Definition of Done:** All worktree operations use `git.raw()`. `getDiff` uses `baseCommit` with reachability pre-check. `grep()` is explicitly defined with a timeout and returns relative paths. Merge checks target branch and dirty tree. Path traversal blocked. `recoverWorktree` handles both recovery and unrecoverable cases.
1086
+ **Verification Command:** `npx vitest run src/adapters/__tests__/git.test.ts`
1087
+
1088
+ ---
1089
+
1090
+ ### [x] Task 2.2 — Pre-flight Binary Resolver *(Completed: 2026-04-03)*
1091
+
1092
+ **Component:** `src/core/preflight.ts`
1093
+ **Objective:** Implement the binary validation pre-flight.
1094
+
1095
+ **Technical Instruction:**
1096
+
1097
+ 1. Create `src/core/preflight.ts`:
1098
+
1099
+ - **`resolveBinary(cmd: string): Promise<string>`**:
1100
+ - If absolute path: check `fs.access(cmd, fs.constants.X_OK)`. Return as-is or throw.
1101
+ - If bare name: walk `process.env.PATH?.split(path.delimiter) ?? []`. On each entry, check `path.join(entry, cmd)` for executability (`fs.access(fullPath, fs.constants.X_OK)`).
1102
+ - On Windows: also try `.exe`, `.cmd`, `.bat` extensions.
1103
+ - Throw `NomosError('binary_not_found', \`Binary "${cmd}" not found in PATH\`)` if nothing found.
1104
+
1105
+ - **`runPreflight(config: NomosConfig, logger: Logger): Promise<{ planner: string; reviewer: string }>`**:
1106
+ - Resolve both `binaries.planner.cmd` and `binaries.reviewer.cmd`.
1107
+ - Optionally run `<binary> --version` with 5s timeout (`child_process.execFile`). Log at debug. Failure is non-fatal (log warning only).
1108
+ - Return resolved absolute paths.
1109
+
1110
+ 2. Create `src/core/__tests__/preflight.test.ts`:
1111
+ - Test `resolveBinary('node')` resolves successfully.
1112
+ - Test `resolveBinary('nonexistent-binary-xyz')` throws `binary_not_found`.
1113
+ - Test absolute path to non-existent file throws.
1114
+
1115
+ **Dependencies:** Task 1.3, Task 1.5
1116
+ **Definition of Done:** Binary resolution works. Missing binaries produce clear errors.
1117
+ **Verification Command:** `npx vitest run src/core/__tests__/preflight.test.ts`
1118
+
1119
+ ---
1120
+
1121
+ ## Milestone 3: Subprocess Adapters (PTY + Stdio)
1122
+
1123
+ ### [x] Task 3.1 — PtyAdapter (Supervised & Auto Mode) *(Completed: 2026-04-03)*
1124
+
1125
+ **Component:** `src/adapters/pty.ts`
1126
+ **Objective:** Implement the PTY subprocess adapter as a pure Tee Stream: pipe PTY output directly to the developer's terminal (`process.stdout`) and capture a stripped copy for logging. No pattern matching, no response_map, no Expect Logic. The developer handles all interaction inside the session. nomos-arc observes the exit code to advance the state machine. **RT2-3.1 Hardening:** Process group killing ensures all grandchildren (bash, editors) are terminated. stdin listener is cleaned up on all exit paths including rejection.
1127
+
1128
+ **Technical Instruction:**
1129
+
1130
+ 1. Create `src/adapters/pty.ts`:
1131
+
1132
+ ```typescript
1133
+ import * as pty from 'node-pty';
1134
+ import type { Logger } from 'winston';
1135
+ import { NomosError } from '../core/errors.js';
1136
+ import { stripAnsi } from '../utils/ansi.js';
1137
+ import type { PtySpawnOptions, ExecutionResult } from '../types/index.js';
1138
+ ```
1139
+
1140
+ **`PtyAdapter` class** with constructor `(logger: Logger)`.
1141
+
1142
+ **`execute(options: PtySpawnOptions): Promise<ExecutionResult>`**:
1143
+
1144
+ ```typescript
1145
+ async execute(options: PtySpawnOptions): Promise<ExecutionResult> {
1146
+ // C5 fix: TTY pre-check for supervised mode
1147
+ if (options.mode === 'supervised' && !process.stdin.isTTY) {
1148
+ throw new NomosError('no_tty',
1149
+ 'Supervised mode requires an interactive terminal. ' +
1150
+ 'Use --mode=auto for non-interactive environments (CI, vitest).');
1151
+ }
1152
+
1153
+ const startTime = Date.now();
1154
+ const outputBuffer: string[] = [];
1155
+ let bytesBuffered = 0;
1156
+ let bytesDropped = 0;
1157
+ let killed = false;
1158
+ let killReason: ExecutionResult['killReason'];
1159
+
1160
+ // Phase 1a: Tee Stream — no pattern matching, no response_map.
1161
+ // PTY output is piped directly to developer terminal and captured for logging.
1162
+
1163
+ // C1 fix: cmd and args always separate — never shell-interpolated
1164
+ // RT2-3.1 fix: Process Group Killing — use detached process group so we can
1165
+ // kill the entire process tree (including grandchildren like bash, editors).
1166
+ const proc = pty.spawn(options.cmd, options.args, {
1167
+ name: 'xterm-256color',
1168
+ cols: 120,
1169
+ rows: 40,
1170
+ cwd: options.cwd,
1171
+ env: options.env,
1172
+ });
1173
+
1174
+ // RT2-3.1 fix: Helper to kill the entire process group.
1175
+ // Using -proc.pid sends SIGTERM to the process GROUP (negative PID),
1176
+ // which terminates all children spawned by the PTY subprocess.
1177
+ const killProcessGroup = () => {
1178
+ try {
1179
+ process.kill(-proc.pid, 'SIGTERM');
1180
+ } catch {
1181
+ // Process group may already be dead — fall back to direct kill
1182
+ try { proc.kill(); } catch {}
1183
+ }
1184
+ };
1185
+
1186
+ // RT2-3.1 fix: stdin cleanup extracted into a helper so it runs on ALL exit paths
1187
+ // (normal exit, timeout kill, AND unexpected throw). This resolves the stdin
1188
+ // listener leak audit finding — previously, an error between listener registration
1189
+ // and onExit would leave the listener attached, causing erratic terminal behavior.
1190
+ let stdinListener: ((data: Buffer) => void) | null = null;
1191
+ const cleanupStdin = () => {
1192
+ try { if (process.stdin.isTTY) process.stdin.setRawMode(false); } catch {}
1193
+ process.stdin.pause();
1194
+ if (stdinListener) {
1195
+ process.stdin.removeListener('data', stdinListener);
1196
+ stdinListener = null;
1197
+ }
1198
+ };
1199
+
1200
+ return new Promise<ExecutionResult>((resolve, reject) => {
1201
+ let heartbeatTimer: ReturnType<typeof setTimeout>;
1202
+ let totalTimer: ReturnType<typeof setTimeout>;
1203
+
1204
+ // RT2-3.1 fix: try/catch wrapper around entire Promise body.
1205
+ // On unexpected throw: kill process group, restore stdin, reject promise.
1206
+ try {
1207
+ const resetHeartbeat = () => {
1208
+ clearTimeout(heartbeatTimer);
1209
+ heartbeatTimer = setTimeout(() => {
1210
+ this.logger.warn(`Heartbeat timeout (${options.heartbeat_timeout_ms}ms). Killing process group.`);
1211
+ killProcessGroup();
1212
+ killed = true;
1213
+ killReason = 'heartbeat_timeout';
1214
+ }, options.heartbeat_timeout_ms);
1215
+ };
1216
+
1217
+ totalTimer = setTimeout(() => {
1218
+ this.logger.warn(`Total timeout (${options.total_timeout_ms}ms). Killing process group.`);
1219
+ killProcessGroup();
1220
+ killed = true;
1221
+ killReason = 'total_timeout';
1222
+ }, options.total_timeout_ms);
1223
+
1224
+ resetHeartbeat();
1225
+
1226
+ proc.onData((data: string) => {
1227
+ // Forward to developer in real-time (supervised and auto modes both show output)
1228
+ process.stdout.write(data);
1229
+
1230
+ // Buffer for log capture
1231
+ const bytes = Buffer.byteLength(data, 'utf8');
1232
+ if (bytesBuffered < options.max_output_bytes) {
1233
+ if (bytesBuffered + bytes > options.max_output_bytes) {
1234
+ // E3 fix: warn on overflow, track dropped bytes
1235
+ const remaining = options.max_output_bytes - bytesBuffered;
1236
+ outputBuffer.push(data.slice(0, remaining));
1237
+ bytesDropped += bytes - remaining;
1238
+ bytesBuffered = options.max_output_bytes;
1239
+ this.logger.warn(
1240
+ `Output exceeded ${options.max_output_bytes} byte limit. ` +
1241
+ `Further output will not be captured. Consider increasing max_output_bytes in config.`
1242
+ );
1243
+ } else {
1244
+ outputBuffer.push(data);
1245
+ bytesBuffered += bytes;
1246
+ }
1247
+ } else {
1248
+ bytesDropped += bytes;
1249
+ }
1250
+
1251
+ resetHeartbeat();
1252
+ });
1253
+
1254
+ // Bidirectional piping for supervised mode
1255
+ if (process.stdin.isTTY) {
1256
+ process.stdin.setRawMode(true);
1257
+ }
1258
+ process.stdin.resume();
1259
+ stdinListener = (data: Buffer) => { proc.write(data.toString()); };
1260
+ process.stdin.on('data', stdinListener);
1261
+
1262
+ proc.onExit(({ exitCode }) => {
1263
+ clearTimeout(heartbeatTimer);
1264
+ clearTimeout(totalTimer);
1265
+ cleanupStdin();
1266
+
1267
+ const rawOutput = outputBuffer.join('');
1268
+ resolve({
1269
+ exitCode: exitCode ?? 1,
1270
+ rawOutput,
1271
+ strippedOutput: stripAnsi(rawOutput),
1272
+ duration_ms: Date.now() - startTime,
1273
+ killed,
1274
+ killReason,
1275
+ });
1276
+ });
1277
+
1278
+ } catch (err) {
1279
+ // RT2-3.1 fix: unexpected throw — kill process group, clean up stdin, reject
1280
+ clearTimeout(heartbeatTimer!);
1281
+ clearTimeout(totalTimer!);
1282
+ killProcessGroup();
1283
+ cleanupStdin();
1284
+ reject(err);
1285
+ }
1286
+ });
1287
+ }
1288
+ ```
1289
+
1290
+ **Dependencies:** Task 1.3 (`PtySpawnOptions`, `ExecutionResult` — import from `'../types/index.js'`), Task 1.4 (logger, `stripAnsi`)
1291
+ **Definition of Done:** PTY spawns subprocess. Output piped to developer terminal in real-time (Tee Stream). ANSI stripped for log capture. Timeouts use process group killing (`-proc.pid`) to terminate all grandchildren. stdin listener cleaned up on ALL exit paths (normal, timeout, and unexpected throw). Supervised mode throws `no_tty` when stdin is not a TTY. Output overflow emits warning and tracks dropped bytes.
1292
+ **Verification Command:** `npx tsc --noEmit`
1293
+
1294
+ ---
1295
+
1296
+ ### [x] Task 3.2 — StdioAdapter (Non-PTY Subprocess for Reviewer) *(Completed: 2026-04-03)*
1297
+
1298
+ **Component:** `src/adapters/stdio.ts`
1299
+ **Objective:** Implement a non-PTY subprocess adapter for the reviewer binary with stdin piping, backpressure handling, and platform-aware process kill.
1300
+
1301
+ **Technical Instruction:**
1302
+
1303
+ 1. Create `src/adapters/stdio.ts`:
1304
+
1305
+ ```typescript
1306
+ import { spawn, type ChildProcess } from 'child_process';
1307
+ import type { Logger } from 'winston';
1308
+ import { stripAnsi } from '../utils/ansi.js';
1309
+ import { NomosError } from '../core/errors.js';
1310
+ import type { StdioSpawnOptions, ExecutionResult } from '../types/index.js';
1311
+ ```
1312
+
1313
+ **`StdioAdapter` class** with constructor `(logger: Logger)`.
1314
+
1315
+ **`execute(options: StdioSpawnOptions): Promise<ExecutionResult>`**:
1316
+
1317
+ ```typescript
1318
+ async execute(options: StdioSpawnOptions): Promise<ExecutionResult> {
1319
+ const startTime = Date.now();
1320
+ let killed = false;
1321
+ let killReason: ExecutionResult['killReason'];
1322
+
1323
+ // C1 fix: shell is NEVER set to true — args passed directly, never interpolated
1324
+ const proc = spawn(options.cmd, options.args, {
1325
+ cwd: options.cwd,
1326
+ env: options.env,
1327
+ stdio: ['pipe', 'pipe', 'pipe'],
1328
+ shell: false,
1329
+ });
1330
+
1331
+ return new Promise<ExecutionResult>((resolve, reject) => {
1332
+ const stdoutChunks: Buffer[] = [];
1333
+ const stderrChunks: Buffer[] = [];
1334
+ let bytesBuffered = 0;
1335
+
1336
+ const killProcess = (p: ChildProcess) => {
1337
+ // W4 fix: platform-aware kill
1338
+ if (process.platform === 'win32') {
1339
+ spawn('taskkill', ['/pid', String(p.pid), '/f', '/t'], { shell: false });
1340
+ } else {
1341
+ p.kill('SIGTERM');
1342
+ setTimeout(() => { if (!p.killed) p.kill('SIGKILL'); }, 3000);
1343
+ }
1344
+ };
1345
+
1346
+ let heartbeatTimer: ReturnType<typeof setTimeout>;
1347
+ let totalTimer: ReturnType<typeof setTimeout>;
1348
+
1349
+ const resetHeartbeat = () => {
1350
+ clearTimeout(heartbeatTimer);
1351
+ heartbeatTimer = setTimeout(() => {
1352
+ killed = true;
1353
+ killReason = 'heartbeat_timeout';
1354
+ killProcess(proc);
1355
+ }, options.heartbeat_timeout_ms);
1356
+ };
1357
+
1358
+ totalTimer = setTimeout(() => {
1359
+ killed = true;
1360
+ killReason = 'total_timeout';
1361
+ killProcess(proc);
1362
+ }, options.total_timeout_ms);
1363
+
1364
+ resetHeartbeat();
1365
+
1366
+ proc.stdout.on('data', (chunk: Buffer) => {
1367
+ resetHeartbeat();
1368
+ if (bytesBuffered < options.max_output_bytes) {
1369
+ stdoutChunks.push(chunk);
1370
+ bytesBuffered += chunk.length;
1371
+ }
1372
+ });
1373
+
1374
+ proc.stderr.on('data', (chunk: Buffer) => { stderrChunks.push(chunk); });
1375
+
1376
+ // M-5 fix: Handle stdin errors (broken pipe) and backpressure (large prompts)
1377
+ proc.stdin.on('error', (err) => {
1378
+ reject(new NomosError('review_failed',
1379
+ `Failed to write to reviewer stdin: ${err.message}`));
1380
+ });
1381
+
1382
+ const ok = proc.stdin.write(options.stdinData);
1383
+ if (!ok) {
1384
+ // Backpressure: large review prompts (200KB+ diff + rules) can exceed the stdin
1385
+ // buffer. Without drain handling, data is silently truncated.
1386
+ proc.stdin.once('drain', () => { proc.stdin.end(); });
1387
+ } else {
1388
+ proc.stdin.end();
1389
+ }
1390
+
1391
+ proc.on('close', (exitCode) => {
1392
+ clearTimeout(heartbeatTimer);
1393
+ clearTimeout(totalTimer);
1394
+
1395
+ const rawOutput = Buffer.concat(stdoutChunks).toString('utf8');
1396
+ const stderrOutput = Buffer.concat(stderrChunks).toString('utf8');
1397
+
1398
+ if (stderrOutput && (exitCode ?? 0) > 0) {
1399
+ this.logger.error(`Reviewer stderr: ${stderrOutput.trim()}`);
1400
+ }
1401
+
1402
+ resolve({
1403
+ exitCode: exitCode ?? 1,
1404
+ rawOutput,
1405
+ strippedOutput: stripAnsi(rawOutput),
1406
+ duration_ms: Date.now() - startTime,
1407
+ killed,
1408
+ killReason,
1409
+ });
1410
+ });
1411
+
1412
+ proc.on('error', (err) => {
1413
+ clearTimeout(heartbeatTimer);
1414
+ clearTimeout(totalTimer);
1415
+ reject(new NomosError('review_failed', `Reviewer process error: ${err.message}`));
1416
+ });
1417
+ });
1418
+ }
1419
+ ```
1420
+
1421
+ 2. Create `src/adapters/__tests__/stdio.test.ts`:
1422
+ - Test `echo "hello"` returns output correctly.
1423
+ - Test stdin piping: spawn `cat`, pipe input, verify output matches.
1424
+ - Test `total_timeout_ms: 1000` kills a `sleep 60` process.
1425
+ - Test exit code propagation.
1426
+
1427
+ **Dependencies:** Task 1.3 (`StdioSpawnOptions`, `ExecutionResult` — import from `'../types/index.js'`), Task 1.4
1428
+ **Definition of Done:** StdioAdapter spawns non-PTY processes. Stdin piped with backpressure. Timeouts work. Platform-aware kill.
1429
+ **Verification Command:** `npx vitest run src/adapters/__tests__/stdio.test.ts`
1430
+
1431
+ ---
1432
+
1433
+ ### [x] Task 3.3 — FrontmatterParser *(Completed: 2026-04-03)*
1434
+
1435
+ **Component:** `src/utils/frontmatter.ts`
1436
+ **Objective:** Parse YAML frontmatter from task markdown files using `gray-matter`.
1437
+
1438
+ **Technical Instruction:**
1439
+
1440
+ 1. Create `src/utils/frontmatter.ts`:
1441
+
1442
+ ```typescript
1443
+ import * as fs from 'fs/promises';
1444
+ import matter from 'gray-matter';
1445
+ import { z } from 'zod';
1446
+ import { NomosError } from '../core/errors.js';
1447
+ import type { TaskFrontmatter } from '../types/index.js';
1448
+
1449
+ const TaskFrontmatterSchema = z.object({
1450
+ title: z.string().min(1),
1451
+ priority: z.enum(['high', 'medium', 'low']),
1452
+ context_files: z.array(z.string()).optional(),
1453
+ status: z.string().optional(),
1454
+ });
1455
+
1456
+ export async function parseTaskFile(
1457
+ filePath: string,
1458
+ ): Promise<{ frontmatter: TaskFrontmatter; body: string }> {
1459
+ const raw = await fs.readFile(filePath, 'utf8');
1460
+ const parsed = matter(raw);
1461
+ const result = TaskFrontmatterSchema.safeParse(parsed.data);
1462
+ if (!result.success) {
1463
+ throw new NomosError('invalid_frontmatter',
1464
+ `Invalid frontmatter in ${filePath}: ${result.error.message}`);
1465
+ }
1466
+ return { frontmatter: result.data as TaskFrontmatter, body: parsed.content };
1467
+ }
1468
+ ```
1469
+
1470
+ 2. Create `src/utils/__tests__/frontmatter.test.ts`:
1471
+ - Test valid frontmatter parses correctly.
1472
+ - Test missing `title` throws `NomosError('invalid_frontmatter')`.
1473
+ - Test body content is returned without the frontmatter block.
1474
+ - Test file with no frontmatter throws.
1475
+
1476
+ **Dependencies:** Task 1.3
1477
+ **Definition of Done:** Frontmatter parsed and validated. Body extracted cleanly.
1478
+ **Verification Command:** `npx vitest run src/utils/__tests__/frontmatter.test.ts`
1479
+
1480
+ ---
1481
+
1482
+ ## Milestone 4: Prompt Engineering & Review Parsing
1483
+
1484
+ ### [x] Task 4.1 — PromptSynthesizer *(Completed: 2026-04-03)*
1485
+
1486
+ **Component:** `src/core/prompt.ts`
1487
+ **Objective:** Assemble the multi-layer planner prompt and reviewer prompt. All templates are defined inline in this task — no external document references.
1488
+
1489
+ **Technical Instruction:**
1490
+
1491
+ 1. Create `src/core/prompt.ts`:
1492
+
1493
+ ```typescript
1494
+ import * as fs from 'fs/promises';
1495
+ import * as path from 'path';
1496
+ import { createHash } from 'crypto';
1497
+ import { NomosError } from './errors.js';
1498
+ import type { ReviewIssue, ExecutionMode } from '../types/index.js';
1499
+ ```
1500
+
1501
+ **`PromptOptions` interface** (defined in `src/types/index.ts` — Task 1.3 already added the exported interfaces list, but add this one too):
1502
+
1503
+ Add to `src/types/index.ts`:
1504
+ ```typescript
1505
+ export interface PromptOptions {
1506
+ globalRules: string;
1507
+ domainRules: string;
1508
+ sessionRules: string | null;
1509
+ taskBody: string;
1510
+ contextFiles: string[]; // BLK-3 fix: from task frontmatter context_files
1511
+ previousFeedback: ReviewIssue[] | null;
1512
+ previousVersion: number | null;
1513
+ mode: ExecutionMode;
1514
+ }
1515
+
1516
+ export interface AffectedFileSnippet {
1517
+ file: string; // relative path from project root
1518
+ snippet: string; // up to 50 lines of the file's content
1519
+ }
1520
+
1521
+ export interface ReviewPromptOptions {
1522
+ planDiff: string;
1523
+ planSummary: string | null;
1524
+ globalRules: string;
1525
+ domainRules: string;
1526
+ mode: ExecutionMode;
1527
+ affectedFileSnippets?: AffectedFileSnippet[]; // Context Injection: files referencing changed code
1528
+ }
1529
+ ```
1530
+
1531
+ **`assemblePrompt(options: PromptOptions): string`**:
1532
+
1533
+ ```typescript
1534
+ export function assemblePrompt(options: PromptOptions): string {
1535
+ const sections: string[] = [];
1536
+
1537
+ sections.push(`[SYSTEM RULES]\n${options.globalRules}`);
1538
+
1539
+ if (options.domainRules.trim()) {
1540
+ sections.push(`[DOMAIN RULES]\n${options.domainRules}`);
1541
+ }
1542
+
1543
+ if (options.sessionRules !== null) {
1544
+ sections.push(`[SESSION CONSTRAINTS]\n${options.sessionRules}`);
1545
+ }
1546
+
1547
+ sections.push(`[TASK REQUIREMENTS]\n${options.taskBody}`);
1548
+
1549
+ // BLK-3 fix: contextFiles section
1550
+ if (options.contextFiles.length > 0) {
1551
+ const fileList = options.contextFiles.map(f => `- ${f}`).join('\n');
1552
+ sections.push(
1553
+ `[CONTEXT FILES]\n` +
1554
+ `The following files are relevant to this task. Read and understand them before planning:\n` +
1555
+ fileList
1556
+ );
1557
+ }
1558
+
1559
+ if (options.previousFeedback !== null && options.previousFeedback.length > 0) {
1560
+ const issueLines = options.previousFeedback
1561
+ .map(i => `- [${i.severity}] ${i.description}: ${i.suggestion}`)
1562
+ .join('\n');
1563
+ sections.push(
1564
+ `[PREVIOUS REVIEW FEEDBACK]\n` +
1565
+ `The following issues were identified in v${options.previousVersion} and MUST be addressed:\n` +
1566
+ issueLines
1567
+ );
1568
+ }
1569
+
1570
+ sections.push(
1571
+ `[INSTRUCTION]\n` +
1572
+ `Generate a detailed implementation plan for the above task.\n` +
1573
+ `Output your plan in Markdown format.`
1574
+ );
1575
+
1576
+ return sections.join('\n\n');
1577
+ }
1578
+ ```
1579
+
1580
+ **`assembleReviewPrompt(options: ReviewPromptOptions): string`**:
1581
+
1582
+ The reviewer binary receives this prompt via stdin. It MUST respond with ONLY a JSON object — no prose, no markdown fences, just the raw JSON.
1583
+
1584
+ The required JSON schema (inline — no external reference):
1585
+ ```
1586
+ {
1587
+ "score": <number 0.0–1.0>,
1588
+ "summary": "<string, minimum 10 characters>",
1589
+ "issues": [
1590
+ {
1591
+ "severity": "<high|medium|low>",
1592
+ "category": "<security|performance|architecture|correctness|maintainability>",
1593
+ "description": "<string, minimum 5 characters>",
1594
+ "suggestion": "<string, minimum 5 characters>"
1595
+ }
1596
+ ]
1597
+ }
1598
+ ```
1599
+
1600
+ Scoring guide (included in the prompt so the reviewer applies it consistently):
1601
+ - 0.9–1.0: Excellent — approved for merge
1602
+ - 0.7–0.9: Good — minor issues, refinement optional
1603
+ - 0.5–0.7: Needs improvement — refinement required
1604
+ - 0.0–0.5: Significant problems — must rework
1605
+
1606
+ ```typescript
1607
+ export function assembleReviewPrompt(options: ReviewPromptOptions): string {
1608
+ const sections: string[] = [];
1609
+
1610
+ sections.push(
1611
+ `[REVIEW REQUEST]\n` +
1612
+ `You are a code review expert. Review the following implementation plan diff.\n` +
1613
+ `Evaluate quality, security, architecture, and correctness.`
1614
+ );
1615
+
1616
+ sections.push(`[PLAN DIFF]\n${options.planDiff}`);
1617
+
1618
+ if (options.affectedFileSnippets && options.affectedFileSnippets.length > 0) {
1619
+ const snippets = options.affectedFileSnippets
1620
+ .map(({ file, snippet }) => `// ${file}\n${snippet}`)
1621
+ .join('\n\n---\n\n');
1622
+ sections.push(
1623
+ `[AFFECTED FILES]\n` +
1624
+ `The following files reference code changed in this diff. ` +
1625
+ `Use them to assess side-effects and dependency impact:\n\n` +
1626
+ snippets
1627
+ );
1628
+ }
1629
+
1630
+ if (options.planSummary) {
1631
+ sections.push(`[DEVELOPER NOTES]\n${options.planSummary}`);
1632
+ }
1633
+
1634
+ if (options.globalRules.trim()) {
1635
+ sections.push(`[SYSTEM RULES]\n${options.globalRules}`);
1636
+ }
1637
+
1638
+ if (options.domainRules.trim()) {
1639
+ sections.push(`[DOMAIN RULES]\n${options.domainRules}`);
1640
+ }
1641
+
1642
+ if (options.mode === 'auto') {
1643
+ sections.push(
1644
+ `[ZERO-TOLERANCE CLAUSE]\n` +
1645
+ `You are operating in auto mode. Any high-severity security issue MUST result in ` +
1646
+ `a score below 0.5, regardless of other positive qualities.`
1647
+ );
1648
+ }
1649
+
1650
+ sections.push(
1651
+ `[INSTRUCTION]\n` +
1652
+ `Respond ONLY with a JSON object matching this exact schema. ` +
1653
+ `Do not include any other text, explanation, or markdown fences:\n` +
1654
+ `{\n` +
1655
+ ` "score": <number between 0.0 and 1.0>,\n` +
1656
+ ` "summary": "<string, minimum 10 characters describing the overall quality>",\n` +
1657
+ ` "issues": [\n` +
1658
+ ` {\n` +
1659
+ ` "severity": "<high|medium|low>",\n` +
1660
+ ` "category": "<security|performance|architecture|correctness|maintainability>",\n` +
1661
+ ` "description": "<string, minimum 5 characters>",\n` +
1662
+ ` "suggestion": "<string, minimum 5 characters>"\n` +
1663
+ ` }\n` +
1664
+ ` ]\n` +
1665
+ `}\n\n` +
1666
+ `Scoring guide:\n` +
1667
+ `- 0.9–1.0: Excellent, approved for merge\n` +
1668
+ `- 0.7–0.9: Good, minor issues\n` +
1669
+ `- 0.5–0.7: Needs improvement\n` +
1670
+ `- 0.0–0.5: Significant problems requiring rework`
1671
+ );
1672
+
1673
+ return sections.join('\n\n');
1674
+ }
1675
+ ```
1676
+
1677
+ **`loadRules(projectRoot: string, taskId: string): Promise<{ global: string; domain: string; session: string | null; rulesHash: string; rulesList: string[] }>`**:
1678
+
1679
+ ```typescript
1680
+ export async function loadRules(projectRoot: string, taskId: string) {
1681
+ const rulesDir = path.join(projectRoot, 'tasks-management', 'rules');
1682
+
1683
+ // global.md is required
1684
+ const globalPath = path.join(rulesDir, 'global.md');
1685
+ let globalContent: string;
1686
+ try {
1687
+ globalContent = await fs.readFile(globalPath, 'utf8');
1688
+ } catch {
1689
+ throw new NomosError('rules_missing',
1690
+ `Required rules file not found: ${globalPath}. ` +
1691
+ `Run: arc init to scaffold the project structure.`);
1692
+ }
1693
+
1694
+ // backend.md is optional
1695
+ let domainContent = '';
1696
+ try {
1697
+ domainContent = await fs.readFile(path.join(rulesDir, 'backend.md'), 'utf8');
1698
+ } catch { /* not found — that's ok */ }
1699
+
1700
+ // session/{taskId}.md is optional
1701
+ let sessionContent: string | null = null;
1702
+ try {
1703
+ sessionContent = await fs.readFile(
1704
+ path.join(rulesDir, 'session', `${taskId}.md`), 'utf8');
1705
+ } catch { /* not found — that's ok */ }
1706
+
1707
+ const rulesHash = `sha256:${createHash('sha256')
1708
+ .update(globalContent + domainContent + (sessionContent ?? ''))
1709
+ .digest('hex')}`;
1710
+
1711
+ const rulesList = [
1712
+ 'global.md',
1713
+ ...(domainContent ? ['backend.md'] : []),
1714
+ ...(sessionContent !== null ? [`session/${taskId}.md`] : []),
1715
+ ];
1716
+
1717
+ return { global: globalContent, domain: domainContent, session: sessionContent, rulesHash, rulesList };
1718
+ }
1719
+ ```
1720
+
1721
+ 2. Update `src/types/index.ts` to add `PromptOptions` and `ReviewPromptOptions` as shown above.
1722
+
1723
+ 3. Create `src/core/__tests__/prompt.test.ts`:
1724
+ - Test all sections appear in correct order.
1725
+ - Test optional sections omitted when null/empty.
1726
+ - Test `contextFiles: ['src/auth.ts']` produces `[CONTEXT FILES]` section.
1727
+ - Test `contextFiles: []` omits `[CONTEXT FILES]` entirely.
1728
+ - Test previous feedback formatted as bullet list with version number.
1729
+ - Test review prompt includes the full JSON schema in `[INSTRUCTION]`.
1730
+ - Test review prompt in auto mode includes `[ZERO-TOLERANCE CLAUSE]`.
1731
+ - Test `loadRules` computes consistent hash (same input → same hash).
1732
+ - Test `loadRules` throws `rules_missing` when `global.md` is absent.
1733
+ - Test `loadRules` handles missing `backend.md` (empty string, no throw).
1734
+
1735
+ **Dependencies:** Task 1.3, Task 1.7
1736
+ **Definition of Done:** Assembled prompt matches template exactly. `contextFiles` included as section when non-empty. Review prompt includes inline JSON schema. `loadRules` uses `crypto.createHash` for SHA-256.
1737
+ **Verification Command:** `npx vitest run src/core/__tests__/prompt.test.ts`
1738
+
1739
+ ---
1740
+
1741
+ ### [x] Task 4.2 — ReviewParser (Validation Pipeline) *(Completed: 2026-04-03)*
1742
+
1743
+ **Component:** `src/core/review.ts`
1744
+ **Objective:** Implement the three-stage review validation pipeline (JSON extraction, schema validation, semantic validation).
1745
+
1746
+ **Technical Instruction:**
1747
+
1748
+ 1. Create `src/core/review.ts`:
1749
+
1750
+ ```typescript
1751
+ import { z } from 'zod';
1752
+ import type { ReviewResult, ExecutionMode } from '../types/index.js';
1753
+
1754
+ // Stage 1 — JSON extraction
1755
+ function extractFirstJsonBlock(raw: string): string | null {
1756
+ let depth = 0;
1757
+ let start = -1;
1758
+ let inString = false;
1759
+ let escape = false;
1760
+ for (let i = 0; i < raw.length; i++) {
1761
+ const ch = raw[i];
1762
+ if (escape) { escape = false; continue; }
1763
+ if (ch === '\\') { escape = true; continue; }
1764
+ if (ch === '"') { inString = !inString; continue; }
1765
+ if (inString) continue;
1766
+ if (ch === '{') { if (depth === 0) start = i; depth++; }
1767
+ if (ch === '}') {
1768
+ depth--;
1769
+ if (depth === 0 && start !== -1) return raw.slice(start, i + 1);
1770
+ }
1771
+ }
1772
+ return null;
1773
+ }
1774
+
1775
+ export function extractJson(raw: string): object | null {
1776
+ // Try 1: direct parse
1777
+ try { return JSON.parse(raw); } catch {}
1778
+ // Try 2: markdown fence extraction
1779
+ const fenceMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
1780
+ if (fenceMatch) {
1781
+ try { return JSON.parse(fenceMatch[1].trim()); } catch {}
1782
+ }
1783
+ // Try 3: brace-depth extraction
1784
+ const block = extractFirstJsonBlock(raw);
1785
+ if (block) {
1786
+ try { return JSON.parse(block); } catch {}
1787
+ }
1788
+ return null;
1789
+ }
1790
+
1791
+ // Stage 2 — Schema validation
1792
+ const ReviewResultSchema = z.object({
1793
+ score: z.number().min(0).max(2), // clamp handled in stage 3
1794
+ summary: z.string().min(10),
1795
+ issues: z.array(z.object({
1796
+ severity: z.enum(['high', 'medium', 'low']),
1797
+ category: z.enum(['security', 'performance', 'architecture', 'correctness', 'maintainability']),
1798
+ description: z.string().min(5),
1799
+ suggestion: z.string().min(5),
1800
+ })),
1801
+ });
1802
+
1803
+ // RT2-4.2 fix: Accept the actual execution mode as a parameter instead of
1804
+ // hardcoding 'auto'. The previous version baked mode: 'auto' into every
1805
+ // persisted HistoryEntry, corrupting history from the first supervised run.
1806
+ export function validateReviewSchema(obj: object, mode: ExecutionMode): ReviewResult | null {
1807
+ const result = ReviewResultSchema.safeParse(obj);
1808
+ if (!result.success) return null;
1809
+ return { ...result.data, mode } as ReviewResult;
1810
+ }
1811
+
1812
+ // Stage 3 — Semantic validation
1813
+ export function semanticValidation(review: ReviewResult): { valid: boolean; reason?: string } {
1814
+ // Clamp score
1815
+ if (review.score > 1.0) {
1816
+ review.score = 1.0;
1817
+ }
1818
+ if (review.score < 0.0) {
1819
+ review.score = 0.0;
1820
+ }
1821
+ if (review.score < 0.5 && review.issues.length === 0) {
1822
+ return { valid: false, reason: 'Low score must have supporting issues.' };
1823
+ }
1824
+ if (review.score >= 0.9 && review.issues.some(i => i.severity === 'high')) {
1825
+ return { valid: false, reason: 'High severity issues cannot pass with score >= 0.9.' };
1826
+ }
1827
+ return { valid: true };
1828
+ }
1829
+
1830
+ // RT2-4.2 fix: mode parameter threaded through the entire pipeline
1831
+ export function parseReviewOutput(
1832
+ raw: string,
1833
+ mode: ExecutionMode,
1834
+ ): { result: ReviewResult | null; error?: string } {
1835
+ const obj = extractJson(raw);
1836
+ if (!obj) return { result: null, error: 'Stage 1 failed: no JSON found in reviewer output.' };
1837
+ const validated = validateReviewSchema(obj, mode);
1838
+ if (!validated) return { result: null, error: 'Stage 2 failed: JSON does not match ReviewResult schema.' };
1839
+ const semantic = semanticValidation(validated);
1840
+ if (!semantic.valid) return { result: null, error: `Stage 3 failed: ${semantic.reason}` };
1841
+ return { result: validated };
1842
+ }
1843
+ ```
1844
+
1845
+ 2. Create `src/core/__tests__/review.test.ts`:
1846
+ - Test clean JSON parses.
1847
+ - Test JSON wrapped in markdown fences extracted.
1848
+ - Test JSON with preamble text ("Here is my review:\n{...}") extracted.
1849
+ - Test `{ score: 1.5 }` is clamped to 1.0.
1850
+ - Test `{ score: 0.3, summary: "...", issues: [] }` rejected (low score, no issues).
1851
+ - Test `{ score: 0.95, ..., issues: [{ severity: "high", ... }] }` rejected (high severity passes).
1852
+ - Test completely invalid output returns null with error string.
1853
+ - **RT2-4.2 fix:** Test `parseReviewOutput(raw, 'supervised')` produces `result.mode === 'supervised'`.
1854
+ - **RT2-4.2 fix:** Test `validateReviewSchema(obj, 'dry-run')` produces `mode === 'dry-run'` (not 'auto').
1855
+
1856
+ **Dependencies:** Task 1.3
1857
+ **Definition of Done:** All three stages work. Edge cases covered. `mode` is passed through from caller — never hardcoded.
1858
+ **Verification Command:** `npx vitest run src/core/__tests__/review.test.ts`
1859
+
1860
+ ---
1861
+
1862
+ ### [x] Task 4.3 — Token & Cost Tracker *(Completed: 2026-04-03)*
1863
+
1864
+ **Component:** `src/core/budget.ts`
1865
+ **Objective:** Implement token estimation, cost tracking, and budget enforcement. **RT2-4.3 Hardening:** Separate input/output token tracking with distinct pricing rates. Normalize `binaryCmd` to basename for reliable cost map lookup.
1866
+
1867
+ **Technical Instruction:**
1868
+
1869
+ 1. Create `src/core/budget.ts`:
1870
+
1871
+ ```typescript
1872
+ import * as path from 'path';
1873
+ import type { TaskState, NomosConfig } from '../types/index.js';
1874
+
1875
+ // RT2-4.3 fix: Token estimation returns separate input/output counts.
1876
+ // The char/4 heuristic is rough but splitting it correctly applies
1877
+ // the right pricing rate to each component.
1878
+ export interface TokenEstimate {
1879
+ input_tokens: number;
1880
+ output_tokens: number;
1881
+ total: number;
1882
+ }
1883
+
1884
+ /**
1885
+ * ROUGH ESTIMATE: Divides character count by 4.
1886
+ * RT2-4.3 fix: Returns SEPARATE input and output token estimates so that
1887
+ * calculateCost() can apply the correct rate to each. The char/4 heuristic
1888
+ * still underestimates for CJK text (~1 char per 1-2 tokens), but splitting
1889
+ * input/output eliminates the ~3x pricing error from applying a flat rate.
1890
+ * Returns tokens_source: 'estimated' — callers should label this in display output.
1891
+ */
1892
+ export function estimateTokens(prompt: string, output: string): TokenEstimate {
1893
+ const input_tokens = Math.ceil(prompt.length / 4);
1894
+ const output_tokens = Math.ceil(output.length / 4);
1895
+ return { input_tokens, output_tokens, total: input_tokens + output_tokens };
1896
+ }
1897
+
1898
+ /**
1899
+ * Extracts the token count from the binary's output using the configured regex pattern.
1900
+ * RT2-4.3 fix: Returns a TokenEstimate with separate input/output when the pattern
1901
+ * captures two groups (e.g., "Input: (\d+).*Output: (\d+)"). Falls back to total-only
1902
+ * (split 90/10 input/output as a heuristic) when only one group is captured.
1903
+ * Returns null if the pattern is null or does not match.
1904
+ */
1905
+ export function parseTokensFromOutput(
1906
+ output: string,
1907
+ usagePattern: string | null,
1908
+ ): TokenEstimate | null {
1909
+ if (!usagePattern) return null;
1910
+ const match = output.match(new RegExp(usagePattern));
1911
+ if (!match?.[1]) return null;
1912
+
1913
+ if (match[2]) {
1914
+ // Pattern captured both input and output groups
1915
+ const input_tokens = parseInt(match[1], 10);
1916
+ const output_tokens = parseInt(match[2], 10);
1917
+ if (isNaN(input_tokens) || isNaN(output_tokens)) return null;
1918
+ return { input_tokens, output_tokens, total: input_tokens + output_tokens };
1919
+ }
1920
+
1921
+ // Single group — total only. Apply 90/10 heuristic split for cost estimation.
1922
+ const total = parseInt(match[1], 10);
1923
+ if (isNaN(total)) return null;
1924
+ const input_tokens = Math.round(total * 0.9);
1925
+ const output_tokens = total - input_tokens;
1926
+ return { input_tokens, output_tokens, total };
1927
+ }
1928
+
1929
+ /**
1930
+ * RT2-4.3 fix: Cost calculation with separate input/output rates.
1931
+ * costMap keys use basename of the binary cmd (e.g., 'claude', 'codex').
1932
+ * The previous version used the raw binaryCmd as the key — if cmd was an
1933
+ * absolute path ('/usr/local/bin/claude') or 'npx', lookup returned undefined
1934
+ * and cost tracking silently stopped.
1935
+ *
1936
+ * Rate structure: costMap values are objects with input/output rates per 1K tokens.
1937
+ * For backward compatibility, a plain number is treated as the output rate
1938
+ * with input rate at 1/5th (the typical Claude input/output ratio).
1939
+ */
1940
+ export function calculateCost(
1941
+ tokens: TokenEstimate,
1942
+ binaryCmd: string,
1943
+ costMap: Record<string, number | { input: number; output: number }>,
1944
+ ): number {
1945
+ // RT2-4.3 fix: normalize binaryCmd to basename for reliable map lookup.
1946
+ // '/usr/local/bin/claude' → 'claude', 'npx' → 'npx'
1947
+ const key = path.basename(binaryCmd);
1948
+ const rateEntry = costMap[key];
1949
+ if (rateEntry === undefined) return 0;
1950
+
1951
+ let inputRate: number;
1952
+ let outputRate: number;
1953
+ if (typeof rateEntry === 'number') {
1954
+ // Backward compat: plain number = output rate; input = 1/5th
1955
+ outputRate = rateEntry;
1956
+ inputRate = rateEntry / 5;
1957
+ } else {
1958
+ inputRate = rateEntry.input;
1959
+ outputRate = rateEntry.output;
1960
+ }
1961
+
1962
+ const inputCost = (tokens.input_tokens / 1000) * inputRate;
1963
+ const outputCost = (tokens.output_tokens / 1000) * outputRate;
1964
+ return Math.round((inputCost + outputCost) * 1_000_000) / 1_000_000;
1965
+ }
1966
+
1967
+ export function checkBudget(
1968
+ state: TaskState,
1969
+ config: NomosConfig,
1970
+ ): { allowed: boolean; warning?: string; error?: string } {
1971
+ const { tokens_used } = state.budget;
1972
+ const { max_tokens_per_task, warn_at_percent } = config.budget;
1973
+ if (tokens_used >= max_tokens_per_task) {
1974
+ return {
1975
+ allowed: false,
1976
+ error: `Token budget exceeded for task "${state.task_id}". ` +
1977
+ `Used: ${tokens_used} / ${max_tokens_per_task}. ` +
1978
+ `Run: arc plan ${state.task_id} --extend-budget to increase limit.`,
1979
+ };
1980
+ }
1981
+ const warnThreshold = (warn_at_percent / 100) * max_tokens_per_task;
1982
+ if (tokens_used >= warnThreshold) {
1983
+ const pct = Math.round((tokens_used / max_tokens_per_task) * 100);
1984
+ return {
1985
+ allowed: true,
1986
+ warning: `Task "${state.task_id}" at ${pct}% of token budget (${tokens_used} / ${max_tokens_per_task}).`,
1987
+ };
1988
+ }
1989
+ return { allowed: true };
1990
+ }
1991
+ ```
1992
+
1993
+ 2. Create `src/core/__tests__/budget.test.ts`:
1994
+ - Test estimation: 4000-char prompt + 4000-char output returns `{ input_tokens: 1000, output_tokens: 1000, total: 2000 }`.
1995
+ - Test `parseTokensFromOutput("Tokens used: 12345", "Tokens used:\\s*(\\d+)")` returns `{ input_tokens: 11111, output_tokens: 1234, total: 12345 }` (90/10 split).
1996
+ - Test `parseTokensFromOutput("Input: 5000 Output: 1000", "Input:\\s*(\\d+).*Output:\\s*(\\d+)")` returns `{ input_tokens: 5000, output_tokens: 1000, total: 6000 }`.
1997
+ - Test `parseTokensFromOutput` returns null when pattern is null or doesn't match.
1998
+ - Test budget check blocks at limit (100000/100000).
1999
+ - Test budget check warns at 80% (80000/100000).
2000
+ - Test budget check allows below threshold.
2001
+ - **RT2-4.3 fix:** Test `calculateCost({ input_tokens: 900, output_tokens: 100, total: 1000 }, 'claude', { claude: 0.015 })` uses split rates (not flat `0.015` for both).
2002
+ - **RT2-4.3 fix:** Test `calculateCost` with explicit `{ input: 0.003, output: 0.015 }` rate entry.
2003
+ - **RT2-4.3 fix:** Test `calculateCost` normalizes `/usr/local/bin/claude` → `'claude'` for map lookup.
2004
+ - **RT2-4.3 fix:** Test `calculateCost` normalizes `npx` → `'npx'` — returns 0 when not in map.
2005
+ - Test `calculateCost` for unknown cmd returns 0.
2006
+
2007
+ **Dependencies:** Task 1.3
2008
+ **Definition of Done:** Token estimation returns separate `input_tokens`/`output_tokens`. `calculateCost` applies distinct input/output rates. `binaryCmd` normalized to basename for reliable cost map lookup. `parseTokensFromOutput` supports both single-group and two-group capture patterns. Budget enforcement unchanged.
2009
+ **Verification Command:** `npx vitest run src/core/__tests__/budget.test.ts`
2010
+
2011
+ ---
2012
+
2013
+ ## Milestone 5: Orchestrator Core
2014
+
2015
+ ### [x] Task 5.1 — Orchestrator State Machine *(Completed: 2026-04-03)*
2016
+
2017
+ **Component:** `src/core/orchestrator.ts`, `src/core/plan-file-manager.ts`, `src/core/worktree-coordinator.ts`
2018
+ **Objective:** Implement the central orchestrator that coordinates state, git, pty, stdio, prompt, and review components. **RT2-5.1 Hardening:** The Orchestrator is decomposed into three classes: `PlanFileManager` (reads/writes plan files, logs, diffs), `WorktreeCoordinator` (manages plan/review/commit lifecycle for git worktrees), and the `Orchestrator` itself (pure state machine logic + delegation). Addresses SIGINT race condition (RTV-5), explicit commit file list (Execution Rule #14), `approval_reason` tracking (RTV-6), context injection precision, `run()` loop counter divergence, and `context_files` secret scanning.
2019
+
2020
+ **Technical Instruction:**
2021
+
2022
+ 1. **Create `src/core/plan-file-manager.ts`** *(RT2-5.1 fix: extracted from Orchestrator to eliminate direct file I/O in the state machine)*:
2023
+
2024
+ ```typescript
2025
+ import * as fs from 'fs';
2026
+ import * as path from 'path';
2027
+ import type { Logger } from 'winston';
2028
+
2029
+ /**
2030
+ * Manages all file I/O for plan artifacts: logs, diffs, plan summaries.
2031
+ * The Orchestrator delegates ALL file reads/writes to this class.
2032
+ * It NEVER touches state JSON (that's StateManager's job).
2033
+ */
2034
+ export class PlanFileManager {
2035
+ constructor(
2036
+ private projectRoot: string,
2037
+ private logger: Logger,
2038
+ ) {}
2039
+
2040
+ private resolve(...segments: string[]): string {
2041
+ return path.join(this.projectRoot, 'tasks-management', ...segments);
2042
+ }
2043
+
2044
+ saveRawLog(taskId: string, version: number, content: string): void {
2045
+ const filePath = this.resolve('logs', `${taskId}-v${version}-raw.log`);
2046
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
2047
+ fs.writeFileSync(filePath, content);
2048
+ }
2049
+
2050
+ saveStrippedLog(taskId: string, version: number, content: string): void {
2051
+ const filePath = this.resolve('logs', `${taskId}-v${version}.log`);
2052
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
2053
+ fs.writeFileSync(filePath, content);
2054
+ }
2055
+
2056
+ saveDiff(taskId: string, version: number, diff: string): void {
2057
+ const filePath = this.resolve('plans', `${taskId}-v${version}.diff`);
2058
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
2059
+ fs.writeFileSync(filePath, diff);
2060
+ }
2061
+
2062
+ saveReviewRawLog(taskId: string, version: number, content: string): void {
2063
+ const filePath = this.resolve('logs', `${taskId}-v${version}-review-raw.log`);
2064
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
2065
+ fs.writeFileSync(filePath, content);
2066
+ }
2067
+
2068
+ loadDiff(taskId: string, version: number): string {
2069
+ return fs.readFileSync(this.resolve('plans', `${taskId}-v${version}.diff`), 'utf8');
2070
+ }
2071
+
2072
+ loadPlanSummary(taskId: string, version: number): string | null {
2073
+ const filePath = this.resolve('plans', `${taskId}-v${version}.md`);
2074
+ return fs.existsSync(filePath) ? fs.readFileSync(filePath, 'utf8') : null;
2075
+ }
2076
+
2077
+ /** Returns the list of relative paths to commit per Execution Rule #14 */
2078
+ getCommitFileList(taskId: string, version: number, includeLogs: boolean): string[] {
2079
+ const files = [`tasks-management/plans/${taskId}-v${version}.diff`];
2080
+ if (includeLogs) {
2081
+ files.push(`tasks-management/logs/${taskId}-v${version}.log`);
2082
+ }
2083
+ return files;
2084
+ }
2085
+
2086
+ deleteSessionRule(taskId: string): void {
2087
+ const filePath = this.resolve('rules', 'session', `${taskId}.md`);
2088
+ if (fs.existsSync(filePath)) fs.unlinkSync(filePath);
2089
+ }
2090
+ }
2091
+ ```
2092
+
2093
+ 2. **Create `src/core/worktree-coordinator.ts`** *(RT2-5.1 fix: extracted from Orchestrator to isolate Git lifecycle operations)*:
2094
+
2095
+ ```typescript
2096
+ import type { Logger } from 'winston';
2097
+ import type { NomosConfig } from '../types/index.js';
2098
+ import { GitAdapter } from '../adapters/git.js';
2099
+ import { NomosError } from './errors.js';
2100
+
2101
+ /**
2102
+ * Coordinates the Git worktree lifecycle: creation, validation,
2103
+ * recovery, diff extraction, commits, and merges.
2104
+ * The Orchestrator delegates ALL git operations to this class.
2105
+ */
2106
+ export class WorktreeCoordinator {
2107
+ constructor(
2108
+ private gitAdapter: GitAdapter,
2109
+ private config: NomosConfig,
2110
+ private logger: Logger,
2111
+ ) {}
2112
+
2113
+ async createWorktree(taskId: string) {
2114
+ return this.gitAdapter.createWorktree(taskId);
2115
+ }
2116
+
2117
+ async ensureWorktreeExists(taskId: string, branch: string): Promise<void> {
2118
+ if (!this.gitAdapter.worktreeExists(taskId)) {
2119
+ try {
2120
+ await this.gitAdapter.recoverWorktree(taskId, branch);
2121
+ this.logger.info(`Worktree recovered for task "${taskId}".`);
2122
+ } catch {
2123
+ throw new NomosError('worktree_missing',
2124
+ `Worktree for task "${taskId}" is missing and unrecoverable. ` +
2125
+ `Run: arc discard ${taskId} && arc init ${taskId}`);
2126
+ }
2127
+ }
2128
+ }
2129
+
2130
+ async getDiff(taskId: string, baseCommit: string): Promise<string> {
2131
+ return this.gitAdapter.getDiff(taskId, baseCommit);
2132
+ }
2133
+
2134
+ async commitPlanFiles(taskId: string, version: number, files: string[]): Promise<void> {
2135
+ if (this.config.git.auto_commit) {
2136
+ await this.gitAdapter.commitToShadowBranch(
2137
+ taskId,
2138
+ `${this.config.git.commit_prefix} plan(${taskId}): v${version}`,
2139
+ files,
2140
+ );
2141
+ }
2142
+ }
2143
+
2144
+ async commitReviewFiles(taskId: string, version: number, files: string[]): Promise<void> {
2145
+ if (this.config.git.auto_commit) {
2146
+ await this.gitAdapter.commitToShadowBranch(
2147
+ taskId,
2148
+ `${this.config.git.commit_prefix} review(${taskId}): v${version}`,
2149
+ files,
2150
+ );
2151
+ }
2152
+ }
2153
+
2154
+ async mergeToMain(taskId: string, version: number) {
2155
+ return this.gitAdapter.mergeToMain(
2156
+ taskId, version, this.config.git.commit_prefix,
2157
+ );
2158
+ }
2159
+
2160
+ async removeWorktree(taskId: string, force: boolean) {
2161
+ return this.gitAdapter.removeWorktree(taskId, force);
2162
+ }
2163
+
2164
+ worktreeExists(taskId: string): boolean {
2165
+ return this.gitAdapter.worktreeExists(taskId);
2166
+ }
2167
+
2168
+ /** Search for files matching a pattern. Used by context injection. */
2169
+ async grep(pattern: string, cwd: string, timeoutMs?: number): Promise<string[]> {
2170
+ return this.gitAdapter.grep(pattern, cwd, timeoutMs);
2171
+ }
2172
+ }
2173
+ ```
2174
+
2175
+ 3. **Create `src/core/orchestrator.ts`** *(the Orchestrator is now a pure state machine that delegates)*:
2176
+
2177
+ ```typescript
2178
+ import * as fs from 'fs';
2179
+ import * as path from 'path';
2180
+ import { createInterface } from 'readline';
2181
+ import { createHash } from 'crypto';
2182
+ import type { Logger } from 'winston';
2183
+ import type {
2184
+ NomosConfig, TaskState, TaskStatus, ExecutionMode,
2185
+ HistoryEntry, BinaryConfig,
2186
+ } from '../types/index.js';
2187
+ import { StateManager } from './state.js';
2188
+ import { PlanFileManager } from './plan-file-manager.js';
2189
+ import { WorktreeCoordinator } from './worktree-coordinator.js';
2190
+ import { GitAdapter } from '../adapters/git.js';
2191
+ import { PtyAdapter } from '../adapters/pty.js';
2192
+ import { StdioAdapter } from '../adapters/stdio.js';
2193
+ import { runPreflight } from './preflight.js';
2194
+ import { loadRules, assemblePrompt, assembleReviewPrompt } from './prompt.js';
2195
+ import { parseTaskFile } from '../utils/frontmatter.js';
2196
+ import { sanitizeForPty, sanitizeByPatterns, sanitizeEnv, scanFileForSecrets } from '../utils/sanitize.js';
2197
+ import { parseReviewOutput } from './review.js';
2198
+ import { parseTokensFromOutput, estimateTokens, calculateCost, checkBudget } from './budget.js';
2199
+ import { NomosError } from './errors.js';
2200
+ ```
2201
+
2202
+ **`Orchestrator` class:**
2203
+
2204
+ ```typescript
2205
+ export class Orchestrator {
2206
+ private fileManager: PlanFileManager;
2207
+ private worktreeCoord: WorktreeCoordinator;
2208
+
2209
+ constructor(
2210
+ private config: NomosConfig,
2211
+ private projectRoot: string,
2212
+ private stateManager: StateManager,
2213
+ private gitAdapter: GitAdapter,
2214
+ private ptyAdapter: PtyAdapter,
2215
+ private stdioAdapter: StdioAdapter,
2216
+ private logger: Logger,
2217
+ ) {
2218
+ // RT2-5.1 fix: Orchestrator no longer handles file I/O or Git lifecycle directly.
2219
+ // It delegates to PlanFileManager and WorktreeCoordinator.
2220
+ this.fileManager = new PlanFileManager(projectRoot, logger);
2221
+ this.worktreeCoord = new WorktreeCoordinator(gitAdapter, config, logger);
2222
+ }
2223
+
2224
+ private get stateDir(): string {
2225
+ return path.join(this.projectRoot, 'tasks-management', 'state');
2226
+ }
2227
+
2228
+ private getAdapter(binary: BinaryConfig) {
2229
+ return binary.pty ? this.ptyAdapter : this.stdioAdapter;
2230
+ }
2231
+ ```
2232
+
2233
+ **`initProject(): Promise<void>`**:
2234
+ - *(M-2 fix)* Check git repo first. If not: throw `NomosError('config_invalid', 'Not a git repository. Run: git init && git commit --allow-empty -m "initial" first.')`.
2235
+ - Check if `.nomos-config.json` already exists → error if so.
2236
+ - Create directory structure: `tasks-management/{tasks,state,plans,logs,rules,rules/session}`.
2237
+ - Generate default `.nomos-config.json` with content `JSON.stringify({ binaries: { planner: { cmd: 'claude' }, reviewer: { cmd: 'codex' } } }, null, 2)`.
2238
+ - Generate `tasks-management/rules/global.md` with the following content (inline — no external reference):
2239
+ ```markdown
2240
+ # Global Engineering Standards
2241
+
2242
+ These rules apply to all tasks. The AI must follow them without exception.
2243
+
2244
+ ## Code Quality
2245
+ - Write clean, readable, well-structured code
2246
+ - Follow SOLID principles; avoid code duplication
2247
+ - No `any` types in TypeScript
2248
+ - All public functions must have input validation
2249
+
2250
+ ## Security
2251
+ - Never hardcode credentials, keys, or tokens
2252
+ - Sanitize all user inputs before processing
2253
+ - Use parameterized queries for database operations
2254
+ - Apply principle of least privilege
2255
+
2256
+ ## Error Handling
2257
+ - All async functions must handle Promise rejections
2258
+ - Errors must include context: what failed, why, how to recover
2259
+ - Never silently swallow errors with empty catch blocks
2260
+
2261
+ ## Testing
2262
+ - Every new function must have corresponding unit tests
2263
+ - Test edge cases and failure paths, not just happy paths
2264
+ - Integration tests for any component touching external systems
2265
+
2266
+ ## Documentation
2267
+ - Functions must be self-documenting through clear naming
2268
+ - Add comments only where logic is non-obvious
2269
+ ```
2270
+ - Generate `tasks-management/rules/backend.md` with placeholder backend rules.
2271
+ - Append `tasks-management/logs/` to `.gitignore` (create if missing).
2272
+ - Never overwrite existing files.
2273
+
2274
+ **`initTask(taskId: string): Promise<TaskState>`**:
2275
+ - Validate task ID: `/^[a-z0-9][a-z0-9-]{1,48}[a-z0-9]$/`.
2276
+ - Check task doesn't already exist.
2277
+ - Build initial `TaskState`:
2278
+ ```typescript
2279
+ const initialState: TaskState = {
2280
+ schema_version: 1,
2281
+ task_id: taskId,
2282
+ current_version: 0,
2283
+ meta: {
2284
+ status: 'init',
2285
+ created_at: new Date().toISOString(),
2286
+ updated_at: new Date().toISOString(),
2287
+ },
2288
+ orchestration: {
2289
+ planner_bin: this.config.binaries.planner.cmd,
2290
+ reviewer_bin: this.config.binaries.reviewer.cmd,
2291
+ },
2292
+ shadow_branch: { branch: '', worktree: '', base_commit: '', status: 'active' },
2293
+ context: { files: [], rules: [], rules_hash: '' },
2294
+ budget: { tokens_used: 0, estimated_cost_usd: 0 },
2295
+ history: [],
2296
+ };
2297
+ ```
2298
+ - **C4 fix — Atomic init:**
2299
+ ```typescript
2300
+ await this.stateManager.create(taskId, initialState);
2301
+ try {
2302
+ const { branch, worktreePath, baseCommit } = await this.worktreeCoord.createWorktree(taskId);
2303
+ initialState.shadow_branch = { branch, worktree: worktreePath, base_commit: baseCommit, status: 'active' };
2304
+ await this.stateManager.write(taskId, initialState);
2305
+ } catch (err) {
2306
+ // H-3 fix: log rollback failures rather than swallowing silently
2307
+ await fs.promises.unlink(path.join(this.stateDir, `${taskId}.json`)).catch((e: Error) => {
2308
+ this.logger.error(
2309
+ `Failed to rollback state file for task "${taskId}": ${e.message}. ` +
2310
+ `Manual cleanup required: rm ${path.join(this.stateDir, `${taskId}.json`)}`
2311
+ );
2312
+ });
2313
+ throw new NomosError('worktree_creation_failed',
2314
+ `Failed to create worktree: ${(err as Error).message}`);
2315
+ }
2316
+ ```
2317
+ - Create task markdown template at `tasks-management/tasks/{taskId}.md` with frontmatter.
2318
+ - Auto-commit if `config.git.auto_commit` (using Execution Rule #14 file list).
2319
+ - Return final state.
2320
+
2321
+ **`plan(taskId: string, mode: ExecutionMode): Promise<TaskState>`**:
2322
+ - Read state. Validate status allows `planning`: `['init', 'refinement', 'failed', 'stalled']`.
2323
+ - Transition to `planning` with `version_increment: true`.
2324
+ - Run preflight.
2325
+ - Load rules.
2326
+ - Parse task file.
2327
+ - **Validate `context_files`:** For each path in `frontmatter.context_files ?? []`:
2328
+ 1. Check `fs.existsSync(path.join(projectRoot, file))`. If any missing, throw with exact path.
2329
+ 2. **RT2-5.1 fix — Secret scanning:** Run `scanFileForSecrets(path.join(projectRoot, file), config.security)` on each file. If secrets detected, throw `NomosError('secrets_detected', \`Context file "${file}" contains potential secrets (matched patterns: ${matches.join(', ')}). Remove secrets or exclude this file from context_files.\`)`. This prevents `.env.local`, `credentials.json`, etc. from flowing through the external AI model. `sanitizeByPatterns` only runs on the assembled prompt string — it does NOT protect file content read during execution.
2330
+ - Get previous feedback: if current state was in `refinement`, extract issues from last history entry with `step: 'reviewing'`.
2331
+ - Assemble prompt with `contextFiles: frontmatter.context_files ?? []`.
2332
+ - Sanitize prompt (escape sequences + secrets).
2333
+ - Check budget — transition to `failed` with `reason: 'budget_exceeded'` if blocked.
2334
+ - **Dry-run mode:** If `mode === 'dry-run'`, print assembled prompt + shadow branch info to stdout. Transition back to original pre-planning status. Return state. *(Dry-run exits here — no subprocess is spawned.)*
2335
+ - Build args: `[...config.binaries.planner.args, '-p', sanitizedPrompt]`.
2336
+ - Sanitize env via `sanitizeEnv`.
2337
+ - **W2 fix — Validate worktree (via WorktreeCoordinator):**
2338
+ ```typescript
2339
+ try {
2340
+ await this.worktreeCoord.ensureWorktreeExists(taskId, state.shadow_branch.branch);
2341
+ } catch {
2342
+ await this.stateManager.transition(taskId, 'failed', { reason: 'worktree_missing' });
2343
+ throw new NomosError('worktree_missing',
2344
+ `Worktree for task "${taskId}" is missing and unrecoverable. ` +
2345
+ `Run: arc discard ${taskId} && arc init ${taskId}`);
2346
+ }
2347
+ ```
2348
+ - **Spawn planner (H-1 fix — SIGINT race resolution):**
2349
+ ```typescript
2350
+ let result;
2351
+ try {
2352
+ result = await this.ptyAdapter.execute({
2353
+ cmd: resolvedPlannerPath,
2354
+ args: plannerArgs,
2355
+ cwd: state.shadow_branch.worktree,
2356
+ env,
2357
+ mode,
2358
+ heartbeat_timeout_ms: mode === 'supervised'
2359
+ ? this.config.execution.supervised_heartbeat_timeout_ms
2360
+ : this.config.binaries.planner.heartbeat_timeout_ms,
2361
+ total_timeout_ms: this.config.binaries.planner.total_timeout_ms,
2362
+ max_output_bytes: this.config.binaries.planner.max_output_bytes,
2363
+ });
2364
+ } catch (err) {
2365
+ // SIGINT propagation — process.exitCode is already 130 (set by cli.ts SIGINT handler).
2366
+ // We still run the state transition so the task is recoverable on next run.
2367
+ await this.stateManager.transition(taskId, 'stalled', { reason: 'interrupted' });
2368
+ throw err;
2369
+ }
2370
+ ```
2371
+ - On `result.killed`: transition to `stalled` (heartbeat/total timeout) or `failed`. Return.
2372
+ - On `result.exitCode !== 0`: transition to `failed`. Return.
2373
+ - On success:
2374
+ - Save logs via PlanFileManager: `this.fileManager.saveRawLog(taskId, version, result.rawOutput)` and `this.fileManager.saveStrippedLog(taskId, version, result.strippedOutput)`.
2375
+ - Get diff via WorktreeCoordinator. **If `base_commit_unreachable` is thrown, transition to `failed` before re-throwing** (same pattern as the `no_changes` guard — state must not be left stranded in `planning`):
2376
+ ```typescript
2377
+ let diff: string;
2378
+ try {
2379
+ diff = await this.worktreeCoord.getDiff(taskId, state.shadow_branch.base_commit);
2380
+ } catch (err) {
2381
+ if (err instanceof NomosError && err.code === 'base_commit_unreachable') {
2382
+ await this.stateManager.transition(taskId, 'failed', { reason: 'base_commit_unreachable' });
2383
+ }
2384
+ throw err;
2385
+ }
2386
+ ```
2387
+ - **H-6 fix — Guard empty diff:**
2388
+ ```typescript
2389
+ if (!diff.trim()) {
2390
+ await this.stateManager.transition(taskId, 'failed', { reason: 'no_changes' });
2391
+ throw new NomosError('no_changes',
2392
+ `Planner exited successfully but made no file changes. ` +
2393
+ `Verify the planner is writing to CWD: ${state.shadow_branch.worktree}`);
2394
+ }
2395
+ ```
2396
+ - Save diff via PlanFileManager: `this.fileManager.saveDiff(taskId, version, diff)`.
2397
+ - Parse tokens (metered first, fallback to estimated). Set `tokens_source` accordingly.
2398
+ **RT2-4.3 integration:** Both `parseTokensFromOutput` and `estimateTokens` now return `TokenEstimate` with separate `input_tokens`/`output_tokens`. Store both in `HistoryEntry`.
2399
+ - Compute output hash.
2400
+ - Build `HistoryEntry` (include `tokens_source`, `input_tokens`, `output_tokens`).
2401
+ - Transition to `pending_review` with history entry attached.
2402
+ - Update budget using `calculateCost(tokenEstimate, binaryCmd, config.budget.cost_per_1k_tokens)`.
2403
+ - **Auto-commit per Execution Rule #14 (via WorktreeCoordinator):**
2404
+ ```typescript
2405
+ const filesToCommit = this.fileManager.getCommitFileList(
2406
+ taskId, version, this.config.git.include_logs,
2407
+ );
2408
+ await this.worktreeCoord.commitPlanFiles(taskId, version, filesToCommit);
2409
+ ```
2410
+ - Return updated state.
2411
+
2412
+ **`review(taskId: string, mode: ExecutionMode, runtimeMaxVersion?: number): Promise<TaskState>`**:
2413
+ - Read state. Validate status is `pending_review`.
2414
+ - Transition to `reviewing`.
2415
+ - Load diff via PlanFileManager: `const diff = this.fileManager.loadDiff(taskId, version)`.
2416
+ - Load optional plan summary via PlanFileManager: `const summary = this.fileManager.loadPlanSummary(taskId, version)`.
2417
+ - Load rules.
2418
+ - **Context Injection (pre-review import-graph scan) — RT2-5.1 fix: Full Relative Path Import Scan:**
2419
+ Extract the **file paths** changed in the diff (not symbol names — avoids false
2420
+ positives from generic names like `handle`, `data`, `init`). Then search for
2421
+ `import`/`require` statements that reference those exact **relative file paths**
2422
+ (not just the basename). This is critical: if the changed file is `src/utils/index.ts`,
2423
+ the previous basename-only regex matched **every** `from './index'` in the entire repo,
2424
+ hitting the `max_context_files` cap with irrelevant noise on any project using barrel exports.
2425
+
2426
+ **The fix:** Use the full relative path (without extension) in the grep pattern.
2427
+ For `src/utils/index.ts`, the pattern matches `from '...src/utils/index'` or
2428
+ `from '...src/utils'` (directory import). For a non-index file like `src/core/state.ts`,
2429
+ it matches `from '...core/state'`. The 5-second timeout on `gitAdapter.grep()` prevents
2430
+ runaway scans on large repos.
2431
+
2432
+ This step is **fail-safe**: any error logs a warning and proceeds with empty snippets.
2433
+ ```typescript
2434
+ // Context Injection — full relative path import-graph scan, fail-safe, never throws
2435
+ let affectedFileSnippets: AffectedFileSnippet[] = [];
2436
+ try {
2437
+ const changedPaths = extractChangedFilePaths(diff); // parse '--- a/' lines
2438
+ if (changedPaths.length > 0) {
2439
+ const dependentFiles: string[] = [];
2440
+ for (const changedFile of changedPaths) {
2441
+ // RT2-5.1 fix: Use FULL relative path (without extension) instead of basename.
2442
+ // This prevents 'index.ts', 'utils.ts', 'types.ts' from matching every barrel export.
2443
+ const withoutExt = changedFile.replace(/\.[^.]+$/, '');
2444
+ // For index files, also match the directory import pattern (e.g., from './utils')
2445
+ const isIndex = path.basename(withoutExt) === 'index';
2446
+ const dirPath = isIndex ? path.dirname(changedFile) : null;
2447
+
2448
+ // Pattern: import from path containing the full relative path (minus extension)
2449
+ const pattern = dirPath
2450
+ ? `from\\s+['"][^'"]*(?:${escapeRegex(withoutExt)}|${escapeRegex(dirPath)})['"]|require\\(['"][^'"]*(?:${escapeRegex(withoutExt)}|${escapeRegex(dirPath)})['"]\\)`
2451
+ : `from\\s+['"][^'"]*${escapeRegex(withoutExt)}['"]|require\\(['"][^'"]*${escapeRegex(withoutExt)}['"]\\)`;
2452
+
2453
+ // RT2-2.1 fix: grep() now has a 5s timeout built in
2454
+ const matches = await this.worktreeCoord.grep(pattern, this.projectRoot);
2455
+ dependentFiles.push(...matches.filter(f => !changedPaths.includes(f)));
2456
+ }
2457
+ const uniqueFiles = [...new Set(dependentFiles)]
2458
+ .slice(0, this.config.review.max_context_files);
2459
+ affectedFileSnippets = uniqueFiles.map(file => ({
2460
+ file,
2461
+ snippet: readFirst50Lines(path.join(this.projectRoot, file)),
2462
+ }));
2463
+ }
2464
+ } catch (err) {
2465
+ this.logger.warn(`Context injection scan failed (non-fatal): ${err}`);
2466
+ }
2467
+ ```
2468
+ Helper `extractChangedFilePaths(diff: string): string[]` lives in `src/utils/context.ts`:
2469
+ - Match lines matching `/^--- a\/(.+)$/` (exclude `/dev/null`)
2470
+ - Return unique relative file paths present in the diff.
2471
+ - No symbol extraction — file paths only.
2472
+
2473
+ Helper `escapeRegex(str: string): string` lives in `src/utils/context.ts`:
2474
+ - Escapes special regex characters in the path for safe use in grep patterns.
2475
+ - `return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');`
2476
+ - Assemble review prompt with `affectedFileSnippets`.
2477
+ - Sanitize env.
2478
+ - Spawn reviewer via `StdioAdapter` (reviewer has `pty: false`).
2479
+ - Parse review output via `parseReviewOutput(result.strippedOutput, mode)` *(RT2-4.2 fix: pass actual mode)*.
2480
+ - **Retry once on parse failure:**
2481
+ ```typescript
2482
+ if (!parsed.result) {
2483
+ this.logger.warn(`Review parse failed: ${parsed.error}. Retrying once.`);
2484
+ const retryPrompt = assembleReviewPrompt(options) + '\n\n' +
2485
+ `[RETRY INSTRUCTION]\nYour previous response was not valid JSON matching the required schema. ` +
2486
+ `Respond ONLY with the JSON object, no other text.\n\n` +
2487
+ `Previous invalid response:\n${result.strippedOutput.slice(0, 500)}`;
2488
+ // Spawn a NEW subprocess for the retry — the previous process has exited
2489
+ const retryResult = await this.stdioAdapter.execute({ ...spawnOptions, stdinData: retryPrompt });
2490
+ parsed = parseReviewOutput(retryResult.strippedOutput, mode);
2491
+ if (!parsed.result) {
2492
+ this.fileManager.saveReviewRawLog(taskId, version, result.rawOutput);
2493
+ await this.stateManager.transition(taskId, 'failed', { reason: 'review_failed' });
2494
+ return this.stateManager.read(taskId); // M-7 fix: return state, don't throw
2495
+ }
2496
+ }
2497
+ ```
2498
+ - Determine `approval_reason` and next status:
2499
+ ```typescript
2500
+ // RTV-6 fix: track how approved status was reached so arc run can distinguish exit codes
2501
+ const score = parsed.result.score;
2502
+ let nextStatus: TaskStatus;
2503
+ let approvalReason: 'score_threshold' | 'max_iterations_reached' | undefined;
2504
+
2505
+ if (score >= this.config.convergence.score_threshold) {
2506
+ nextStatus = 'approved';
2507
+ approvalReason = 'score_threshold';
2508
+ } else {
2509
+ // RT2-5.1 fix: use runtimeMaxVersion (from run()) when available, otherwise
2510
+ // fall back to config.max_iterations. This prevents the divergence where
2511
+ // manually-incremented current_version immediately triggers max_iterations_reached.
2512
+ const maxVersion = runtimeMaxVersion ?? this.config.convergence.max_iterations;
2513
+ if (state.current_version >= maxVersion) {
2514
+ nextStatus = 'approved';
2515
+ approvalReason = 'max_iterations_reached';
2516
+ this.logger.warn(`Max iterations reached (version ${state.current_version} >= ${maxVersion}) without convergence.`);
2517
+ } else {
2518
+ nextStatus = 'refinement';
2519
+ }
2520
+ }
2521
+ ```
2522
+ - Transition to next status, attaching review result and `approval_reason` (if set).
2523
+ - **W1 fix — Budget check after review:**
2524
+ ```typescript
2525
+ // M-4 fix: re-read state after review (review() wrote state; local var is stale)
2526
+ const updatedState = await this.stateManager.read(taskId);
2527
+ const budgetCheck = checkBudget(updatedState, this.config);
2528
+ if (budgetCheck.warning) this.logger.warn(budgetCheck.warning);
2529
+ ```
2530
+ - Auto-commit per Execution Rule #14.
2531
+ - Return updated state. *(M-7 fix: NEVER throw for convergence miss — return state and let CLI decide exit code.)*
2532
+
2533
+ **`run(taskId: string, mode: ExecutionMode, maxIterations?: number): Promise<TaskState>`**:
2534
+ ```typescript
2535
+ const limit = maxIterations ?? this.config.convergence.max_iterations;
2536
+
2537
+ // RT2-5.1 fix: Loop counter divergence — the previous implementation used a local
2538
+ // counter `i` while review() checked state.current_version against config.max_iterations.
2539
+ // These are different counters. If a user ran `arc plan` + `arc review` 3 times manually
2540
+ // then called `arc run --iterations 10`, review() would immediately fire
2541
+ // 'max_iterations_reached' because current_version was already 3.
2542
+ //
2543
+ // Fix: run() reads the STARTING version and counts only iterations IT performs.
2544
+ // It passes its own runtime limit into review() via a runtimeMaxVersion override
2545
+ // so review()'s convergence check is synchronized with run()'s loop.
2546
+ const startState = await this.stateManager.read(taskId);
2547
+ const startVersion = startState.current_version;
2548
+
2549
+ let i = 0;
2550
+ while (i < limit) {
2551
+ await this.plan(taskId, mode);
2552
+ // RT2-5.1 fix: pass runtimeMaxVersion so review() uses OUR limit, not config.max_iterations
2553
+ const runtimeMaxVersion = startVersion + limit;
2554
+ await this.review(taskId, mode, runtimeMaxVersion);
2555
+ // M-4 fix: always re-read after async operations
2556
+ const state = await this.stateManager.read(taskId);
2557
+ if (state.meta.status === 'approved' || state.meta.status === 'failed') break;
2558
+
2559
+ // W1 fix: budget guard before next iteration
2560
+ const budgetCheck = checkBudget(state, this.config);
2561
+ if (!budgetCheck.allowed) {
2562
+ this.logger.error(budgetCheck.error);
2563
+ break;
2564
+ }
2565
+
2566
+ if (mode === 'supervised') {
2567
+ const answer = await this.promptUser(
2568
+ `Plan v${state.current_version} done. Score: ${state.history[state.history.length - 1]?.review?.score ?? 'N/A'}. ` +
2569
+ `Budget: ${state.budget.tokens_used}/${this.config.budget.max_tokens_per_task} tokens. Continue? [Y/n] `
2570
+ );
2571
+ if (answer.trim().toLowerCase() === 'n') break;
2572
+ } else {
2573
+ // W1 fix: 2s backoff in auto mode so user can Ctrl+C between iterations
2574
+ await new Promise(resolve => setTimeout(resolve, 2000));
2575
+ }
2576
+ i++;
2577
+ }
2578
+ return this.stateManager.read(taskId);
2579
+ ```
2580
+
2581
+ **`promptUser(question: string): Promise<string>`** (private):
2582
+ ```typescript
2583
+ // W8 fix: restore stdin state defensively before readline
2584
+ private async promptUser(question: string): Promise<string> {
2585
+ try { if (process.stdin.isTTY) process.stdin.setRawMode(false); } catch {}
2586
+ process.stdin.resume();
2587
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
2588
+ return new Promise((resolve) => {
2589
+ rl.question(question, (answer) => { rl.close(); resolve(answer); });
2590
+ });
2591
+ }
2592
+ ```
2593
+
2594
+ **`apply(taskId: string): Promise<void>`**:
2595
+ - Read state. Guard: status must be `approved` or `merge_conflict` *(M-1 fix)*.
2596
+ - Delete session rule via PlanFileManager: `this.fileManager.deleteSessionRule(taskId)`.
2597
+ - Merge shadow branch via WorktreeCoordinator: `this.worktreeCoord.mergeToMain(taskId, state.current_version)`. Enforces W3 (target branch) and W7 (dirty tree) internally.
2598
+ - If conflicts: transition to `merge_conflict`. Print conflict list. Return (do not throw).
2599
+ - On success: `this.worktreeCoord.removeWorktree(taskId, false)`. Transition to `merged`.
2600
+
2601
+ **`discard(taskId: string): Promise<void>`**:
2602
+ - Read state. Guard: not terminal (`merged` or `discarded`).
2603
+ - Delete session rule via PlanFileManager: `this.fileManager.deleteSessionRule(taskId)`.
2604
+ - `this.worktreeCoord.removeWorktree(taskId, true)` (force delete branch).
2605
+ - Transition to `discarded`.
2606
+
2607
+ **`status(taskId: string): Promise<TaskState>`**:
2608
+ - Read and return current state. Never throws for valid task IDs.
2609
+
2610
+ **Dependencies:** Tasks 1.4, 1.5, 1.6, 1.7, 2.1, 2.2, 3.1, 3.2, 3.3, 4.1, 4.2, 4.3
2611
+ **Definition of Done:** Orchestrator is decomposed: `PlanFileManager` handles all file I/O, `WorktreeCoordinator` handles all git lifecycle operations, Orchestrator is a pure state machine. Planner uses PtyAdapter, reviewer uses StdioAdapter. `commitToShadowBranch` called via WorktreeCoordinator with explicit plan-file-only list from PlanFileManager (never state JSON). `approval_reason` set in `review()` when status transitions to `approved`. `review()` accepts optional `runtimeMaxVersion` to prevent `run()` loop counter divergence. `review()` returns state — never throws for convergence miss (M-7). SIGINT catch block transitions to `stalled` then re-throws. Context injection uses full relative path import scan (not basename). `context_files` scanned for secrets before validation. `parseReviewOutput` called with actual `mode`. Token tracking uses `TokenEstimate` with separate `input_tokens`/`output_tokens`.
2612
+ **Verification Command:** `npx tsc --noEmit`
2613
+
2614
+ ---
2615
+
2616
+ ## Milestone 6: CLI Commands & Entry Point
2617
+
2618
+ ### [x] Task 6.1 — CLI Factory & `arc init` Command *(Completed: 2026-04-03)*
2619
+
2620
+ **Component:** `src/commands/init.ts`, `src/core/factory.ts`, update `src/cli.ts`
2621
+ **Objective:** Implement the orchestrator factory (lazy initialization) and the `arc init` command.
2622
+
2623
+ **Technical Instruction:**
2624
+
2625
+ 1. Create `src/core/factory.ts`:
2626
+
2627
+ ```typescript
2628
+ import * as path from 'path';
2629
+ import { createLogger } from './logger.js';
2630
+ import { loadConfig, NomosConfigSchema } from './config.js';
2631
+ import { StateManager } from './state.js';
2632
+ import { GitAdapter } from '../adapters/git.js';
2633
+ import { PtyAdapter } from '../adapters/pty.js';
2634
+ import { StdioAdapter } from '../adapters/stdio.js';
2635
+ import { Orchestrator } from './orchestrator.js';
2636
+ import type { NomosConfig } from '../types/index.js';
2637
+
2638
+ export async function createOrchestrator(options?: {
2639
+ skipConfig?: boolean;
2640
+ projectRoot?: string; // E5 fix: explicit root avoids process.chdir() in tests
2641
+ }): Promise<{ orchestrator: Orchestrator; config: NomosConfig; projectRoot: string }> {
2642
+
2643
+ if (options?.skipConfig) {
2644
+ // arc init (project scaffold) — no config file exists yet.
2645
+ // RTV-3 fix: use NomosConfigSchema.parse({}) instead of the previously-undefined getDefaultConfig()
2646
+ const projectRoot = options.projectRoot ?? process.cwd();
2647
+ const config = NomosConfigSchema.parse({}) as NomosConfig;
2648
+ const logger = createLogger('info');
2649
+ const stateDir = path.join(projectRoot, 'tasks-management', 'state');
2650
+ const stateManager = new StateManager(stateDir, logger);
2651
+ const gitAdapter = new GitAdapter(projectRoot, config, logger);
2652
+ const ptyAdapter = new PtyAdapter(logger);
2653
+ const stdioAdapter = new StdioAdapter(logger);
2654
+ return {
2655
+ orchestrator: new Orchestrator(config, projectRoot, stateManager, gitAdapter, ptyAdapter, stdioAdapter, logger),
2656
+ config,
2657
+ projectRoot,
2658
+ };
2659
+ }
2660
+
2661
+ const { config, projectRoot } = loadConfig(options?.projectRoot);
2662
+ const logger = createLogger(config.logging.level, path.join(projectRoot, 'tasks-management', 'logs'));
2663
+ logger.debug(`Project root: ${projectRoot}`);
2664
+ const stateDir = path.join(projectRoot, 'tasks-management', 'state');
2665
+ const stateManager = new StateManager(stateDir, logger);
2666
+ // M-6 fix: clean orphaned .tmp files from crashed writes on startup
2667
+ stateManager.cleanupTempFiles(stateDir);
2668
+ const gitAdapter = new GitAdapter(projectRoot, config, logger);
2669
+ const ptyAdapter = new PtyAdapter(logger);
2670
+ const stdioAdapter = new StdioAdapter(logger);
2671
+ return {
2672
+ orchestrator: new Orchestrator(config, projectRoot, stateManager, gitAdapter, ptyAdapter, stdioAdapter, logger),
2673
+ config,
2674
+ projectRoot,
2675
+ };
2676
+ }
2677
+ ```
2678
+
2679
+ 2. Create `src/commands/init.ts`:
2680
+
2681
+ ```typescript
2682
+ import * as fs from 'fs';
2683
+ import * as path from 'path';
2684
+ import type { Command } from 'commander';
2685
+ import { createOrchestrator } from '../core/factory.js';
2686
+ import { GitAdapter } from '../adapters/git.js';
2687
+ import { createLogger } from '../core/logger.js';
2688
+ import { NomosError } from '../core/errors.js';
2689
+
2690
+ export function registerInitCommand(program: Command): void {
2691
+ program
2692
+ .command('init [task]')
2693
+ .description('Initialize a new project (no task) or a new task (with task name)')
2694
+ .option('--force', 'Force re-initialize: prune zombie worktrees, delete task branch, and reset state')
2695
+ .action(async (task?: string, options?: { force?: boolean }) => {
2696
+ try {
2697
+ if (!task) {
2698
+ // Project initialization — no config exists yet
2699
+ const { orchestrator } = await createOrchestrator({ skipConfig: true });
2700
+ await orchestrator.initProject();
2701
+ console.log(
2702
+ "Project initialized. Next steps:\n" +
2703
+ " 1. Edit tasks-management/rules/global.md with your engineering standards.\n" +
2704
+ " 2. Run: arc init <task-name> to create your first task."
2705
+ );
2706
+ } else if (options?.force) {
2707
+ // Force Recovery — zombie worktree cleanup + state reset
2708
+ // GAP-3 fix: handles SIGKILL mid-init, crash mid-worktree-creation,
2709
+ // or any state where git metadata and filesystem are out of sync.
2710
+ //
2711
+ // RT2-6.1 fix: The previous 5-step sequence had a fatal ordering bug.
2712
+ // Step 1 was `git worktree prune`, which only removes metadata for worktrees
2713
+ // whose directories NO LONGER EXIST. In the most common crash scenario
2714
+ // (SIGKILL mid-creation), the directory STILL EXISTS, so prune is a no-op.
2715
+ // Then branch deletion fails because git says the branch is "checked out"
2716
+ // in the still-registered worktree.
2717
+ //
2718
+ // Correct sequence: unlock → remove directory → prune metadata → delete branch.
2719
+ const { orchestrator, config, projectRoot } = await createOrchestrator();
2720
+ // RT2-6.1 fix: Use GitAdapter (already imported via factory), not raw simpleGit.
2721
+ // The previous code called `simpleGit(projectRoot)` but simpleGit was never
2722
+ // imported in init.ts — a compile-time error hidden by the --help verification.
2723
+ const gitAdapter = new GitAdapter(projectRoot, config,
2724
+ createLogger(config.logging.level));
2725
+
2726
+ const branchName = `${config.execution.shadow_branch_prefix}${task}`;
2727
+ const worktreePath = path.join(
2728
+ config.execution.worktree_base,
2729
+ path.basename(projectRoot),
2730
+ task,
2731
+ );
2732
+
2733
+ // Step 1: Unlock the worktree if it was locked (prevents removal)
2734
+ try {
2735
+ await gitAdapter.raw(['worktree', 'unlock', worktreePath]);
2736
+ } catch {
2737
+ // Worktree may not exist or not be locked — not an error
2738
+ }
2739
+
2740
+ // Step 2: Remove the worktree directory from filesystem
2741
+ // This MUST happen before prune — prune only cleans metadata for
2742
+ // directories that no longer exist on disk.
2743
+ if (fs.existsSync(worktreePath)) {
2744
+ fs.rmSync(worktreePath, { recursive: true, force: true });
2745
+ }
2746
+
2747
+ // Step 3: Prune git worktree metadata (now that directory is gone)
2748
+ await gitAdapter.raw(['worktree', 'prune']);
2749
+
2750
+ // Step 4: Force-delete the task's shadow branch
2751
+ // This now succeeds because the worktree referencing it was pruned.
2752
+ try {
2753
+ await gitAdapter.raw(['branch', '-D', branchName]);
2754
+ } catch {
2755
+ // Branch may not exist — not an error
2756
+ }
2757
+
2758
+ // Step 5: Remove stale state file
2759
+ const statePath = path.join(projectRoot, 'tasks-management', 'state', `${task}.json`);
2760
+ if (fs.existsSync(statePath)) {
2761
+ fs.rmSync(statePath);
2762
+ }
2763
+
2764
+ // Step 6: Re-initialize the task cleanly
2765
+ await orchestrator.initTask(task);
2766
+ console.log(
2767
+ `[nomos:force] Zombie worktree and stale state cleared for '${task}'.\n` +
2768
+ `Task re-initialized.\n` +
2769
+ ` Edit: tasks-management/tasks/${task}.md\n` +
2770
+ ` Then: arc plan ${task}`
2771
+ );
2772
+ } else {
2773
+ // Task initialization — config MUST exist.
2774
+ // RTV-8: If config is missing, loadConfig() throws NomosError('config_not_found').
2775
+ // The error handler below catches this and provides a targeted recovery message.
2776
+ const { orchestrator } = await createOrchestrator();
2777
+ await orchestrator.initTask(task);
2778
+ console.log(
2779
+ `Task '${task}' initialized.\n` +
2780
+ ` Edit: tasks-management/tasks/${task}.md\n` +
2781
+ ` Then: arc plan ${task}`
2782
+ );
2783
+ }
2784
+ } catch (err) {
2785
+ if (err instanceof NomosError) {
2786
+ if (err.code === 'config_not_found') {
2787
+ // RTV-8 fix: specific message when arc init <task> is run without a project
2788
+ console.error(
2789
+ `[nomos:error] No project found. To create a task, you must first initialize a project.\n` +
2790
+ ` Run: arc init (to scaffold a new project in the current directory)\n` +
2791
+ ` Then: arc init ${task}`
2792
+ );
2793
+ } else {
2794
+ console.error(`[nomos:error] ${err.message}`);
2795
+ }
2796
+ process.exit(1);
2797
+ }
2798
+ console.error(`[nomos:error] Unexpected error: ${err}`);
2799
+ process.exit(1);
2800
+ }
2801
+ });
2802
+ }
2803
+ ```
2804
+
2805
+ 3. Update `src/cli.ts` to import and register the init command.
2806
+
2807
+ **Dependencies:** Task 5.1
2808
+ **Definition of Done:** Factory uses `NomosConfigSchema.parse({})` (not undefined `getDefaultConfig()`). `arc init <task>` without a project produces a helpful multi-line error. `arc init <task> --force` clears zombie worktrees and resets state in **6 deterministic steps** (unlock → rm directory → prune → branch -D → rm state → re-init). **RT2-6.1 fix:** `simpleGit` is NOT imported directly — uses `GitAdapter` from factory. The `--force` sequence survives the exact crash scenario it's designed for (SIGKILL mid-creation where directory exists but git metadata is incomplete). Factory calls `cleanupTempFiles` on startup. `projectRoot` option supported for test isolation.
2809
+ **Verification Command:** `npx tsx src/cli.ts init --help`
2810
+
2811
+ ---
2812
+
2813
+ ### [x] Task 6.2 — `arc plan` Command *(Completed: 2026-04-03)*
2814
+
2815
+ **Component:** `src/commands/plan.ts`, update `src/cli.ts`
2816
+
2817
+ **Technical Instruction:**
2818
+
2819
+ 1. Create `src/commands/plan.ts`:
2820
+ - Register `program.command('plan <task>')`.
2821
+ - Add `--mode <mode>` option.
2822
+ - Create orchestrator via factory. Resolve mode: flag > `config.execution.default_mode`.
2823
+ - Call `orchestrator.plan(task, mode)`.
2824
+ - Print: `"Plan v${version} saved to tasks-management/plans/${task}-v${version}.diff"`.
2825
+ - Exit code 1 on error.
2826
+
2827
+ 2. Update `src/cli.ts` to register the plan command.
2828
+
2829
+ **Dependencies:** Task 6.1
2830
+ **Definition of Done:** `arc plan <task>` spawns the planner in the correct mode and persists results.
2831
+ **Verification Command:** `npx tsx src/cli.ts plan --help`
2832
+
2833
+ ---
2834
+
2835
+ ### [x] Task 6.3 — `arc review` Command *(Completed: 2026-04-03)*
2836
+
2837
+ **Component:** `src/commands/review.ts`, update `src/cli.ts`
2838
+
2839
+ **Technical Instruction:**
2840
+
2841
+ 1. Create `src/commands/review.ts`:
2842
+ - Register `program.command('review <task>')`.
2843
+ - Add `--mode` option.
2844
+ - Call `orchestrator.review(task, mode)`.
2845
+ - Read final state and print review summary:
2846
+ ```
2847
+ Review complete (v${version}):
2848
+ Score: ${score}
2849
+ Status: ${status}
2850
+ Issues: ${issues.length} (${highCount} high, ${medCount} medium, ${lowCount} low)
2851
+ Summary: ${summary}
2852
+ ```
2853
+ - *(M-7 fix)* Exit codes from state inspection, NOT exception catching:
2854
+ ```typescript
2855
+ const state = await orchestrator.review(task, mode);
2856
+ const lastReview = state.history[state.history.length - 1]?.review;
2857
+ // ... print summary ...
2858
+ if (state.meta.status === 'approved') {
2859
+ process.exit(0);
2860
+ } else if (state.meta.status === 'refinement') {
2861
+ process.exit(2); // expected non-success — not an error
2862
+ } else {
2863
+ process.exit(1); // failed, or other unexpected state
2864
+ }
2865
+ ```
2866
+
2867
+ 2. Update `src/cli.ts`.
2868
+
2869
+ **Dependencies:** Task 6.1
2870
+ **Definition of Done:** Exit code logic in CLI layer. `review()` returning `refinement` status exits 2, not throwing.
2871
+ **Verification Command:** `npx tsx src/cli.ts review --help`
2872
+
2873
+ ---
2874
+
2875
+ ### [x] Task 6.4 — `arc run` Command *(Completed: 2026-04-03)*
2876
+
2877
+ **Component:** `src/commands/run.ts`, update `src/cli.ts`
2878
+
2879
+ **Technical Instruction:**
2880
+
2881
+ 1. Create `src/commands/run.ts`:
2882
+ - Register `program.command('run <task>')`.
2883
+ - Add `--mode` and `--iterations <n>` (parseInt) options.
2884
+ - Call `orchestrator.run(task, mode, iterations)`.
2885
+ - Read final state and print summary.
2886
+ - **Exit code determination (RTV-6 fix — replaces the removed `NomosError('convergence_failed')`):**
2887
+ ```typescript
2888
+ const finalState = await orchestrator.run(task, mode, iterations);
2889
+ // ... print summary ...
2890
+
2891
+ if (finalState.meta.status === 'approved') {
2892
+ if (finalState.meta.approval_reason === 'max_iterations_reached') {
2893
+ // Converged by iteration limit, not by score threshold — signal as non-convergence
2894
+ console.error(
2895
+ `[nomos:warn] Max iterations reached without meeting score threshold. ` +
2896
+ `Final score: ${finalState.history[finalState.history.length - 1]?.review?.score ?? 'N/A'}. ` +
2897
+ `Consider increasing max_iterations or adjusting score_threshold in config.`
2898
+ );
2899
+ process.exit(2); // expected non-convergence
2900
+ }
2901
+ process.exit(0); // genuine convergence
2902
+ } else {
2903
+ process.exit(1); // error (failed state, budget exceeded, etc.)
2904
+ }
2905
+ ```
2906
+ - **Note:** `NomosError('convergence_failed')` is NOT thrown anywhere. Exit code 2 is set by inspecting `meta.approval_reason`. This is the M-7 pattern applied to `run`.
2907
+
2908
+ 2. Update `src/cli.ts`.
2909
+
2910
+ **Dependencies:** Task 6.2, Task 6.3
2911
+ **Definition of Done:** Exit code 0 = genuine convergence. Exit code 2 = max iterations forced approval. Exit code 1 = actual error. No `NomosError('convergence_failed')` in the codebase.
2912
+ **Verification Command:** `npx tsx src/cli.ts run --help`
2913
+
2914
+ ---
2915
+
2916
+ ### [x] Task 6.5 — `arc status`, `arc apply`, `arc discard`, `arc list` Commands *(Completed: 2026-04-03)*
2917
+
2918
+ **Component:** `src/commands/status.ts`, `apply.ts`, `discard.ts`, `list.ts`, update `src/cli.ts`
2919
+
2920
+ **Technical Instruction:**
2921
+
2922
+ 1. **`arc status <task>`** (`src/commands/status.ts`):
2923
+ - Call `orchestrator.status(task)`.
2924
+ - Print formatted summary. **RTV-4 fix — `hasMeteredTokens` derivation:**
2925
+ ```typescript
2926
+ const state = await orchestrator.status(task);
2927
+ // Derive hasMeteredTokens from history entries
2928
+ const hasMeteredTokens = state.history.some(h => h.tokens_source === 'metered');
2929
+ const lastReview = state.history
2930
+ .slice()
2931
+ .reverse()
2932
+ .find(h => h.review !== null)?.review;
2933
+
2934
+ console.log(`
2935
+ Task: ${state.task_id}
2936
+ Status: ${state.meta.status}
2937
+ Version: ${state.current_version}
2938
+ Last Score: ${lastReview?.score?.toFixed(2) ?? 'N/A'}
2939
+ Shadow Branch: ${state.shadow_branch.branch}
2940
+ Worktree: ${state.shadow_branch.worktree}
2941
+ Tokens Used: ${state.budget.tokens_used} (${hasMeteredTokens ? 'metered' : '~estimated'})
2942
+ Estimated Cost: $${state.budget.estimated_cost_usd.toFixed(4)}
2943
+ `.trim());
2944
+ ```
2945
+ - **RTV-7 fix — Recovery hints per status:**
2946
+ ```typescript
2947
+ const recoveryHints: Partial<Record<typeof state.meta.status, string>> = {
2948
+ init: `Run: arc plan ${task}`,
2949
+ planning: `(in progress — if stuck, run: arc discard ${task} && arc init ${task})`,
2950
+ pending_review:`Run: arc review ${task}`,
2951
+ reviewing: `(in progress — if stuck, run: arc discard ${task} && arc init ${task})`,
2952
+ refinement: `Run: arc plan ${task} (to address review feedback)`,
2953
+ approved: `Run: arc apply ${task} (to merge) or: arc discard ${task}`,
2954
+ merge_conflict:`Resolve conflicts manually, then: arc apply ${task}`,
2955
+ stalled: `Run: arc plan ${task} (to retry from stalled state)`,
2956
+ failed: `Run: arc plan ${task} (to retry from failed state)`,
2957
+ merged: `(terminal — task complete)`,
2958
+ discarded: `(terminal — task discarded)`,
2959
+ };
2960
+ const hint = recoveryHints[state.meta.status];
2961
+ if (hint) console.log(`\nNext step: ${hint}`);
2962
+ ```
2963
+ - Always exit code 0.
2964
+
2965
+ 2. **`arc apply <task>`** (`src/commands/apply.ts`):
2966
+ - Call `orchestrator.apply(task)`.
2967
+ - On success: `"Task '${task}' merged to main. Shadow branch cleaned up."`, exit 0.
2968
+ - On merge conflict (returned from apply without throw): print conflict list, exit 3 *(corrected from 1 — merge conflict is recoverable, not an error)*.
2969
+ - On `NomosError('invalid_transition')`: print current status + next steps, exit 1.
2970
+ - **Exit code 3** = merge conflict requiring manual resolution. Add to the project exit code matrix (see Task 6.6).
2971
+
2972
+ 3. **`arc discard <task>`** (`src/commands/discard.ts`):
2973
+ - Call `orchestrator.discard(task)`.
2974
+ - Print `"Task '${task}' discarded."`, exit 0.
2975
+
2976
+ 4. **`arc list`** (`src/commands/list.ts`) — E6 enhancement:
2977
+ - Call `stateManager.listTasks()` (access via the returned `orchestrator` internals or add a public `listTasks()` method to Orchestrator).
2978
+ - Print table:
2979
+ ```
2980
+ Task Status Version Last Score Tokens
2981
+ ─────────────── ────────────── ─────── ────────── ──────
2982
+ auth-refactor approved 3 0.92 12500
2983
+ fix-login-bug planning 1 N/A 3200
2984
+ ```
2985
+ - No tasks: `"No tasks found. Run: arc init <task-name> to create one."`, exit 0.
2986
+ - Always exit code 0.
2987
+
2988
+ 5. Update `src/cli.ts` to register all four commands.
2989
+
2990
+ **Dependencies:** Task 5.1
2991
+ **Definition of Done:** `arc status` derives `hasMeteredTokens` from `history[].tokens_source`. Status output includes recovery hint for every non-terminal status. `arc apply` uses exit code 3 for merge conflict. `arc list` works.
2992
+ **Verification Command:** `npx tsx src/cli.ts status --help && npx tsx src/cli.ts apply --help && npx tsx src/cli.ts discard --help && npx tsx src/cli.ts list --help`
2993
+
2994
+ ---
2995
+
2996
+ ### [x] Task 6.6 — Exit Code Enforcement & Build Configuration *(Completed: 2026-04-03)*
2997
+
2998
+ **Component:** `src/cli.ts`, `package.json`
2999
+ **Objective:** Enforce deterministic exit codes and configure the esbuild production build.
3000
+
3001
+ **Exit Code Matrix (complete reference):**
3002
+ | Code | Meaning | Set by |
3003
+ |------|---------|--------|
3004
+ | 0 | Success | commands on normal completion |
3005
+ | 1 | Error | any `NomosError` or unexpected exception |
3006
+ | 2 | Expected non-success | `arc review` (refinement), `arc run` (max iterations) |
3007
+ | 3 | Merge conflict | `arc apply` (conflict requires manual resolution) |
3008
+ | 130 | SIGINT (Ctrl+C) | SIGINT handler |
3009
+
3010
+ **Technical Instruction:**
3011
+
3012
+ 1. Update `src/cli.ts` with the complete final version:
3013
+
3014
+ ```typescript
3015
+ import { Command } from 'commander';
3016
+ import { createRequire } from 'module';
3017
+ import { NomosError } from './core/errors.js';
3018
+ import { registerInitCommand } from './commands/init.js';
3019
+ import { registerPlanCommand } from './commands/plan.js';
3020
+ import { registerReviewCommand } from './commands/review.js';
3021
+ import { registerRunCommand } from './commands/run.js';
3022
+ import { registerStatusCommand } from './commands/status.js';
3023
+ import { registerApplyCommand } from './commands/apply.js';
3024
+ import { registerDiscardCommand } from './commands/discard.js';
3025
+ import { registerListCommand } from './commands/list.js';
3026
+
3027
+ const require = createRequire(import.meta.url);
3028
+ const pkg = require('../package.json');
3029
+
3030
+ const program = new Command();
3031
+ program.name('arc').description('The Architect — AI Orchestrator CLI').version(pkg.version);
3032
+
3033
+ // RTV-5 fix: SIGINT handler uses process.exitCode (not process.exit) to avoid racing
3034
+ // with the orchestrator's state-transition catch block.
3035
+ //
3036
+ // Flow on Ctrl+C:
3037
+ // 1. SIGINT fires → stdin restored, process.exitCode = 130
3038
+ // 2. Child process receives SIGINT → PTY onExit fires → ptyAdapter.execute() resolves
3039
+ // 3. Orchestrator's catch block runs: await stateManager.transition(taskId, 'stalled', ...)
3040
+ // 4. State written successfully (no race — process.exit() was NOT called yet)
3041
+ // 5. catch block re-throws error → main() catches it
3042
+ // 6. main() sees process.exitCode === 130 → calls process.exit(130)
3043
+ //
3044
+ // This is a non-destructive exit: state is persisted, task is recoverable via arc plan.
3045
+ process.on('SIGINT', () => {
3046
+ process.exitCode = 130; // mark the intended exit code
3047
+ try {
3048
+ if (process.stdin.isTTY) process.stdin.setRawMode(false);
3049
+ } catch {}
3050
+ process.stdin.pause();
3051
+ // Do NOT call process.exit() here. Let the orchestrator's catch block complete first.
3052
+ });
3053
+
3054
+ [
3055
+ registerInitCommand,
3056
+ registerPlanCommand,
3057
+ registerReviewCommand,
3058
+ registerRunCommand,
3059
+ registerStatusCommand,
3060
+ registerApplyCommand,
3061
+ registerDiscardCommand,
3062
+ registerListCommand,
3063
+ ].forEach(fn => fn(program));
3064
+
3065
+ async function main() {
3066
+ try {
3067
+ await program.parseAsync();
3068
+ } catch (err) {
3069
+ // RTV-5: If SIGINT was received, honour the 130 exit code
3070
+ if (process.exitCode === 130) {
3071
+ console.error('\n[nomos:warn] Interrupted. Task state preserved (stalled).');
3072
+ process.exit(130);
3073
+ }
3074
+ if (err instanceof NomosError) {
3075
+ console.error(`[nomos:error] ${err.message}`);
3076
+ process.exit(1);
3077
+ }
3078
+ console.error(`[nomos:error] Unexpected error: ${err}`);
3079
+ process.exit(1);
3080
+ }
3081
+ }
3082
+
3083
+ main();
3084
+ ```
3085
+
3086
+ 2. Verify the `build` script in `package.json` has all necessary externals:
3087
+ ```json
3088
+ "build": "esbuild src/cli.ts --bundle --platform=node --target=node20 --outfile=dist/cli.js --format=esm --external:node-pty --external:simple-git --external:proper-lockfile --external:winston --external:gray-matter --banner:js='#!/usr/bin/env node'"
3089
+ ```
3090
+ **Why these externals:**
3091
+ - `node-pty`: native C++ addon, cannot be bundled.
3092
+ - `simple-git`: spawns `git` binary, has dynamic requires.
3093
+ - `proper-lockfile`: uses native fs operations.
3094
+ - `winston`: transport plugins loaded dynamically.
3095
+ - `gray-matter`: has optional dependencies.
3096
+
3097
+ 3. Test the build: `npm run build && node dist/cli.js --version`.
3098
+
3099
+ **Dependencies:** Tasks 6.1–6.5
3100
+ **Definition of Done:** SIGINT handler sets `process.exitCode = 130` (not `process.exit(130)`). `main()` checks `process.exitCode === 130` before other error handling. Exit codes are deterministic per the matrix. `npm run build` produces a working binary.
3101
+ **Verification Command:** `npm run build && node dist/cli.js --version`
3102
+
3103
+ ---
3104
+
3105
+ ## Milestone 7: End-to-End Testing
3106
+
3107
+ ### [x] Task 7.1 — Mock Binaries *(Completed: 2026-04-03)*
3108
+
3109
+ **Component:** `test/fixtures/mock-planner.ts`, `test/fixtures/mock-reviewer.ts`, etc.
3110
+ **Objective:** Create mock binaries that simulate planner and reviewer behavior for E2E tests without requiring real AI API keys.
3111
+
3112
+ **Technical Instruction:**
3113
+
3114
+ 1. Create `test/fixtures/mock-planner.ts`:
3115
+ ```typescript
3116
+ #!/usr/bin/env node
3117
+ // Simulates: claude -p "<prompt>" running inside a worktree
3118
+ // Reads prompt from -p arg, creates a file, commits it, prints token usage.
3119
+ import simpleGit from 'simple-git';
3120
+ import * as fs from 'fs';
3121
+ import * as path from 'path';
3122
+
3123
+ const pIdx = process.argv.indexOf('-p');
3124
+ const prompt = pIdx !== -1 ? process.argv[pIdx + 1] : '';
3125
+
3126
+ // Write ANSI-colored output (to test ANSI stripping in PtyAdapter)
3127
+ process.stdout.write('\x1b[32m✓\x1b[0m Generating plan...\n');
3128
+ process.stdout.write(`Received prompt (${prompt.length} chars)\n`);
3129
+
3130
+ // Create a file in the current working directory (the worktree)
3131
+ const outputFile = path.join(process.cwd(), 'src', 'mock-output.ts');
3132
+ fs.mkdirSync(path.dirname(outputFile), { recursive: true });
3133
+ fs.writeFileSync(outputFile, `// mock implementation\nexport const result = 'plan';\n`);
3134
+
3135
+ // L-5 fix: commit using simple-git, not shell git commands
3136
+ const git = simpleGit(process.cwd());
3137
+ await git.add('.');
3138
+ await git.commit('mock planner: add implementation');
3139
+
3140
+ process.stdout.write('Tokens used: 5000\n');
3141
+ process.exit(0);
3142
+ ```
3143
+
3144
+ 2. Create `test/fixtures/mock-reviewer.ts`:
3145
+ ```typescript
3146
+ #!/usr/bin/env node
3147
+ // Simulates: codex -q --full-auto receiving review prompt via stdin
3148
+ let input = '';
3149
+ process.stdin.on('data', (chunk: Buffer) => { input += chunk.toString(); });
3150
+ process.stdin.on('end', () => {
3151
+ const review = {
3152
+ score: 0.92,
3153
+ summary: "Plan is well-structured with clear implementation steps and good error handling.",
3154
+ issues: [{
3155
+ severity: "low",
3156
+ category: "maintainability",
3157
+ description: "Consider adding more inline documentation.",
3158
+ suggestion: "Add JSDoc comments to exported functions."
3159
+ }]
3160
+ };
3161
+ process.stdout.write(JSON.stringify(review));
3162
+ process.exit(0);
3163
+ });
3164
+ ```
3165
+
3166
+ 3. Create `test/fixtures/mock-planner-hang.ts`:
3167
+ ```typescript
3168
+ #!/usr/bin/env node
3169
+ // Writes initial output then hangs — used to test heartbeat/total timeout
3170
+ process.stdout.write('Starting plan...\n');
3171
+ // Never exits — timeout test relies on this hanging indefinitely
3172
+ setInterval(() => {}, 60000);
3173
+ ```
3174
+
3175
+ 4. Create `test/fixtures/mock-reviewer-bad.ts`:
3176
+ ```typescript
3177
+ #!/usr/bin/env node
3178
+ // Always returns invalid JSON — used to test review retry and failure handling
3179
+ let input = '';
3180
+ process.stdin.on('data', (chunk: Buffer) => { input += chunk.toString(); });
3181
+ process.stdin.on('end', () => {
3182
+ process.stdout.write('I cannot review this because reasons');
3183
+ process.exit(0);
3184
+ });
3185
+ ```
3186
+
3187
+ 5. Create `test/fixtures/mock-reviewer-retry.ts`:
3188
+ ```typescript
3189
+ #!/usr/bin/env node
3190
+ // First call: returns invalid JSON. Second call (retry): returns valid JSON.
3191
+ // Uses a temp file as an invocation counter — since each retry is a NEW subprocess,
3192
+ // in-process state cannot be used. The temp file path is derived from the process CWD.
3193
+ import * as fs from 'fs';
3194
+ import * as path from 'path';
3195
+ import * as os from 'os';
3196
+
3197
+ const counterFile = path.join(os.tmpdir(), 'nomos-mock-reviewer-retry-count.txt');
3198
+
3199
+ let input = '';
3200
+ process.stdin.on('data', (chunk: Buffer) => { input += chunk.toString(); });
3201
+ process.stdin.on('end', () => {
3202
+ // Read and increment call counter
3203
+ let count = 0;
3204
+ try { count = parseInt(fs.readFileSync(counterFile, 'utf8'), 10); } catch {}
3205
+ count++;
3206
+ fs.writeFileSync(counterFile, String(count));
3207
+
3208
+ if (count === 1) {
3209
+ // First invocation: return bad JSON
3210
+ process.stdout.write('I cannot review this because reasons');
3211
+ } else {
3212
+ // Second invocation (retry): clean up counter and return valid JSON
3213
+ try { fs.unlinkSync(counterFile); } catch {}
3214
+ const review = {
3215
+ score: 0.85,
3216
+ summary: "Plan looks good after retry. Implementation steps are clear.",
3217
+ issues: []
3218
+ };
3219
+ process.stdout.write(JSON.stringify(review));
3220
+ }
3221
+ process.exit(0);
3222
+ });
3223
+ ```
3224
+ **Note for test setup:** Tests using `mock-reviewer-retry.ts` must delete the counter file in `beforeEach`/`afterEach` to prevent state from leaking between test runs: `try { fs.unlinkSync(counterFile); } catch {}`.
3225
+
3226
+ **Dependencies:** None.
3227
+ **Definition of Done:** All mock binaries are syntactically valid TypeScript. Mock planner reads `-p` arg, creates a file, commits via `simple-git`. Mock reviewer reads stdin, returns valid JSON. Retry mock uses temp file counter that tests clean up.
3228
+ **Verification Command:** `echo "test prompt" | npx tsx test/fixtures/mock-reviewer.ts`
3229
+
3230
+ ---
3231
+
3232
+ ### [x] Task 7.2 — E2E Test: Full Lifecycle *(Completed: 2026-04-03)*
3233
+
3234
+ **Component:** `test/e2e/lifecycle.test.ts`
3235
+ **Objective:** Test the complete `init → plan → review → apply` cycle with mock binaries. All tests use `mode: 'auto'` (BLK-1 fix — supervised requires TTY unavailable in vitest/CI).
3236
+
3237
+ **Technical Instruction:**
3238
+
3239
+ 1. Create `test/e2e/lifecycle.test.ts`:
3240
+
3241
+ **Setup:**
3242
+ ```typescript
3243
+ import * as fs from 'fs';
3244
+ import * as path from 'path';
3245
+ import * as os from 'os';
3246
+ import { describe, it, beforeEach, afterEach, expect, vi } from 'vitest';
3247
+ import simpleGit from 'simple-git';
3248
+ import { createOrchestrator } from '../../src/core/factory.js';
3249
+ import { NomosError } from '../../src/core/errors.js';
3250
+ import type { Orchestrator } from '../../src/core/orchestrator.js';
3251
+
3252
+ // E5 fix: use path.resolve(__dirname, ...) to get fixture paths.
3253
+ // fixturesDir is in the REPO, not in tempDir. These are separate locations.
3254
+ const fixturesDir = path.resolve(new URL(import.meta.url).pathname, '../../fixtures');
3255
+
3256
+ vi.setConfig({ testTimeout: 60000 });
3257
+
3258
+ let tempDir: string;
3259
+ let orchestrator: Orchestrator;
3260
+
3261
+ beforeEach(async () => {
3262
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'nomos-e2e-'));
3263
+ // E5 fix: NEVER use process.chdir() — it affects global state and breaks parallel tests.
3264
+ // Pass tempDir as projectRoot to createOrchestrator.
3265
+ const git = simpleGit(tempDir);
3266
+ await git.init();
3267
+ await git.addConfig('user.email', 'test@test.com');
3268
+ await git.addConfig('user.name', 'Test');
3269
+ fs.writeFileSync(path.join(tempDir, 'README.md'), '# Test Project');
3270
+ fs.mkdirSync(path.join(tempDir, 'src'));
3271
+ await git.add('.');
3272
+ await git.commit('initial commit');
3273
+ });
3274
+
3275
+ afterEach(() => {
3276
+ fs.rmSync(tempDir, { recursive: true, force: true });
3277
+ });
3278
+ ```
3279
+
3280
+ **Test 1: Full happy path:**
3281
+ ```typescript
3282
+ it('runs the full init → plan → review → apply lifecycle', async () => {
3283
+ // 1. Initialize project
3284
+ const { orchestrator: initOrch } = await createOrchestrator({
3285
+ skipConfig: true, projectRoot: tempDir
3286
+ });
3287
+ await initOrch.initProject();
3288
+ expect(fs.existsSync(path.join(tempDir, 'tasks-management/rules/global.md'))).toBe(true);
3289
+ expect(fs.existsSync(path.join(tempDir, '.nomos-config.json'))).toBe(true);
3290
+
3291
+ // 2. Modify config to use mock binaries
3292
+ const config = {
3293
+ binaries: {
3294
+ planner: {
3295
+ cmd: 'npx',
3296
+ args: ['tsx', path.join(fixturesDir, 'mock-planner.ts')],
3297
+ pty: true,
3298
+ total_timeout_ms: 30000,
3299
+ heartbeat_timeout_ms: 20000,
3300
+ max_output_bytes: 1048576,
3301
+ usage_pattern: 'Tokens used:\\s*(\\d+)',
3302
+ },
3303
+ reviewer: {
3304
+ cmd: 'npx',
3305
+ args: ['tsx', path.join(fixturesDir, 'mock-reviewer.ts')],
3306
+ pty: false,
3307
+ total_timeout_ms: 30000,
3308
+ heartbeat_timeout_ms: 20000,
3309
+ max_output_bytes: 524288,
3310
+ usage_pattern: null,
3311
+ },
3312
+ },
3313
+ };
3314
+ fs.writeFileSync(
3315
+ path.join(tempDir, '.nomos-config.json'),
3316
+ JSON.stringify(config, null, 2)
3317
+ );
3318
+
3319
+ // 3. Reload orchestrator with real config
3320
+ const { orchestrator } = await createOrchestrator({ projectRoot: tempDir });
3321
+
3322
+ // 4. Init task
3323
+ await orchestrator.initTask('my-task');
3324
+ expect(fs.existsSync(path.join(tempDir, 'tasks-management/tasks/my-task.md'))).toBe(true);
3325
+ expect(fs.existsSync(path.join(tempDir, 'tasks-management/state/my-task.json'))).toBe(true);
3326
+
3327
+ // 5. Plan
3328
+ await orchestrator.plan('my-task', 'auto');
3329
+ const stateAfterPlan = await orchestrator.status('my-task');
3330
+ expect(stateAfterPlan.meta.status).toBe('pending_review');
3331
+ expect(stateAfterPlan.history).toHaveLength(1);
3332
+ expect(stateAfterPlan.history[0].tokens_source).toBe('metered'); // RTV-4
3333
+
3334
+ // 6. Review
3335
+ await orchestrator.review('my-task', 'auto');
3336
+ const stateAfterReview = await orchestrator.status('my-task');
3337
+ expect(stateAfterReview.meta.status).toBe('approved');
3338
+ expect(stateAfterReview.meta.approval_reason).toBe('score_threshold'); // RTV-6
3339
+ expect(stateAfterReview.history[stateAfterReview.history.length - 1]?.review?.score).toBe(0.92);
3340
+
3341
+ // 7. Apply
3342
+ await orchestrator.apply('my-task');
3343
+ const finalState = await orchestrator.status('my-task');
3344
+ expect(finalState.meta.status).toBe('merged');
3345
+ });
3346
+ ```
3347
+
3348
+ **Test 2: Subprocess timeout:**
3349
+ - Config: `mock-planner-hang.ts` with `heartbeat_timeout_ms: 2000`.
3350
+ - Call `orchestrator.plan(...)`.
3351
+ - Verify state status is `stalled` or `failed`.
3352
+
3353
+ **Test 3: Budget guard:**
3354
+ - Set `max_tokens_per_task: 1000` in config.
3355
+ - Mock planner reports `Tokens used: 5000`.
3356
+ - First plan succeeds (records 5000 tokens).
3357
+ - Second plan call throws/transitions to `failed` with `reason: 'budget_exceeded'`.
3358
+
3359
+ **Test 4: Discard lifecycle:**
3360
+ - `initTask('task2')` → `discard('task2')`.
3361
+ - Verify: worktree removed, branch deleted, state is `discarded`.
3362
+
3363
+ **Test 5: Apply guard:**
3364
+ - `initTask('task3')`.
3365
+ - Call `orchestrator.apply('task3')` directly (skip plan/review).
3366
+ - Verify: throws `NomosError('invalid_transition')` with message about status being `init`.
3367
+
3368
+ **Test 6: Supervised mode TTY guard (BLK-1):**
3369
+ - Call `orchestrator.plan('task-tty', 'supervised')` in vitest (no TTY).
3370
+ - Verify: throws `NomosError` with code `no_tty`.
3371
+
3372
+ **Test 7: Review retry on bad JSON:**
3373
+ - Config: `mock-reviewer-bad.ts`.
3374
+ - After plan, call `orchestrator.review(...)`.
3375
+ - Verify: state is `failed` with reason `review_failed`.
3376
+ - Verify: `tasks-management/logs/{task}-v{n}-review-raw.log` exists.
3377
+
3378
+ **Test 8: Review retry succeeds on second attempt:**
3379
+ - Config: `mock-reviewer-retry.ts`.
3380
+ - Delete counter file in `beforeEach` cleanup: `try { fs.unlinkSync(counterFile); } catch {}`.
3381
+ - After plan, call `orchestrator.review(...)`.
3382
+ - Verify: state is `approved` or `refinement` (valid JSON was returned on retry).
3383
+
3384
+ **Test 9: `approval_reason` is `max_iterations_reached` when iterations exhausted (RTV-6):**
3385
+ - Set `convergence.max_iterations: 1` and `convergence.score_threshold: 0.99` in config.
3386
+ - Mock reviewer returns `score: 0.85` (below threshold).
3387
+ - Run one plan + one review iteration.
3388
+ - Verify: state is `approved` AND `meta.approval_reason === 'max_iterations_reached'`.
3389
+
3390
+ 2. Configure vitest timeout per file: `vi.setConfig({ testTimeout: 60000 })` at the top.
3391
+
3392
+ **Dependencies:** Task 7.1, Tasks 6.1–6.6
3393
+ **Definition of Done:** All 9 E2E tests pass. Full lifecycle verified. `tokens_source: 'metered'` verified when usage pattern matches. `approval_reason` set correctly. Review retry works with temp-file-based mock. Supervised TTY guard verified. Tests isolated via temp dirs — no `process.chdir()`.
3394
+ **Verification Command:** `npx vitest run test/e2e/lifecycle.test.ts --timeout 60000`
3395
+
3396
+ ---
3397
+
3398
+ ## Task Summary
3399
+
3400
+ | Milestone | Tasks | Description |
3401
+ |-----------|-------|-------------|
3402
+ | **1: Scaffolding** | 1.1–1.7 | Package config, types (with `tokens_source`, `approval_reason`, `input_tokens`/`output_tokens`, `PtySpawnOptions`, `StdioSpawnOptions`, `PromptOptions` all in `src/types/index.ts`), logger, config (full Zod schema with per-binary defaults + `getDefaultConfig()` + split cost rates), state (TaskState Zod schema, transition metadata, schema migration, no manual lock cleanup), sanitizer |
3403
+ | **2: Git** | 2.1–2.2 | Worktree manager (`git.raw()`, recovery, path traversal, merge pre-checks, git identity, **`grep()` with timeout**, **`baseCommit` reachability check**, `raw()` exposed), preflight binary resolver |
3404
+ | **3: Adapters** | 3.1–3.3 | PtyAdapter (pure Tee Stream — passthrough to terminal + log capture, TTY guard, arg safety, **process group killing**, **stdin leak fix**), StdioAdapter (non-PTY, platform-aware kill, backpressure), frontmatter parser |
3405
+ | **4: Prompt & Review** | 4.1–4.3 | Prompt assembler (inline templates, no overview.md references, `contextFiles`), review parser (**mode parameter, no hardcoding**), budget tracker (**split input/output rates, basename normalization**) |
3406
+ | **5: Orchestrator** | 5.1 | **Decomposed**: PlanFileManager (file I/O), WorktreeCoordinator (git lifecycle), Orchestrator (pure state machine). Full relative path import scan, `context_files` secret scanning, `run()` loop counter fix, `runtimeMaxVersion` threading, separate input/output token tracking, all prior fixes integrated |
3407
+ | **6: Commands** | 6.1–6.6 | Factory (`NomosConfigSchema.parse({})`, not ghost function), init command (targeted error for missing project, **6-step --force recovery**), exit codes (3 for merge conflict), SIGINT handler, build |
3408
+ | **7: E2E Testing** | 7.1–7.2 | Mock binaries (temp-file retry counter, no shell git), 9 lifecycle tests (tokens_source, approval_reason, retry success verified) |
3409
+
3410
+ **Total: 21 atomic tasks.**
3411
+
3412
+ ---
3413
+
3414
+ ## Dependency Graph
3415
+
3416
+ ```
3417
+ 1.1 (package + vitest config + .gitignore)
3418
+ └── 1.2 (dirs + cli stub)
3419
+ └── 1.3 (types: ALL interfaces including PtySpawnOptions, StdioSpawnOptions, PromptOptions,
3420
+ │ tokens_source, approval_reason, input_tokens, output_tokens)
3421
+ ├── 1.4 (logger + ansi.ts)
3422
+ ├── 1.5 (config: full Zod schema + getDefaultConfig() + split cost rates)
3423
+ ├── 1.6 (state: TaskState Zod schema + transition metadata + input/output tokens)
3424
+ ├── 1.7 (sanitize + scanFileForSecrets)
3425
+ ├── 2.1 (git: git.raw() worktrees, baseCommit diff + reachability check,
3426
+ │ grep() with timeout, raw() exposed, explicit commit file list)
3427
+ ├── 2.2 (preflight)
3428
+ ├── 3.1 (PtyAdapter: process group killing, stdin leak fix, TTY guard)
3429
+ ├── 3.2 (StdioAdapter: non-PTY for reviewer)
3430
+ ├── 3.3 (frontmatter)
3431
+ ├── 4.1 (prompt: inline templates, contextFiles)
3432
+ ├── 4.2 (review parser: mode parameter, no hardcoding)
3433
+ └── 4.3 (budget: split input/output rates, basename normalization)
3434
+
3435
+ ├── 5.1a (plan-file-manager: extracted file I/O)
3436
+ ├── 5.1b (worktree-coordinator: extracted git lifecycle)
3437
+ └── 5.1 (orchestrator: pure state machine, full relative path import scan,
3438
+ │ context_files secret scan, runtimeMaxVersion threading)
3439
+
3440
+ └── 6.1 (factory + init command: 6-step --force recovery)
3441
+ ├── 6.2 (plan)
3442
+ ├── 6.3 (review)
3443
+ ├── 6.4 (run: approval_reason exit code)
3444
+ ├── 6.5 (status/apply/discard/list: hasMeteredTokens, recovery hints)
3445
+ └── 6.6 (exit codes + SIGINT handler + build)
3446
+
3447
+ ├── 7.1 (mock binaries: temp-file retry counter)
3448
+ └── 7.2 (E2E: 9 tests, approval_reason, tokens_source verified)
3449
+ ```
3450
+
3451
+ ---
3452
+
3453
+ ## Gaps Closed — Original Revision
3454
+
3455
+ | # | Gap | How it was fixed |
3456
+ |---|-----|-----------------|
3457
+ | 1 | No non-PTY adapter for reviewer | Task 3.2 (StdioAdapter) added |
3458
+ | 2 | Prompt delivery contradiction (stdin vs `-p` flag) | Execution Rule #9 |
3459
+ | 3 | Supervised mode one-directional | Task 3.1 rewritten with bidirectional piping |
3460
+ | 4 | ESM `require()` in verification command | Task 1.1 uses `npx tsx -e "import { createRequire }..."` |
3461
+ | 5 | `getDiff` uses `HEAD~1` | Task 2.1 uses `git diff ${baseCommit}..HEAD` |
3462
+ | 6 | `simple-git` has no `.worktree()` | Execution Rule #7, `git.raw(['worktree', ...])` |
3463
+ | 7 | File location ambiguity | Execution Rule #8 |
3464
+ | 8 | State transition lacks metadata | Task 1.3 adds `StateTransitionOptions` |
3465
+ | 9 | Missing `vitest.config.ts` | Task 1.1 creates it |
3466
+ | 10 | `arc run` supervised — no readline | Task 5.1 `promptUser()` helper |
3467
+ | 11 | esbuild externals incomplete | Task 6.6 lists all externals with rationale |
3468
+ | 12 | Missing `.gitignore` | Task 1.1 |
3469
+ | 13 | E2E test invocation method unclear | Task 7.2: import orchestrator directly |
3470
+ | 14 | `stalled`/`merge_conflict` dead-end states | Task 1.6 valid transitions map |
3471
+ | 15 | `crypto` import for SHA-256 | Task 4.1 explicit import |
3472
+
3473
+ ---
3474
+
3475
+ ## Gaps Closed — Red-Team Review (Pass 1)
3476
+
3477
+ | ID | Category | Gap | How it was fixed |
3478
+ |----|----------|-----|-----------------|
3479
+ | C1 | CRITICAL | Shell injection | Execution Rule #10, `shell: false` explicit |
3480
+ | C2 | CRITICAL | `SIGKILL` stale lock | Task 1.6 `stale: 30000` |
3481
+ | C3 | CRITICAL | Env regex false positives | Task 1.7 `sanitizeEnv` name-only matching |
3482
+ | C4 | CRITICAL | TOCTOU race on worktree | Task 5.1 state-first ordering with rollback |
3483
+ | C5 | CRITICAL | `setRawMode(true)` crash on non-TTY | Task 3.1 `no_tty` guard |
3484
+ | W1 | WARNING | Token loop no backoff/budget | Task 5.1 2s backoff + mid-loop budget check |
3485
+ | W2 | WARNING | Missing worktree crashes PTY | Execution Rule #12, `recoverWorktree()` |
3486
+ | W3 | WARNING | Merge into wrong branch | Execution Rule #13, `mergeToMain` branch check |
3487
+ | W4 | WARNING | SIGTERM not supported Windows | Task 3.2 platform-aware kill |
3488
+ | W5 | WARNING | No schema versioning | Execution Rule #11, `schema_version` field, migration map |
3489
+ | W6 | WARNING | Path traversal in commit | Task 2.1 path validation in `commitToShadowBranch` |
3490
+ | W7 | WARNING | Merge with dirty working tree | Task 2.1 `status.isClean()` check |
3491
+ | W8 | WARNING | readline conflicts with PTY stdin | Task 5.1 `promptUser` defensive stdin restoration |
3492
+ | E3 | ENHANCEMENT | Output buffer overflow silent | Task 3.1 warning + `bytesDropped` |
3493
+ | E4 | ENHANCEMENT | Token estimation labeled as exact | Task 6.5 `~estimated` label |
3494
+ | E5 | ENHANCEMENT | E2E tests use `process.chdir()` | Task 7.2 `projectRoot` explicit |
3495
+ | E6 | ENHANCEMENT | No `arc list` | Task 6.5 includes `arc list` |
3496
+ | BLK-1 | P0 | E2E tests fail in CI (TTY) | Task 7.2 uses Mock Binary (writes file + exits 0) — no TTY simulation needed |
3497
+ | BLK-2 | P0 | Auto mode non-functional | Auto mode deferred to Phase 1b (see GAP-5). Phase 1a is supervised-only. |
3498
+ | BLK-3 | P0 | context_files never reach AI | Task 4.1 `[CONTEXT FILES]` section, Task 5.1 passes them |
3499
+ | H-1 | P1 | No SIGINT handler | Task 6.6 SIGINT handler |
3500
+ | H-2 | P1 | 30s heartbeat kills LLMs | Task 1.5 `heartbeat_timeout_ms: 120000` |
3501
+ | H-3 | P1 | Rollback swallows unlink failure | Task 5.1 logs rollback errors |
3502
+ | H-4 | P1 | `locked_by` dead code | Task 1.3 removes it |
3503
+ | H-5 | P1 | State files committed to main | Task 1.1 `tasks-management/state/` gitignored |
3504
+ | H-6 | P1 | Empty diff not guarded | Task 5.1 empty diff → `failed` with `no_changes` |
3505
+ | M-1 | P2 | `merge_conflict` no recovery | Task 5.1 `apply()` accepts `merge_conflict` |
3506
+ | M-2 | P2 | `arc init` no git repo check | Task 5.1 `initProject()` calls `isGitRepo()` |
3507
+ | M-3 | P2 | Git commit fails without identity | Task 2.1 identity check before commit |
3508
+ | M-4 | P2 | Budget check uses stale state | Task 5.1 re-reads state after `review()` |
3509
+ | M-5 | P2 | StdioAdapter no stdin backpressure | Task 3.2 drain event + error handler |
3510
+ | M-6 | P2 | `cleanupTempFiles` never called | Task 6.1 factory calls it on startup |
3511
+ | M-7 | P2 | `arc review` throws for refinement | Task 5.1 returns state; CLI checks status |
3512
+ | L-1 | P3 | No `engines` field | Task 1.1 `"engines": { "node": ">=20.0.0" }` |
3513
+ | L-2 | P3 | Token estimation misleading | Task 4.3 JSDoc + `~estimated` label |
3514
+ | L-3 | P3 | Dual stale lock mechanisms | Task 1.6 trusts `proper-lockfile`, no manual cleanup |
3515
+ | L-4 | P3 | Lock files not gitignored | Task 1.1 `tasks-management/state/*.lock` |
3516
+ | L-5 | P3 | Mock planner uses shell git | Task 7.1 uses `simple-git` API |
3517
+
3518
+ ---
3519
+
3520
+ ## Gaps Closed — Red-Team Review (Pass 2 — Final)
3521
+
3522
+ | ID | Severity | Gap Found | How it was fixed |
3523
+ |----|----------|-----------|-----------------|
3524
+ | RTV-1 | CRITICAL | Response map chunk boundary split — `regex.test(data)` on individual PTY chunks silently fails for multi-char patterns split across events | Execution Rule #15 + Task 3.1 rolling 512-byte `matchBuffer`. Phase 1b.1 no longer covers chunk boundary (fixed here). |
3525
+ | RTV-2 | CRITICAL | `commitToShadowBranch` explicit contradiction — Task 1.1 says no state JSON, Task 5.1 says "state + plans + logs" | Execution Rule #14 added as authoritative single source. Task 5.1 orchestrator now builds an explicit file list (diff + optionally logs). "state +" removed everywhere. |
3526
+ | RTV-3 | CRITICAL | `getDefaultConfig()` ghost function — called in factory.ts, never defined anywhere | Task 1.5 exports `getDefaultConfig(): NomosConfig { return NomosConfigSchema.parse({}); }`. Task 6.1 factory uses `NomosConfigSchema.parse({})` directly with explicit comment. |
3527
+ | RTV-4 | CRITICAL | `hasMeteredTokens` undeclared variable in `arc status` | `tokens_source: 'metered' \| 'estimated'` added to `HistoryEntry` in Task 1.3. Task 1.6 Zod schema includes it. Task 4.3 `parseTokensFromOutput` implies `'metered'`, `estimateTokens` implies `'estimated'`. Task 6.5 derives `hasMeteredTokens` from `history.some(h => h.tokens_source === 'metered')`. |
3528
+ | RTV-5 | CRITICAL | SIGINT race — `process.exit(130)` in handler races with `await stateManager.transition()` in catch block, preventing state write | Task 6.6 SIGINT handler uses `process.exitCode = 130` (not `process.exit()`). `main()` checks `process.exitCode === 130` after catch. State transition completes before process exits. Full flow documented in code comments. |
3529
+ | RTV-6 | CRITICAL | Convergence approval indistinguishable — forced-by-max-iterations vs genuine score threshold both set status `approved`, `arc run` can't emit exit code 2 | `approval_reason?: 'score_threshold' \| 'max_iterations_reached'` added to `TaskState.meta` (Task 1.3). Set in `review()` (Task 5.1). Task 6.4 `arc run` checks `meta.approval_reason` for exit code determination. `NomosError('convergence_failed')` removed from codebase entirely. |
3530
+ | RTV-7 | CRITICAL | `TaskState` Zod schema never defined — Task 1.6 `read()` validates against it but it doesn't exist | Complete `TaskStateSchema` Zod definition provided inline in Task 1.6, covering all fields including `tokens_source`, `approval_reason`, ISO datetime validation. |
3531
+ | RTV-8 | CONTEXTUAL | `arc init <task>` without a project gives generic config_not_found error | Task 6.1 `registerInitCommand` catches `NomosError('config_not_found')` and prints a specific 3-line recovery message. |
3532
+ | RTV-9 | CONTEXTUAL | `PtySpawnOptions` and `StdioSpawnOptions` assigned to no canonical file | Both interfaces moved to `src/types/index.ts` in Task 1.3. Tasks 3.1 and 3.2 import from `'../types/index.js'` (`.js` extension required for Node16 ESM). |
3533
+ | RTV-10 | CONTEXTUAL | `PromptOptions` and `ReviewPromptOptions` not in types file | Added to `src/types/index.ts` update in Task 4.1. |
3534
+ | RTV-11 | CONTEXTUAL | Review prompt template references overview.md Section 17 (not self-contained) | Complete review prompt template inlined in Task 4.1 — JSON schema, scoring guide, all section headers, Zero-Tolerance clause. No external document references remain. |
3535
+ | RTV-12 | CONTEXTUAL | `rules/global.md` template content from "Section 14" (not self-contained) | Full `rules/global.md` content inlined in Task 5.1 `initProject()`. |
3536
+ | RTV-13 | CONTEXTUAL | mock-reviewer-retry.ts stateless — can't detect it's a second invocation | Task 7.1 uses a temp file counter. Counter file path documented. Tests instructed to delete it in `beforeEach`/`afterEach`. |
3537
+ | RTV-14 | CONTEXTUAL | `arc run` exit code 2 — three conflicting instructions (Task 6.4 says throw, M-7 says don't throw, Task 6.6 says removed) | Task 6.4 rewritten to use `meta.approval_reason` check. No `NomosError('convergence_failed')` anywhere. Consistent with M-7 pattern. |
3538
+ | RTV-15 | CONTEXTUAL | `arc apply` exit code 1 for merge conflict conflicts with recoverable state model | Task 6.5 `arc apply` uses exit code 3 for merge conflict. Task 6.6 exit code matrix documents all 5 codes (0, 1, 2, 3, 130). |
3539
+ | RTV-16 | CONTEXTUAL | Stale lock test requires 30s wait | Task 1.6 test note specifies using `fs.utimesSync` to backdate lock file mtime by 60s — no sleep required. |
3540
+ | RTV-17 | CONTEXTUAL | `recoverWorktree` git identity ambiguity | Task 2.1 documents that recovery itself needs no identity check — `commitToShadowBranch` (called later) handles it. No double-checking. |
3541
+ | RTV-18 | CONTEXTUAL | E2E test `<absolute-path>` placeholder unresolved | Task 7.2 setup defines `fixturesDir = path.resolve(new URL(import.meta.url).pathname, '../../fixtures')`. All mock binary paths use this. Full config object shown in test. |
3542
+ | GAP-1 | HIGH | Reviewer context gap — reviewer receives only `.diff` with no visibility into files that reference changed code, triggering hallucination risk on side-effect assessment | Context Injection step added to `review()` in Task 5.1. **Import-graph scan** (not symbol grep): `extractChangedFilePaths()` parses `--- a/` lines from diff; for each changed file, ripgrep finds files with `import`/`require` statements targeting that exact path; zero false positives from generic symbol names. Up to `config.review.max_context_files` snippets injected as `[AFFECTED FILES]` section. Fail-safe: scan errors log a warning and proceed. `AffectedFileSnippet` type added to Task 1.3. `max_context_files` config field added to Task 1.5. |
3543
+ | GAP-4 | HIGH | Zombie worktree recovery — SIGKILL or crash mid-init leaves git metadata referencing non-existent worktrees; subsequent `arc init <task>` fails with `branch already exists` | `arc init <task> --force` added to Task 6.1: 5-step deterministic cleanup: (1) `git worktree prune`, (2) force-delete shadow branch, (3) `fs.rmSync` worktree path, (4) delete stale `state.json`, (5) re-initialize cleanly. Safe to run after any crash scenario. |
3544
+ | GAP-2 | MEDIUM | PTY adapter not formally typed — swapping PTY for SDK later would require touching Orchestrator logic | `PlannerTransport` and `ReviewerTransport` interfaces added to Task 1.3. `PtyAdapter` implements `PlannerTransport`, `StdioAdapter` implements `ReviewerTransport`. A future `SDKAdapter` implements the same interface with zero Orchestrator changes. |
3545
+ | GAP-3 | LOW | Token cost of context injection unbound — injecting many large files could silently spike reviewer token spend | `review.max_context_files` defaults to 5 (≈250 lines / ≈1500 tokens overhead). Comment in Zod schema documents the trade-off. Tunable per project in `.nomos-config.json`. |
3546
+ | GAP-5 | CRITICAL | Auto mode PTY Expect Logic — rolling-buffer pattern matching is heuristic and breaks on any claude-code prompt text change | Auto mode deferred to Phase 1b. PtyAdapter in Phase 1a is a pure Tee Stream (passthrough + log capture). ResponseMapEntry type, response_map config fields, and auto_flags removed entirely. E2E tests use Mock Binary (writes file + exits 0) instead of TTY simulation. |
3547
+
3548
+ ---
3549
+
3550
+ ## Gaps Closed — Red-Team Audit (Pass 3 — Hardening)
3551
+
3552
+ | ID | Priority | Issue | How it was fixed |
3553
+ |----|----------|-------|-----------------|
3554
+ | RT2-6.1a | P0 | `--force` recovery sequence leaves git metadata intact when directory still exists — `git worktree prune` is a no-op, branch deletion fails | Task 6.1 rewritten: 6-step sequence — unlock → rm directory → prune metadata → branch -D → rm state → re-init. Prune now runs AFTER directory removal so it actually cleans metadata. |
3555
+ | RT2-6.1b | P0 | `simpleGit` not imported in `init.ts` — compile error hidden by `--help` verification path | Task 6.1 uses `GitAdapter` (already available from factory) instead of raw `simpleGit`. `raw()` method exposed on GitAdapter. |
3556
+ | RT2-4.2 | P0 | `validateReviewSchema` hardcodes `mode: 'auto'` — corrupts history from first supervised run | Task 4.2 `validateReviewSchema` and `parseReviewOutput` accept actual `ExecutionMode` as parameter. Task 5.1 `review()` passes through the real mode. |
3557
+ | RT2-5.1a | P0 | Planner commit contract undefined — does Claude Code commit, or does nomos-arc? | Documented in Execution Rule #14 and Task 5.1: the planner binary commits its own work within the worktree. nomos-arc only commits plan artifacts (diffs, logs) to the shadow branch. |
3558
+ | RT2-5.1b | P1 | Import-graph scan explodes on `index`, `utils`, `types` basenames — every barrel export matches | Task 5.1 context injection rewritten: uses full relative path (minus extension) in grep pattern. For `index.ts` files, also matches directory import pattern. `gitAdapter.grep()` has built-in 5s timeout. |
3559
+ | RT2-4.3a | P1 | Token estimation ignores input/output price differential — cost estimates off by ~3x | Task 1.3 `HistoryEntry` now has `input_tokens`/`output_tokens` fields. Task 4.3 `estimateTokens` and `parseTokensFromOutput` return `TokenEstimate`. `calculateCost` applies distinct input/output rates. Config `cost_per_1k_tokens` supports `{ input, output }` rate objects. |
3560
+ | RT2-4.3b | P1 | `calculateCost` key mismatch for absolute paths or `npx` commands — cost tracking silently stops | Task 4.3 `calculateCost` normalizes `binaryCmd` to `path.basename()` before map lookup. |
3561
+ | RT2-5.1c | P1 | `run()` loop counter `i` diverges from `state.current_version` — `--iterations` flag has unpredictable behavior | Task 5.1 `run()` reads starting version, computes `runtimeMaxVersion = startVersion + limit`, passes it to `review()`. `review()` uses `runtimeMaxVersion` instead of `config.max_iterations` when provided. |
3562
+ | RT2-2.1a | P1 | `baseCommit` SHA not validated for reachability — `git rebase` orphans it, `getDiff` throws `fatal: unknown revision` | Task 2.1 `getDiff()` runs `git cat-file -t` pre-check. Throws `NomosError('base_commit_unreachable')` with clear recovery instructions. |
3563
+ | RT2-2.1b | P1 | `grep()` ghost method — called in Task 5.1 but never defined on GitAdapter | Task 2.1 explicitly defines `grep(pattern, cwd, timeoutMs)` with 5s default timeout, returns relative paths, catches timeout and no-match gracefully. |
3564
+ | RT2-5.1d | P1 | `context_files` not scanned for secrets — `.env.local` flows through external AI model | Task 5.1 `plan()` runs `scanFileForSecrets()` on each context file before validation. Throws `NomosError('secrets_detected')` if matches found. |
3565
+ | RT2-3.1a | P2 | PTY process kill only terminates direct child — grandchildren (bash, editors) survive | Task 3.1 uses process group killing: `-proc.pid` with `process.kill()` sends SIGTERM to entire process group. Falls back to direct `proc.kill()`. |
3566
+ | RT2-3.1b | P2 | PTY stdin listener leaks on rejection — erratic terminal for remainder of process lifetime | Task 3.1 extracts `cleanupStdin()` helper called on ALL exit paths. Promise body wrapped in try/catch with explicit kill + cleanup + reject on unexpected throw. |
3567
+ | RT2-5.1e | P2 | Orchestrator is a God Object importing 15+ modules — untestable as a unit | Task 5.1 decomposed: `PlanFileManager` (file I/O), `WorktreeCoordinator` (git lifecycle), Orchestrator (pure state machine). Each component independently testable. |
3568
+
3569
+ ---
3570
+
3571
+ ## Phase 1b Tasks (Queued — Do Not Start Until Phase 1a Ships)
3572
+
3573
+ These tasks are documented here for reference but are **not part of the current execution plan**:
3574
+
3575
+ - **1b.1:** Auto-mode PTY response_map enhancements — ~~chunk boundary matching~~ (fixed in Phase 1a, RTV-1). Phase 1b.1 now covers only: response_map hot-reload (config file watch), per-pattern verbose logging, and response_map test coverage for real-world interactive CLI patterns.
3576
+ - **1b.2:** Dry-run mode completion — the `if (mode === 'dry-run')` branch in `orchestrator.plan()` is stubbed in Phase 1a (prints prompt + exits). Phase 1b.2 adds: diff preview, config audit output, sandbox estimation of token cost without calling AI.
3577
+ - **1b.3:** Zero-Tolerance reviewer clause injection already in the review prompt template (Task 4.1). Phase 1b.3: Add configurable per-task severity override.
3578
+ - **1b.4:** Rules hash enforcement (block stale plans in auto mode if rules changed since last plan).
3579
+ - **1b.5:** `arc log <task>` command — tail and format the log files.
3580
+ - **1b.6:** Session rule creation via `--session-rule` flag on `arc plan`.
3581
+ - **1b.7:** Enhanced dual-stream logging (raw + stripped written in parallel via streams).