@nathapp/nax 0.36.0 → 0.36.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/nax.js +543 -154
  2. package/package.json +1 -1
  3. package/src/agents/claude-decompose.ts +3 -3
  4. package/src/cli/constitution.ts +0 -92
  5. package/src/constitution/generator.ts +0 -33
  6. package/src/constitution/index.ts +2 -1
  7. package/src/constitution/loader.ts +1 -13
  8. package/src/context/builder.ts +1 -2
  9. package/src/context/elements.ts +1 -12
  10. package/src/context/index.ts +2 -1
  11. package/src/context/test-scanner.ts +1 -1
  12. package/src/execution/dry-run.ts +1 -1
  13. package/src/execution/escalation/escalation.ts +5 -3
  14. package/src/execution/escalation/tier-escalation.ts +41 -4
  15. package/src/execution/iteration-runner.ts +5 -0
  16. package/src/execution/parallel-executor.ts +293 -9
  17. package/src/execution/parallel.ts +40 -21
  18. package/src/execution/pipeline-result-handler.ts +3 -2
  19. package/src/execution/runner.ts +13 -3
  20. package/src/interaction/chain.ts +17 -1
  21. package/src/metrics/tracker.ts +8 -4
  22. package/src/metrics/types.ts +2 -0
  23. package/src/pipeline/event-bus.ts +1 -1
  24. package/src/pipeline/stages/completion.ts +1 -1
  25. package/src/pipeline/stages/execution.ts +23 -1
  26. package/src/pipeline/stages/verify.ts +8 -1
  27. package/src/pipeline/subscribers/reporters.ts +3 -3
  28. package/src/pipeline/types.ts +4 -0
  29. package/src/plugins/types.ts +1 -1
  30. package/src/prd/types.ts +2 -0
  31. package/src/prompts/builder.ts +13 -6
  32. package/src/prompts/sections/conventions.ts +5 -7
  33. package/src/prompts/sections/isolation.ts +7 -7
  34. package/src/prompts/sections/role-task.ts +64 -64
  35. package/src/review/orchestrator.ts +11 -1
  36. package/src/routing/strategies/llm-prompts.ts +1 -1
  37. package/src/routing/strategies/llm.ts +3 -3
  38. package/src/tdd/index.ts +2 -3
  39. package/src/tdd/isolation.ts +0 -13
  40. package/src/tdd/orchestrator.ts +5 -0
  41. package/src/tdd/prompts.ts +1 -231
  42. package/src/tdd/session-runner.ts +2 -0
  43. package/src/tdd/types.ts +2 -1
  44. package/src/tdd/verdict.ts +20 -2
  45. package/src/verification/crash-detector.ts +34 -0
  46. package/src/verification/orchestrator-types.ts +8 -1
  47. package/src/verification/parser.ts +0 -10
  48. package/src/verification/rectification-loop.ts +2 -51
  49. package/src/worktree/dispatcher.ts +0 -59
@@ -2,213 +2,6 @@ import type { RectificationConfig } from "../config";
2
2
  import type { TestFailure } from "../execution/test-output-parser";
3
3
  import type { UserStory } from "../prd";
4
4
  import { createRectificationPrompt } from "../verification/rectification";
5
- import type { TddSessionRole } from "./types";
6
-
7
- /**
8
- * Prompt to build the TDD agent's role definition
9
- */
10
- export function buildTddRolePrompt(
11
- role: TddSessionRole,
12
- story: UserStory,
13
- config?: { projectRoot: string },
14
- currentBranch?: string,
15
- ): string {
16
- const common = `You are a TDD agent (role: ${role}) working on the story: "${story.title}".${config ? `\nProject root: ${config.projectRoot}` : ""}${currentBranch ? `\nCurrent branch: ${currentBranch}` : ""}
17
-
18
- STORY DESCRIPTION:
19
- ${story.description}
20
-
21
- ACCEPTANCE CRITERIA:
22
- ${story.acceptanceCriteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}
23
-
24
- ---
25
- `;
26
-
27
- if (role === "test-writer") {
28
- return `${common}
29
- YOUR TASK: Write ONLY test files for this story.
30
- - Use the existing test framework (Bun test).
31
- - Tests must fail because the feature is not implemented yet.
32
- - Do NOT modify any existing source files.
33
- - Do NOT implement the feature.
34
- - Name tests consistently (e.g., test/*.test.ts).
35
-
36
- IMPORTANT: Only write new test files or update existing ones. Do NOT touch src/*.`;
37
- }
38
-
39
- if (role === "implementer") {
40
- return `${common}
41
- YOUR TASK: Implement the feature to make the tests pass.
42
- - Read the tests in the current branch.
43
- - Modify source files in src/ as needed.
44
- - Do NOT modify test files unless there is a bug in the tests.
45
- - Run tests frequently to check progress.
46
- - Goal: All tests pass.`;
47
- }
48
-
49
- // Verifier
50
- return `${common}
51
- YOUR TASK: Verify the implementation and tests.
52
- - Ensure all tests pass.
53
- - Check that the implementation meets all acceptance criteria.
54
- - Fix any minor bugs or missing edge cases.
55
- - Do NOT change the behavior unless it violates the criteria.
56
- - When running tests, run ONLY test files related to your changes (e.g. \`bun test ./test/specific.test.ts\`). NEVER run \`bun test\` without a file filter — full suite output will flood your context window and cause failures.
57
- - Goal: High-quality implementation and passing tests.`;
58
- }
59
-
60
- /**
61
- * Prompt to build the verifier's verification instructions (Session 3)
62
- */
63
- export function buildVerifierPrompt(story: UserStory): string {
64
- return `# Session 3: Verify — "${story.title}"
65
-
66
- STORY:
67
- ${story.description}
68
-
69
- ACCEPTANCE CRITERIA:
70
- ${story.acceptanceCriteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}
71
-
72
- ---
73
-
74
- ## TASKS
75
-
76
- 1. Run all tests and verify they pass.
77
- - When running tests, run ONLY test files related to your changes (e.g. \`bun test ./test/specific.test.ts\`). NEVER run \`bun test\` without a file filter — full suite output will flood your context window and cause failures.
78
- 2. Review the implementation for quality and correctness.
79
- 3. Check that the implementation meets all acceptance criteria.
80
- 4. Check if test files were modified by the implementer (make sure they are legitimate fixes, NOT just loosening assertions to mask bugs).
81
- 5. If any issues exist, fix them minimally — do NOT refactor.
82
-
83
- ---
84
-
85
- ## IMPORTANT — Write Verdict File
86
-
87
- After completing your verification, you **MUST** write a verdict file at the **project root**:
88
-
89
- **File:** \`.nax-verifier-verdict.json\`
90
-
91
- Set \`approved: true\` when ALL of these conditions are met:
92
- - All tests pass
93
- - Implementation is clean and follows conventions
94
- - All acceptance criteria met
95
- - Any test modifications by implementer are legitimate fixes
96
-
97
- Set \`approved: false\` when ANY of these conditions are true:
98
- - Tests are failing and you cannot fix them
99
- - The implementer loosened test assertions to mask bugs
100
- - Critical acceptance criteria are not met
101
- - Code quality is poor (security issues, severe bugs, etc.)
102
-
103
- **Full JSON schema example** (fill in all fields with real values):
104
-
105
- \`\`\`json
106
- {
107
- "version": 1,
108
- "approved": true,
109
- "tests": {
110
- "allPassing": true,
111
- "passCount": 42,
112
- "failCount": 0
113
- },
114
- "testModifications": {
115
- "detected": false,
116
- "files": [],
117
- "legitimate": true,
118
- "reasoning": "No test files were modified by the implementer"
119
- },
120
- "acceptanceCriteria": {
121
- "allMet": true,
122
- "criteria": [
123
- { "criterion": "Example criterion", "met": true }
124
- ]
125
- },
126
- "quality": {
127
- "rating": "good",
128
- "issues": []
129
- },
130
- "fixes": [],
131
- "reasoning": "All tests pass, implementation is clean, all acceptance criteria are met."
132
- }
133
- \`\`\`
134
-
135
- **Field notes:**
136
- - \`quality.rating\` must be one of: \`"good"\`, \`"acceptable"\`, \`"poor"\`
137
- - \`testModifications.files\` — list any test files the implementer changed
138
- - \`fixes\` — list any fixes you applied yourself during this verification session
139
- - \`reasoning\` — brief summary of your overall assessment
140
-
141
- When done, commit any fixes with message: "fix: verify and adjust ${story.title}"`;
142
- }
143
-
144
- /**
145
- * Prompt for a test-writer session (single-session lite variant)
146
- */
147
- export function buildTestWriterPrompt(story: UserStory, contextMarkdown?: string): string {
148
- const contextSection = contextMarkdown ? `\n\n---\n\n${contextMarkdown}` : "";
149
- return `# Test Writer — "${story.title}"
150
-
151
- Your role: Write failing tests ONLY. Do NOT implement any source code.
152
-
153
- STORY:
154
- ${story.description}
155
-
156
- ACCEPTANCE CRITERIA:
157
- ${story.acceptanceCriteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}
158
-
159
- RULES:
160
- - Only create or modify files in the test/ directory.
161
- - Tests must fail (feature not implemented yet).
162
- - Use Bun test (describe/test/expect).
163
- - Cover all acceptance criteria.
164
- - When running tests, run ONLY test files related to your changes (e.g. \`bun test ./test/specific.test.ts\`). NEVER run \`bun test\` without a file filter — full suite output will flood your context window and cause failures.${contextSection}`;
165
- }
166
-
167
- /**
168
- * Prompt for a test-writer lite session (no isolation enforcement)
169
- */
170
- export function buildTestWriterLitePrompt(story: UserStory, contextMarkdown?: string): string {
171
- const contextSection = contextMarkdown ? `\n\n---\n\n${contextMarkdown}` : "";
172
- return `# Test Writer (Lite) — "${story.title}"
173
-
174
- Your role: Write failing tests. You MAY read source files and MAY import from source files to ensure correct types/interfaces. You may create minimal stubs in src/ if needed to make imports work, but do NOT implement real logic.
175
-
176
- STORY:
177
- ${story.description}
178
-
179
- ACCEPTANCE CRITERIA:
180
- ${story.acceptanceCriteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}
181
-
182
- RULES:
183
- - Primarily CREATE test files in the test/ directory.
184
- - Stub-only src/ files are allowed (empty exports, no logic).
185
- - Tests must fail for the right reasons (feature not implemented).
186
- - Use Bun test (describe/test/expect).
187
- - When running tests, run ONLY test files related to your changes (e.g. \`bun test ./test/specific.test.ts\`). NEVER run \`bun test\` without a file filter — full suite output will flood your context window and cause failures.${contextSection}`;
188
- }
189
-
190
- /**
191
- * Prompt for an implementer session
192
- */
193
- export function buildImplementerPrompt(story: UserStory, contextMarkdown?: string): string {
194
- const contextSection = contextMarkdown ? `\n\n---\n\n${contextMarkdown}` : "";
195
- return `# Implementer — "${story.title}"
196
-
197
- Your role: Make all failing tests pass.
198
-
199
- STORY:
200
- ${story.description}
201
-
202
- ACCEPTANCE CRITERIA:
203
- ${story.acceptanceCriteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}
204
-
205
- RULES:
206
- - Implement source code in src/ to make tests pass.
207
- - Do NOT modify test files.
208
- - Run tests frequently to track progress.
209
- - When running tests, run ONLY test files related to your changes (e.g. \`bun test ./test/specific.test.ts\`). NEVER run \`bun test\` without a file filter — full suite output will flood your context window and cause failures.
210
- - Goal: all tests green.${contextSection}`;
211
- }
212
5
 
213
6
  /**
214
7
  * Build implementer rectification prompt (v0.11)
@@ -219,7 +12,7 @@ RULES:
219
12
  export function buildImplementerRectificationPrompt(
220
13
  failures: TestFailure[],
221
14
  story: UserStory,
222
- contextMarkdown?: string,
15
+ _contextMarkdown?: string,
223
16
  config?: RectificationConfig,
224
17
  ): string {
225
18
  // Reuse the existing rectification prompt builder from R2
@@ -227,29 +20,6 @@ export function buildImplementerRectificationPrompt(
227
20
  return createRectificationPrompt(failures, story, config);
228
21
  }
229
22
 
230
- /**
231
- * Prompt for an implementer lite session (combined test + implement)
232
- */
233
- export function buildImplementerLitePrompt(story: UserStory, contextMarkdown?: string): string {
234
- const contextSection = contextMarkdown ? `\n\n---\n\n${contextMarkdown}` : "";
235
- return `# Implementer (Lite) — "${story.title}"
236
-
237
- Your role: Write tests AND implement the feature in a single session.
238
-
239
- STORY:
240
- ${story.description}
241
-
242
- ACCEPTANCE CRITERIA:
243
- ${story.acceptanceCriteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}
244
-
245
- RULES:
246
- - Write tests first (test/ directory), then implement (src/ directory).
247
- - All tests must pass by the end.
248
- - Use Bun test (describe/test/expect).
249
- - When running tests, run ONLY test files related to your changes (e.g. \`bun test ./test/specific.test.ts\`). NEVER run \`bun test\` without a file filter — full suite output will flood your context window and cause failures.
250
- - Goal: all tests green, all criteria met.${contextSection}`;
251
- }
252
-
253
23
  /**
254
24
  * Build rectification prompt for retry after test failures
255
25
  *
@@ -83,6 +83,7 @@ export async function runTddSession(
83
83
  contextMarkdown?: string,
84
84
  lite = false,
85
85
  skipIsolation = false,
86
+ constitution?: string,
86
87
  ): Promise<TddSessionResult> {
87
88
  const startTime = Date.now();
88
89
 
@@ -101,6 +102,7 @@ export async function runTddSession(
101
102
  .withLoader(workdir, config)
102
103
  .story(story)
103
104
  .context(contextMarkdown)
105
+ .constitution(constitution)
104
106
  .build();
105
107
  break;
106
108
  case "verifier":
package/src/tdd/types.ts CHANGED
@@ -12,7 +12,8 @@ export type FailureCategory =
12
12
  /** Verifier explicitly rejected the implementation */
13
13
  | "verifier-rejected"
14
14
  /** Greenfield project with no test files — TDD not applicable (BUG-010) */
15
- | "greenfield-no-tests";
15
+ | "greenfield-no-tests"
16
+ | "runtime-crash";
16
17
 
17
18
  /** Isolation verification result */
18
19
  export interface IsolationCheck {
@@ -126,7 +126,12 @@ export function coerceVerdict(obj: Record<string, unknown>): VerifierVerdict | n
126
126
  try {
127
127
  // Determine approval status
128
128
  const verdictStr = String(obj.verdict ?? "").toUpperCase();
129
- const approved = verdictStr === "PASS" || verdictStr === "APPROVED" || obj.approved === true;
129
+ const approved =
130
+ verdictStr === "PASS" ||
131
+ verdictStr === "APPROVED" ||
132
+ verdictStr.startsWith("VERIFIED") ||
133
+ verdictStr.includes("ALL ACCEPTANCE CRITERIA MET") ||
134
+ obj.approved === true;
130
135
 
131
136
  // Parse test results from verification_summary or top-level
132
137
  let passCount = 0;
@@ -252,13 +257,26 @@ export async function readVerdict(workdir: string): Promise<VerifierVerdict | nu
252
257
  return null;
253
258
  }
254
259
 
260
+ // Read as text first so we can log raw content on parse failure
261
+ let rawText: string;
262
+ try {
263
+ rawText = await file.text();
264
+ } catch (readErr) {
265
+ logger.warn("tdd", "Failed to read verifier verdict file", {
266
+ path: verdictPath,
267
+ error: String(readErr),
268
+ });
269
+ return null;
270
+ }
271
+
255
272
  let parsed: unknown;
256
273
  try {
257
- parsed = await file.json();
274
+ parsed = JSON.parse(rawText);
258
275
  } catch (parseErr) {
259
276
  logger.warn("tdd", "Verifier verdict file is not valid JSON — ignoring", {
260
277
  path: verdictPath,
261
278
  error: String(parseErr),
279
+ rawContent: rawText.slice(0, 1000),
262
280
  });
263
281
  return null;
264
282
  }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Runtime Crash Detector — BUG-070
3
+ *
4
+ * Detects Bun runtime crashes in test output so they can be classified as
5
+ * RUNTIME_CRASH rather than TEST_FAILURE, preventing spurious tier escalation.
6
+ *
7
+ * STUB — implementation is intentionally absent. Tests are RED until
8
+ * the real logic is written.
9
+ */
10
+
11
+ /**
12
+ * Known patterns emitted by the Bun runtime before any test results
13
+ * when a crash occurs (segfault, panic, etc.).
14
+ */
15
+ export const CRASH_PATTERNS = [
16
+ "panic(main thread)",
17
+ "Segmentation fault",
18
+ "Bun has crashed",
19
+ "oh no: Bun has crashed",
20
+ ] as const;
21
+
22
+ /**
23
+ * Detect whether the given test runner output contains a Bun runtime crash.
24
+ *
25
+ * Returns true if any known crash pattern is found in the output.
26
+ * These patterns are emitted by Bun itself before any test result lines.
27
+ *
28
+ * @param output - Raw stdout/stderr from the test runner
29
+ */
30
+ export function detectRuntimeCrash(output: string | undefined | null): boolean {
31
+ // STUB: not implemented yet — always returns false
32
+ if (!output) return false;
33
+ return CRASH_PATTERNS.some((pattern) => output.includes(pattern));
34
+ }
@@ -50,7 +50,14 @@ export interface StructuredTestFailure {
50
50
  // Result
51
51
  // ---------------------------------------------------------------------------
52
52
 
53
- export type VerifyStatus = "PASS" | "TEST_FAILURE" | "TIMEOUT" | "BUILD_ERROR" | "SKIPPED" | "ASSET_CHECK_FAILED";
53
+ export type VerifyStatus =
54
+ | "PASS"
55
+ | "TEST_FAILURE"
56
+ | "TIMEOUT"
57
+ | "BUILD_ERROR"
58
+ | "SKIPPED"
59
+ | "ASSET_CHECK_FAILED"
60
+ | "RUNTIME_CRASH";
54
61
 
55
62
  export interface VerifyResult {
56
63
  success: boolean;
@@ -216,15 +216,5 @@ export function parseTestOutput(output: string, exitCode: number): TestOutputAna
216
216
  return result;
217
217
  }
218
218
 
219
- /**
220
- * Calculate early escalation threshold for environmental failures.
221
- *
222
- * Environmental failures should escalate faster: after ceil(tier.attempts / 2)
223
- * instead of the full tier budget.
224
- */
225
- export function getEnvironmentalEscalationThreshold(tierAttempts: number, divisor = 2): number {
226
- return Math.ceil(tierAttempts / divisor);
227
- }
228
-
229
219
  // Re-export types for consumers that import from this module
230
220
  export type { TestFailure, TestSummary } from "./types";
@@ -10,12 +10,10 @@
10
10
  import { getAgent } from "../agents";
11
11
  import type { NaxConfig } from "../config";
12
12
  import { resolveModel } from "../config";
13
- import { appendProgress } from "../execution/progress";
14
13
  import { parseBunTestOutput } from "../execution/test-output-parser";
15
14
  import { getSafeLogger } from "../logger";
16
- import type { StoryMetrics } from "../metrics";
17
- import type { PRD, StructuredFailure, UserStory } from "../prd";
18
- import { getExpectedFiles, savePRD } from "../prd";
15
+ import type { UserStory } from "../prd";
16
+ import { getExpectedFiles } from "../prd";
19
17
  import { type RectificationState, createRectificationPrompt, shouldRetryRectification } from "./rectification";
20
18
  import { fullSuite as runVerification } from "./runners";
21
19
 
@@ -147,50 +145,3 @@ export async function runRectificationLoop(opts: RectificationLoopOptions): Prom
147
145
 
148
146
  return false;
149
147
  }
150
-
151
- export interface RevertStoriesOptions {
152
- prd: PRD;
153
- prdPath: string;
154
- story: UserStory;
155
- storiesToExecute: UserStory[];
156
- allStoryMetrics: StoryMetrics[];
157
- featureDir?: string;
158
- diagnosticContext: string;
159
- countsTowardEscalation: boolean;
160
- priorFailure?: StructuredFailure;
161
- }
162
-
163
- /** Revert stories to pending on verification failure and save PRD. */
164
- export async function revertStoriesOnFailure(opts: RevertStoriesOptions): Promise<PRD> {
165
- const storyIds = new Set(opts.storiesToExecute.map((s) => s.id));
166
-
167
- for (let i = opts.allStoryMetrics.length - 1; i >= 0; i--) {
168
- if (storyIds.has(opts.allStoryMetrics[i].storyId)) opts.allStoryMetrics.splice(i, 1);
169
- }
170
-
171
- opts.prd.userStories = opts.prd.userStories.map((s) =>
172
- storyIds.has(s.id)
173
- ? {
174
- ...s,
175
- priorErrors: [...(s.priorErrors || []), opts.diagnosticContext],
176
- priorFailures: opts.priorFailure ? [...(s.priorFailures || []), opts.priorFailure] : s.priorFailures,
177
- status: "pending" as const,
178
- passes: false,
179
- }
180
- : s,
181
- );
182
-
183
- if (opts.countsTowardEscalation) {
184
- opts.prd.userStories = opts.prd.userStories.map((s) =>
185
- s.id === opts.story.id ? { ...s, attempts: s.attempts + 1 } : s,
186
- );
187
- }
188
-
189
- await savePRD(opts.prd, opts.prdPath);
190
-
191
- if (opts.featureDir) {
192
- await appendProgress(opts.featureDir, opts.story.id, "failed", `${opts.story.title} -- ${opts.diagnosticContext}`);
193
- }
194
-
195
- return opts.prd;
196
- }
@@ -1,65 +1,6 @@
1
- import type { UserStory } from "../prd/types";
2
- import type { WorktreeManager } from "./manager";
3
-
4
1
  export interface DispatchResult {
5
2
  storyId: string;
6
3
  success: boolean;
7
4
  worktreePath: string;
8
5
  error?: string;
9
6
  }
10
-
11
- export class ParallelDispatcher {
12
- constructor(
13
- private worktreeManager: WorktreeManager,
14
- private runPipeline: (args: { workdir: string; story: UserStory }) => Promise<boolean>,
15
- ) {}
16
-
17
- async dispatch(projectRoot: string, stories: UserStory[], maxConcurrency: number): Promise<DispatchResult[]> {
18
- const results: DispatchResult[] = [];
19
- const independentBatches = this.getBatches(stories);
20
-
21
- for (const batch of independentBatches) {
22
- const batchPromises = batch.map(async (story) => {
23
- const worktreePath = `${projectRoot}/.nax-wt/${story.id}`;
24
- try {
25
- await this.worktreeManager.create(projectRoot, story.id);
26
- const success = await this.runPipeline({ workdir: worktreePath, story });
27
- return { storyId: story.id, success, worktreePath };
28
- } catch (err) {
29
- return {
30
- storyId: story.id,
31
- success: false,
32
- worktreePath,
33
- error: err instanceof Error ? err.message : String(err),
34
- };
35
- }
36
- });
37
-
38
- const batchResults = await pLimit(maxConcurrency, batchPromises);
39
- results.push(...batchResults);
40
- }
41
-
42
- return results;
43
- }
44
-
45
- private getBatches(stories: UserStory[]): UserStory[][] {
46
- // TODO: Implement dependency-aware batching
47
- return [stories];
48
- }
49
- }
50
-
51
- // Helper for concurrency limiting (Simplified p-limit)
52
- async function pLimit<T>(concurrency: number, promises: Promise<T>[]): Promise<T[]> {
53
- const results: T[] = [];
54
- const executing: Promise<void>[] = [];
55
- for (const p of promises) {
56
- const e = p.then((r) => {
57
- results.push(r);
58
- executing.splice(executing.indexOf(e), 1);
59
- });
60
- executing.push(e);
61
- if (executing.length >= concurrency) await Promise.race(executing);
62
- }
63
- await Promise.all(executing);
64
- return results;
65
- }