@nathapp/nax 0.49.6 → 0.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +59 -0
  3. package/dist/nax.js +415 -106
  4. package/package.json +2 -1
  5. package/src/acceptance/generator.ts +48 -7
  6. package/src/cli/config-descriptions.ts +6 -0
  7. package/src/cli/plan.ts +46 -13
  8. package/src/config/defaults.ts +3 -0
  9. package/src/config/runtime-types.ts +21 -0
  10. package/src/config/schemas.ts +23 -0
  11. package/src/config/test-strategy.ts +17 -16
  12. package/src/config/types.ts +1 -0
  13. package/src/context/builder.ts +25 -0
  14. package/src/context/parent-context.ts +39 -0
  15. package/src/decompose/apply.ts +20 -14
  16. package/src/execution/escalation/tier-escalation.ts +1 -1
  17. package/src/execution/escalation/tier-outcome.ts +2 -2
  18. package/src/execution/iteration-runner.ts +3 -0
  19. package/src/execution/lifecycle/run-completion.ts +4 -0
  20. package/src/execution/lifecycle/run-initialization.ts +47 -13
  21. package/src/execution/lifecycle/run-regression.ts +5 -1
  22. package/src/execution/parallel-coordinator.ts +3 -3
  23. package/src/execution/pipeline-result-handler.ts +30 -1
  24. package/src/execution/runner-completion.ts +1 -0
  25. package/src/execution/sequential-executor.ts +19 -0
  26. package/src/hooks/types.ts +2 -0
  27. package/src/pipeline/event-bus.ts +9 -1
  28. package/src/pipeline/runner.ts +13 -1
  29. package/src/pipeline/stages/autofix.ts +10 -2
  30. package/src/pipeline/stages/prompt.ts +4 -2
  31. package/src/pipeline/stages/rectify.ts +1 -0
  32. package/src/pipeline/stages/routing.ts +10 -2
  33. package/src/pipeline/subscribers/events-writer.ts +14 -0
  34. package/src/pipeline/subscribers/hooks.ts +14 -0
  35. package/src/pipeline/types.ts +2 -0
  36. package/src/prd/index.ts +24 -1
  37. package/src/prd/schema.ts +8 -0
  38. package/src/prd/types.ts +11 -0
  39. package/src/precheck/checks-git.ts +3 -0
  40. package/src/prompts/builder.ts +19 -0
  41. package/src/prompts/sections/hermetic.ts +41 -0
  42. package/src/prompts/sections/index.ts +1 -0
  43. package/src/routing/router.ts +1 -1
  44. package/src/tdd/session-runner.ts +3 -0
  45. package/src/utils/git.ts +23 -0
  46. package/src/verification/rectification-loop.ts +11 -3
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.49.6",
3
+ "version": "0.50.1",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,6 +12,7 @@
12
12
  "build": "bun build bin/nax.ts --outdir dist --target bun --define \"GIT_COMMIT=\\\"$(git rev-parse --short HEAD)\\\"\"",
13
13
  "typecheck": "bun x tsc --noEmit",
14
14
  "lint": "bun x biome check src/ bin/",
15
+ "release": "bun scripts/release.ts",
15
16
  "test": "CI=1 NAX_SKIP_PRECHECK=1 bun test test/ --timeout=60000",
16
17
  "test:watch": "CI=1 bun test --watch",
17
18
  "test:unit": "CI=1 NAX_SKIP_PRECHECK=1 bun test ./test/unit/ --timeout=60000",
@@ -114,6 +114,18 @@ IMPORTANT: Output raw TypeScript code only. Do NOT use markdown code fences (\`\
114
114
  });
115
115
  const testCode = extractTestCode(rawOutput);
116
116
 
117
+ if (!testCode) {
118
+ logger.warn("acceptance", "LLM returned non-code output for acceptance tests — falling back to skeleton", {
119
+ outputPreview: rawOutput.slice(0, 200),
120
+ });
121
+ const skeletonCriteria: AcceptanceCriterion[] = refinedCriteria.map((c, i) => ({
122
+ id: `AC-${i + 1}`,
123
+ text: c.refined,
124
+ lineNumber: i + 1,
125
+ }));
126
+ return { testCode: generateSkeletonTests(options.featureName, skeletonCriteria), criteria: skeletonCriteria };
127
+ }
128
+
117
129
  const refinedJsonContent = JSON.stringify(
118
130
  refinedCriteria.map((c, i) => ({
119
131
  acId: `AC-${i + 1}`,
@@ -306,6 +318,16 @@ export async function generateAcceptanceTests(
306
318
  // Extract test code from output
307
319
  const testCode = extractTestCode(output);
308
320
 
321
+ if (!testCode) {
322
+ logger.warn("acceptance", "LLM returned non-code output for acceptance tests — falling back to skeleton", {
323
+ outputPreview: output.slice(0, 200),
324
+ });
325
+ return {
326
+ testCode: generateSkeletonTests(options.featureName, criteria),
327
+ criteria,
328
+ };
329
+ }
330
+
309
331
  return {
310
332
  testCode,
311
333
  criteria,
@@ -328,21 +350,40 @@ export async function generateAcceptanceTests(
328
350
  * @param output - Agent stdout
329
351
  * @returns Extracted test code
330
352
  */
331
- function extractTestCode(output: string): string {
353
+ function extractTestCode(output: string): string | null {
354
+ let code: string | undefined;
355
+
332
356
  // Try to extract from markdown code fence
333
357
  const fenceMatch = output.match(/```(?:typescript|ts)?\s*([\s\S]*?)\s*```/);
334
358
  if (fenceMatch) {
335
- return fenceMatch[1].trim();
359
+ code = fenceMatch[1].trim();
336
360
  }
337
361
 
338
362
  // If no fence, try to find import statement and take everything from there
339
- const importMatch = output.match(/import\s+{[\s\S]+/);
340
- if (importMatch) {
341
- return importMatch[0].trim();
363
+ if (!code) {
364
+ const importMatch = output.match(/import\s+{[\s\S]+/);
365
+ if (importMatch) {
366
+ code = importMatch[0].trim();
367
+ }
368
+ }
369
+
370
+ // If no fence and no import, try to find describe() block
371
+ if (!code) {
372
+ const describeMatch = output.match(/describe\s*\([\s\S]+/);
373
+ if (describeMatch) {
374
+ code = describeMatch[0].trim();
375
+ }
376
+ }
377
+
378
+ if (!code) return null;
379
+
380
+ // Validate: extracted code must contain at least one test-like keyword
381
+ const hasTestKeyword = /\b(?:describe|test|it|expect)\s*\(/.test(code);
382
+ if (!hasTestKeyword) {
383
+ return null;
342
384
  }
343
385
 
344
- // Fall back to full output
345
- return output.trim();
386
+ return code;
346
387
  }
347
388
 
348
389
  /**
@@ -209,4 +209,10 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = {
209
209
  "agent.protocol": "Protocol for agent communication: 'acp' | 'cli' (default: 'acp')",
210
210
  "agent.maxInteractionTurns":
211
211
  "Max turns in multi-turn interaction loop when interactionBridge is active (default: 10)",
212
+ // Testing
213
+ testing: "Hermetic test enforcement configuration (ENH-010)",
214
+ "testing.hermetic":
215
+ "Inject hermetic test requirement into prompts — never call real external services in tests (default: true)",
216
+ "testing.externalBoundaries": "Project-specific CLI tools/clients to mock (e.g. ['claude', 'acpx', 'redis'])",
217
+ "testing.mockGuidance": "Project-specific mocking guidance injected verbatim into the prompt",
212
218
  };
package/src/cli/plan.ts CHANGED
@@ -395,15 +395,18 @@ function buildCodebaseContext(scan: CodebaseScan): string {
395
395
  /**
396
396
  * Build the full planning prompt sent to the LLM.
397
397
  *
398
+ * Structured as 3 explicit steps (ENH-006):
399
+ * Step 1: Understand the spec
400
+ * Step 2: Analyze codebase (existing) or architecture decisions (greenfield)
401
+ * Step 3: Generate implementation stories from analysis
402
+ *
398
403
  * Includes:
399
- * - Spec content
400
- * - Codebase context
401
- * - Output schema (exact prd.json JSON structure)
402
- * - Complexity classification guide
403
- * - Test strategy guide
404
+ * - Spec content + codebase context
405
+ * - Output schema with analysis + contextFiles fields
406
+ * - Complexity + test strategy guides
404
407
  * - MW-007: Monorepo hint and package list when packages are detected
405
408
  */
406
- function buildPlanningPrompt(
409
+ export function buildPlanningPrompt(
407
410
  specContent: string,
408
411
  codebaseContext: string,
409
412
  outputFilePath?: string,
@@ -423,14 +426,48 @@ function buildPlanningPrompt(
423
426
 
424
427
  return `You are a senior software architect generating a product requirements document (PRD) as JSON.
425
428
 
429
+ ## Step 1: Understand the Spec
430
+
431
+ Read the spec carefully. Identify the goal, scope, constraints, and what "done" looks like.
432
+
426
433
  ## Spec
427
434
 
428
435
  ${specContent}
429
436
 
437
+ ## Step 2: Analyze
438
+
439
+ Examine the codebase context below.
440
+
441
+ If the codebase has existing code (refactoring, enhancement, bug fix):
442
+ - Which existing files need modification?
443
+ - Which files import from or depend on them?
444
+ - What tests cover the affected code?
445
+ - What are the risks (breaking changes, backward compatibility)?
446
+ - What is the migration path?
447
+
448
+ If this is a greenfield project (empty or minimal codebase):
449
+ - What is the target architecture?
450
+ - What are the key technical decisions (framework, patterns, conventions)?
451
+ - What should be built first (dependency order)?
452
+
453
+ Record ALL findings in the "analysis" field of the output JSON. This analysis is provided to every implementation agent as context — be thorough.
454
+
430
455
  ## Codebase Context
431
456
 
432
457
  ${codebaseContext}${monorepoHint}
433
458
 
459
+ ## Step 3: Generate Implementation Stories
460
+
461
+ Based on your Step 2 analysis, create stories that produce CODE CHANGES.
462
+
463
+ ${GROUPING_RULES}
464
+
465
+ For each story, set "contextFiles" to the key source files the agent should read before implementing (max 5 per story). Use your Step 2 analysis to identify the most relevant files. Leave empty for greenfield stories with no existing files to reference.
466
+
467
+ ${COMPLEXITY_GUIDE}
468
+
469
+ ${TEST_STRATEGY_GUIDE}
470
+
434
471
  ## Output Schema
435
472
 
436
473
  Generate a JSON object with this exact structure (no markdown, no explanation — JSON only):
@@ -438,6 +475,7 @@ Generate a JSON object with this exact structure (no markdown, no explanation
438
475
  {
439
476
  "project": "string — project name",
440
477
  "feature": "string — feature name",
478
+ "analysis": "string — your Step 2 analysis: key files, impact areas, risks, architecture decisions, migration notes. All implementation agents will receive this.",
441
479
  "branchName": "string — git branch (e.g. feat/my-feature)",
442
480
  "createdAt": "ISO 8601 timestamp",
443
481
  "updatedAt": "ISO 8601 timestamp",
@@ -447,13 +485,14 @@ Generate a JSON object with this exact structure (no markdown, no explanation
447
485
  "title": "string — concise story title",
448
486
  "description": "string — detailed description of the story",
449
487
  "acceptanceCriteria": ["string — each AC line"],
488
+ "contextFiles": ["string — key source files the agent should read (max 5, relative paths)"],
450
489
  "tags": ["string — routing tags, e.g. feature, security, api"],
451
490
  "dependencies": ["string — story IDs this story depends on"],${workdirField}
452
491
  "status": "pending",
453
492
  "passes": false,
454
493
  "routing": {
455
494
  "complexity": "simple | medium | complex | expert",
456
- "testStrategy": "test-after | tdd-simple | three-session-tdd | three-session-tdd-lite",
495
+ "testStrategy": "tdd-simple | three-session-tdd-lite | three-session-tdd | test-after",
457
496
  "reasoning": "string — brief classification rationale"
458
497
  },
459
498
  "escalations": [],
@@ -462,12 +501,6 @@ Generate a JSON object with this exact structure (no markdown, no explanation
462
501
  ]
463
502
  }
464
503
 
465
- ${COMPLEXITY_GUIDE}
466
-
467
- ${TEST_STRATEGY_GUIDE}
468
-
469
- ${GROUPING_RULES}
470
-
471
504
  ${
472
505
  outputFilePath
473
506
  ? `Write the PRD JSON directly to this file path: ${outputFilePath}\nDo NOT output the JSON to the conversation. Write the file, then reply with a brief confirmation.`
@@ -211,4 +211,7 @@ export const DEFAULT_CONFIG: NaxConfig = {
211
211
  maxRetries: 2,
212
212
  model: "balanced",
213
213
  },
214
+ testing: {
215
+ hermetic: true,
216
+ },
214
217
  };
@@ -430,6 +430,25 @@ export interface DecomposeConfig {
430
430
  model: ModelTier;
431
431
  }
432
432
 
433
+ /** Hermetic test enforcement configuration (ENH-010) */
434
+ export interface TestingConfig {
435
+ /**
436
+ * When true (default), nax injects a hermetic test requirement into all code-writing prompts.
437
+ * Instructs the AI to mock all I/O boundaries and never call real external services in tests.
438
+ */
439
+ hermetic: boolean;
440
+ /**
441
+ * Project-specific external boundaries to mock (e.g. ["claude", "acpx", "redis", "grpc"]).
442
+ * Injected into the hermetic requirement section so the AI knows which project tools to mock.
443
+ */
444
+ externalBoundaries?: string[];
445
+ /**
446
+ * Project-specific mocking guidance injected verbatim into the prompt.
447
+ * E.g. "Use injectable deps for CLI spawning, ioredis-mock for Redis"
448
+ */
449
+ mockGuidance?: string;
450
+ }
451
+
433
452
  /** Full nax configuration */
434
453
  export interface NaxConfig {
435
454
  /** Schema version */
@@ -476,6 +495,8 @@ export interface NaxConfig {
476
495
  decompose?: DecomposeConfig;
477
496
  /** Agent protocol settings (ACP-003) */
478
497
  agent?: AgentConfig;
498
+ /** Hermetic test enforcement settings (ENH-010) */
499
+ testing?: TestingConfig;
479
500
  /** Generate settings */
480
501
  generate?: GenerateConfig;
481
502
  }
@@ -362,6 +362,28 @@ export const PromptsConfigSchema = z.object({
362
362
  .optional(),
363
363
  });
364
364
 
365
+ const TestingConfigSchema = z.object({
366
+ /**
367
+ * When true (default), nax injects a hermetic test requirement into all code-writing prompts.
368
+ * Instructs the AI to mock all I/O boundaries (HTTP, CLI spawning, databases, etc.)
369
+ * and never invoke real external processes or services during test execution.
370
+ * Set to false only if your project requires real integration calls in tests.
371
+ */
372
+ hermetic: z.boolean().default(true),
373
+ /**
374
+ * Project-specific external boundaries the AI should watch for and mock.
375
+ * E.g. ["claude", "acpx", "redis", "grpc"] — any CLI tools, clients, or services
376
+ * the project uses that should never be called from tests.
377
+ */
378
+ externalBoundaries: z.array(z.string()).optional(),
379
+ /**
380
+ * Project-specific guidance on how to mock external dependencies.
381
+ * Injected verbatim into the hermetic requirement section of the prompt.
382
+ * E.g. "Use injectable deps for CLI spawning, ioredis-mock for Redis"
383
+ */
384
+ mockGuidance: z.string().optional(),
385
+ });
386
+
365
387
  const DecomposeConfigSchema = z.object({
366
388
  trigger: z.enum(["auto", "confirm", "disabled"]).default("auto"),
367
389
  maxAcceptanceCriteria: z.number().int().min(1).default(6),
@@ -395,6 +417,7 @@ export const NaxConfigSchema = z
395
417
  precheck: PrecheckConfigSchema.optional(),
396
418
  prompts: PromptsConfigSchema.optional(),
397
419
  decompose: DecomposeConfigSchema.optional(),
420
+ testing: TestingConfigSchema.optional(),
398
421
  })
399
422
  .refine((data) => data.version === 1, {
400
423
  message: "Invalid version: expected 1",
@@ -40,31 +40,32 @@ export function resolveTestStrategy(raw: string | undefined): TestStrategy {
40
40
 
41
41
  export const COMPLEXITY_GUIDE = `## Complexity Classification Guide
42
42
 
43
- - simple: ≤50 LOC, single-file change, purely additive, no new dependencies → test-after
44
- - medium: 50–200 LOC, 2–5 files, standard patterns, clear requirements → tdd-simple
43
+ - simple: ≤50 LOC, single-file change, purely additive, no new dependencies → tdd-simple
44
+ - medium: 50–200 LOC, 2–5 files, standard patterns, clear requirements → three-session-tdd-lite
45
45
  - complex: 200–500 LOC, multiple modules, new abstractions or integrations → three-session-tdd
46
- - expert: 500+ LOC, architectural changes, cross-cutting concerns, high risk → three-session-tdd-lite
46
+ - expert: 500+ LOC, architectural changes, cross-cutting concerns, high risk → three-session-tdd
47
47
 
48
48
  ### Security Override
49
49
 
50
50
  Security-critical functions (authentication, cryptography, tokens, sessions, credentials,
51
- password hashing, access control) must be classified at MINIMUM "medium" complexity
52
- regardless of LOC count. These require at minimum "tdd-simple" test strategy.`;
51
+ password hashing, access control) must use three-session-tdd regardless of complexity.`;
53
52
 
54
53
  export const TEST_STRATEGY_GUIDE = `## Test Strategy Guide
55
54
 
56
- - test-after: Simple changes with well-understood behavior. Write tests after implementation in a single session.
57
- - tdd-simple: Medium complexity. Write failing tests first, then implement to pass them all in one session.
58
- - three-session-tdd: Complex stories. 3 sessions: (1) test-writer writes failing tests — no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
59
- - three-session-tdd-lite: Expert/high-risk stories. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may add missing coverage or replace stubs, (3) verifier confirms correctness.`;
55
+ - tdd-simple: Simple stories (≤50 LOC). Write failing tests first, then implement to pass them — all in one session.
56
+ - three-session-tdd-lite: Medium stories, or complex stories involving UI/CLI/integration. 3 sessions: (1) test-writer writes failing tests and may create minimal src/ stubs for imports, (2) implementer makes tests pass and may replace stubs, (3) verifier confirms correctness.
57
+ - three-session-tdd: Complex/expert stories or security-critical code. 3 sessions with strict isolation: (1) test-writer writes failing tests — no src/ changes allowed, (2) implementer makes them pass without modifying test files, (3) verifier confirms correctness.
58
+ - test-after: Only when explicitly configured (tddStrategy: "off"). Write tests after implementation. Not auto-assigned.`;
60
59
 
61
- export const GROUPING_RULES = `## Grouping Rules
60
+ export const GROUPING_RULES = `## Story Rules
62
61
 
62
+ - Every story must produce code changes verifiable by tests or review.
63
+ - NEVER create stories for analysis, planning, documentation, or migration plans.
64
+ Your analysis belongs in the "analysis" field, not in a story.
65
+ - NEVER create stories whose primary purpose is writing tests, achieving coverage
66
+ targets, or running validation/regression suites. Each story's testStrategy
67
+ handles test creation as part of implementation. Testing is a built-in pipeline
68
+ stage, not a user story. No exceptions.
63
69
  - Combine small, related tasks into a single "simple" or "medium" story.
64
- - Do NOT create separate stories for every single file or function unless complex.
65
- - Do NOT create standalone stories purely for test coverage or testing.
66
- Each story's testStrategy already handles testing (tdd-simple writes tests first,
67
- three-session-tdd uses separate test-writer session, test-after writes tests after).
68
- Only create a dedicated test story for unique integration/E2E test logic that spans
69
- multiple stories and cannot be covered by individual story test strategies.
70
+ Do NOT create separate stories for every single file or function unless complex.
70
71
  - Aim for coherent units of value. Maximum recommended stories: 10-15 per feature.`;
@@ -51,6 +51,7 @@ export type {
51
51
  StorySizeGateConfig,
52
52
  TddConfig,
53
53
  TestCoverageConfig,
54
+ TestingConfig,
54
55
  AdaptiveRoutingConfig,
55
56
  AgentConfig,
56
57
  } from "./runtime-types";
@@ -21,6 +21,7 @@ import {
21
21
  createStoryContext,
22
22
  createTestCoverageContext,
23
23
  } from "./elements";
24
+ import { getParentOutputFiles } from "./parent-context";
24
25
  import { generateTestCoverageSummary } from "./test-scanner";
25
26
  import type { BuiltContext, ContextBudget, ContextElement, StoryContext } from "./types";
26
27
 
@@ -115,6 +116,18 @@ export async function buildContext(storyContext: StoryContext, budget: ContextBu
115
116
  // Add current story (high priority)
116
117
  elements.push(createStoryContext(currentStory, 80));
117
118
 
119
+ // ENH-006: Inject planning analysis from prd.analysis (priority 88 — above story, below errors)
120
+ if (prd.analysis) {
121
+ const analysisContent = `The following analysis was performed during the planning phase. Use it to understand the codebase context before implementing:\n\n${prd.analysis}`;
122
+ elements.push({
123
+ type: "planning-analysis",
124
+ label: "Planning Analysis",
125
+ content: analysisContent,
126
+ priority: 88,
127
+ tokens: estimateTokens(analysisContent),
128
+ });
129
+ }
130
+
118
131
  // Add dependency stories (medium priority)
119
132
  addDependencyElements(elements, currentStory, prd);
120
133
 
@@ -199,6 +212,18 @@ async function addFileElements(
199
212
 
200
213
  let contextFiles = getContextFiles(story);
201
214
 
215
+ // ENH-005: Inject parent output files for context chaining
216
+ const parentFiles = getParentOutputFiles(story, storyContext.prd?.userStories ?? []);
217
+ if (parentFiles.length > 0) {
218
+ const logger = getLogger();
219
+ logger.info("context", "Injecting parent output files for context chaining", {
220
+ storyId: story.id,
221
+ parentFiles,
222
+ });
223
+ // Merge with existing contextFiles (don't replace — parent files are supplementary)
224
+ contextFiles = [...new Set([...contextFiles, ...parentFiles])];
225
+ }
226
+
202
227
  // Auto-detect contextFiles if empty and enabled (BUG-006)
203
228
  if (
204
229
  contextFiles.length === 0 &&
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Parent output file resolution for context chaining (ENH-005).
3
+ *
4
+ * When a story has dependencies, its parent stories' outputFiles are injected
5
+ * as additional contextFiles so agents have targeted context from prior work.
6
+ */
7
+
8
+ import type { UserStory } from "../prd/types";
9
+
10
+ const MAX_PARENT_FILES = 10;
11
+
12
+ const NOISE_PATTERNS = [
13
+ /\.test\.(ts|js|tsx|jsx)$/,
14
+ /\.spec\.(ts|js|tsx|jsx)$/,
15
+ /package-lock\.json$/,
16
+ /bun\.lockb?$/,
17
+ /\.gitignore$/,
18
+ /^nax\//,
19
+ ];
20
+
21
+ /**
22
+ * Get output files from direct parent stories (dependencies[]).
23
+ * Only direct parents — no transitive resolution (keep simple, extend later).
24
+ * Returns deduped list, filtered of noise, capped at MAX_PARENT_FILES.
25
+ */
26
+ export function getParentOutputFiles(story: UserStory, allStories: UserStory[]): string[] {
27
+ if (!story.dependencies || story.dependencies.length === 0) return [];
28
+
29
+ const parentFiles: string[] = [];
30
+ for (const depId of story.dependencies) {
31
+ const parent = allStories.find((s) => s.id === depId);
32
+ if (parent?.outputFiles) {
33
+ parentFiles.push(...parent.outputFiles);
34
+ }
35
+ }
36
+
37
+ const unique = [...new Set(parentFiles)];
38
+ return unique.filter((f) => !NOISE_PATTERNS.some((p) => p.test(f))).slice(0, MAX_PARENT_FILES);
39
+ }
@@ -21,23 +21,29 @@ export function applyDecomposition(prd: PRD, result: DecomposeResult): void {
21
21
  const originalIndex = prd.userStories.findIndex((s) => s.id === parentStoryId);
22
22
  if (originalIndex === -1) return;
23
23
 
24
+ const parentStory = prd.userStories[originalIndex];
25
+
24
26
  // Mark original story as decomposed
25
- prd.userStories[originalIndex].status = "decomposed";
27
+ parentStory.status = "decomposed";
26
28
 
27
29
  // Convert substories to UserStory format with parentStoryId attached
28
- const newStories = subStories.map((sub): UserStory & { parentStoryId: string } => ({
29
- id: sub.id,
30
- title: sub.title,
31
- description: sub.description,
32
- acceptanceCriteria: sub.acceptanceCriteria,
33
- tags: sub.tags,
34
- dependencies: sub.dependencies,
35
- status: "pending",
36
- passes: false,
37
- escalations: [],
38
- attempts: 0,
39
- parentStoryId: sub.parentStoryId,
40
- }));
30
+ // ENH-008: Inherit workdir from parent so sub-stories run in the same package scope
31
+ const newStories = subStories.map(
32
+ (sub): UserStory => ({
33
+ id: sub.id,
34
+ title: sub.title,
35
+ description: sub.description,
36
+ acceptanceCriteria: sub.acceptanceCriteria,
37
+ tags: sub.tags,
38
+ dependencies: sub.dependencies,
39
+ status: "pending",
40
+ passes: false,
41
+ escalations: [],
42
+ attempts: 0,
43
+ parentStoryId: sub.parentStoryId,
44
+ ...(parentStory.workdir !== undefined && { workdir: parentStory.workdir }),
45
+ }),
46
+ );
41
47
 
42
48
  // Insert substories immediately after the original story
43
49
  prd.userStories.splice(originalIndex + 1, 0, ...newStories);
@@ -150,7 +150,7 @@ export async function preIterationTierCheck(
150
150
  });
151
151
 
152
152
  const failedPrd = { ...prd };
153
- markStoryFailed(failedPrd, story.id);
153
+ markStoryFailed(failedPrd, story.id, undefined, undefined);
154
154
  await savePRD(failedPrd, prdPath);
155
155
 
156
156
  if (featureDir) {
@@ -56,7 +56,7 @@ export async function handleNoTierAvailable(
56
56
 
57
57
  // Outcome is "fail"
58
58
  const failedPrd = { ...ctx.prd };
59
- markStoryFailed(failedPrd, ctx.story.id, failureCategory);
59
+ markStoryFailed(failedPrd, ctx.story.id, failureCategory, undefined);
60
60
  await savePRD(failedPrd, ctx.prdPath);
61
61
 
62
62
  logger?.error("execution", "Story failed - execution failed", {
@@ -119,7 +119,7 @@ export async function handleMaxAttemptsReached(
119
119
 
120
120
  // Outcome is "fail"
121
121
  const failedPrd = { ...ctx.prd };
122
- markStoryFailed(failedPrd, ctx.story.id, failureCategory);
122
+ markStoryFailed(failedPrd, ctx.story.id, failureCategory, undefined);
123
123
  await savePRD(failedPrd, ctx.prdPath);
124
124
 
125
125
  logger?.error("execution", "Story failed - max attempts reached", {
@@ -27,6 +27,8 @@ export interface IterationResult {
27
27
  prdDirty: boolean;
28
28
  finalAction?: string;
29
29
  reason?: string;
30
+ /** Set when finalAction === "decomposed" — number of sub-stories created */
31
+ subStoryCount?: number;
30
32
  }
31
33
 
32
34
  export async function runIteration(
@@ -146,6 +148,7 @@ export async function runIteration(
146
148
  prdDirty: r.prdDirty,
147
149
  finalAction: pipelineResult.finalAction,
148
150
  reason: pipelineResult.reason,
151
+ subStoryCount: pipelineResult.subStoryCount,
149
152
  };
150
153
  }
151
154
 
@@ -15,6 +15,7 @@ import { getSafeLogger } from "../../logger";
15
15
  import type { StoryMetrics } from "../../metrics";
16
16
  import { saveRunMetrics } from "../../metrics";
17
17
  import { pipelineEventBus } from "../../pipeline/event-bus";
18
+ import type { AgentGetFn } from "../../pipeline/types";
18
19
  import { countStories, isComplete, isStalled } from "../../prd";
19
20
  import type { PRD } from "../../prd";
20
21
  import type { StatusWriter } from "../status-writer";
@@ -45,6 +46,8 @@ export interface RunCompletionOptions {
45
46
  hooksConfig?: HooksConfig;
46
47
  /** Whether the run used sequential (non-parallel) execution. Defaults to true. */
47
48
  isSequential?: boolean;
49
+ /** Protocol-aware agent resolver (ACP wiring). Falls back to static getAgent when absent. */
50
+ agentGetFn?: AgentGetFn;
48
51
  }
49
52
 
50
53
  export interface RunCompletionResult {
@@ -120,6 +123,7 @@ export async function handleRunCompletion(options: RunCompletionOptions): Promis
120
123
  config,
121
124
  prd,
122
125
  workdir,
126
+ agentGetFn: options.agentGetFn,
123
127
  });
124
128
 
125
129
  logger?.info("regression", "Deferred regression gate completed", {