@nathapp/nax 0.50.1 → 0.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -522,12 +522,16 @@ The hermetic requirement is injected into all code-writing prompts (test-writer,
522
522
 
523
523
  ### Configuration
524
524
 
525
+ Configured under `quality.testing` — supports **per-package override** in monorepos.
526
+
525
527
  ```json
526
528
  {
527
- "testing": {
528
- "hermetic": true,
529
- "externalBoundaries": ["claude", "acpx", "redis", "grpc"],
530
- "mockGuidance": "Use injectable deps for CLI spawning, ioredis-mock for Redis"
529
+ "quality": {
530
+ "testing": {
531
+ "hermetic": true,
532
+ "externalBoundaries": ["claude", "acpx", "redis", "grpc"],
533
+ "mockGuidance": "Use injectable deps for CLI spawning, ioredis-mock for Redis"
534
+ }
531
535
  }
532
536
  }
533
537
  ```
@@ -540,7 +544,9 @@ The hermetic requirement is injected into all code-writing prompts (test-writer,
540
544
 
541
545
  > **Tip:** `externalBoundaries` and `mockGuidance` complement `context.md`. nax provides the rule ("mock all I/O"), while `context.md` provides project-specific knowledge ("use `ioredis-mock` for Redis"). Use both for best results.
542
546
 
543
- > **Opt-out:** Set `testing.hermetic: false` if your project requires real integration calls (e.g. live database tests against a local dev container).
547
+ > **Monorepo:** Each package can override `quality.testing` in its own `packages/<name>/nax/config.json`. For example, `packages/api` can specify Redis boundaries while `packages/web` specifies HTTP-only.
548
+
549
+ > **Opt-out:** Set `quality.testing.hermetic: false` if your project requires real integration calls (e.g. live database tests against a local dev container).
544
550
 
545
551
  ---
546
552
 
package/dist/nax.js CHANGED
@@ -17678,7 +17678,7 @@ var init_zod = __esm(() => {
17678
17678
  });
17679
17679
 
17680
17680
  // src/config/schemas.ts
17681
- var TokenPricingSchema, ModelDefSchema, ModelEntrySchema, ModelMapSchema, ModelTierSchema, TierConfigSchema, AutoModeConfigSchema, RectificationConfigSchema, RegressionGateConfigSchema, SmartTestRunnerConfigSchema, SMART_TEST_RUNNER_DEFAULT, smartTestRunnerFieldSchema, ExecutionConfigSchema, QualityConfigSchema, TddConfigSchema, ConstitutionConfigSchema, AnalyzeConfigSchema, ReviewConfigSchema, PlanConfigSchema, AcceptanceConfigSchema, TestCoverageConfigSchema, ContextAutoDetectConfigSchema, ContextConfigSchema, AdaptiveRoutingConfigSchema, LlmRoutingConfigSchema, RoutingConfigSchema, OptimizerConfigSchema, PluginConfigEntrySchema, HooksConfigSchema, InteractionConfigSchema, StorySizeGateConfigSchema, AgentConfigSchema, PrecheckConfigSchema, PromptsConfigSchema, TestingConfigSchema, DecomposeConfigSchema, NaxConfigSchema;
17681
+ var TokenPricingSchema, ModelDefSchema, ModelEntrySchema, ModelMapSchema, ModelTierSchema, TierConfigSchema, AutoModeConfigSchema, RectificationConfigSchema, RegressionGateConfigSchema, SmartTestRunnerConfigSchema, SMART_TEST_RUNNER_DEFAULT, smartTestRunnerFieldSchema, ExecutionConfigSchema, QualityConfigSchema, TddConfigSchema, ConstitutionConfigSchema, AnalyzeConfigSchema, ReviewConfigSchema, PlanConfigSchema, AcceptanceConfigSchema, TestCoverageConfigSchema, ContextAutoDetectConfigSchema, ContextConfigSchema, AdaptiveRoutingConfigSchema, LlmRoutingConfigSchema, RoutingConfigSchema, OptimizerConfigSchema, PluginConfigEntrySchema, HooksConfigSchema, InteractionConfigSchema, StorySizeGateConfigSchema, AgentConfigSchema, PrecheckConfigSchema, PromptsConfigSchema, DecomposeConfigSchema, NaxConfigSchema;
17682
17682
  var init_schemas3 = __esm(() => {
17683
17683
  init_zod();
17684
17684
  TokenPricingSchema = exports_external.object({
@@ -17818,7 +17818,12 @@ var init_schemas3 = __esm(() => {
17818
17818
  "SENTRY_AUTH_TOKEN",
17819
17819
  "DATADOG_API_KEY"
17820
17820
  ]),
17821
- environmentalEscalationDivisor: exports_external.number().min(1).max(10).default(2)
17821
+ environmentalEscalationDivisor: exports_external.number().min(1).max(10).default(2),
17822
+ testing: exports_external.object({
17823
+ hermetic: exports_external.boolean().default(true),
17824
+ externalBoundaries: exports_external.array(exports_external.string()).optional(),
17825
+ mockGuidance: exports_external.string().optional()
17826
+ }).optional()
17822
17827
  });
17823
17828
  TddConfigSchema = exports_external.object({
17824
17829
  maxRetries: exports_external.number().int().nonnegative(),
@@ -17869,7 +17874,8 @@ var init_schemas3 = __esm(() => {
17869
17874
  refinement: exports_external.boolean().default(true),
17870
17875
  redGate: exports_external.boolean().default(true),
17871
17876
  testStrategy: exports_external.enum(["unit", "component", "cli", "e2e", "snapshot"]).optional(),
17872
- testFramework: exports_external.string().min(1, "acceptance.testFramework must be non-empty").optional()
17877
+ testFramework: exports_external.string().min(1, "acceptance.testFramework must be non-empty").optional(),
17878
+ timeoutMs: exports_external.number().int().min(30000).max(3600000).default(1800000)
17873
17879
  });
17874
17880
  TestCoverageConfigSchema = exports_external.object({
17875
17881
  enabled: exports_external.boolean().default(true),
@@ -17965,11 +17971,6 @@ var init_schemas3 = __esm(() => {
17965
17971
  message: "Role must be one of: test-writer, implementer, verifier, single-session, tdd-simple"
17966
17972
  }), exports_external.string().min(1, "Override path must be non-empty")).optional()
17967
17973
  });
17968
- TestingConfigSchema = exports_external.object({
17969
- hermetic: exports_external.boolean().default(true),
17970
- externalBoundaries: exports_external.array(exports_external.string()).optional(),
17971
- mockGuidance: exports_external.string().optional()
17972
- });
17973
17974
  DecomposeConfigSchema = exports_external.object({
17974
17975
  trigger: exports_external.enum(["auto", "confirm", "disabled"]).default("auto"),
17975
17976
  maxAcceptanceCriteria: exports_external.number().int().min(1).default(6),
@@ -18000,8 +18001,7 @@ var init_schemas3 = __esm(() => {
18000
18001
  agent: AgentConfigSchema.optional(),
18001
18002
  precheck: PrecheckConfigSchema.optional(),
18002
18003
  prompts: PromptsConfigSchema.optional(),
18003
- decompose: DecomposeConfigSchema.optional(),
18004
- testing: TestingConfigSchema.optional()
18004
+ decompose: DecomposeConfigSchema.optional()
18005
18005
  }).refine((data) => data.version === 1, {
18006
18006
  message: "Invalid version: expected 1",
18007
18007
  path: ["version"]
@@ -18118,7 +18118,10 @@ var init_defaults = __esm(() => {
18118
18118
  "SENTRY_AUTH_TOKEN",
18119
18119
  "DATADOG_API_KEY"
18120
18120
  ],
18121
- environmentalEscalationDivisor: 2
18121
+ environmentalEscalationDivisor: 2,
18122
+ testing: {
18123
+ hermetic: true
18124
+ }
18122
18125
  },
18123
18126
  tdd: {
18124
18127
  maxRetries: 2,
@@ -18161,7 +18164,8 @@ var init_defaults = __esm(() => {
18161
18164
  testPath: "acceptance.test.ts",
18162
18165
  model: "fast",
18163
18166
  refinement: true,
18164
- redGate: true
18167
+ redGate: true,
18168
+ timeoutMs: 1800000
18165
18169
  },
18166
18170
  context: {
18167
18171
  fileInjection: "disabled",
@@ -18206,9 +18210,6 @@ var init_defaults = __esm(() => {
18206
18210
  maxSubstoryComplexity: "medium",
18207
18211
  maxRetries: 2,
18208
18212
  model: "balanced"
18209
- },
18210
- testing: {
18211
- hermetic: true
18212
18213
  }
18213
18214
  };
18214
18215
  });
@@ -18726,32 +18727,48 @@ async function generateFromPRD(_stories, refinedCriteria, options) {
18726
18727
  }
18727
18728
  const criteriaList = refinedCriteria.map((c, i) => `AC-${i + 1}: ${c.refined}`).join(`
18728
18729
  `);
18729
- const strategyInstructions = buildStrategyInstructions(options.testStrategy, options.testFramework);
18730
- const prompt = `You are a test engineer. Generate acceptance tests for the "${options.featureName}" feature based on the refined acceptance criteria below.
18730
+ const frameworkOverrideLine = options.testFramework ? `
18731
+ [FRAMEWORK OVERRIDE: Use ${options.testFramework} as the test framework regardless of what you detect.]` : "";
18732
+ const basePrompt = `You are a senior test engineer. Your task is to generate a complete acceptance test file for the "${options.featureName}" feature.
18731
18733
 
18732
- CODEBASE CONTEXT:
18733
- ${options.codebaseContext}
18734
+ ## Step 1: Understand and Classify the Acceptance Criteria
18735
+
18736
+ Read each AC below and classify its verification type:
18737
+ - **file-check**: Verify by reading source files (e.g. "no @nestjs/jwt imports", "file exists", "module registered", "uses registerAs pattern")
18738
+ - **runtime-check**: Load and invoke code directly, assert on return values or behavior
18739
+ - **integration-check**: Requires a running service (e.g. HTTP endpoint returns 200, 11th request returns 429, database query succeeds)
18734
18740
 
18735
- ACCEPTANCE CRITERIA (refined):
18741
+ ACCEPTANCE CRITERIA:
18736
18742
  ${criteriaList}
18737
18743
 
18738
- ${strategyInstructions}Generate a complete acceptance.test.ts file using bun:test framework. Each AC maps to exactly one test named "AC-N: <description>".
18744
+ ## Step 2: Explore the Project
18739
18745
 
18740
- Structure example (do NOT wrap in markdown fences \u2014 output raw TypeScript only):
18746
+ Before writing any tests, examine the project to understand:
18747
+ 1. **Language and test framework** \u2014 check dependency manifests (package.json, go.mod, Gemfile, pyproject.toml, Cargo.toml, build.gradle, etc.) to identify the language and test runner
18748
+ 2. **Existing test patterns** \u2014 read 1-2 existing test files to understand import style, describe/test/it conventions, and available helpers
18749
+ 3. **Project structure** \u2014 identify relevant source directories to determine correct import or load paths
18741
18750
 
18742
- import { describe, test, expect } from "bun:test";
18751
+ ${frameworkOverrideLine}
18743
18752
 
18744
- describe("${options.featureName} - Acceptance Tests", () => {
18745
- test("AC-1: <description>", async () => {
18746
- // Test implementation
18747
- });
18748
- });
18753
+ ## Step 3: Generate the Acceptance Test File
18754
+
18755
+ Write the complete acceptance test file using the framework identified in Step 2.
18749
18756
 
18750
- IMPORTANT: Output raw TypeScript code only. Do NOT use markdown code fences (\`\`\`typescript or \`\`\`). Start directly with the import statement.`;
18757
+ Rules:
18758
+ - **One test per AC**, named exactly "AC-N: <description>"
18759
+ - **file-check ACs** \u2192 read source files using the language's standard file I/O, assert with string or regex checks. Do not start the application.
18760
+ - **runtime-check ACs** \u2192 load or import the module directly and invoke it, assert on the return value or observable side effects
18761
+ - **integration-check ACs** \u2192 use the language's HTTP client or existing test helpers; add a clear setup block (beforeAll/setup/TestMain/etc.) explaining what must be running
18762
+ - **NEVER use placeholder assertions** \u2014 no always-passing or always-failing stubs, no TODO comments as the only content, no empty test bodies
18763
+ - Every test MUST have real assertions that PASS when the feature is correctly implemented and FAIL when it is broken
18764
+ - Output raw code only \u2014 no markdown fences, start directly with the language's import or package declaration`;
18765
+ const prompt = basePrompt;
18751
18766
  logger.info("acceptance", "Generating tests from PRD refined criteria", { count: refinedCriteria.length });
18752
- const rawOutput = await _generatorPRDDeps.adapter.complete(prompt, {
18767
+ const rawOutput = await (options.adapter ?? _generatorPRDDeps.adapter).complete(prompt, {
18753
18768
  model: options.modelDef.model,
18754
- config: options.config
18769
+ config: options.config,
18770
+ timeoutMs: options.config?.acceptance?.timeoutMs ?? 1800000,
18771
+ workdir: options.workdir
18755
18772
  });
18756
18773
  const testCode = extractTestCode(rawOutput);
18757
18774
  if (!testCode) {
@@ -18775,40 +18792,6 @@ IMPORTANT: Output raw TypeScript code only. Do NOT use markdown code fences (\`\
18775
18792
  await _generatorPRDDeps.writeFile(join2(options.featureDir, "acceptance-refined.json"), refinedJsonContent);
18776
18793
  return { testCode, criteria };
18777
18794
  }
18778
- function buildStrategyInstructions(strategy, framework) {
18779
- switch (strategy) {
18780
- case "component": {
18781
- const fw = framework ?? "ink-testing-library";
18782
- if (fw === "react") {
18783
- return `TEST STRATEGY: component (react)
18784
- Import render and screen from @testing-library/react. Render the component and use screen.getByText to assert on output.
18785
-
18786
- `;
18787
- }
18788
- return `TEST STRATEGY: component (ink-testing-library)
18789
- Import render from ink-testing-library. Render the component and use lastFrame() to assert on output.
18790
-
18791
- `;
18792
- }
18793
- case "cli":
18794
- return `TEST STRATEGY: cli
18795
- Use Bun.spawn to run the binary. Read stdout and assert on the text output.
18796
-
18797
- `;
18798
- case "e2e":
18799
- return `TEST STRATEGY: e2e
18800
- Use fetch() against http://localhost to call the running service. Assert on response body using response.text() or response.json().
18801
-
18802
- `;
18803
- case "snapshot":
18804
- return `TEST STRATEGY: snapshot
18805
- Render the component and use toMatchSnapshot() to capture and compare snapshots.
18806
-
18807
- `;
18808
- default:
18809
- return "";
18810
- }
18811
- }
18812
18795
  function parseAcceptanceCriteria(specContent) {
18813
18796
  const criteria = [];
18814
18797
  const lines = specContent.split(`
@@ -18832,46 +18815,38 @@ function parseAcceptanceCriteria(specContent) {
18832
18815
  function buildAcceptanceTestPrompt(criteria, featureName, codebaseContext) {
18833
18816
  const criteriaList = criteria.map((ac) => `${ac.id}: ${ac.text}`).join(`
18834
18817
  `);
18835
- return `You are a test engineer. Generate acceptance tests for the "${featureName}" feature based on the acceptance criteria below.
18818
+ return `You are a senior test engineer. Your task is to generate a complete acceptance test file for the "${featureName}" feature.
18836
18819
 
18837
- CODEBASE CONTEXT:
18838
- ${codebaseContext}
18820
+ ## Step 1: Understand and Classify the Acceptance Criteria
18821
+
18822
+ Read each AC below and classify its verification type:
18823
+ - **file-check**: Verify by reading source files (e.g. "no @nestjs/jwt imports", "file exists", "module registered", "uses registerAs pattern")
18824
+ - **runtime-check**: Load and invoke code directly, assert on return values or behavior
18825
+ - **integration-check**: Requires a running service (e.g. HTTP endpoint returns 200, 11th request returns 429, database query succeeds)
18839
18826
 
18840
18827
  ACCEPTANCE CRITERIA:
18841
18828
  ${criteriaList}
18842
18829
 
18843
- Generate a complete acceptance.test.ts file using bun:test framework. Follow these rules:
18830
+ ## Step 2: Explore the Project
18844
18831
 
18845
- 1. **One test per AC**: Each acceptance criterion maps to exactly one test
18846
- 2. **Test observable behavior only**: No implementation details, only user-facing behavior
18847
- 3. **Independent tests**: No shared state between tests
18848
- 4. **Real-implementation**: Tests should use real implementations without mocking (test observable behavior, not internal units)
18849
- 5. **Clear test names**: Use format "AC-N: <description>" for test names
18850
- 6. **Async where needed**: Use async/await for operations that may be asynchronous
18832
+ Before writing any tests, examine the project to understand:
18833
+ 1. **Language and test framework** \u2014 check dependency manifests (package.json, go.mod, Gemfile, pyproject.toml, Cargo.toml, build.gradle, etc.) to identify the language and test runner
18834
+ 2. **Existing test patterns** \u2014 read 1-2 existing test files to understand import style, describe/test/it conventions, and available helpers
18835
+ 3. **Project structure** \u2014 identify relevant source directories to determine correct import or load paths
18851
18836
 
18852
- Use this structure:
18853
18837
 
18854
- \`\`\`typescript
18855
- import { describe, test, expect } from "bun:test";
18838
+ ## Step 3: Generate the Acceptance Test File
18856
18839
 
18857
- describe("${featureName} - Acceptance Tests", () => {
18858
- test("AC-1: <description>", async () => {
18859
- // Test implementation
18860
- });
18840
+ Write the complete acceptance test file using the framework identified in Step 2.
18861
18841
 
18862
- test("AC-2: <description>", async () => {
18863
- // Test implementation
18864
- });
18865
- });
18866
- \`\`\`
18867
-
18868
- **Important**:
18869
- - Import the feature code being tested
18870
- - Set up any necessary test fixtures
18871
- - Use expect() assertions to verify behavior
18872
- - Clean up resources if needed (close connections, delete temp files)
18873
-
18874
- Respond with ONLY the TypeScript test code (no markdown code fences, no explanation).`;
18842
+ Rules:
18843
+ - **One test per AC**, named exactly "AC-N: <description>"
18844
+ - **file-check ACs** \u2192 read source files using the language's standard file I/O, assert with string or regex checks. Do not start the application.
18845
+ - **runtime-check ACs** \u2192 load or import the module directly and invoke it, assert on the return value or observable side effects
18846
+ - **integration-check ACs** \u2192 use the language's HTTP client or existing test helpers; add a clear setup block (beforeAll/setup/TestMain/etc.) explaining what must be running
18847
+ - **NEVER use placeholder assertions** \u2014 no always-passing or always-failing stubs, no TODO comments as the only content, no empty test bodies
18848
+ - Every test MUST have real assertions that PASS when the feature is correctly implemented and FAIL when it is broken
18849
+ - Output raw code only \u2014 no markdown fences, start directly with the language's import or package declaration`;
18875
18850
  }
18876
18851
  async function generateAcceptanceTests(adapter, options) {
18877
18852
  const logger = getLogger();
@@ -18888,7 +18863,9 @@ async function generateAcceptanceTests(adapter, options) {
18888
18863
  try {
18889
18864
  const output = await adapter.complete(prompt, {
18890
18865
  model: options.modelDef.model,
18891
- config: options.config
18866
+ config: options.config,
18867
+ timeoutMs: options.config?.acceptance?.timeoutMs ?? 1800000,
18868
+ workdir: options.workdir
18892
18869
  });
18893
18870
  const testCode = extractTestCode(output);
18894
18871
  if (!testCode) {
@@ -20827,7 +20804,8 @@ function mergePackageConfig(root, packageOverride) {
20827
20804
  commands: {
20828
20805
  ...root.quality.commands,
20829
20806
  ...packageOverride.quality?.commands
20830
- }
20807
+ },
20808
+ testing: packageOverride.quality?.testing !== undefined ? { ...root.quality.testing, ...packageOverride.quality.testing } : root.quality.testing
20831
20809
  },
20832
20810
  context: {
20833
20811
  ...root.context,
@@ -22351,7 +22329,7 @@ var package_default;
22351
22329
  var init_package = __esm(() => {
22352
22330
  package_default = {
22353
22331
  name: "@nathapp/nax",
22354
- version: "0.50.1",
22332
+ version: "0.50.3",
22355
22333
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
22356
22334
  type: "module",
22357
22335
  bin: {
@@ -22425,8 +22403,8 @@ var init_version = __esm(() => {
22425
22403
  NAX_VERSION = package_default.version;
22426
22404
  NAX_COMMIT = (() => {
22427
22405
  try {
22428
- if (/^[0-9a-f]{6,10}$/.test("5ff4e09"))
22429
- return "5ff4e09";
22406
+ if (/^[0-9a-f]{6,10}$/.test("684b48b"))
22407
+ return "684b48b";
22430
22408
  } catch {}
22431
22409
  try {
22432
22410
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -24180,7 +24158,105 @@ ${stderr}`;
24180
24158
  };
24181
24159
  });
24182
24160
 
24161
+ // src/agents/shared/validation.ts
24162
+ function validateAgentForTier(agent, tier) {
24163
+ return agent.capabilities.supportedTiers.includes(tier);
24164
+ }
24165
+ function validateAgentFeature(agent, feature) {
24166
+ return agent.capabilities.features.has(feature);
24167
+ }
24168
+ function describeAgentCapabilities(agent) {
24169
+ const tiers = agent.capabilities.supportedTiers.join(",");
24170
+ const features = Array.from(agent.capabilities.features).join(",");
24171
+ const maxTokens = agent.capabilities.maxContextTokens;
24172
+ return `${agent.name}: tiers=[${tiers}], maxTokens=${maxTokens}, features=[${features}]`;
24173
+ }
24174
+
24175
+ // src/agents/shared/version-detection.ts
24176
+ async function getAgentVersion(binaryName) {
24177
+ try {
24178
+ const proc = _versionDetectionDeps.spawn([binaryName, "--version"], {
24179
+ stdout: "pipe",
24180
+ stderr: "pipe"
24181
+ });
24182
+ const exitCode = await proc.exited;
24183
+ if (exitCode !== 0) {
24184
+ return null;
24185
+ }
24186
+ const stdout = await new Response(proc.stdout).text();
24187
+ const versionLine = stdout.trim().split(`
24188
+ `)[0];
24189
+ const versionMatch = versionLine.match(/v?(\d+\.\d+(?:\.\d+)?(?:[-+][\w.]+)?)/);
24190
+ if (versionMatch) {
24191
+ return versionMatch[0];
24192
+ }
24193
+ return versionLine || null;
24194
+ } catch {
24195
+ return null;
24196
+ }
24197
+ }
24198
+ async function getAgentVersions() {
24199
+ const agents = await getInstalledAgents();
24200
+ const agentsByName = new Map(agents.map((a) => [a.name, a]));
24201
+ const { ALL_AGENTS: ALL_AGENTS2 } = await Promise.resolve().then(() => (init_registry(), exports_registry));
24202
+ const versions2 = await Promise.all(ALL_AGENTS2.map(async (agent) => {
24203
+ const version2 = agentsByName.has(agent.name) ? await getAgentVersion(agent.binary) : null;
24204
+ return {
24205
+ name: agent.name,
24206
+ displayName: agent.displayName,
24207
+ version: version2,
24208
+ installed: agentsByName.has(agent.name)
24209
+ };
24210
+ }));
24211
+ return versions2;
24212
+ }
24213
+ var _versionDetectionDeps;
24214
+ var init_version_detection = __esm(() => {
24215
+ init_registry();
24216
+ _versionDetectionDeps = {
24217
+ spawn(cmd, opts) {
24218
+ return Bun.spawn(cmd, opts);
24219
+ }
24220
+ };
24221
+ });
24222
+
24223
+ // src/agents/index.ts
24224
+ var exports_agents = {};
24225
+ __export(exports_agents, {
24226
+ validateAgentForTier: () => validateAgentForTier,
24227
+ validateAgentFeature: () => validateAgentFeature,
24228
+ parseTokenUsage: () => parseTokenUsage,
24229
+ getInstalledAgents: () => getInstalledAgents,
24230
+ getAllAgentNames: () => getAllAgentNames,
24231
+ getAgentVersions: () => getAgentVersions,
24232
+ getAgentVersion: () => getAgentVersion,
24233
+ getAgent: () => getAgent,
24234
+ formatCostWithConfidence: () => formatCostWithConfidence,
24235
+ estimateCostFromTokenUsage: () => estimateCostFromTokenUsage,
24236
+ estimateCostFromOutput: () => estimateCostFromOutput,
24237
+ estimateCostByDuration: () => estimateCostByDuration,
24238
+ estimateCost: () => estimateCost,
24239
+ describeAgentCapabilities: () => describeAgentCapabilities,
24240
+ checkAgentHealth: () => checkAgentHealth,
24241
+ MODEL_PRICING: () => MODEL_PRICING,
24242
+ CompleteError: () => CompleteError,
24243
+ ClaudeCodeAdapter: () => ClaudeCodeAdapter,
24244
+ COST_RATES: () => COST_RATES
24245
+ });
24246
+ var init_agents = __esm(() => {
24247
+ init_types2();
24248
+ init_claude();
24249
+ init_registry();
24250
+ init_cost();
24251
+ init_version_detection();
24252
+ });
24253
+
24183
24254
  // src/pipeline/stages/acceptance-setup.ts
24255
+ var exports_acceptance_setup = {};
24256
+ __export(exports_acceptance_setup, {
24257
+ acceptanceSetupStage: () => acceptanceSetupStage,
24258
+ _acceptanceSetupDeps: () => _acceptanceSetupDeps
24259
+ });
24184
24260
  import path5 from "path";
24185
24261
  var _acceptanceSetupDeps, acceptanceSetupStage;
24186
24262
  var init_acceptance_setup = __esm(() => {
@@ -24232,6 +24308,8 @@ ${stderr}` };
24232
24308
  if (!fileExists) {
24233
24309
  const allCriteria = ctx.prd.userStories.flatMap((s) => s.acceptanceCriteria);
24234
24310
  totalCriteria = allCriteria.length;
24311
+ const { getAgent: getAgent2 } = await Promise.resolve().then(() => (init_agents(), exports_agents));
24312
+ const agent = (ctx.agentGetFn ?? getAgent2)(ctx.config.autoMode.defaultAgent);
24235
24313
  let refinedCriteria;
24236
24314
  if (ctx.config.acceptance.refinement) {
24237
24315
  refinedCriteria = await _acceptanceSetupDeps.refine(allCriteria, {
@@ -24259,7 +24337,8 @@ ${stderr}` };
24259
24337
  modelDef: resolveModel(ctx.config.models[ctx.config.acceptance.model ?? "fast"]),
24260
24338
  config: ctx.config,
24261
24339
  testStrategy: ctx.config.acceptance.testStrategy,
24262
- testFramework: ctx.config.acceptance.testFramework
24340
+ testFramework: ctx.config.acceptance.testFramework,
24341
+ adapter: agent ?? undefined
24263
24342
  });
24264
24343
  await _acceptanceSetupDeps.writeFile(testPath, result.testCode);
24265
24344
  }
@@ -24281,99 +24360,6 @@ ${stderr}` };
24281
24360
  };
24282
24361
  });
24283
24362
 
24284
- // src/agents/shared/validation.ts
24285
- function validateAgentForTier(agent, tier) {
24286
- return agent.capabilities.supportedTiers.includes(tier);
24287
- }
24288
- function validateAgentFeature(agent, feature) {
24289
- return agent.capabilities.features.has(feature);
24290
- }
24291
- function describeAgentCapabilities(agent) {
24292
- const tiers = agent.capabilities.supportedTiers.join(",");
24293
- const features = Array.from(agent.capabilities.features).join(",");
24294
- const maxTokens = agent.capabilities.maxContextTokens;
24295
- return `${agent.name}: tiers=[${tiers}], maxTokens=${maxTokens}, features=[${features}]`;
24296
- }
24297
-
24298
- // src/agents/shared/version-detection.ts
24299
- async function getAgentVersion(binaryName) {
24300
- try {
24301
- const proc = _versionDetectionDeps.spawn([binaryName, "--version"], {
24302
- stdout: "pipe",
24303
- stderr: "pipe"
24304
- });
24305
- const exitCode = await proc.exited;
24306
- if (exitCode !== 0) {
24307
- return null;
24308
- }
24309
- const stdout = await new Response(proc.stdout).text();
24310
- const versionLine = stdout.trim().split(`
24311
- `)[0];
24312
- const versionMatch = versionLine.match(/v?(\d+\.\d+(?:\.\d+)?(?:[-+][\w.]+)?)/);
24313
- if (versionMatch) {
24314
- return versionMatch[0];
24315
- }
24316
- return versionLine || null;
24317
- } catch {
24318
- return null;
24319
- }
24320
- }
24321
- async function getAgentVersions() {
24322
- const agents = await getInstalledAgents();
24323
- const agentsByName = new Map(agents.map((a) => [a.name, a]));
24324
- const { ALL_AGENTS: ALL_AGENTS2 } = await Promise.resolve().then(() => (init_registry(), exports_registry));
24325
- const versions2 = await Promise.all(ALL_AGENTS2.map(async (agent) => {
24326
- const version2 = agentsByName.has(agent.name) ? await getAgentVersion(agent.binary) : null;
24327
- return {
24328
- name: agent.name,
24329
- displayName: agent.displayName,
24330
- version: version2,
24331
- installed: agentsByName.has(agent.name)
24332
- };
24333
- }));
24334
- return versions2;
24335
- }
24336
- var _versionDetectionDeps;
24337
- var init_version_detection = __esm(() => {
24338
- init_registry();
24339
- _versionDetectionDeps = {
24340
- spawn(cmd, opts) {
24341
- return Bun.spawn(cmd, opts);
24342
- }
24343
- };
24344
- });
24345
-
24346
- // src/agents/index.ts
24347
- var exports_agents = {};
24348
- __export(exports_agents, {
24349
- validateAgentForTier: () => validateAgentForTier,
24350
- validateAgentFeature: () => validateAgentFeature,
24351
- parseTokenUsage: () => parseTokenUsage,
24352
- getInstalledAgents: () => getInstalledAgents,
24353
- getAllAgentNames: () => getAllAgentNames,
24354
- getAgentVersions: () => getAgentVersions,
24355
- getAgentVersion: () => getAgentVersion,
24356
- getAgent: () => getAgent,
24357
- formatCostWithConfidence: () => formatCostWithConfidence,
24358
- estimateCostFromTokenUsage: () => estimateCostFromTokenUsage,
24359
- estimateCostFromOutput: () => estimateCostFromOutput,
24360
- estimateCostByDuration: () => estimateCostByDuration,
24361
- estimateCost: () => estimateCost,
24362
- describeAgentCapabilities: () => describeAgentCapabilities,
24363
- checkAgentHealth: () => checkAgentHealth,
24364
- MODEL_PRICING: () => MODEL_PRICING,
24365
- CompleteError: () => CompleteError,
24366
- ClaudeCodeAdapter: () => ClaudeCodeAdapter,
24367
- COST_RATES: () => COST_RATES
24368
- });
24369
- var init_agents = __esm(() => {
24370
- init_types2();
24371
- init_claude();
24372
- init_registry();
24373
- init_cost();
24374
- init_version_detection();
24375
- });
24376
-
24377
24363
  // src/pipeline/event-bus.ts
24378
24364
  class PipelineEventBus {
24379
24365
  subscribers = new Map;
@@ -27659,13 +27645,13 @@ async function runTddSession(role, agent, story, config2, workdir, modelTier, be
27659
27645
  } else {
27660
27646
  switch (role) {
27661
27647
  case "test-writer":
27662
- prompt = await PromptBuilder.for("test-writer", { isolation: lite ? "lite" : "strict" }).withLoader(workdir, config2).story(story).context(contextMarkdown).constitution(constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.testing).build();
27648
+ prompt = await PromptBuilder.for("test-writer", { isolation: lite ? "lite" : "strict" }).withLoader(workdir, config2).story(story).context(contextMarkdown).constitution(constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.quality?.testing).build();
27663
27649
  break;
27664
27650
  case "implementer":
27665
- prompt = await PromptBuilder.for("implementer", { variant: lite ? "lite" : "standard" }).withLoader(workdir, config2).story(story).context(contextMarkdown).constitution(constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.testing).build();
27651
+ prompt = await PromptBuilder.for("implementer", { variant: lite ? "lite" : "standard" }).withLoader(workdir, config2).story(story).context(contextMarkdown).constitution(constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.quality?.testing).build();
27666
27652
  break;
27667
27653
  case "verifier":
27668
- prompt = await PromptBuilder.for("verifier").withLoader(workdir, config2).story(story).context(contextMarkdown).constitution(constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.testing).build();
27654
+ prompt = await PromptBuilder.for("verifier").withLoader(workdir, config2).story(story).context(contextMarkdown).constitution(constitution).testCommand(config2.quality?.commands?.test).hermeticConfig(config2.quality?.testing).build();
27669
27655
  break;
27670
27656
  }
27671
27657
  }
@@ -28788,11 +28774,11 @@ var init_prompt = __esm(() => {
28788
28774
  const effectiveConfig = ctx.effectiveConfig ?? ctx.config;
28789
28775
  let prompt;
28790
28776
  if (isBatch) {
28791
- const builder = PromptBuilder.for("batch").withLoader(ctx.workdir, ctx.config).stories(ctx.stories).context(ctx.contextMarkdown).constitution(ctx.constitution?.content).testCommand(effectiveConfig.quality?.commands?.test).hermeticConfig(effectiveConfig.testing);
28777
+ const builder = PromptBuilder.for("batch").withLoader(ctx.workdir, ctx.config).stories(ctx.stories).context(ctx.contextMarkdown).constitution(ctx.constitution?.content).testCommand(effectiveConfig.quality?.commands?.test).hermeticConfig(effectiveConfig.quality?.testing);
28792
28778
  prompt = await builder.build();
28793
28779
  } else {
28794
28780
  const role = "tdd-simple";
28795
- const builder = PromptBuilder.for(role).withLoader(ctx.workdir, ctx.config).story(ctx.story).context(ctx.contextMarkdown).constitution(ctx.constitution?.content).testCommand(effectiveConfig.quality?.commands?.test).hermeticConfig(effectiveConfig.testing);
28781
+ const builder = PromptBuilder.for(role).withLoader(ctx.workdir, ctx.config).story(ctx.story).context(ctx.contextMarkdown).constitution(ctx.constitution?.content).testCommand(effectiveConfig.quality?.commands?.test).hermeticConfig(effectiveConfig.quality?.testing);
28796
28782
  prompt = await builder.build();
28797
28783
  }
28798
28784
  ctx.prompt = prompt;
@@ -32201,9 +32187,13 @@ var init_crash_recovery = __esm(() => {
32201
32187
  // src/execution/lifecycle/acceptance-loop.ts
32202
32188
  var exports_acceptance_loop = {};
32203
32189
  __export(exports_acceptance_loop, {
32204
- runAcceptanceLoop: () => runAcceptanceLoop
32190
+ runAcceptanceLoop: () => runAcceptanceLoop,
32191
+ isStubTestFile: () => isStubTestFile
32205
32192
  });
32206
32193
  import path14 from "path";
32194
+ function isStubTestFile(content) {
32195
+ return /expect\s*\(\s*true\s*\)\s*\.\s*toBe\s*\(\s*(?:false|true)\s*\)/.test(content);
32196
+ }
32207
32197
  async function loadSpecContent(featureDir) {
32208
32198
  if (!featureDir)
32209
32199
  return "";
@@ -32337,6 +32327,25 @@ async function runAcceptanceLoop(ctx) {
32337
32327
  }), ctx.workdir);
32338
32328
  return buildResult(false, prd, totalCost, iterations, storiesCompleted, prdDirty);
32339
32329
  }
32330
+ if (ctx.featureDir) {
32331
+ const testPath = path14.join(ctx.featureDir, "acceptance.test.ts");
32332
+ const testFile = Bun.file(testPath);
32333
+ if (await testFile.exists()) {
32334
+ const testContent = await testFile.text();
32335
+ if (isStubTestFile(testContent)) {
32336
+ logger?.warn("acceptance", "Stub tests detected \u2014 re-generating acceptance tests");
32337
+ const { unlink: unlink3 } = await import("fs/promises");
32338
+ await unlink3(testPath);
32339
+ const { acceptanceSetupStage: acceptanceSetupStage2 } = await Promise.resolve().then(() => (init_acceptance_setup(), exports_acceptance_setup));
32340
+ await acceptanceSetupStage2.execute(acceptanceContext);
32341
+ const newContent = await Bun.file(testPath).text();
32342
+ if (isStubTestFile(newContent)) {
32343
+ logger?.error("acceptance", "Acceptance test generation failed after retry \u2014 manual implementation required");
32344
+ return buildResult(false, prd, totalCost, iterations, storiesCompleted, prdDirty);
32345
+ }
32346
+ }
32347
+ }
32348
+ }
32340
32349
  logger?.info("acceptance", "Generating fix stories...");
32341
32350
  const fixStories = await generateAndAddFixStories(ctx, failures, prd);
32342
32351
  if (!fixStories) {
@@ -69630,6 +69639,7 @@ var FIELD_DESCRIPTIONS = {
69630
69639
  "acceptance.maxRetries": "Max retry loops for fix stories",
69631
69640
  "acceptance.generateTests": "Generate acceptance tests during analyze",
69632
69641
  "acceptance.testPath": "Path to acceptance test file (relative to feature dir)",
69642
+ "acceptance.timeoutMs": "Timeout for acceptance test generation in milliseconds (default: 1800000 = 30 min)",
69633
69643
  context: "Context injection configuration",
69634
69644
  "context.fileInjection": "Mode: 'disabled' (default, MCP-aware agents pull context on-demand) | 'keyword' (legacy git-grep injection for non-MCP agents). Set context.fileInjection in config.",
69635
69645
  "context.testCoverage": "Test coverage context settings",
@@ -69678,10 +69688,10 @@ var FIELD_DESCRIPTIONS = {
69678
69688
  agent: "Agent protocol configuration (ACP-003)",
69679
69689
  "agent.protocol": "Protocol for agent communication: 'acp' | 'cli' (default: 'acp')",
69680
69690
  "agent.maxInteractionTurns": "Max turns in multi-turn interaction loop when interactionBridge is active (default: 10)",
69681
- testing: "Hermetic test enforcement configuration (ENH-010)",
69682
- "testing.hermetic": "Inject hermetic test requirement into prompts \u2014 never call real external services in tests (default: true)",
69683
- "testing.externalBoundaries": "Project-specific CLI tools/clients to mock (e.g. ['claude', 'acpx', 'redis'])",
69684
- "testing.mockGuidance": "Project-specific mocking guidance injected verbatim into the prompt"
69691
+ "quality.testing": "Hermetic test enforcement \u2014 per-package overridable (ENH-010)",
69692
+ "quality.testing.hermetic": "Inject hermetic test requirement into prompts \u2014 never call real external services in tests (default: true)",
69693
+ "quality.testing.externalBoundaries": "Project-specific CLI tools/clients to mock (e.g. ['claude', 'acpx', 'redis'])",
69694
+ "quality.testing.mockGuidance": "Project-specific mocking guidance injected verbatim into the prompt"
69685
69695
  };
69686
69696
 
69687
69697
  // src/cli/config-diff.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.50.1",
3
+ "version": "0.50.3",
4
4
  "description": "AI Coding Agent Orchestrator — loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -82,35 +82,53 @@ export async function generateFromPRD(
82
82
 
83
83
  const criteriaList = refinedCriteria.map((c, i) => `AC-${i + 1}: ${c.refined}`).join("\n");
84
84
 
85
- const strategyInstructions = buildStrategyInstructions(options.testStrategy, options.testFramework);
85
+ const frameworkOverrideLine = options.testFramework
86
+ ? `\n[FRAMEWORK OVERRIDE: Use ${options.testFramework} as the test framework regardless of what you detect.]`
87
+ : "";
86
88
 
87
- const prompt = `You are a test engineer. Generate acceptance tests for the "${options.featureName}" feature based on the refined acceptance criteria below.
89
+ const basePrompt = `You are a senior test engineer. Your task is to generate a complete acceptance test file for the "${options.featureName}" feature.
88
90
 
89
- CODEBASE CONTEXT:
90
- ${options.codebaseContext}
91
+ ## Step 1: Understand and Classify the Acceptance Criteria
91
92
 
92
- ACCEPTANCE CRITERIA (refined):
93
+ Read each AC below and classify its verification type:
94
+ - **file-check**: Verify by reading source files (e.g. "no @nestjs/jwt imports", "file exists", "module registered", "uses registerAs pattern")
95
+ - **runtime-check**: Load and invoke code directly, assert on return values or behavior
96
+ - **integration-check**: Requires a running service (e.g. HTTP endpoint returns 200, 11th request returns 429, database query succeeds)
97
+
98
+ ACCEPTANCE CRITERIA:
93
99
  ${criteriaList}
94
100
 
95
- ${strategyInstructions}Generate a complete acceptance.test.ts file using bun:test framework. Each AC maps to exactly one test named "AC-N: <description>".
101
+ ## Step 2: Explore the Project
96
102
 
97
- Structure example (do NOT wrap in markdown fences — output raw TypeScript only):
103
+ Before writing any tests, examine the project to understand:
104
+ 1. **Language and test framework** — check dependency manifests (package.json, go.mod, Gemfile, pyproject.toml, Cargo.toml, build.gradle, etc.) to identify the language and test runner
105
+ 2. **Existing test patterns** — read 1-2 existing test files to understand import style, describe/test/it conventions, and available helpers
106
+ 3. **Project structure** — identify relevant source directories to determine correct import or load paths
98
107
 
99
- import { describe, test, expect } from "bun:test";
108
+ ${frameworkOverrideLine}
100
109
 
101
- describe("${options.featureName} - Acceptance Tests", () => {
102
- test("AC-1: <description>", async () => {
103
- // Test implementation
104
- });
105
- });
110
+ ## Step 3: Generate the Acceptance Test File
111
+
112
+ Write the complete acceptance test file using the framework identified in Step 2.
106
113
 
107
- IMPORTANT: Output raw TypeScript code only. Do NOT use markdown code fences (\`\`\`typescript or \`\`\`). Start directly with the import statement.`;
114
+ Rules:
115
+ - **One test per AC**, named exactly "AC-N: <description>"
116
+ - **file-check ACs** → read source files using the language's standard file I/O, assert with string or regex checks. Do not start the application.
117
+ - **runtime-check ACs** → load or import the module directly and invoke it, assert on the return value or observable side effects
118
+ - **integration-check ACs** → use the language's HTTP client or existing test helpers; add a clear setup block (beforeAll/setup/TestMain/etc.) explaining what must be running
119
+ - **NEVER use placeholder assertions** — no always-passing or always-failing stubs, no TODO comments as the only content, no empty test bodies
120
+ - Every test MUST have real assertions that PASS when the feature is correctly implemented and FAIL when it is broken
121
+ - Output raw code only — no markdown fences, start directly with the language's import or package declaration`;
122
+
123
+ const prompt = basePrompt;
108
124
 
109
125
  logger.info("acceptance", "Generating tests from PRD refined criteria", { count: refinedCriteria.length });
110
126
 
111
- const rawOutput = await _generatorPRDDeps.adapter.complete(prompt, {
127
+ const rawOutput = await (options.adapter ?? _generatorPRDDeps.adapter).complete(prompt, {
112
128
  model: options.modelDef.model,
113
129
  config: options.config,
130
+ timeoutMs: options.config?.acceptance?.timeoutMs ?? 1800000,
131
+ workdir: options.workdir,
114
132
  });
115
133
  const testCode = extractTestCode(rawOutput);
116
134
 
@@ -143,26 +161,6 @@ IMPORTANT: Output raw TypeScript code only. Do NOT use markdown code fences (\`\
143
161
  return { testCode, criteria };
144
162
  }
145
163
 
146
- function buildStrategyInstructions(strategy?: string, framework?: string): string {
147
- switch (strategy) {
148
- case "component": {
149
- const fw = framework ?? "ink-testing-library";
150
- if (fw === "react") {
151
- return "TEST STRATEGY: component (react)\nImport render and screen from @testing-library/react. Render the component and use screen.getByText to assert on output.\n\n";
152
- }
153
- return "TEST STRATEGY: component (ink-testing-library)\nImport render from ink-testing-library. Render the component and use lastFrame() to assert on output.\n\n";
154
- }
155
- case "cli":
156
- return "TEST STRATEGY: cli\nUse Bun.spawn to run the binary. Read stdout and assert on the text output.\n\n";
157
- case "e2e":
158
- return "TEST STRATEGY: e2e\nUse fetch() against http://localhost to call the running service. Assert on response body using response.text() or response.json().\n\n";
159
- case "snapshot":
160
- return "TEST STRATEGY: snapshot\nRender the component and use toMatchSnapshot() to capture and compare snapshots.\n\n";
161
- default:
162
- return "";
163
- }
164
- }
165
-
166
164
  export function parseAcceptanceCriteria(specContent: string): AcceptanceCriterion[] {
167
165
  const criteria: AcceptanceCriterion[] = [];
168
166
  const lines = specContent.split("\n");
@@ -218,46 +216,38 @@ export function buildAcceptanceTestPrompt(
218
216
  ): string {
219
217
  const criteriaList = criteria.map((ac) => `${ac.id}: ${ac.text}`).join("\n");
220
218
 
221
- return `You are a test engineer. Generate acceptance tests for the "${featureName}" feature based on the acceptance criteria below.
219
+ return `You are a senior test engineer. Your task is to generate a complete acceptance test file for the "${featureName}" feature.
220
+
221
+ ## Step 1: Understand and Classify the Acceptance Criteria
222
222
 
223
- CODEBASE CONTEXT:
224
- ${codebaseContext}
223
+ Read each AC below and classify its verification type:
224
+ - **file-check**: Verify by reading source files (e.g. "no @nestjs/jwt imports", "file exists", "module registered", "uses registerAs pattern")
225
+ - **runtime-check**: Load and invoke code directly, assert on return values or behavior
226
+ - **integration-check**: Requires a running service (e.g. HTTP endpoint returns 200, 11th request returns 429, database query succeeds)
225
227
 
226
228
  ACCEPTANCE CRITERIA:
227
229
  ${criteriaList}
228
230
 
229
- Generate a complete acceptance.test.ts file using bun:test framework. Follow these rules:
230
-
231
- 1. **One test per AC**: Each acceptance criterion maps to exactly one test
232
- 2. **Test observable behavior only**: No implementation details, only user-facing behavior
233
- 3. **Independent tests**: No shared state between tests
234
- 4. **Real-implementation**: Tests should use real implementations without mocking (test observable behavior, not internal units)
235
- 5. **Clear test names**: Use format "AC-N: <description>" for test names
236
- 6. **Async where needed**: Use async/await for operations that may be asynchronous
237
-
238
- Use this structure:
231
+ ## Step 2: Explore the Project
239
232
 
240
- \`\`\`typescript
241
- import { describe, test, expect } from "bun:test";
233
+ Before writing any tests, examine the project to understand:
234
+ 1. **Language and test framework** — check dependency manifests (package.json, go.mod, Gemfile, pyproject.toml, Cargo.toml, build.gradle, etc.) to identify the language and test runner
235
+ 2. **Existing test patterns** — read 1-2 existing test files to understand import style, describe/test/it conventions, and available helpers
236
+ 3. **Project structure** — identify relevant source directories to determine correct import or load paths
242
237
 
243
- describe("${featureName} - Acceptance Tests", () => {
244
- test("AC-1: <description>", async () => {
245
- // Test implementation
246
- });
247
238
 
248
- test("AC-2: <description>", async () => {
249
- // Test implementation
250
- });
251
- });
252
- \`\`\`
239
+ ## Step 3: Generate the Acceptance Test File
253
240
 
254
- **Important**:
255
- - Import the feature code being tested
256
- - Set up any necessary test fixtures
257
- - Use expect() assertions to verify behavior
258
- - Clean up resources if needed (close connections, delete temp files)
241
+ Write the complete acceptance test file using the framework identified in Step 2.
259
242
 
260
- Respond with ONLY the TypeScript test code (no markdown code fences, no explanation).`;
243
+ Rules:
244
+ - **One test per AC**, named exactly "AC-N: <description>"
245
+ - **file-check ACs** → read source files using the language's standard file I/O, assert with string or regex checks. Do not start the application.
246
+ - **runtime-check ACs** → load or import the module directly and invoke it, assert on the return value or observable side effects
247
+ - **integration-check ACs** → use the language's HTTP client or existing test helpers; add a clear setup block (beforeAll/setup/TestMain/etc.) explaining what must be running
248
+ - **NEVER use placeholder assertions** — no always-passing or always-failing stubs, no TODO comments as the only content, no empty test bodies
249
+ - Every test MUST have real assertions that PASS when the feature is correctly implemented and FAIL when it is broken
250
+ - Output raw code only — no markdown fences, start directly with the language's import or package declaration`;
261
251
  }
262
252
 
263
253
  /**
@@ -313,6 +303,8 @@ export async function generateAcceptanceTests(
313
303
  const output = await adapter.complete(prompt, {
314
304
  model: options.modelDef.model,
315
305
  config: options.config,
306
+ timeoutMs: options.config?.acceptance?.timeoutMs ?? 1800000,
307
+ workdir: options.workdir,
316
308
  });
317
309
 
318
310
  // Extract test code from output
@@ -4,6 +4,7 @@
4
4
  * Types for generating acceptance tests from spec.md acceptance criteria.
5
5
  */
6
6
 
7
+ import type { AgentAdapter } from "../agents/types";
7
8
  import type { AcceptanceTestStrategy, ModelDef, ModelTier, NaxConfig } from "../config/schema";
8
9
 
9
10
  /**
@@ -94,6 +95,8 @@ export interface GenerateFromPRDOptions {
94
95
  testStrategy?: AcceptanceTestStrategy;
95
96
  /** Test framework for component/snapshot strategies (e.g. 'ink-testing-library', 'react') */
96
97
  testFramework?: string;
98
+ /** Agent adapter to use for test generation — overrides _generatorPRDDeps.adapter */
99
+ adapter?: AgentAdapter;
97
100
  }
98
101
 
99
102
  export interface GenerateAcceptanceTestsOptions {
@@ -141,6 +141,7 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = {
141
141
  "acceptance.maxRetries": "Max retry loops for fix stories",
142
142
  "acceptance.generateTests": "Generate acceptance tests during analyze",
143
143
  "acceptance.testPath": "Path to acceptance test file (relative to feature dir)",
144
+ "acceptance.timeoutMs": "Timeout for acceptance test generation in milliseconds (default: 1800000 = 30 min)",
144
145
 
145
146
  // Context
146
147
  context: "Context injection configuration",
@@ -209,10 +210,10 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = {
209
210
  "agent.protocol": "Protocol for agent communication: 'acp' | 'cli' (default: 'acp')",
210
211
  "agent.maxInteractionTurns":
211
212
  "Max turns in multi-turn interaction loop when interactionBridge is active (default: 10)",
212
- // Testing
213
- testing: "Hermetic test enforcement configuration (ENH-010)",
214
- "testing.hermetic":
213
+ // quality.testing (ENH-010) — per-package overridable
214
+ "quality.testing": "Hermetic test enforcement per-package overridable (ENH-010)",
215
+ "quality.testing.hermetic":
215
216
  "Inject hermetic test requirement into prompts — never call real external services in tests (default: true)",
216
- "testing.externalBoundaries": "Project-specific CLI tools/clients to mock (e.g. ['claude', 'acpx', 'redis'])",
217
- "testing.mockGuidance": "Project-specific mocking guidance injected verbatim into the prompt",
217
+ "quality.testing.externalBoundaries": "Project-specific CLI tools/clients to mock (e.g. ['claude', 'acpx', 'redis'])",
218
+ "quality.testing.mockGuidance": "Project-specific mocking guidance injected verbatim into the prompt",
218
219
  };
@@ -121,6 +121,9 @@ export const DEFAULT_CONFIG: NaxConfig = {
121
121
  "DATADOG_API_KEY",
122
122
  ],
123
123
  environmentalEscalationDivisor: 2,
124
+ testing: {
125
+ hermetic: true,
126
+ },
124
127
  },
125
128
  tdd: {
126
129
  maxRetries: 2,
@@ -165,6 +168,7 @@ export const DEFAULT_CONFIG: NaxConfig = {
165
168
  model: "fast" as const,
166
169
  refinement: true,
167
170
  redGate: true,
171
+ timeoutMs: 1800000,
168
172
  },
169
173
  context: {
170
174
  fileInjection: "disabled",
@@ -211,7 +215,4 @@ export const DEFAULT_CONFIG: NaxConfig = {
211
215
  maxRetries: 2,
212
216
  model: "balanced",
213
217
  },
214
- testing: {
215
- hermetic: true,
216
- },
217
218
  };
@@ -15,7 +15,7 @@ import type { NaxConfig } from "./schema";
15
15
  * - execution: smartTestRunner, regressionGate (deep), verificationTimeoutSeconds
16
16
  * - review: enabled, checks, commands (deep), pluginMode
17
17
  * - acceptance: enabled, generateTests, testPath
18
- * - quality: requireTests, requireTypecheck, requireLint, commands (deep)
18
+ * - quality: requireTests, requireTypecheck, requireLint, commands (deep), testing (deep)
19
19
  * - context: testCoverage (deep)
20
20
  *
21
21
  * All other sections (models, autoMode, routing, agent, generate, tdd,
@@ -89,6 +89,11 @@ export function mergePackageConfig(root: NaxConfig, packageOverride: Partial<Nax
89
89
  ...root.quality.commands,
90
90
  ...packageOverride.quality?.commands,
91
91
  },
92
+ // ENH-010: deep-merge testing config so per-package overrides work
93
+ testing:
94
+ packageOverride.quality?.testing !== undefined
95
+ ? { ...root.quality.testing, ...packageOverride.quality.testing }
96
+ : root.quality.testing,
92
97
  },
93
98
  context: {
94
99
  ...root.context,
@@ -160,6 +160,8 @@ export interface QualityConfig {
160
160
  stripEnvVars: string[];
161
161
  /** Divisor for environmental failure early escalation (default: 2 = half the tier budget) */
162
162
  environmentalEscalationDivisor: number;
163
+ /** Hermetic test enforcement settings (ENH-010). Supports per-package override. */
164
+ testing?: TestingConfig;
163
165
  }
164
166
 
165
167
  /** TDD config */
@@ -260,6 +262,8 @@ export interface AcceptanceConfig {
260
262
  testStrategy?: AcceptanceTestStrategy;
261
263
  /** Test framework for acceptance tests (default: auto-detect) */
262
264
  testFramework?: string;
265
+ /** Timeout for acceptance test generation in milliseconds (default: 1800000 = 30 min) */
266
+ timeoutMs: number;
263
267
  }
264
268
 
265
269
  /** Optimizer config (v0.10) */
@@ -495,8 +499,6 @@ export interface NaxConfig {
495
499
  decompose?: DecomposeConfig;
496
500
  /** Agent protocol settings (ACP-003) */
497
501
  agent?: AgentConfig;
498
- /** Hermetic test enforcement settings (ENH-010) */
499
- testing?: TestingConfig;
500
502
  /** Generate settings */
501
503
  generate?: GenerateConfig;
502
504
  }
@@ -175,6 +175,29 @@ const QualityConfigSchema = z.object({
175
175
  "DATADOG_API_KEY",
176
176
  ]),
177
177
  environmentalEscalationDivisor: z.number().min(1).max(10).default(2),
178
+ testing: z
179
+ .object({
180
+ /**
181
+ * When true (default), nax injects a hermetic test requirement into all code-writing prompts.
182
+ * Instructs the AI to mock all I/O boundaries (HTTP, CLI spawning, databases, etc.)
183
+ * and never invoke real external processes or services during test execution.
184
+ * Set to false only if your project requires real integration calls in tests.
185
+ */
186
+ hermetic: z.boolean().default(true),
187
+ /**
188
+ * Project-specific external boundaries the AI should watch for and mock.
189
+ * E.g. ["claude", "acpx", "redis", "grpc"] — any CLI tools, clients, or services
190
+ * the project uses that should never be called from tests.
191
+ */
192
+ externalBoundaries: z.array(z.string()).optional(),
193
+ /**
194
+ * Project-specific guidance on how to mock external dependencies.
195
+ * Injected verbatim into the hermetic requirement section of the prompt.
196
+ * E.g. "Use injectable deps for CLI spawning, ioredis-mock for Redis"
197
+ */
198
+ mockGuidance: z.string().optional(),
199
+ })
200
+ .optional(),
178
201
  });
179
202
 
180
203
  const TddConfigSchema = z.object({
@@ -234,6 +257,7 @@ export const AcceptanceConfigSchema = z.object({
234
257
  redGate: z.boolean().default(true),
235
258
  testStrategy: z.enum(["unit", "component", "cli", "e2e", "snapshot"]).optional(),
236
259
  testFramework: z.string().min(1, "acceptance.testFramework must be non-empty").optional(),
260
+ timeoutMs: z.number().int().min(30000).max(3600000).default(1800000),
237
261
  });
238
262
 
239
263
  const TestCoverageConfigSchema = z.object({
@@ -362,28 +386,6 @@ export const PromptsConfigSchema = z.object({
362
386
  .optional(),
363
387
  });
364
388
 
365
- const TestingConfigSchema = z.object({
366
- /**
367
- * When true (default), nax injects a hermetic test requirement into all code-writing prompts.
368
- * Instructs the AI to mock all I/O boundaries (HTTP, CLI spawning, databases, etc.)
369
- * and never invoke real external processes or services during test execution.
370
- * Set to false only if your project requires real integration calls in tests.
371
- */
372
- hermetic: z.boolean().default(true),
373
- /**
374
- * Project-specific external boundaries the AI should watch for and mock.
375
- * E.g. ["claude", "acpx", "redis", "grpc"] — any CLI tools, clients, or services
376
- * the project uses that should never be called from tests.
377
- */
378
- externalBoundaries: z.array(z.string()).optional(),
379
- /**
380
- * Project-specific guidance on how to mock external dependencies.
381
- * Injected verbatim into the hermetic requirement section of the prompt.
382
- * E.g. "Use injectable deps for CLI spawning, ioredis-mock for Redis"
383
- */
384
- mockGuidance: z.string().optional(),
385
- });
386
-
387
389
  const DecomposeConfigSchema = z.object({
388
390
  trigger: z.enum(["auto", "confirm", "disabled"]).default("auto"),
389
391
  maxAcceptanceCriteria: z.number().int().min(1).default(6),
@@ -417,7 +419,6 @@ export const NaxConfigSchema = z
417
419
  precheck: PrecheckConfigSchema.optional(),
418
420
  prompts: PromptsConfigSchema.optional(),
419
421
  decompose: DecomposeConfigSchema.optional(),
420
- testing: TestingConfigSchema.optional(),
421
422
  })
422
423
  .refine((data) => data.version === 1, {
423
424
  message: "Invalid version: expected 1",
@@ -55,6 +55,11 @@ export interface AcceptanceLoopResult {
55
55
  prdDirty: boolean;
56
56
  }
57
57
 
58
+ export function isStubTestFile(content: string): boolean {
59
+ // Detect skeleton stubs: expect(true).toBe(false) or expect(true).toBe(true) in test bodies
60
+ return /expect\s*\(\s*true\s*\)\s*\.\s*toBe\s*\(\s*(?:false|true)\s*\)/.test(content);
61
+ }
62
+
58
63
  /** Load spec.md content for AC text */
59
64
  async function loadSpecContent(featureDir?: string): Promise<string> {
60
65
  if (!featureDir) return "";
@@ -243,6 +248,30 @@ export async function runAcceptanceLoop(ctx: AcceptanceLoopContext): Promise<Acc
243
248
  return buildResult(false, prd, totalCost, iterations, storiesCompleted, prdDirty);
244
249
  }
245
250
 
251
+ // Check for stub test file before generating fix stories
252
+ if (ctx.featureDir) {
253
+ const testPath = path.join(ctx.featureDir, "acceptance.test.ts");
254
+ const testFile = Bun.file(testPath);
255
+ if (await testFile.exists()) {
256
+ const testContent = await testFile.text();
257
+ if (isStubTestFile(testContent)) {
258
+ logger?.warn("acceptance", "Stub tests detected — re-generating acceptance tests");
259
+ const { unlink } = await import("node:fs/promises");
260
+ await unlink(testPath);
261
+ const { acceptanceSetupStage } = await import("../../pipeline/stages/acceptance-setup");
262
+ await acceptanceSetupStage.execute(acceptanceContext);
263
+ const newContent = await Bun.file(testPath).text();
264
+ if (isStubTestFile(newContent)) {
265
+ logger?.error(
266
+ "acceptance",
267
+ "Acceptance test generation failed after retry — manual implementation required",
268
+ );
269
+ return buildResult(false, prd, totalCost, iterations, storiesCompleted, prdDirty);
270
+ }
271
+ }
272
+ }
273
+ }
274
+
246
275
  // Generate and add fix stories
247
276
  logger?.info("acceptance", "Generating fix stories...");
248
277
  const fixStories = await generateAndAddFixStories(ctx, failures, prd);
@@ -82,6 +82,9 @@ export const acceptanceSetupStage: PipelineStage = {
82
82
  const allCriteria: string[] = ctx.prd.userStories.flatMap((s) => s.acceptanceCriteria);
83
83
  totalCriteria = allCriteria.length;
84
84
 
85
+ const { getAgent } = await import("../../agents");
86
+ const agent = (ctx.agentGetFn ?? getAgent)(ctx.config.autoMode.defaultAgent);
87
+
85
88
  let refinedCriteria: RefinedCriterion[];
86
89
 
87
90
  if (ctx.config.acceptance.refinement) {
@@ -113,6 +116,7 @@ export const acceptanceSetupStage: PipelineStage = {
113
116
  config: ctx.config,
114
117
  testStrategy: ctx.config.acceptance.testStrategy,
115
118
  testFramework: ctx.config.acceptance.testFramework,
119
+ adapter: agent ?? undefined,
116
120
  });
117
121
 
118
122
  await _acceptanceSetupDeps.writeFile(testPath, result.testCode);
@@ -45,7 +45,7 @@ export const promptStage: PipelineStage = {
45
45
  .context(ctx.contextMarkdown)
46
46
  .constitution(ctx.constitution?.content)
47
47
  .testCommand(effectiveConfig.quality?.commands?.test)
48
- .hermeticConfig(effectiveConfig.testing);
48
+ .hermeticConfig(effectiveConfig.quality?.testing);
49
49
  prompt = await builder.build();
50
50
  } else {
51
51
  // Both test-after and tdd-simple use the tdd-simple prompt (RED/GREEN/REFACTOR)
@@ -56,7 +56,7 @@ export const promptStage: PipelineStage = {
56
56
  .context(ctx.contextMarkdown)
57
57
  .constitution(ctx.constitution?.content)
58
58
  .testCommand(effectiveConfig.quality?.commands?.test)
59
- .hermeticConfig(effectiveConfig.testing);
59
+ .hermeticConfig(effectiveConfig.quality?.testing);
60
60
  prompt = await builder.build();
61
61
  }
62
62
 
@@ -133,7 +133,7 @@ export async function runTddSession(
133
133
  .context(contextMarkdown)
134
134
  .constitution(constitution)
135
135
  .testCommand(config.quality?.commands?.test)
136
- .hermeticConfig(config.testing)
136
+ .hermeticConfig(config.quality?.testing)
137
137
  .build();
138
138
  break;
139
139
  case "implementer":
@@ -143,7 +143,7 @@ export async function runTddSession(
143
143
  .context(contextMarkdown)
144
144
  .constitution(constitution)
145
145
  .testCommand(config.quality?.commands?.test)
146
- .hermeticConfig(config.testing)
146
+ .hermeticConfig(config.quality?.testing)
147
147
  .build();
148
148
  break;
149
149
  case "verifier":
@@ -153,7 +153,7 @@ export async function runTddSession(
153
153
  .context(contextMarkdown)
154
154
  .constitution(constitution)
155
155
  .testCommand(config.quality?.commands?.test)
156
- .hermeticConfig(config.testing)
156
+ .hermeticConfig(config.quality?.testing)
157
157
  .build();
158
158
  break;
159
159
  }