@nathapp/nax 0.67.0-canary.6 → 0.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/nax.js +625 -336
  2. package/package.json +1 -1
package/dist/nax.js CHANGED
@@ -17156,7 +17156,8 @@ var init_schemas_infra = __esm(() => {
17156
17156
  PromptsConfigSchema = exports_external.object({
17157
17157
  overrides: exports_external.record(exports_external.string().refine((key) => ["no-test", "test-writer", "implementer", "verifier", "single-session", "tdd-simple"].includes(key), {
17158
17158
  message: "Role must be one of: no-test, test-writer, implementer, verifier, single-session, tdd-simple"
17159
- }), exports_external.string().min(1, "Override path must be non-empty")).optional()
17159
+ }), exports_external.string().min(1, "Override path must be non-empty")).optional(),
17160
+ behavioralGuardrails: exports_external.enum(["off", "lite", "strict"]).default("lite")
17160
17161
  });
17161
17162
  ProjectProfileSchema = exports_external.object({
17162
17163
  language: exports_external.enum(["typescript", "javascript", "go", "rust", "python", "ruby", "java", "kotlin", "php"]).optional(),
@@ -17550,7 +17551,7 @@ var init_schemas3 = __esm(() => {
17550
17551
  maxReplanAttempts: 3
17551
17552
  }
17552
17553
  }),
17553
- prompts: PromptsConfigSchema.optional(),
17554
+ prompts: PromptsConfigSchema.default({ behavioralGuardrails: "lite" }),
17554
17555
  generate: GenerateConfigSchema.optional(),
17555
17556
  project: ProjectProfileSchema.optional(),
17556
17557
  debate: DebateConfigSchema.optional().default(() => ({
@@ -20717,6 +20718,12 @@ var init_adapter = __esm(() => {
20717
20718
  displayName: "Gemini CLI (ACP)",
20718
20719
  supportedTiers: ["fast", "balanced", "powerful"],
20719
20720
  maxContextTokens: 1e6
20721
+ },
20722
+ opencode: {
20723
+ binary: "opencode",
20724
+ displayName: "opencode (ACP)",
20725
+ supportedTiers: ["fast", "balanced", "powerful"],
20726
+ maxContextTokens: 128000
20720
20727
  }
20721
20728
  };
20722
20729
  DEFAULT_ENTRY = {
@@ -22594,6 +22601,15 @@ var init_conventions = __esm(() => {
22594
22601
  ]);
22595
22602
  DEFAULT_SEPARATED_TEST_DIRS = Object.freeze(["test/unit", "test/integration"]);
22596
22603
  });
22604
+
22605
+ // src/test-runners/classifier.ts
22606
+ function createTestFileClassifier(resolved) {
22607
+ const { regex } = resolved;
22608
+ if (regex.length === 0)
22609
+ return () => false;
22610
+ return (path) => regex.some((re) => re.test(path));
22611
+ }
22612
+
22597
22613
  // src/test-runners/detect/cache.ts
22598
22614
  function cachePath(workdir) {
22599
22615
  return `${workdir}/.nax/cache/test-patterns.json`;
@@ -24150,6 +24166,32 @@ var init_ac_parser = __esm(() => {
24150
24166
  });
24151
24167
 
24152
24168
  // src/test-runners/index.ts
24169
+ var exports_test_runners = {};
24170
+ __export(exports_test_runners, {
24171
+ resolveTestFilePatterns: () => resolveTestFilePatterns,
24172
+ resolveReviewExcludePatterns: () => resolveReviewExcludePatterns,
24173
+ parseTestOutput: () => parseTestOutput,
24174
+ parseTestFailures: () => parseTestFailures,
24175
+ parseBunTestOutput: () => parseBunTestOutput,
24176
+ isTestFileByPatterns: () => isTestFileByPatterns,
24177
+ isTestFile: () => isTestFile,
24178
+ globsToTestRegex: () => globsToTestRegex,
24179
+ globsToPathspec: () => globsToPathspec,
24180
+ formatFailureSummary: () => formatFailureSummary,
24181
+ findPackageDir: () => findPackageDir,
24182
+ extractTestDirs: () => extractTestDirs,
24183
+ detectTestFilePatterns: () => detectTestFilePatterns,
24184
+ detectManifestFrameworksFromPackageJson: () => detectManifestFrameworksFromPackageJson,
24185
+ detectFramework: () => detectFramework,
24186
+ createTestFileClassifier: () => createTestFileClassifier,
24187
+ buildTestFrameworkHint: () => buildTestFrameworkHint,
24188
+ analyzeTestExitCode: () => analyzeTestExitCode,
24189
+ _resolverDeps: () => _resolverDeps,
24190
+ DEFAULT_TS_DERIVE_SUFFIXES: () => DEFAULT_TS_DERIVE_SUFFIXES,
24191
+ DEFAULT_TEST_FILE_PATTERNS: () => DEFAULT_TEST_FILE_PATTERNS,
24192
+ DEFAULT_SEPARATED_TEST_DIRS: () => DEFAULT_SEPARATED_TEST_DIRS,
24193
+ DEFAULT_SCAN_TEST_DIRS: () => DEFAULT_SCAN_TEST_DIRS
24194
+ });
24153
24195
  var init_test_runners = __esm(() => {
24154
24196
  init_conventions();
24155
24197
  init_detect2();
@@ -29046,9 +29088,7 @@ function buildIsolationSection(roleOrMode, mode, testCommand) {
29046
29088
 
29047
29089
  ${buildTestFilterRule(testCmd)}`;
29048
29090
  if (role === "no-test") {
29049
- return `${header}
29050
-
29051
- isolation scope: Implement changes in src/ and other non-test directories. Do NOT create or modify any files in the test/ directory.${footer}`;
29091
+ return "";
29052
29092
  }
29053
29093
  if (role === "test-writer") {
29054
29094
  const m = mode ?? "strict";
@@ -29082,13 +29122,14 @@ isolation scope: You may modify both src/ and test/ files. Write failing tests F
29082
29122
  }
29083
29123
 
29084
29124
  // src/prompts/sections/role-task.ts
29085
- function buildRoleTaskSection(roleOrVariant, variant, testCommand, isolation, noTestJustification) {
29125
+ function buildRoleTaskSection(roleOrVariant, variant, testCommand, isolation, noTestJustification, storyId) {
29086
29126
  if ((roleOrVariant === "standard" || roleOrVariant === "lite") && variant === undefined) {
29087
- return buildRoleTaskSection("implementer", roleOrVariant, testCommand, isolation);
29127
+ return buildRoleTaskSection("implementer", roleOrVariant, testCommand, isolation, noTestJustification, storyId);
29088
29128
  }
29089
29129
  const role = roleOrVariant;
29090
29130
  const testCmd = testCommand ?? "";
29091
29131
  const frameworkHint = buildTestFrameworkHint(testCmd);
29132
+ const commitMsg = storyId ? `feat(${storyId}): <description>` : "feat: <description>";
29092
29133
  if (role === "no-test") {
29093
29134
  const justification = noTestJustification ?? "No behavioral changes \u2014 tests not required";
29094
29135
  return `# Role: Implementer (No Tests)
@@ -29099,7 +29140,7 @@ Instructions:
29099
29140
  - Implement the change as described in the story
29100
29141
  - Do NOT create or modify test files
29101
29142
  - Justification for no tests: ${justification}
29102
- - When done, stage and commit ALL changed files with: git commit -m 'feat: <description>'
29143
+ - When done, stage and commit ALL changed files with: git commit -m '${commitMsg}'
29103
29144
  - Goal: change implemented, no test files created or modified, all changes committed`;
29104
29145
  }
29105
29146
  if (role === "implementer") {
@@ -29107,62 +29148,78 @@ Instructions:
29107
29148
  if (v === "standard") {
29108
29149
  return `# Role: Implementer
29109
29150
 
29110
- Your task: make failing tests pass.
29151
+ Your task: make the failing tests pass by writing real source code.
29111
29152
 
29112
- Instructions:
29113
- - Implement source code in src/ to make tests pass
29114
- - Do NOT modify test files \u2014 three narrow lint/contract/sibling exceptions exist; see the escape valve section in the rectification prompt if you encounter one
29115
- - Run tests frequently to track progress
29116
- - When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
29117
- - Goal: all tests green, all changes committed`;
29153
+ Workflow:
29154
+ 1. Read every failing test in scope. The tests are the contract \u2014 understand what each one asserts before editing source.
29155
+ 2. Run the scoped test files once to establish the baseline (which fail, which pass, and why).
29156
+ 3. Implement source code in the package's source location (the project context names it).
29157
+ 4. After each meaningful change, re-run only the scoped test files \u2014 never the full suite.
29158
+ 5. When all scoped tests pass, stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
29159
+
29160
+ Rules:
29161
+ - Do NOT modify test files. Three narrow exceptions: (a) a lint-only fix to a test, (b) a contract drift where the test imports a removed/renamed symbol, (c) a sibling test file rename forced by your source change. Name which exception applies in the commit body before editing any test file.
29162
+ - Goal: every acceptance criterion covered by at least one passing test; all changes committed.`;
29118
29163
  }
29119
29164
  return `# Role: Implementer (Lite)
29120
29165
 
29121
- Your task: Make the failing tests pass AND add any missing test coverage.
29166
+ Your task: make the failing tests pass AND fill any test coverage gaps an earlier session left.
29122
29167
 
29123
- Context: A test-writer session has already created test files with failing tests and possibly minimal stubs in src/. Your job is to make those tests pass by implementing the real logic.
29168
+ Context: A test-writer session has already created tests and may have added minimal stubs in the package's source location. Your job is to (a) replace stubs with real implementations and (b) confirm every AC has test coverage before committing.
29124
29169
 
29125
- Instructions:
29126
- - Start by running the existing tests to see what's failing
29127
- - Implement source code in src/ to make all failing tests pass
29128
- - You MAY add additional tests if you find gaps in coverage
29129
- - Replace any stubs with real implementations
29170
+ Workflow:
29171
+ 1. Run the existing scoped tests to see which fail and why (assertion failure vs import error).
29172
+ 2. Read each failing test. Note which ACs they cover and which they DON'T.
29173
+ 3. Replace stubs with real implementations. A stub is one of: a type-only declaration, a function returning a placeholder/throwing "not implemented", or a const placeholder.
29174
+ 4. If any AC has no test, add one before implementing \u2014 do not implement uncovered behavior.
29175
+ 5. Re-run only the scoped test files after each meaningful change.
29176
+ 6. When all scoped tests pass, stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
29177
+
29178
+ Rules:
29179
+ - Three test-modification exceptions apply (lint-only fix, contract drift, sibling rename). Name the exception in the commit body before editing any test the test-writer wrote.
29130
29180
  - ${frameworkHint}
29131
- - When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
29132
- - Goal: all tests green, all criteria met, all changes committed`;
29181
+ - Goal: every AC has at least one passing test; all stubs replaced with real logic; all changes committed.`;
29133
29182
  }
29134
29183
  if (role === "test-writer") {
29135
29184
  if (isolation === "lite") {
29136
29185
  return `# Role: Test-Writer (Lite)
29137
29186
 
29138
- Your task: Write failing tests for the feature. You may create minimal stubs to support imports.
29187
+ Your task: write failing tests AND minimal stubs that let the tests compile.
29139
29188
 
29140
29189
  Context: You are session 1 of a multi-session workflow. An implementer will follow to make your tests pass.
29141
29190
 
29142
- Instructions:
29143
- - Create test files in test/ directory that cover all acceptance criteria
29144
- - Tests must fail initially (RED phase) \u2014 do NOT implement real logic
29191
+ Workflow:
29192
+ 1. Re-read the acceptance criteria above.
29193
+ 2. Create test files in the location the project uses for tests.
29194
+ 3. Create stubs in the package's source location so the tests can import and compile. A stub is one of: a type/interface declaration, a function returning a placeholder/throwing "not implemented" (no more than 3 lines of body), or a const placeholder. If a stub body needs real logic, you have crossed into implementer territory \u2014 stop.
29195
+ 4. For each AC: at least one success-path test and one boundary/failure-path test.
29196
+ 5. Run the new test files. Confirm tests compile (stubs work) AND fail with ASSERTION failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
29197
+
29198
+ Rules:
29199
+ - Stubs are NOT implementations. The implementer in the next session writes real logic.
29200
+ - Each test name describes ONE behavior. Use AC IDs in test names when available (e.g. \`it('AC4: throws Division by zero when b === 0')\`).
29201
+ - Assert on observable outputs.
29145
29202
  - ${frameworkHint}
29146
- - You MAY read src/ files and import types/interfaces from them
29147
- - You MAY create minimal stubs in src/ (type definitions, empty functions) so tests can import and compile
29148
- - Write clear test names that document expected behavior
29149
- - Focus on behavior, not implementation details
29150
- - Goal: comprehensive failing test suite with compilable imports, ready for implementation`;
29203
+ - Goal: comprehensive failing test suite that compiles, with stubs \u22643 lines each, ready for implementation.`;
29151
29204
  }
29152
29205
  return `# Role: Test-Writer
29153
29206
 
29154
- Your task: Write comprehensive failing tests for the feature.
29207
+ Your task: write failing tests that pin down every acceptance criterion. An implementer will follow.
29155
29208
 
29156
- Context: You are session 1 of a multi-session workflow. An implementer will follow to make your tests pass.
29209
+ Context: You are session 1 of a multi-session workflow.
29157
29210
 
29158
- Instructions:
29159
- - Create test files in test/ directory that cover all acceptance criteria
29160
- - Tests must fail initially (RED phase) \u2014 the feature is not yet implemented
29161
- - Do NOT create or modify any files in src/
29211
+ Workflow:
29212
+ 1. Re-read the acceptance criteria above.
29213
+ 2. Create test files in the location the project uses for tests (project context names it).
29214
+ 3. For each AC: write at least one test for the success path AND at least one for a boundary/failure path (zero, empty, negative, missing, throws). ACs worded as "throws X" require a test asserting the throw.
29215
+ 4. Run the new test files. Confirm every test fails with an ASSERTION failure \u2014 NOT an import error, compile error, or runtime crash before assertion. A test that errors before reaching its assertion does not prove the behavior is missing.
29216
+
29217
+ Rules:
29218
+ - Do NOT create or modify any source files. Read source for types/interfaces only.
29219
+ - Each test name describes ONE behavior; each test asserts ONE behavior. When the AC has a number or ID, prefix the test name (e.g. \`it('AC4: throws Division by zero when b === 0')\`).
29220
+ - Assert on observable outputs (return values, thrown errors, file contents, log output, boundary state). Do not assert on private helpers, internal call counts, or implementation-level mocks unless the AC requires it.
29162
29221
  - ${frameworkHint}
29163
- - Write clear test names that document expected behavior
29164
- - Focus on behavior, not implementation details
29165
- - Goal: comprehensive failing test suite ready for implementation`;
29222
+ - Goal: every AC has at least one failing test that fails at assertion time and clearly documents what the implementer must build.`;
29166
29223
  }
29167
29224
  if (role === "verifier") {
29168
29225
  return `# Role: Verifier
@@ -29183,45 +29240,59 @@ Instructions:
29183
29240
  if (role === "single-session") {
29184
29241
  return `# Role: Single-Session
29185
29242
 
29186
- Your task: Write tests AND implement the feature in a single focused session.
29243
+ Your task: write tests AND implement the feature in one session.
29187
29244
 
29188
- Instructions:
29189
- - Phase 1: Write comprehensive tests (test/ directory)
29190
- - Phase 2: Implement to make all tests pass (src/ directory)
29245
+ Workflow:
29246
+ 1. Read the acceptance criteria. For each AC, plan one success-path test and one boundary/failure test.
29247
+ 2. Create test files in the location the project uses for tests. Cover every AC.
29248
+ 3. Run the tests to confirm they fail with ASSERTION failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
29249
+ 4. Implement source code in the package's source location to make the tests pass.
29250
+ 5. After each meaningful change, re-run only the scoped test files \u2014 never the full suite.
29251
+ 6. When all scoped tests pass, stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
29252
+
29253
+ Rules:
29254
+ - Each test name describes ONE behavior; use AC IDs when available.
29255
+ - Assert on observable outputs.
29191
29256
  - ${frameworkHint}
29192
- - Run tests frequently throughout implementation
29193
- - When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
29194
- - Goal: all tests passing, all changes committed, full story complete`;
29257
+ - Goal: every AC has at least one passing test; all changes committed.`;
29195
29258
  }
29196
29259
  if (role === "batch") {
29197
- const verifyCmdLine = testCmd ? ` - Verify all tests pass: ${testCmd}` : " - Verify all tests pass using your project's test command";
29260
+ const verifyCmdLine = testCmd ? ` - Re-run only the scoped test files after each meaningful change: ${testCmd}` : " - Re-run only the scoped test files after each meaningful change";
29198
29261
  return `# Role: Batch Implementer
29199
29262
 
29200
- Your task: Implement each story in order using TDD \u2014 write tests first, then implement, then verify.
29263
+ Your task: implement each story in order using TDD \u2014 write tests first, then implement, then commit per story.
29201
29264
 
29202
- Instructions:
29203
- - Process each story in order (Story 1, Story 2, \u2026)
29204
- - For each story:
29205
- - Write failing tests FIRST covering the acceptance criteria
29206
- - Run tests to confirm they fail (RED phase)
29207
- - Implement the minimum code to make tests pass (GREEN phase)
29208
- ${verifyCmdLine}
29209
- - Commit the story with its story ID in the commit message: git commit -m 'feat(<story-id>): <description>'
29265
+ Per-story workflow (RED \u2192 GREEN):
29266
+ 1. RED \u2014 write failing tests in the location the project uses for tests covering the story's ACs (success + boundary).
29267
+ 2. RED \u2014 run the new test files. Confirm assertion failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
29268
+ 3. GREEN \u2014 implement source code in the package's source location.
29269
+ 4. GREEN \u2014 re-run only the scoped test files after each meaningful change.
29270
+ 5. Commit the story with its ID: \`git commit -m 'feat(<story-id>): <description>'\`.
29271
+
29272
+ Rules:
29273
+ - One commit per story \u2014 never bundle stories.
29274
+ - Process stories in order (Story 1, Story 2, \u2026).
29275
+ - Each test name describes ONE behavior; use AC IDs when available.
29210
29276
  - ${frameworkHint}
29211
- - Do NOT commit multiple stories together \u2014 each story gets its own commit
29212
- - Goal: all stories implemented, all tests passing, each story committed with its story ID`;
29277
+ ${verifyCmdLine}
29278
+ - Goal: every story implemented with passing tests; one commit per story tagged with the story ID.`;
29213
29279
  }
29214
29280
  return `# Role: TDD-Simple
29215
29281
 
29216
- Your task: Write failing tests FIRST, then implement to make them pass.
29282
+ Your task: write failing tests FIRST, then implement in one session.
29217
29283
 
29218
- Instructions:
29219
- - RED phase: Write failing tests FIRST for the acceptance criteria
29220
- - RED phase: Run the tests to confirm they fail
29221
- - GREEN phase: Implement the minimum code to make tests pass
29222
- - REFACTOR phase: Refactor while keeping tests green
29223
- - When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
29224
- - Goal: all tests passing, feature complete, all changes committed`;
29284
+ Workflow (RED \u2192 GREEN \u2192 REFACTOR):
29285
+ 1. RED \u2014 write failing tests in the location the project uses for tests covering every AC (success + boundary).
29286
+ 2. RED \u2014 run the tests. Confirm they fail with ASSERTION failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
29287
+ 3. GREEN \u2014 implement minimum source code in the package's source location to make the tests pass.
29288
+ 4. GREEN \u2014 re-run only the scoped test files after each meaningful change.
29289
+ 5. REFACTOR \u2014 clean up while keeping tests green. No new behavior; no expanded scope.
29290
+ 6. Stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
29291
+
29292
+ Rules:
29293
+ - Each test name describes ONE behavior; use AC IDs when available.
29294
+ - ${frameworkHint}
29295
+ - Goal: every AC covered by passing tests; refactor complete; all changes committed.`;
29225
29296
  }
29226
29297
  var init_role_task = __esm(() => {
29227
29298
  init_test_runners();
@@ -29433,11 +29504,12 @@ function buildSelfVerificationSection(role, input) {
29433
29504
  `- packageDir: \`${input.packageDir}\``,
29434
29505
  input.language ? `- language: \`${input.language}\`` : "- language: unknown",
29435
29506
  roleSpecificLine(role),
29436
- "- Scope: changed files from this turn (`CHANGED`) inside this package.",
29507
+ "- Scope: focus first on changed files from this turn (`CHANGED`) inside this package.",
29437
29508
  commandLine("lint", input.lintCommand),
29438
29509
  commandLine("typecheck", input.typecheckCommand),
29439
29510
  "- If a configured check fails on files in CHANGED: fix and rerun.",
29440
- "- If a configured check fails only on files outside CHANGED: do not edit those files; report them under PRE_EXISTING_FAILURES.",
29511
+ "- If a configured check fails outside CHANGED but the smallest package-local fix is required to satisfy this story's acceptance criteria, you MAY make that fix and rerun.",
29512
+ "- Otherwise, do not edit unrelated sibling files; report them under PRE_EXISTING_FAILURES.",
29441
29513
  "",
29442
29514
  "End your response with exactly this block:",
29443
29515
  "```text",
@@ -29452,6 +29524,97 @@ function buildSelfVerificationSection(role, input) {
29452
29524
  }
29453
29525
  var CHECK_HEADER = "# Self-Verification Gate";
29454
29526
 
29527
+ // src/prompts/sections/behavioral-guardrails.ts
29528
+ function buildBehavioralGuardrailsSection(role, level, _variant, _isolation) {
29529
+ if (level === "off" || role === "verifier" || role === "no-test") {
29530
+ return null;
29531
+ }
29532
+ if (role === "test-writer") {
29533
+ return buildTestWriterGuardrails(level);
29534
+ }
29535
+ if (role === "single-session" || role === "tdd-simple" || role === "batch") {
29536
+ return buildCombinedGuardrails(level);
29537
+ }
29538
+ return buildImplementerGuardrails(level);
29539
+ }
29540
+ function buildTestWriterGuardrails(level) {
29541
+ const lines = [
29542
+ "# Behavioral Guardrails",
29543
+ "",
29544
+ "- Simplicity: write tests that cover the acceptance criteria. No tests for behaviors the story does not require.",
29545
+ "- Surgical: do not modify source files beyond the stub allowance in the Isolation Rules above. Do not add tests for unrelated existing code."
29546
+ ];
29547
+ if (level === "strict") {
29548
+ lines.push("- State Assumptions: when the story is ambiguous, pick an interpretation, proceed, and document the choice in the commit body under `Assumptions:`. Do not invent requirements; do not silently choose when the story is genuinely under-specified \u2014 note it.");
29549
+ }
29550
+ return lines.join(`
29551
+ `);
29552
+ }
29553
+ function buildCombinedGuardrails(level) {
29554
+ if (level === "lite") {
29555
+ return `# Behavioral Guardrails
29556
+
29557
+ - Simplicity (tests): write tests that cover the acceptance criteria only. No tests for behaviors the story does not require.
29558
+ - Simplicity (source): write the minimum source code that makes the tests pass. No speculative abstractions, configurability, or error handling for scenarios that cannot occur.
29559
+ - Surgical: every changed line must trace to the story. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires.
29560
+ - Anti-cheat: do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run.
29561
+ - Orphans: remove imports/variables/helpers that YOUR changes made unused. Do not delete pre-existing dead code.
29562
+ - Commit: include the story ID when known \u2014 \`feat(<story-id>): <description>\`.`;
29563
+ }
29564
+ return `# Behavioral Guardrails
29565
+
29566
+ ## Simplicity (Tests)
29567
+ Write tests that cover the acceptance criteria only. No tests for behaviors the story does not require. Every test you add is a constraint the implementer must satisfy \u2014 do not over-constrain with speculative behavior.
29568
+
29569
+ ## Simplicity (Source)
29570
+ Write the minimum source code that makes the tests pass. Every line you add is a line someone else must read, understand, and maintain. Do not add speculative abstractions, configurability, or error handling for scenarios that cannot occur given the story's constraints.
29571
+
29572
+ ## Surgical
29573
+ Every changed line must trace directly to a story requirement or a failing test. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires. Reviewers will flag any change that cannot be linked to a specific requirement.
29574
+
29575
+ ## Anti-cheat
29576
+ Do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run. A green test suite achieved by weakening tests is not a passing implementation \u2014 it is a failing one with hidden evidence.
29577
+
29578
+ ## Orphans
29579
+ Remove imports, variables, and helpers that YOUR changes made unused. Do not delete pre-existing dead code that was already there before your changes.
29580
+
29581
+ ## Commit
29582
+ Include the story ID when known \u2014 \`feat(<story-id>): <description>\`.
29583
+
29584
+ ## State Assumptions
29585
+ When the story is ambiguous, pick an interpretation, proceed, and document the choice in the commit body under \`Assumptions:\`. Do not invent requirements; do not silently choose when the story is genuinely under-specified \u2014 note it.`;
29586
+ }
29587
+ function buildImplementerGuardrails(level) {
29588
+ if (level === "lite") {
29589
+ return `# Behavioral Guardrails
29590
+
29591
+ - Simplicity: write the minimum code that makes the tests pass. No speculative abstractions, configurability, or error handling for scenarios that cannot occur.
29592
+ - Surgical: every changed line must trace to the story. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires.
29593
+ - Anti-cheat: do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run.
29594
+ - Orphans: remove imports/variables/helpers that YOUR changes made unused. Do not delete pre-existing dead code.
29595
+ - Commit: include the story ID when known \u2014 \`feat(<story-id>): <description>\`.`;
29596
+ }
29597
+ return `# Behavioral Guardrails
29598
+
29599
+ ## Simplicity
29600
+ Write the minimum code that makes the tests pass. Every line you add is a line someone else must read, understand, and maintain. Do not add speculative abstractions, configurability, or error handling for scenarios that cannot occur given the story's constraints. If it isn't required by a test or acceptance criterion, don't write it.
29601
+
29602
+ ## Surgical
29603
+ Every changed line must trace directly to a story requirement or a failing test. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires. Reviewers will flag any change that cannot be linked to a specific requirement.
29604
+
29605
+ ## Anti-cheat
29606
+ Do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run. A green test suite achieved by weakening tests is not a passing implementation \u2014 it is a failing one with hidden evidence.
29607
+
29608
+ ## Orphans
29609
+ Remove imports, variables, and helpers that YOUR changes made unused. Do not delete pre-existing dead code that was already there before your changes.
29610
+
29611
+ ## Commit
29612
+ Include the story ID when known \u2014 \`feat(<story-id>): <description>\`.
29613
+
29614
+ ## State Assumptions
29615
+ When the story is ambiguous, pick an interpretation, proceed, and document the choice in the commit body under \`Assumptions:\`. Do not invent requirements; do not silently choose when the story is genuinely under-specified \u2014 note it.`;
29616
+ }
29617
+
29455
29618
  // src/prompts/sections/index.ts
29456
29619
  var init_sections2 = __esm(() => {
29457
29620
  init_hermetic();
@@ -29609,6 +29772,12 @@ class TddPromptBuilder {
29609
29772
  if (hermeticSection)
29610
29773
  acc.add(this.s("hermetic", hermeticSection));
29611
29774
  }
29775
+ const guardrailLevel = this.loaderConfig_?.prompts?.behavioralGuardrails ?? "lite";
29776
+ const guardrailVariant = this.options.variant;
29777
+ const guardrailIsolation = this.options.isolation;
29778
+ const guardrails = buildBehavioralGuardrailsSection(this.role, guardrailLevel, guardrailVariant, guardrailIsolation);
29779
+ if (guardrails)
29780
+ acc.add(this.s("guardrails", guardrails));
29612
29781
  if (this.role !== "verifier") {
29613
29782
  const selfVerify = buildSelfVerificationSection(this.role, this.selfVerification_);
29614
29783
  if (selfVerify)
@@ -29645,7 +29814,7 @@ class TddPromptBuilder {
29645
29814
  }
29646
29815
  const variant = this.options.variant;
29647
29816
  const isolation = this.options.isolation;
29648
- return buildRoleTaskSection(this.role, variant, this.testCommand_, isolation, this.noTestJustification_);
29817
+ return buildRoleTaskSection(this.role, variant, this.testCommand_, isolation, this.noTestJustification_, this.story_?.id);
29649
29818
  }
29650
29819
  }
29651
29820
  var init_tdd_builder = __esm(() => {
@@ -34924,31 +35093,101 @@ var init_debate_plan = __esm(() => {
34924
35093
  };
34925
35094
  });
34926
35095
 
35096
+ // src/tdd/isolation.ts
35097
+ function isSourceFile(filePath) {
35098
+ return SRC_PATTERNS.some((pattern) => pattern.test(filePath));
35099
+ }
35100
+ async function getChangedFiles(workdir, fromRef = "HEAD") {
35101
+ const proc = _isolationDeps.spawn(["git", "diff", "--name-only", fromRef], {
35102
+ cwd: workdir,
35103
+ stdout: "pipe",
35104
+ stderr: "pipe"
35105
+ });
35106
+ const output = await Bun.readableStreamToText(proc.stdout);
35107
+ await proc.exited;
35108
+ return output.trim().split(`
35109
+ `).filter(Boolean);
35110
+ }
35111
+ function matchesAllowedPath(filePath, allowedPaths) {
35112
+ return allowedPaths.some((pattern) => {
35113
+ const regexPattern = pattern.replace(/\*\*/g, ".*").replace(/\*/g, "[^/]*").replace(/\//g, "\\/");
35114
+ const regex = new RegExp(`^${regexPattern}$`);
35115
+ return regex.test(filePath);
35116
+ });
35117
+ }
35118
+ async function verifyTestWriterIsolation(workdir, beforeRef, allowedPaths = ["src/index.ts", "src/**/index.ts"], testFilePatterns = DEFAULT_TEST_FILE_PATTERNS) {
35119
+ const changed = await getChangedFiles(workdir, beforeRef);
35120
+ const sourceFiles = changed.filter((f) => isSourceFile(f) && !isTestFileByPatterns(f, testFilePatterns));
35121
+ const softViolations = [];
35122
+ const violations = [];
35123
+ for (const file3 of sourceFiles) {
35124
+ if (matchesAllowedPath(file3, allowedPaths)) {
35125
+ softViolations.push(file3);
35126
+ } else {
35127
+ violations.push(file3);
35128
+ }
35129
+ }
35130
+ return {
35131
+ passed: violations.length === 0,
35132
+ violations,
35133
+ softViolations,
35134
+ description: "Test writer should only modify test files, not source files"
35135
+ };
35136
+ }
35137
+ async function verifyImplementerIsolation(workdir, beforeRef, testFilePatterns = DEFAULT_TEST_FILE_PATTERNS) {
35138
+ const changed = await getChangedFiles(workdir, beforeRef);
35139
+ const testFiles = changed.filter((f) => isTestFileByPatterns(f, testFilePatterns));
35140
+ if (testFiles.length > 0) {
35141
+ return {
35142
+ passed: true,
35143
+ violations: [],
35144
+ warnings: testFiles,
35145
+ description: "Implementer modified test files (warning: should be minimal fixes only)"
35146
+ };
35147
+ }
35148
+ return {
35149
+ passed: true,
35150
+ violations: [],
35151
+ description: "Implementer should not modify test files"
35152
+ };
35153
+ }
35154
+ var _isolationDeps, SRC_PATTERNS;
35155
+ var init_isolation = __esm(() => {
35156
+ init_test_runners();
35157
+ init_bun_deps();
35158
+ _isolationDeps = { spawn };
35159
+ SRC_PATTERNS = [/^src\//, /^lib\//, /^packages\//];
35160
+ });
35161
+
34927
35162
  // src/operations/_session-output.ts
34928
35163
  function parseSessionJsonOutput(output) {
34929
35164
  if (!output)
34930
35165
  return EMPTY;
34931
35166
  try {
34932
35167
  const v = JSON.parse(output);
34933
- if (v === null || typeof v !== "object" || typeof v.success !== "boolean")
34934
- return EMPTY;
35168
+ if (v === null || typeof v !== "object" || typeof v.success !== "boolean") {
35169
+ return { ...EMPTY, output };
35170
+ }
34935
35171
  return {
34936
35172
  success: v.success,
34937
- filesChanged: Array.isArray(v.filesChanged) ? v.filesChanged : []
35173
+ filesChanged: Array.isArray(v.filesChanged) ? v.filesChanged : [],
35174
+ output,
35175
+ parsed: true
34938
35176
  };
34939
35177
  } catch {
34940
- return EMPTY;
35178
+ return { ...EMPTY, output };
34941
35179
  }
34942
35180
  }
34943
35181
  var EMPTY;
34944
35182
  var init__session_output = __esm(() => {
34945
- EMPTY = { success: false, filesChanged: [] };
35183
+ EMPTY = { success: false, filesChanged: [], output: "", parsed: false };
34946
35184
  });
34947
35185
 
34948
35186
  // src/operations/write-test.ts
34949
35187
  var testWriterOp;
34950
35188
  var init_write_test = __esm(() => {
34951
35189
  init_config();
35190
+ init_isolation();
34952
35191
  init__session_output();
34953
35192
  testWriterOp = {
34954
35193
  kind: "run",
@@ -34957,6 +35196,12 @@ var init_write_test = __esm(() => {
34957
35196
  session: { role: "test-writer", lifetime: "fresh" },
34958
35197
  config: tddConfigSelector,
34959
35198
  build(input, _ctx) {
35199
+ if (input.promptMarkdown?.trim()) {
35200
+ return {
35201
+ role: { id: "role", content: "", overridable: false },
35202
+ task: { id: "task", content: input.promptMarkdown, overridable: false }
35203
+ };
35204
+ }
34960
35205
  const context = [input.contextMarkdown, input.featureContextMarkdown].filter(Boolean).join(`
34961
35206
 
34962
35207
  `);
@@ -34971,24 +35216,66 @@ var init_write_test = __esm(() => {
34971
35216
  };
34972
35217
  },
34973
35218
  parse(output, _input, _ctx) {
35219
+ if (!output)
35220
+ return { success: false, filesChanged: [], estimatedCostUsd: 0, durationMs: 0, output: "" };
35221
+ if (output.startsWith('Agent "')) {
35222
+ return { success: false, filesChanged: [], estimatedCostUsd: 0, durationMs: 0, output };
35223
+ }
34974
35224
  const envelope = parseSessionJsonOutput(output);
34975
- return { ...envelope, estimatedCostUsd: 0, durationMs: 0 };
35225
+ return {
35226
+ success: envelope.parsed ? envelope.success : true,
35227
+ filesChanged: envelope.filesChanged,
35228
+ estimatedCostUsd: 0,
35229
+ durationMs: 0,
35230
+ output: envelope.output
35231
+ };
35232
+ },
35233
+ async verify(parsed, input, ctx) {
35234
+ if (!input.beforeRef)
35235
+ return parsed;
35236
+ const allowedPaths = ctx.config.tdd?.testWriterAllowedPaths ?? ["src/index.ts", "src/**/index.ts"];
35237
+ const testFilePatterns = typeof ctx.packageView.config.execution?.smartTestRunner === "object" && ctx.packageView.config.execution.smartTestRunner !== null ? ctx.packageView.config.execution.smartTestRunner.testFilePatterns : undefined;
35238
+ const isolation = await verifyTestWriterIsolation(ctx.packageView.packageDir, input.beforeRef, allowedPaths, testFilePatterns);
35239
+ return { ...parsed, isolation };
34976
35240
  }
34977
35241
  };
34978
35242
  });
34979
35243
 
35244
+ // src/operations/execution-gates.ts
35245
+ function shouldRunReview(config2) {
35246
+ return config2.review?.enabled === true;
35247
+ }
35248
+ function shouldRunRectification(config2) {
35249
+ return config2.execution?.rectification?.enabled === true;
35250
+ }
35251
+ function shouldKeepSessionOpen(config2, role) {
35252
+ return role === "implementer" && (shouldRunReview(config2) || shouldRunRectification(config2));
35253
+ }
35254
+ var init_execution_gates = __esm(() => {
35255
+ init_config();
35256
+ });
35257
+
34980
35258
  // src/operations/implement.ts
34981
35259
  var implementerOp;
34982
35260
  var init_implement = __esm(() => {
34983
35261
  init_config();
35262
+ init_isolation();
34984
35263
  init__session_output();
35264
+ init_execution_gates();
34985
35265
  implementerOp = {
34986
35266
  kind: "run",
34987
35267
  name: "implementer",
34988
35268
  stage: "run",
34989
35269
  session: { role: "implementer", lifetime: "warm" },
34990
35270
  config: tddConfigSelector,
35271
+ keepOpen: (_input, ctx) => shouldKeepSessionOpen(ctx.config, "implementer"),
34991
35272
  build(input, _ctx) {
35273
+ if (input.promptMarkdown?.trim()) {
35274
+ return {
35275
+ role: { id: "role", content: "", overridable: false },
35276
+ task: { id: "task", content: input.promptMarkdown, overridable: false }
35277
+ };
35278
+ }
34992
35279
  const context = [input.contextMarkdown, input.featureContextMarkdown].filter(Boolean).join(`
34993
35280
 
34994
35281
  `);
@@ -35003,8 +35290,26 @@ var init_implement = __esm(() => {
35003
35290
  };
35004
35291
  },
35005
35292
  parse(output, _input, _ctx) {
35293
+ if (!output)
35294
+ return { success: false, filesChanged: [], estimatedCostUsd: 0, durationMs: 0, output: "" };
35295
+ if (output.startsWith('Agent "')) {
35296
+ return { success: false, filesChanged: [], estimatedCostUsd: 0, durationMs: 0, output };
35297
+ }
35006
35298
  const envelope = parseSessionJsonOutput(output);
35007
- return { ...envelope, estimatedCostUsd: 0, durationMs: 0 };
35299
+ return {
35300
+ success: envelope.parsed ? envelope.success : true,
35301
+ filesChanged: envelope.filesChanged,
35302
+ estimatedCostUsd: 0,
35303
+ durationMs: 0,
35304
+ output: envelope.output
35305
+ };
35306
+ },
35307
+ async verify(parsed, input, ctx) {
35308
+ if (!input.beforeRef)
35309
+ return parsed;
35310
+ const testFilePatterns = typeof ctx.packageView.config.execution?.smartTestRunner === "object" && ctx.packageView.config.execution.smartTestRunner !== null ? ctx.packageView.config.execution.smartTestRunner.testFilePatterns : undefined;
35311
+ const isolation = await verifyImplementerIsolation(ctx.packageView.packageDir, input.beforeRef, testFilePatterns);
35312
+ return { ...parsed, isolation };
35008
35313
  }
35009
35314
  };
35010
35315
  });
@@ -35264,9 +35569,16 @@ var init_verdict = __esm(() => {
35264
35569
  });
35265
35570
 
35266
35571
  // src/operations/verify.ts
35572
+ async function runVerifierIsolation(beforeRef, ctx) {
35573
+ if (!beforeRef)
35574
+ return;
35575
+ const testFilePatterns = typeof ctx.packageView.config.execution?.smartTestRunner === "object" && ctx.packageView.config.execution.smartTestRunner !== null ? ctx.packageView.config.execution.smartTestRunner.testFilePatterns : undefined;
35576
+ return verifyImplementerIsolation(ctx.packageView.packageDir, beforeRef, testFilePatterns);
35577
+ }
35267
35578
  var verifierOp;
35268
35579
  var init_verify = __esm(() => {
35269
35580
  init_config();
35581
+ init_isolation();
35270
35582
  init_verdict();
35271
35583
  init__session_output();
35272
35584
  verifierOp = {
@@ -35276,6 +35588,12 @@ var init_verify = __esm(() => {
35276
35588
  session: { role: "verifier", lifetime: "fresh" },
35277
35589
  config: tddConfigSelector,
35278
35590
  build(input, _ctx) {
35591
+ if (input.promptMarkdown?.trim()) {
35592
+ return {
35593
+ role: { id: "role", content: "", overridable: false },
35594
+ task: { id: "task", content: input.promptMarkdown, overridable: false }
35595
+ };
35596
+ }
35279
35597
  return {
35280
35598
  role: { id: "role", content: "", overridable: false },
35281
35599
  task: {
@@ -35289,10 +35607,13 @@ var init_verify = __esm(() => {
35289
35607
  const envelope = parseSessionJsonOutput(output);
35290
35608
  return { ...envelope, estimatedCostUsd: 0, durationMs: 0 };
35291
35609
  },
35292
- async verify(parsed, _input, _ctx) {
35293
- return parsed.success ? parsed : null;
35610
+ async verify(parsed, input, ctx) {
35611
+ if (!parsed.success)
35612
+ return null;
35613
+ const isolation = await runVerifierIsolation(input.beforeRef, ctx);
35614
+ return isolation ? { ...parsed, isolation } : parsed;
35294
35615
  },
35295
- async recover(_input, verifyCtx) {
35616
+ async recover(input, verifyCtx) {
35296
35617
  const packageDir = verifyCtx.packageView.packageDir;
35297
35618
  try {
35298
35619
  const verdict = await readVerdict(packageDir);
@@ -35300,13 +35621,16 @@ var init_verify = __esm(() => {
35300
35621
  return null;
35301
35622
  const testsAllPassing = verdict.tests.allPassing === true;
35302
35623
  const categorization = categorizeVerdict(verdict, testsAllPassing);
35624
+ const isolation = await runVerifierIsolation(input.beforeRef, verifyCtx);
35303
35625
  return {
35304
35626
  success: categorization.success,
35305
35627
  filesChanged: [],
35306
35628
  estimatedCostUsd: 0,
35307
35629
  durationMs: 0,
35630
+ output: "",
35308
35631
  ...categorization.failureCategory && { failureCategory: categorization.failureCategory },
35309
- ...categorization.reviewReason && { reviewReason: categorization.reviewReason }
35632
+ ...categorization.reviewReason && { reviewReason: categorization.reviewReason },
35633
+ ...isolation && { isolation }
35310
35634
  };
35311
35635
  } finally {
35312
35636
  await cleanupVerdict(packageDir);
@@ -35793,14 +36117,6 @@ var init_plan_critic_llm = __esm(() => {
35793
36117
  };
35794
36118
  });
35795
36119
 
35796
- // src/operations/execution-gates.ts
35797
- function shouldRunRectification(config2) {
35798
- return config2.execution?.rectification?.enabled === true;
35799
- }
35800
- var init_execution_gates = __esm(() => {
35801
- init_config();
35802
- });
35803
-
35804
36120
  // src/context/greenfield.ts
35805
36121
  import { readdir as readdir2 } from "fs/promises";
35806
36122
  import { join as join19 } from "path";
@@ -35930,11 +36246,6 @@ function makePassResult(storyId, strategy, opts = {}) {
35930
36246
  };
35931
36247
  }
35932
36248
 
35933
- // src/verification/parser.ts
35934
- var init_parser2 = __esm(() => {
35935
- init_test_runners();
35936
- });
35937
-
35938
36249
  // src/utils/process-kill.ts
35939
36250
  function killProcessGroup(pid, signal) {
35940
36251
  try {
@@ -36183,9 +36494,9 @@ async function regression(options) {
36183
36494
  }
36184
36495
  var _regressionRunnerDeps;
36185
36496
  var init_runners = __esm(() => {
36497
+ init_test_runners();
36186
36498
  init_bun_deps();
36187
36499
  init_executor();
36188
- init_parser2();
36189
36500
  _regressionRunnerDeps = {
36190
36501
  sleep
36191
36502
  };
@@ -36481,8 +36792,8 @@ class ScopedStrategy {
36481
36792
  var DEFAULT_SMART_RUNNER_CONFIG, _scopedDeps;
36482
36793
  var init_scoped = __esm(() => {
36483
36794
  init_logger2();
36484
- init_conventions();
36485
- init_parser2();
36795
+ init_test_runners();
36796
+ init_test_runners();
36486
36797
  init_runners();
36487
36798
  init_smart_runner();
36488
36799
  DEFAULT_SMART_RUNNER_CONFIG = {
@@ -36928,7 +37239,8 @@ ${debateResult.output}`;
36928
37239
  story,
36929
37240
  failures: failureRecords,
36930
37241
  testCommand,
36931
- conventions: true
37242
+ conventions: true,
37243
+ guardrailLevel: config2.prompts.behavioralGuardrails
36932
37244
  });
36933
37245
  const rectPromise = Promise.resolve(rectPrompt);
36934
37246
  return (async () => {
@@ -37140,8 +37452,7 @@ var init_rectification_loop = __esm(() => {
37140
37452
  init_prompts();
37141
37453
  init_session_name();
37142
37454
  init_session_keeper();
37143
- init_parser2();
37144
- init_parser2();
37455
+ init_test_runners();
37145
37456
  init_runners();
37146
37457
  _rectificationDeps = {
37147
37458
  agentManager: undefined,
@@ -37158,19 +37469,15 @@ __export(exports_verification, {
37158
37469
  shouldRetryRectification: () => shouldRetryRectification,
37159
37470
  scoped: () => scoped,
37160
37471
  runRetryLoop: () => runRetryLoop,
37472
+ runRectificationLoop: () => runRectificationLoop,
37161
37473
  regression: () => regression,
37162
- parseTestOutput: () => parseTestOutput,
37163
- parseBunTestOutput: () => parseBunTestOutput,
37164
37474
  normalizeEnvironment: () => normalizeEnvironment,
37165
37475
  fullSuite: () => fullSuite,
37166
- formatFailureSummary: () => formatFailureSummary,
37167
37476
  executeWithTimeout: () => executeWithTimeout,
37168
- detectFramework: () => detectFramework,
37169
37477
  buildTestCommand: () => buildTestCommand,
37170
37478
  buildProgressivePromptPreamble: () => buildProgressivePromptPreamble,
37171
37479
  appendOpenHandlesFlag: () => appendOpenHandlesFlag,
37172
37480
  appendForceExitFlag: () => appendForceExitFlag,
37173
- analyzeTestExitCode: () => analyzeTestExitCode,
37174
37481
  _regressionRunnerDeps: () => _regressionRunnerDeps,
37175
37482
  _rectificationDeps: () => _rectificationDeps,
37176
37483
  _executorDeps: () => _executorDeps
@@ -37178,7 +37485,6 @@ __export(exports_verification, {
37178
37485
  var init_verification = __esm(() => {
37179
37486
  init_rectification_loop();
37180
37487
  init_executor();
37181
- init_parser2();
37182
37488
  init_runners();
37183
37489
  });
37184
37490
 
@@ -37207,7 +37513,8 @@ var init_full_suite_gate = __esm(() => {
37207
37513
  return { config: config2, testCmd: resolvedTestCmd, fullSuiteTimeout };
37208
37514
  },
37209
37515
  runTests: async (input, gateCtx) => {
37210
- const { executeWithTimeout: executeWithTimeout2, parseTestOutput: parseTestOutput2 } = await Promise.resolve().then(() => (init_verification(), exports_verification));
37516
+ const { executeWithTimeout: executeWithTimeout2 } = await Promise.resolve().then(() => (init_verification(), exports_verification));
37517
+ const { parseTestOutput: parseTestOutput2 } = await Promise.resolve().then(() => (init_test_runners(), exports_test_runners));
37211
37518
  const result = await executeWithTimeout2(gateCtx.testCmd, gateCtx.fullSuiteTimeout, undefined, {
37212
37519
  cwd: input.workdir
37213
37520
  });
@@ -39550,7 +39857,7 @@ var init_verdict_writer = __esm(() => {
39550
39857
  // src/review/orchestrator.ts
39551
39858
  import { join as join25 } from "path";
39552
39859
  var {spawn: spawn4 } = globalThis.Bun;
39553
- async function getChangedFiles(workdir, baseRef) {
39860
+ async function getChangedFiles2(workdir, baseRef) {
39554
39861
  try {
39555
39862
  const diffArgs = ["diff", "--name-only"];
39556
39863
  const [stagedProc, unstagedProc, baseProc] = [
@@ -39879,7 +40186,7 @@ class ReviewOrchestrator {
39879
40186
  const reviewers = plugins.getReviewers();
39880
40187
  if (reviewers.length > 0) {
39881
40188
  const baseRef = storyGitRef ?? executionConfig?.storyGitRef;
39882
- const changedFiles = await getChangedFiles(workdir, baseRef);
40189
+ const changedFiles = await getChangedFiles2(workdir, baseRef);
39883
40190
  const repoRoot = projectDir ?? workdir;
39884
40191
  const packageDir = scopePrefix ? join25(repoRoot, scopePrefix) : undefined;
39885
40192
  const ignoreMatchers = naxIgnoreIndex?.getMatchers(packageDir) ?? await resolveNaxIgnorePatterns(repoRoot, packageDir);
@@ -40071,6 +40378,17 @@ var init_review = __esm(() => {
40071
40378
  });
40072
40379
 
40073
40380
  // src/prompts/builders/rectifier-builder-helpers.ts
40381
+ function escapeHatchFor(story) {
40382
+ const isTdd = THREE_SESSION_STRATEGIES.has(story.routing?.testStrategy ?? "");
40383
+ return isTdd ? CONTRADICTION_ESCAPE_HATCH : CONTRADICTION_ESCAPE_HATCH.replace(EXCEPTION_4_MOCK_HANDOFF, "");
40384
+ }
40385
+ function noTestIsolationBlock(story) {
40386
+ if (story.routing?.testStrategy !== "no-test")
40387
+ return "";
40388
+ return `
40389
+
40390
+ ${buildIsolationSection("no-test")}`;
40391
+ }
40074
40392
  function formatCheckErrors(checks3) {
40075
40393
  return checks3.map((c) => `## ${c.check} errors (exit code ${c.exitCode})
40076
40394
  \`\`\`
@@ -40100,7 +40418,7 @@ ${errors3}
40100
40418
 
40101
40419
  Do NOT change test files or test behavior \u2014 see the three narrow exceptions appended below.
40102
40420
  Do NOT add new features \u2014 only fix valid issues.
40103
- Commit your fixes when done.${scopeConstraint}${CONTRADICTION_ESCAPE_HATCH}`;
40421
+ Commit your fixes when done.${scopeConstraint}${noTestIsolationBlock(story)}${escapeHatchFor(story)}`;
40104
40422
  }
40105
40423
  function adversarialRectification(checks3, story, scopeConstraint) {
40106
40424
  const errors3 = formatCheckErrors(checks3);
@@ -40122,7 +40440,7 @@ ${errors3}
40122
40440
  3. Do NOT add keys, functions, or imports that already exist \u2014 check first
40123
40441
 
40124
40442
  Do NOT add new features \u2014 only fix valid issues.
40125
- Commit your fixes when done.${scopeConstraint}${CONTRADICTION_ESCAPE_HATCH}`;
40443
+ Commit your fixes when done.${scopeConstraint}${noTestIsolationBlock(story)}${escapeHatchFor(story)}`;
40126
40444
  }
40127
40445
  function combinedLlmRectification(semanticChecks, adversarialChecks, story, scopeConstraint) {
40128
40446
  const semanticErrors = formatCheckErrors(semanticChecks);
@@ -40148,7 +40466,7 @@ ${adversarialErrors}
40148
40466
  3. Do NOT add keys, functions, or imports that already exist \u2014 check first
40149
40467
 
40150
40468
  Do NOT add new features \u2014 only fix valid issues.
40151
- Commit your fixes when done.${scopeConstraint}${CONTRADICTION_ESCAPE_HATCH}`;
40469
+ Commit your fixes when done.${scopeConstraint}${noTestIsolationBlock(story)}${escapeHatchFor(story)}`;
40152
40470
  }
40153
40471
  function mechanicalRectification(checks3, story, scopeConstraint) {
40154
40472
  const errors3 = formatCheckErrors(checks3);
@@ -40162,7 +40480,7 @@ ${errors3}
40162
40480
 
40163
40481
  Fix all errors listed above that are within this story's scope \u2014 see the three narrow exceptions appended below for sibling-story spillover. Do NOT change test files or test behavior except via those exceptions.
40164
40482
  Do NOT add new features \u2014 only fix the quality check errors.
40165
- After fixing, re-run the failing check(s) to verify they pass, then commit your changes.${scopeConstraint}${CONTRADICTION_ESCAPE_HATCH}`;
40483
+ After fixing, re-run the failing check(s) to verify they pass, then commit your changes.${scopeConstraint}${noTestIsolationBlock(story)}${escapeHatchFor(story)}`;
40166
40484
  }
40167
40485
  var CONTRADICTION_ESCAPE_HATCH = `
40168
40486
  If two findings in this list contradict each other and you cannot satisfy both, do not guess.
@@ -40210,10 +40528,11 @@ TEST_AFTER: <corrected call line>
40210
40528
  Do NOT use this exception to change test logic, assertions, or mock setup \u2014 only call
40211
40529
  signatures that directly contradict a quoted PRD interface.
40212
40530
 
40213
- ### Exception 3 \u2014 Sibling-story lint spillover
40531
+ ### Exception 3 \u2014 Unrelated sibling spillover
40214
40532
 
40215
- When a lint or typecheck error is in a file you did NOT create or modify in this turn,
40216
- do NOT edit that file. Instead declare:
40533
+ When a lint or typecheck error is outside this story's intended scope, do NOT edit that
40534
+ file. If the smallest package-local fix is required to satisfy this story's acceptance
40535
+ criteria, you MAY make that fix instead. Otherwise declare:
40217
40536
  \`\`\`
40218
40537
  TEST_EDIT_REASON: sibling_scope
40219
40538
  SIBLING_FILE: <file path>
@@ -40237,7 +40556,28 @@ REASON: <one paragraph: which mock is wrong vs which dispatch the new code uses>
40237
40556
  Rules:
40238
40557
  - Do NOT make any edits yourself; the test-writer will fulfill.
40239
40558
  - Do NOT also emit \`UNRESOLVED:\` in the same turn \u2014 this declaration IS the handoff.
40240
- - FILES must list real test files. Each path must exist and be a test file.`;
40559
+ - FILES must list real test files. Each path must exist and be a test file.`, EXCEPTION_4_MOCK_HANDOFF = `
40560
+ ### Exception 4 \u2014 Mock-structure handoff
40561
+
40562
+ Use ONLY when the only path to satisfy the ACs requires a structural test rewrite
40563
+ that does NOT fit Exception 2. Examples: mocks reference primitives the new code
40564
+ bypasses; assertion topology must change to match a new dispatch shape.
40565
+
40566
+ Declare with:
40567
+ \`\`\`
40568
+ TEST_EDIT_REASON: mock_structure
40569
+ FILES: <comma-separated test file paths>
40570
+ REASON: <one paragraph: which mock is wrong vs which dispatch the new code uses>
40571
+ \`\`\`
40572
+
40573
+ Rules:
40574
+ - Do NOT make any edits yourself; the test-writer will fulfill.
40575
+ - Do NOT also emit \`UNRESOLVED:\` in the same turn \u2014 this declaration IS the handoff.
40576
+ - FILES must list real test files. Each path must exist and be a test file.`, THREE_SESSION_STRATEGIES;
40577
+ var init_rectifier_builder_helpers = __esm(() => {
40578
+ init_sections2();
40579
+ THREE_SESSION_STRATEGIES = new Set(["three-session-tdd", "three-session-tdd-lite"]);
40580
+ });
40241
40581
 
40242
40582
  // src/prompts/builders/rectifier-builder.ts
40243
40583
  function priorityForCheck(checkName) {
@@ -40317,7 +40657,7 @@ function renderPrioritizedFailures(failedChecks, opts) {
40317
40657
  }
40318
40658
 
40319
40659
  class RectifierPromptBuilder {
40320
- static firstAttemptDelta(failedChecks, maxAttempts) {
40660
+ static firstAttemptDelta(failedChecks, maxAttempts, guardrailLevel) {
40321
40661
  const parts = [];
40322
40662
  const attemptWord = maxAttempts === 1 ? "1 attempt" : `${maxAttempts} attempts`;
40323
40663
  parts.push(`Review failed after your implementation. Fix the following issues (${attemptWord} available before escalation):
@@ -40326,10 +40666,16 @@ class RectifierPromptBuilder {
40326
40666
  parts.push(`
40327
40667
  Fix in priority order. After fixing each priority, re-run the failing check(s) at that level to verify they pass before moving on. Do NOT change test files or test behavior \u2014 see the three narrow exceptions appended below. Commit your changes when all checks pass.`);
40328
40668
  parts.push(CONTRADICTION_ESCAPE_HATCH);
40669
+ const guardrails = buildBehavioralGuardrailsSection("implementer", guardrailLevel ?? "lite");
40670
+ if (guardrails) {
40671
+ parts.push(`
40672
+
40673
+ ${guardrails}`);
40674
+ }
40329
40675
  return parts.join(`
40330
40676
  `);
40331
40677
  }
40332
- static continuation(failedChecks, attempt, rethinkAtAttempt, urgencyAtAttempt) {
40678
+ static continuation(failedChecks, attempt, rethinkAtAttempt, urgencyAtAttempt, guardrailLevel) {
40333
40679
  const parts = [];
40334
40680
  parts.push(`Your previous fix attempt did not resolve all issues. Here are the remaining failures:
40335
40681
  `);
@@ -40343,6 +40689,12 @@ Fix in priority order. After fixing each priority, re-run the failing check(s) a
40343
40689
  parts.push("\n**URGENT: This is your final attempt.** If you cannot fix all issues, emit `UNRESOLVED: <reason>` to escalate.\n");
40344
40690
  }
40345
40691
  parts.push(CONTRADICTION_ESCAPE_HATCH);
40692
+ const guardrails = buildBehavioralGuardrailsSection("implementer", guardrailLevel ?? "lite");
40693
+ if (guardrails) {
40694
+ parts.push(`
40695
+
40696
+ ${guardrails}`);
40697
+ }
40346
40698
  return parts.join(`
40347
40699
  `);
40348
40700
  }
@@ -40734,6 +41086,13 @@ ${basePrompt}`;
40734
41086
  parts.push(buildIsolationSection("implementer", opts.isolation, undefined));
40735
41087
  parts.push(`
40736
41088
 
41089
+ `);
41090
+ }
41091
+ const guardrails = buildBehavioralGuardrailsSection("implementer", opts.guardrailLevel ?? "lite");
41092
+ if (guardrails) {
41093
+ parts.push(guardrails);
41094
+ parts.push(`
41095
+
40737
41096
  `);
40738
41097
  }
40739
41098
  if (opts.conventions !== false) {
@@ -40790,9 +41149,11 @@ Fix the implementation (not the tests) to make all failing tests pass. Run the t
40790
41149
  var PRIORITY_BUCKETS, PRIORITY_ORDER;
40791
41150
  var init_rectifier_builder = __esm(() => {
40792
41151
  init_review();
40793
- init_verification();
41152
+ init_test_runners();
40794
41153
  init_core3();
40795
41154
  init_sections2();
41155
+ init_rectifier_builder_helpers();
41156
+ init_rectifier_builder_helpers();
40796
41157
  PRIORITY_BUCKETS = {
40797
41158
  "compile-build": {
40798
41159
  priority: 1,
@@ -41827,6 +42188,7 @@ async function callOp(ctx, op, input) {
41827
42188
  const sessionRole = ctx.sessionOverride?.role ?? runOp.session.role;
41828
42189
  const retryStrategy = resolveOpRetry(runOp, input, buildCtx);
41829
42190
  const fileOutputPath = runOp.fileOutput?.(input);
42191
+ const keepOpen = runOp.keepOpen?.(input, buildCtx) ?? runOp.session.lifetime === "warm";
41830
42192
  const runOptions = {
41831
42193
  prompt,
41832
42194
  workdir: ctx.packageDir,
@@ -41839,7 +42201,7 @@ async function callOp(ctx, op, input) {
41839
42201
  featureName: ctx.featureName,
41840
42202
  storyId: ctx.storyId,
41841
42203
  callId,
41842
- ...runOp.session.lifetime === "warm" ? { keepOpen: true } : {},
42204
+ ...keepOpen ? { keepOpen: true } : {},
41843
42205
  ...ctx.scopeId !== undefined ? { scopeId: ctx.scopeId } : {},
41844
42206
  ...ctx.interactionBridge ? { interactionBridge: ctx.interactionBridge } : {},
41845
42207
  ...ctx.maxInteractionTurns !== undefined ? { maxInteractionTurns: ctx.maxInteractionTurns } : {}
@@ -46723,11 +47085,6 @@ var init_types8 = __esm(() => {
46723
47085
  safety: "yellow",
46724
47086
  defaultSummary: "Story {{storyId}} is oversized ({{criteriaCount}} acceptance criteria) \u2014 decompose into smaller stories?"
46725
47087
  },
46726
- "story-ambiguity": {
46727
- defaultFallback: "continue",
46728
- safety: "green",
46729
- defaultSummary: "Story {{storyId}} requirements unclear \u2014 continue with best effort?"
46730
- },
46731
47088
  "review-gate": {
46732
47089
  defaultFallback: "continue",
46733
47090
  safety: "green",
@@ -48127,12 +48484,6 @@ async function checkPreMerge(context, config2, chain) {
48127
48484
  const response = await executeTrigger("pre-merge", context, config2, chain);
48128
48485
  return response.action === "approve";
48129
48486
  }
48130
- async function checkStoryAmbiguity(context, config2, chain) {
48131
- if (!isTriggerEnabled("story-ambiguity", config2))
48132
- return true;
48133
- const response = await executeTrigger("story-ambiguity", context, config2, chain);
48134
- return response.action === "approve";
48135
- }
48136
48487
  async function checkReviewGate(context, config2, chain) {
48137
48488
  if (!isTriggerEnabled("review-gate", config2))
48138
48489
  return true;
@@ -51309,113 +51660,7 @@ var init_event_bus = __esm(() => {
51309
51660
  pipelineEventBus = new PipelineEventBus;
51310
51661
  });
51311
51662
 
51312
- // src/tdd/isolation.ts
51313
- function isSourceFile(filePath) {
51314
- return SRC_PATTERNS.some((pattern) => pattern.test(filePath));
51315
- }
51316
- async function getChangedFiles2(workdir, fromRef = "HEAD") {
51317
- const proc = _isolationDeps.spawn(["git", "diff", "--name-only", fromRef], {
51318
- cwd: workdir,
51319
- stdout: "pipe",
51320
- stderr: "pipe"
51321
- });
51322
- const output = await Bun.readableStreamToText(proc.stdout);
51323
- await proc.exited;
51324
- return output.trim().split(`
51325
- `).filter(Boolean);
51326
- }
51327
- function matchesAllowedPath(filePath, allowedPaths) {
51328
- return allowedPaths.some((pattern) => {
51329
- const regexPattern = pattern.replace(/\*\*/g, ".*").replace(/\*/g, "[^/]*").replace(/\//g, "\\/");
51330
- const regex = new RegExp(`^${regexPattern}$`);
51331
- return regex.test(filePath);
51332
- });
51333
- }
51334
- async function verifyTestWriterIsolation(workdir, beforeRef, allowedPaths = ["src/index.ts", "src/**/index.ts"], testFilePatterns = DEFAULT_TEST_FILE_PATTERNS) {
51335
- const changed = await getChangedFiles2(workdir, beforeRef);
51336
- const sourceFiles = changed.filter((f) => isSourceFile(f) && !isTestFileByPatterns(f, testFilePatterns));
51337
- const softViolations = [];
51338
- const violations = [];
51339
- for (const file3 of sourceFiles) {
51340
- if (matchesAllowedPath(file3, allowedPaths)) {
51341
- softViolations.push(file3);
51342
- } else {
51343
- violations.push(file3);
51344
- }
51345
- }
51346
- return {
51347
- passed: violations.length === 0,
51348
- violations,
51349
- softViolations,
51350
- description: "Test writer should only modify test files, not source files"
51351
- };
51352
- }
51353
- async function verifyImplementerIsolation(workdir, beforeRef, testFilePatterns = DEFAULT_TEST_FILE_PATTERNS) {
51354
- const changed = await getChangedFiles2(workdir, beforeRef);
51355
- const testFiles = changed.filter((f) => isTestFileByPatterns(f, testFilePatterns));
51356
- if (testFiles.length > 0) {
51357
- return {
51358
- passed: true,
51359
- violations: [],
51360
- warnings: testFiles,
51361
- description: "Implementer modified test files (warning: should be minimal fixes only)"
51362
- };
51363
- }
51364
- return {
51365
- passed: true,
51366
- violations: [],
51367
- description: "Implementer should not modify test files"
51368
- };
51369
- }
51370
- var _isolationDeps, SRC_PATTERNS;
51371
- var init_isolation = __esm(() => {
51372
- init_test_runners();
51373
- init_bun_deps();
51374
- _isolationDeps = { spawn };
51375
- SRC_PATTERNS = [/^src\//, /^lib\//, /^packages\//];
51376
- });
51377
-
51378
51663
  // src/tdd/cleanup.ts
51379
- async function getPgid(pid) {
51380
- try {
51381
- const proc = _cleanupDeps.spawn(["ps", "-o", "pgid=", "-p", String(pid)], {
51382
- stdout: "pipe",
51383
- stderr: "pipe"
51384
- });
51385
- const output = await Bun.readableStreamToText(proc.stdout);
51386
- const exitCode = await proc.exited;
51387
- if (exitCode !== 0) {
51388
- return null;
51389
- }
51390
- const pgid = Number.parseInt(output.trim(), 10);
51391
- return Number.isNaN(pgid) ? null : pgid;
51392
- } catch {
51393
- return null;
51394
- }
51395
- }
51396
- async function cleanupProcessTree(pid, gracePeriodMs = 3000) {
51397
- try {
51398
- const pgid = await getPgid(pid);
51399
- if (!pgid) {
51400
- return;
51401
- }
51402
- const sentSigterm = _cleanupDeps.killProcessGroupFn(pgid, "SIGTERM");
51403
- if (!sentSigterm) {
51404
- return;
51405
- }
51406
- await _cleanupDeps.sleep(gracePeriodMs);
51407
- const pgidAfterWait = await getPgid(pid);
51408
- if (pgidAfterWait && pgidAfterWait === pgid) {
51409
- _cleanupDeps.killProcessGroupFn(pgid, "SIGKILL");
51410
- }
51411
- } catch (error48) {
51412
- const logger = getLogger();
51413
- logger.warn("tdd", "Failed to cleanup process tree", {
51414
- pid,
51415
- error: error48.message
51416
- });
51417
- }
51418
- }
51419
51664
  var _cleanupDeps;
51420
51665
  var init_cleanup = __esm(() => {
51421
51666
  init_logger2();
@@ -51428,11 +51673,11 @@ var init_cleanup = __esm(() => {
51428
51673
  };
51429
51674
  });
51430
51675
 
51431
- // src/tdd/session-runner.ts
51676
+ // src/tdd/rollback.ts
51432
51677
  async function rollbackToRef(workdir, ref) {
51433
51678
  const logger = getLogger();
51434
51679
  logger.warn("tdd", "Rolling back git changes", { ref });
51435
- const resetProc = _sessionRunnerDeps.spawn(["git", "reset", "--hard", ref], {
51680
+ const resetProc = _rollbackDeps.spawn(["git", "reset", "--hard", ref], {
51436
51681
  cwd: workdir,
51437
51682
  stdout: "pipe",
51438
51683
  stderr: "pipe"
@@ -51443,7 +51688,7 @@ async function rollbackToRef(workdir, ref) {
51443
51688
  logger.error("tdd", "Failed to rollback git changes", { ref, stderr });
51444
51689
  throw new Error(`Git rollback failed: ${stderr}`);
51445
51690
  }
51446
- const cleanProc = _sessionRunnerDeps.spawn(["git", "clean", "-fd"], {
51691
+ const cleanProc = _rollbackDeps.spawn(["git", "clean", "-fd"], {
51447
51692
  cwd: workdir,
51448
51693
  stdout: "pipe",
51449
51694
  stderr: "pipe"
@@ -51455,48 +51700,22 @@ async function rollbackToRef(workdir, ref) {
51455
51700
  }
51456
51701
  logger.info("tdd", "Successfully rolled back git changes", { ref });
51457
51702
  }
51458
- var _sessionRunnerDeps;
51459
- var init_session_runner = __esm(() => {
51460
- init_agents();
51461
- init_config();
51462
- init_engine();
51703
+ var _rollbackDeps;
51704
+ var init_rollback = __esm(() => {
51463
51705
  init_logger2();
51464
- init_execution_gates();
51465
- init_prompts();
51466
- init_quality();
51467
- init_git();
51468
- init_git();
51469
- init_cleanup();
51470
- init_isolation();
51471
- _sessionRunnerDeps = {
51472
- autoCommitIfDirty,
51473
- spawn: Bun.spawn,
51474
- getChangedFiles: getChangedFiles2,
51475
- verifyTestWriterIsolation,
51476
- verifyImplementerIsolation,
51477
- captureGitRef,
51478
- cleanupProcessTree,
51479
- buildPrompt: null
51706
+ _rollbackDeps = {
51707
+ spawn: Bun.spawn
51480
51708
  };
51481
51709
  });
51482
51710
 
51483
- // src/tdd/session-op.ts
51484
- var init_session_op = __esm(() => {
51485
- init_agents();
51486
- init_bridge_builder();
51487
- init_logger2();
51488
- init_operations();
51489
- init_quality();
51490
- init_session_runner();
51491
- });
51492
-
51493
51711
  // src/tdd/index.ts
51494
51712
  var init_tdd = __esm(() => {
51495
51713
  init_test_runners();
51496
51714
  init_isolation();
51497
51715
  init_cleanup();
51498
51716
  init_verdict();
51499
- init_session_op();
51717
+ init_rollback();
51718
+ init_operations();
51500
51719
  });
51501
51720
 
51502
51721
  // src/pipeline/stages/autofix-guards.ts
@@ -53988,14 +54207,53 @@ function gatherRectificationFindings(phaseOutputs, verifierPhase, fullSuiteGateP
53988
54207
  }
53989
54208
  return findings;
53990
54209
  }
53991
- async function runPhase(ctx, slot, phaseCosts, phaseOutputs) {
54210
+ async function runPhase(ctx, slot, phaseCosts, phaseOutputs, isThreeSession = false) {
54211
+ const logger = getSafeLogger();
54212
+ const opName = slot.op.name;
54213
+ const isTddPhase = isThreeSession && TDD_OP_NAMES.has(opName);
54214
+ const beforeRef = isTddPhase ? await _storyOrchestratorDeps.captureGitRef(ctx.packageDir) : undefined;
54215
+ const dispatchInput = isTddPhase && beforeRef ? { ...slot.input, beforeRef } : slot.input;
54216
+ if (isTddPhase) {
54217
+ logger?.info("tdd", `-> Session: ${opName}`, { storyId: ctx.storyId, role: opName });
54218
+ } else if (isThreeSession && opName === "full-suite-gate") {
54219
+ logger?.info("tdd", "-> Running full test suite gate (before Verifier)", { storyId: ctx.storyId });
54220
+ }
54221
+ const phaseStartedAt = Date.now();
53992
54222
  const scope = ctx.runtime.costAggregator.openScope();
53993
54223
  try {
53994
- const output = await _storyOrchestratorDeps.callOp({ ...ctx, scopeId: scope.scopeId }, slot.op, slot.input);
53995
- phaseOutputs[slot.op.name] = output;
54224
+ const output = await _storyOrchestratorDeps.callOp({ ...ctx, scopeId: scope.scopeId }, slot.op, dispatchInput);
54225
+ phaseOutputs[opName] = output;
54226
+ if (isTddPhase) {
54227
+ const durationMs = Date.now() - phaseStartedAt;
54228
+ logger?.info("tdd", `Session complete: ${opName}`, {
54229
+ storyId: ctx.storyId,
54230
+ role: opName,
54231
+ durationMs
54232
+ });
54233
+ const filesChanged = output?.filesChanged ?? [];
54234
+ if (opName === "test-writer" && filesChanged.length > 0) {
54235
+ logger?.info("tdd", "Created test files", {
54236
+ storyId: ctx.storyId,
54237
+ testFilesCount: filesChanged.length,
54238
+ testFiles: [...filesChanged]
54239
+ });
54240
+ }
54241
+ const isolation = output?.isolation;
54242
+ if (isolation) {
54243
+ if (isolation.passed) {
54244
+ logger?.info("tdd", "Isolation maintained", { storyId: ctx.storyId, role: opName });
54245
+ } else {
54246
+ logger?.error("tdd", "Isolation violated", {
54247
+ storyId: ctx.storyId,
54248
+ role: opName,
54249
+ violations: isolation.violations
54250
+ });
54251
+ }
54252
+ }
54253
+ }
53996
54254
  return output;
53997
54255
  } finally {
53998
- phaseCosts[slot.op.name] = (phaseCosts[slot.op.name] ?? 0) + scope.snapshot().totalCostUsd;
54256
+ phaseCosts[opName] = (phaseCosts[opName] ?? 0) + scope.snapshot().totalCostUsd;
53999
54257
  scope.close();
54000
54258
  }
54001
54259
  }
@@ -54070,9 +54328,11 @@ async function runRectification(ctx, state, phaseCosts, phaseOutputs) {
54070
54328
  class ExecutionPlan {
54071
54329
  ctx;
54072
54330
  state;
54073
- constructor(ctx, state) {
54331
+ isThreeSession;
54332
+ constructor(ctx, state, isThreeSession = false) {
54074
54333
  this.ctx = ctx;
54075
54334
  this.state = state;
54335
+ this.isThreeSession = isThreeSession;
54076
54336
  }
54077
54337
  phaseNames() {
54078
54338
  const names = collectOrderedPhases(this.state).map((p) => p.slot.op.name);
@@ -54092,7 +54352,7 @@ class ExecutionPlan {
54092
54352
  ]) : new Set;
54093
54353
  for (const phase of collectOrderedPhases(this.state)) {
54094
54354
  try {
54095
- await runPhase(this.ctx, phase.slot, phaseCosts, phaseOutputs);
54355
+ await runPhase(this.ctx, phase.slot, phaseCosts, phaseOutputs, this.isThreeSession);
54096
54356
  } catch (error48) {
54097
54357
  logger?.error("story-orchestrator", "Phase threw unexpected error", {
54098
54358
  storyId: this.ctx.storyId,
@@ -54154,24 +54414,27 @@ class StoryOrchestratorBuilder {
54154
54414
  this.state.rectification = opts;
54155
54415
  return this;
54156
54416
  }
54157
- build(ctx) {
54417
+ build(ctx, opts = {}) {
54158
54418
  if (!this.state.implementer) {
54159
54419
  throw new NaxError("StoryOrchestratorBuilder.build(): addImplementer() must be called before build()", "ORCHESTRATOR_NO_IMPLEMENTER", { stage: "execution" });
54160
54420
  }
54161
- return new ExecutionPlan(ctx, { ...this.state });
54421
+ return new ExecutionPlan(ctx, { ...this.state }, opts.isThreeSession ?? false);
54162
54422
  }
54163
54423
  }
54164
- var _storyOrchestratorDeps, CANONICAL_ORDER, PHASE_KIND_TO_STATE_KEY;
54424
+ var _storyOrchestratorDeps, TDD_OP_NAMES, CANONICAL_ORDER, PHASE_KIND_TO_STATE_KEY;
54165
54425
  var init_story_orchestrator = __esm(() => {
54166
54426
  init_errors();
54167
54427
  init_findings();
54168
54428
  init_logger2();
54169
54429
  init_operations();
54170
54430
  init_call();
54431
+ init_git();
54171
54432
  _storyOrchestratorDeps = {
54172
54433
  callOp,
54173
- runFixCycle
54434
+ runFixCycle,
54435
+ captureGitRef
54174
54436
  };
54437
+ TDD_OP_NAMES = new Set(["test-writer", "implementer", "verifier"]);
54175
54438
  CANONICAL_ORDER = [
54176
54439
  "test-writer",
54177
54440
  "greenfield-gate",
@@ -54194,7 +54457,7 @@ var init_story_orchestrator = __esm(() => {
54194
54457
 
54195
54458
  // src/execution/build-plan-for-strategy.ts
54196
54459
  function isThreeSessionStrategy(strategy) {
54197
- return THREE_SESSION_STRATEGIES.has(strategy);
54460
+ return THREE_SESSION_STRATEGIES2.has(strategy);
54198
54461
  }
54199
54462
  function requiresInitialRefCapture(strategy) {
54200
54463
  return isThreeSessionStrategy(strategy);
@@ -54211,22 +54474,22 @@ function isFreshRun(story) {
54211
54474
  return !hasAttempts && !hasReviewEscalation;
54212
54475
  }
54213
54476
  function buildPlanForStrategy(ctx, story, config2, testStrategy, inputs) {
54214
- const isTdd = isThreeSessionStrategy(testStrategy);
54477
+ const isThreeSession = isThreeSessionStrategy(testStrategy);
54215
54478
  const freshRun = isFreshRun(story);
54216
54479
  const builder = new StoryOrchestratorBuilder;
54217
- if (isTdd && freshRun && inputs.testWriter) {
54480
+ if (isThreeSession && freshRun && inputs.testWriter) {
54218
54481
  builder.addTestWriter(inputs.testWriter);
54219
54482
  }
54220
- if (isTdd && freshRun && inputs.greenfieldGate) {
54483
+ if (isThreeSession && freshRun && inputs.greenfieldGate) {
54221
54484
  builder.addGreenfieldGate(inputs.greenfieldGate);
54222
54485
  }
54223
54486
  if (inputs.implementer) {
54224
54487
  builder.addImplementer(inputs.implementer);
54225
54488
  }
54226
- if (isTdd && inputs.fullSuiteGate) {
54489
+ if (isThreeSession && inputs.fullSuiteGate) {
54227
54490
  builder.addFullSuiteGate(inputs.fullSuiteGate);
54228
54491
  }
54229
- if (isTdd && inputs.verifier) {
54492
+ if (isThreeSession && inputs.verifier) {
54230
54493
  builder.addVerifier(inputs.verifier);
54231
54494
  }
54232
54495
  if (hasReviewCheck(config2, "semantic") && inputs.semanticReview) {
@@ -54236,21 +54499,21 @@ function buildPlanForStrategy(ctx, story, config2, testStrategy, inputs) {
54236
54499
  builder.addAdversarialReview(inputs.adversarialReview);
54237
54500
  }
54238
54501
  if (shouldRunRectification(config2) && inputs.rectification) {
54239
- const gateStrategies = isTdd && inputs.fullSuiteGate ? [makeFullSuiteRectifyStrategy(story)] : [];
54502
+ const gateStrategies = isThreeSession && inputs.fullSuiteGate ? [makeFullSuiteRectifyStrategy(story)] : [];
54240
54503
  const rectOpts = {
54241
54504
  ...inputs.rectification,
54242
54505
  strategies: [...gateStrategies, ...inputs.rectification.strategies]
54243
54506
  };
54244
54507
  builder.addRectification(rectOpts);
54245
54508
  }
54246
- return builder.build(ctx);
54509
+ return builder.build(ctx, { isThreeSession });
54247
54510
  }
54248
- var THREE_SESSION_STRATEGIES;
54511
+ var THREE_SESSION_STRATEGIES2;
54249
54512
  var init_build_plan_for_strategy = __esm(() => {
54250
54513
  init_execution_gates();
54251
54514
  init_full_suite_rectify();
54252
54515
  init_story_orchestrator();
54253
- THREE_SESSION_STRATEGIES = new Set(["three-session-tdd", "three-session-tdd-lite"]);
54516
+ THREE_SESSION_STRATEGIES2 = new Set(["three-session-tdd", "three-session-tdd-lite"]);
54254
54517
  });
54255
54518
 
54256
54519
  // src/execution/plan-inputs.ts
@@ -54286,22 +54549,40 @@ function validatePlanInputs(story, config2) {
54286
54549
  function hasReviewEscalation(story) {
54287
54550
  return (story.priorFailures ?? []).some((f) => f.stage === "review");
54288
54551
  }
54552
+ async function buildThreeSessionPrompt(role, ctx, lite) {
54553
+ return TddPromptBuilder.buildForRole(role, ctx.workdir, ctx.config, ctx.story, {
54554
+ lite,
54555
+ contextMarkdown: ctx.contextMarkdown,
54556
+ featureContextMarkdown: ctx.featureContextMarkdown,
54557
+ contextBundle: ctx.contextBundle,
54558
+ constitution: ctx.constitution?.content
54559
+ });
54560
+ }
54289
54561
  async function assemblePlanInputsFromCtx(ctx) {
54290
54562
  const { story, config: config2 } = ctx;
54291
54563
  validatePlanInputs(story, config2);
54292
54564
  const _isTdd = isThreeSessionStrategy(ctx.routing.testStrategy);
54293
54565
  const _isFreshRun = (story.attempts ?? 0) === 0 && !hasReviewEscalation(story);
54566
+ const isLite = ctx.routing.testStrategy === "three-session-tdd-lite";
54567
+ if (!_isTdd && !ctx.prompt?.trim()) {
54568
+ throw new NaxError(`Prompt missing for strategy "${ctx.routing.testStrategy}" \u2014 non-TDD strategies require ctx.prompt`, "PROMPT_NOT_BUILT", { stage: "plan-inputs", storyId: story.id, testStrategy: ctx.routing.testStrategy });
54569
+ }
54294
54570
  const resolvedTestPatterns = _isTdd ? await resolveTestFilePatterns(config2, ctx.workdir) : undefined;
54571
+ const [testWriterPrompt, implementerPrompt, verifierPrompt] = _isTdd ? await Promise.all([
54572
+ _isFreshRun ? buildThreeSessionPrompt("test-writer", ctx, isLite) : Promise.resolve(""),
54573
+ buildThreeSessionPrompt("implementer", ctx, isLite),
54574
+ buildThreeSessionPrompt("verifier", ctx, isLite)
54575
+ ]) : ["", ctx.prompt, ""];
54295
54576
  const testWriterInput = _isTdd && _isFreshRun ? {
54296
54577
  story,
54297
- contextMarkdown: ctx.prompt,
54578
+ promptMarkdown: testWriterPrompt,
54298
54579
  featureContextMarkdown: ctx.featureContextMarkdown,
54299
54580
  constitution: ctx.constitution?.content
54300
54581
  } : undefined;
54301
54582
  const greenfieldGateInput = _isTdd && _isFreshRun && resolvedTestPatterns ? { story, workdir: ctx.workdir, resolvedTestPatterns } : undefined;
54302
54583
  const implementerInput = {
54303
54584
  story,
54304
- contextMarkdown: ctx.prompt,
54585
+ promptMarkdown: implementerPrompt,
54305
54586
  featureContextMarkdown: ctx.featureContextMarkdown,
54306
54587
  constitution: ctx.constitution?.content
54307
54588
  };
@@ -54312,7 +54593,7 @@ async function assemblePlanInputsFromCtx(ctx) {
54312
54593
  projectDir: ctx.projectDir,
54313
54594
  resolvedTestPatterns
54314
54595
  } : undefined;
54315
- const verifierInput = _isTdd ? { story } : undefined;
54596
+ const verifierInput = _isTdd ? { story, promptMarkdown: verifierPrompt } : undefined;
54316
54597
  const inlineReviewEnabled = ctx.config.execution?.inlineReview === true;
54317
54598
  const semanticStory = {
54318
54599
  id: story.id,
@@ -54357,25 +54638,12 @@ async function assemblePlanInputsFromCtx(ctx) {
54357
54638
  }
54358
54639
  var init_plan_inputs = __esm(() => {
54359
54640
  init_errors();
54641
+ init_prompts();
54360
54642
  init_resolver();
54361
54643
  init_build_plan_for_strategy();
54362
54644
  });
54363
54645
 
54364
54646
  // src/pipeline/stages/execution-helpers.ts
54365
- function isAmbiguousOutput(output) {
54366
- if (!output)
54367
- return false;
54368
- const ambiguityKeywords = [
54369
- "unclear",
54370
- "ambiguous",
54371
- "need clarification",
54372
- "please clarify",
54373
- "which one",
54374
- "not sure which"
54375
- ];
54376
- const lowerOutput = output.toLowerCase();
54377
- return ambiguityKeywords.some((keyword) => lowerOutput.includes(keyword));
54378
- }
54379
54647
  function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason) {
54380
54648
  if (failureCategory === "isolation-violation") {
54381
54649
  if (!isLiteMode) {
@@ -54608,6 +54876,16 @@ async function applyPostRunInspection(ctx, planResult, opts) {
54608
54876
  }
54609
54877
  const pauseReason = extractPauseReason(planResult.phaseOutputs);
54610
54878
  const failureCategory = isTdd && !planResult.success ? deriveTddFailureCategory(planResult.phaseOutputs) : undefined;
54879
+ const tddIsolations = {};
54880
+ for (const opName of ["test-writer", "implementer", "verifier"]) {
54881
+ const phaseOut = planResult.phaseOutputs[opName];
54882
+ if (phaseOut?.isolation) {
54883
+ tddIsolations[opName] = phaseOut.isolation;
54884
+ }
54885
+ }
54886
+ if (Object.keys(tddIsolations).length > 0) {
54887
+ ctx.tddIsolations = tddIsolations;
54888
+ }
54611
54889
  const needsHumanReview = failureCategory === "session-failure";
54612
54890
  const combinedOutput = (agentResult.output ?? "") + (agentResult.stderr ?? "");
54613
54891
  if (isTdd) {
@@ -54721,13 +54999,6 @@ Category: ${failureCategory ?? "unknown"}`,
54721
54999
  await cleanupSessionOnFailure(ctx);
54722
55000
  return { action: "escalate" };
54723
55001
  }
54724
- if (agentResult.success && _postRunDeps.isAmbiguousOutput(combinedOutput) && ctx.interaction && isTriggerEnabled("story-ambiguity", ctx.config)) {
54725
- const shouldContinue = await _postRunDeps.checkStoryAmbiguity({ featureName: ctx.prd.feature, storyId: ctx.story.id, reason: "Agent output suggests ambiguity" }, ctx.config, ctx.interaction);
54726
- if (!shouldContinue) {
54727
- logger.warn("execution", "Story ambiguity detected \u2014 escalating story", { storyId: ctx.story.id });
54728
- return { action: "escalate", reason: "Story ambiguity detected \u2014 needs clarification" };
54729
- }
54730
- }
54731
55002
  if (!isTdd) {
54732
55003
  await _postRunDeps.autoCommitIfDirty(ctx.workdir, "execution", "single-session", ctx.story.id);
54733
55004
  }
@@ -54745,13 +55016,11 @@ var init_post_run = __esm(() => {
54745
55016
  init_execution_helpers();
54746
55017
  init_quality();
54747
55018
  init_scratch_writer();
54748
- init_session_runner();
55019
+ init_rollback();
54749
55020
  init_git();
54750
55021
  _postRunDeps = {
54751
55022
  detectMergeConflict,
54752
55023
  checkMergeConflict,
54753
- isAmbiguousOutput,
54754
- checkStoryAmbiguity,
54755
55024
  failAndClose,
54756
55025
  rollbackToRef,
54757
55026
  autoCommitIfDirty
@@ -54778,8 +55047,6 @@ var init_execution = __esm(() => {
54778
55047
  const agent = (ctx.agentGetFn ?? _executionDeps.getAgent)(defaultAgent);
54779
55048
  if (!agent)
54780
55049
  return { action: "fail", reason: `Agent "${defaultAgent}" not found` };
54781
- if (!ctx.prompt)
54782
- return { action: "fail", reason: "Prompt not built (prompt stage skipped?)" };
54783
55050
  let effectiveTier = ctx.routing.modelTier;
54784
55051
  if (!_executionDeps.validateAgentForTier(agent, ctx.routing.modelTier)) {
54785
55052
  effectiveTier = agent.capabilities.supportedTiers[0] ?? ctx.routing.modelTier;
@@ -54791,7 +55058,8 @@ var init_execution = __esm(() => {
54791
55058
  supportedTiers: agent.capabilities.supportedTiers
54792
55059
  });
54793
55060
  }
54794
- if (!ctx.packageView)
55061
+ const packageView = ctx.packageView ?? ctx.runtime?.packages?.resolve(ctx.workdir);
55062
+ if (!packageView)
54795
55063
  return { action: "fail", reason: "Package view unavailable for execution dispatch" };
54796
55064
  const interactionBridge = buildInteractionBridge(ctx.interaction, {
54797
55065
  featureName: ctx.prd.feature,
@@ -54800,7 +55068,7 @@ var init_execution = __esm(() => {
54800
55068
  });
54801
55069
  const callCtx = {
54802
55070
  runtime: ctx.runtime,
54803
- packageView: ctx.packageView,
55071
+ packageView,
54804
55072
  packageDir: ctx.workdir,
54805
55073
  agentName: ctx.routing.agent ?? defaultAgent,
54806
55074
  storyId: ctx.story.id,
@@ -55593,7 +55861,7 @@ class RegressionStrategy {
55593
55861
  var _regressionStrategyDeps, DeferredRegressionStrategy;
55594
55862
  var init_regression = __esm(() => {
55595
55863
  init_logger2();
55596
- init_parser2();
55864
+ init_test_runners();
55597
55865
  init_runners();
55598
55866
  _regressionStrategyDeps = { runVerification: fullSuite };
55599
55867
  DeferredRegressionStrategy = class DeferredRegressionStrategy extends RegressionStrategy {
@@ -58892,7 +59160,7 @@ var package_default;
58892
59160
  var init_package = __esm(() => {
58893
59161
  package_default = {
58894
59162
  name: "@nathapp/nax",
58895
- version: "0.67.0-canary.6",
59163
+ version: "0.67.0",
58896
59164
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
58897
59165
  type: "module",
58898
59166
  bin: {
@@ -58987,8 +59255,8 @@ var init_version = __esm(() => {
58987
59255
  NAX_VERSION = package_default.version;
58988
59256
  NAX_COMMIT = (() => {
58989
59257
  try {
58990
- if (/^[0-9a-f]{6,10}$/.test("e83c9ffd"))
58991
- return "e83c9ffd";
59258
+ if (/^[0-9a-f]{6,10}$/.test("ffa2f392"))
59259
+ return "ffa2f392";
58992
59260
  } catch {}
58993
59261
  try {
58994
59262
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -60195,10 +60463,9 @@ var _regressionDeps;
60195
60463
  var init_run_regression = __esm(() => {
60196
60464
  init_logger2();
60197
60465
  init_prd();
60466
+ init_test_runners();
60198
60467
  init_git();
60199
60468
  init_verification();
60200
- init_rectification_loop();
60201
- init_runners();
60202
60469
  _regressionDeps = {
60203
60470
  runVerification: fullSuite,
60204
60471
  runRectificationLoop,
@@ -60456,6 +60723,7 @@ async function handleRunCompletion(options) {
60456
60723
  return {
60457
60724
  durationMs,
60458
60725
  runCompletedAt,
60726
+ reportedTotal,
60459
60727
  finalCounts: {
60460
60728
  total: finalCounts.total,
60461
60729
  passed: finalCounts.passed,
@@ -62320,6 +62588,27 @@ async function runIteration(ctx, prd, selection, iterations, totalCost, allStory
62320
62588
  });
62321
62589
  }
62322
62590
  }
62591
+ if (pipelineResult.finalAction === "escalate" && ctx.sessionManager) {
62592
+ const sessionManager = ctx.sessionManager;
62593
+ const liveStorySessions = sessionManager.getForStory(story.id).filter((desc) => desc.handle && (desc.state === "RUNNING" || desc.state === "CREATED"));
62594
+ for (const desc of liveStorySessions) {
62595
+ if (!desc.handle)
62596
+ continue;
62597
+ const live = sessionManager.getLiveHandle(desc.handle);
62598
+ if (!live)
62599
+ continue;
62600
+ try {
62601
+ await sessionManager.closeSession(live);
62602
+ } catch (err) {
62603
+ getLogger().warn("iteration-runner", "Failed to close warm session on escalation \u2014 continuing", {
62604
+ storyId: story.id,
62605
+ sessionName: desc.handle ?? "(no handle)",
62606
+ role: desc.role,
62607
+ error: errorMessage(err)
62608
+ });
62609
+ }
62610
+ }
62611
+ }
62323
62612
  const reviewSummaryFromPipeline = pipelineResult.context.reviewResult?.reviewSummary;
62324
62613
  if (reviewSummaryFromPipeline) {
62325
62614
  ctx.statusWriter.setReviewSummary(reviewSummaryFromPipeline);
@@ -99013,11 +99302,11 @@ async function runCompletionPhase(options) {
99013
99302
  runtime: options.runtime,
99014
99303
  abortSignal: options.abortSignal
99015
99304
  });
99016
- const { durationMs, runCompletedAt, finalCounts } = completionResult;
99305
+ const { durationMs, runCompletedAt, finalCounts, reportedTotal } = completionResult;
99017
99306
  if (options.featureDir) {
99018
99307
  const finalStatus = isComplete(options.prd) ? "completed" : "failed";
99019
99308
  options.statusWriter.setRunStatus(finalStatus);
99020
- await options.statusWriter.writeFeatureStatus(options.featureDir, options.totalCost, options.iterations);
99309
+ await options.statusWriter.writeFeatureStatus(options.featureDir, reportedTotal, options.iterations);
99021
99310
  }
99022
99311
  if (options.headless && options.formatterMode !== "json") {
99023
99312
  const { outputRunFooter: outputRunFooter2 } = await Promise.resolve().then(() => (init_headless_formatter(), exports_headless_formatter));
@@ -99029,7 +99318,7 @@ async function runCompletionPhase(options) {
99029
99318
  skipped: finalCounts.skipped
99030
99319
  },
99031
99320
  durationMs,
99032
- totalCost: options.totalCost,
99321
+ totalCost: reportedTotal,
99033
99322
  startedAt: options.startedAt,
99034
99323
  completedAt: runCompletedAt,
99035
99324
  formatterMode: options.formatterMode
@@ -99037,7 +99326,7 @@ async function runCompletionPhase(options) {
99037
99326
  }
99038
99327
  logger?.debug("execution", "Completion phase \u2014 stopping heartbeat and writing exit summary");
99039
99328
  stopHeartbeat();
99040
- await writeExitSummary(options.logFilePath, options.totalCost, options.iterations, options.storiesCompleted, durationMs);
99329
+ await writeExitSummary(options.logFilePath, reportedTotal, options.iterations, options.storiesCompleted, durationMs);
99041
99330
  logger?.debug("execution", "Completion phase \u2014 auto-committing dirty files");
99042
99331
  await autoCommitIfDirty(options.workdir, "run.complete", "run-summary", options.feature);
99043
99332
  reviewOrchestrator.reset();