@nathapp/nax 0.67.0-canary.7 → 0.67.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +267 -128
- package/package.json +1 -1
package/dist/nax.js
CHANGED
|
@@ -17156,7 +17156,8 @@ var init_schemas_infra = __esm(() => {
|
|
|
17156
17156
|
PromptsConfigSchema = exports_external.object({
|
|
17157
17157
|
overrides: exports_external.record(exports_external.string().refine((key) => ["no-test", "test-writer", "implementer", "verifier", "single-session", "tdd-simple"].includes(key), {
|
|
17158
17158
|
message: "Role must be one of: no-test, test-writer, implementer, verifier, single-session, tdd-simple"
|
|
17159
|
-
}), exports_external.string().min(1, "Override path must be non-empty")).optional()
|
|
17159
|
+
}), exports_external.string().min(1, "Override path must be non-empty")).optional(),
|
|
17160
|
+
behavioralGuardrails: exports_external.enum(["off", "lite", "strict"]).default("lite")
|
|
17160
17161
|
});
|
|
17161
17162
|
ProjectProfileSchema = exports_external.object({
|
|
17162
17163
|
language: exports_external.enum(["typescript", "javascript", "go", "rust", "python", "ruby", "java", "kotlin", "php"]).optional(),
|
|
@@ -17550,7 +17551,7 @@ var init_schemas3 = __esm(() => {
|
|
|
17550
17551
|
maxReplanAttempts: 3
|
|
17551
17552
|
}
|
|
17552
17553
|
}),
|
|
17553
|
-
prompts: PromptsConfigSchema.
|
|
17554
|
+
prompts: PromptsConfigSchema.default({ behavioralGuardrails: "lite" }),
|
|
17554
17555
|
generate: GenerateConfigSchema.optional(),
|
|
17555
17556
|
project: ProjectProfileSchema.optional(),
|
|
17556
17557
|
debate: DebateConfigSchema.optional().default(() => ({
|
|
@@ -22600,6 +22601,15 @@ var init_conventions = __esm(() => {
|
|
|
22600
22601
|
]);
|
|
22601
22602
|
DEFAULT_SEPARATED_TEST_DIRS = Object.freeze(["test/unit", "test/integration"]);
|
|
22602
22603
|
});
|
|
22604
|
+
|
|
22605
|
+
// src/test-runners/classifier.ts
|
|
22606
|
+
function createTestFileClassifier(resolved) {
|
|
22607
|
+
const { regex } = resolved;
|
|
22608
|
+
if (regex.length === 0)
|
|
22609
|
+
return () => false;
|
|
22610
|
+
return (path) => regex.some((re) => re.test(path));
|
|
22611
|
+
}
|
|
22612
|
+
|
|
22603
22613
|
// src/test-runners/detect/cache.ts
|
|
22604
22614
|
function cachePath(workdir) {
|
|
22605
22615
|
return `${workdir}/.nax/cache/test-patterns.json`;
|
|
@@ -24156,6 +24166,32 @@ var init_ac_parser = __esm(() => {
|
|
|
24156
24166
|
});
|
|
24157
24167
|
|
|
24158
24168
|
// src/test-runners/index.ts
|
|
24169
|
+
var exports_test_runners = {};
|
|
24170
|
+
__export(exports_test_runners, {
|
|
24171
|
+
resolveTestFilePatterns: () => resolveTestFilePatterns,
|
|
24172
|
+
resolveReviewExcludePatterns: () => resolveReviewExcludePatterns,
|
|
24173
|
+
parseTestOutput: () => parseTestOutput,
|
|
24174
|
+
parseTestFailures: () => parseTestFailures,
|
|
24175
|
+
parseBunTestOutput: () => parseBunTestOutput,
|
|
24176
|
+
isTestFileByPatterns: () => isTestFileByPatterns,
|
|
24177
|
+
isTestFile: () => isTestFile,
|
|
24178
|
+
globsToTestRegex: () => globsToTestRegex,
|
|
24179
|
+
globsToPathspec: () => globsToPathspec,
|
|
24180
|
+
formatFailureSummary: () => formatFailureSummary,
|
|
24181
|
+
findPackageDir: () => findPackageDir,
|
|
24182
|
+
extractTestDirs: () => extractTestDirs,
|
|
24183
|
+
detectTestFilePatterns: () => detectTestFilePatterns,
|
|
24184
|
+
detectManifestFrameworksFromPackageJson: () => detectManifestFrameworksFromPackageJson,
|
|
24185
|
+
detectFramework: () => detectFramework,
|
|
24186
|
+
createTestFileClassifier: () => createTestFileClassifier,
|
|
24187
|
+
buildTestFrameworkHint: () => buildTestFrameworkHint,
|
|
24188
|
+
analyzeTestExitCode: () => analyzeTestExitCode,
|
|
24189
|
+
_resolverDeps: () => _resolverDeps,
|
|
24190
|
+
DEFAULT_TS_DERIVE_SUFFIXES: () => DEFAULT_TS_DERIVE_SUFFIXES,
|
|
24191
|
+
DEFAULT_TEST_FILE_PATTERNS: () => DEFAULT_TEST_FILE_PATTERNS,
|
|
24192
|
+
DEFAULT_SEPARATED_TEST_DIRS: () => DEFAULT_SEPARATED_TEST_DIRS,
|
|
24193
|
+
DEFAULT_SCAN_TEST_DIRS: () => DEFAULT_SCAN_TEST_DIRS
|
|
24194
|
+
});
|
|
24159
24195
|
var init_test_runners = __esm(() => {
|
|
24160
24196
|
init_conventions();
|
|
24161
24197
|
init_detect2();
|
|
@@ -29086,13 +29122,14 @@ isolation scope: You may modify both src/ and test/ files. Write failing tests F
|
|
|
29086
29122
|
}
|
|
29087
29123
|
|
|
29088
29124
|
// src/prompts/sections/role-task.ts
|
|
29089
|
-
function buildRoleTaskSection(roleOrVariant, variant, testCommand, isolation, noTestJustification) {
|
|
29125
|
+
function buildRoleTaskSection(roleOrVariant, variant, testCommand, isolation, noTestJustification, storyId) {
|
|
29090
29126
|
if ((roleOrVariant === "standard" || roleOrVariant === "lite") && variant === undefined) {
|
|
29091
|
-
return buildRoleTaskSection("implementer", roleOrVariant, testCommand, isolation);
|
|
29127
|
+
return buildRoleTaskSection("implementer", roleOrVariant, testCommand, isolation, noTestJustification, storyId);
|
|
29092
29128
|
}
|
|
29093
29129
|
const role = roleOrVariant;
|
|
29094
29130
|
const testCmd = testCommand ?? "";
|
|
29095
29131
|
const frameworkHint = buildTestFrameworkHint(testCmd);
|
|
29132
|
+
const commitMsg = storyId ? `feat(${storyId}): <description>` : "feat: <description>";
|
|
29096
29133
|
if (role === "no-test") {
|
|
29097
29134
|
const justification = noTestJustification ?? "No behavioral changes \u2014 tests not required";
|
|
29098
29135
|
return `# Role: Implementer (No Tests)
|
|
@@ -29103,7 +29140,7 @@ Instructions:
|
|
|
29103
29140
|
- Implement the change as described in the story
|
|
29104
29141
|
- Do NOT create or modify test files
|
|
29105
29142
|
- Justification for no tests: ${justification}
|
|
29106
|
-
- When done, stage and commit ALL changed files with: git commit -m '
|
|
29143
|
+
- When done, stage and commit ALL changed files with: git commit -m '${commitMsg}'
|
|
29107
29144
|
- Goal: change implemented, no test files created or modified, all changes committed`;
|
|
29108
29145
|
}
|
|
29109
29146
|
if (role === "implementer") {
|
|
@@ -29111,62 +29148,78 @@ Instructions:
|
|
|
29111
29148
|
if (v === "standard") {
|
|
29112
29149
|
return `# Role: Implementer
|
|
29113
29150
|
|
|
29114
|
-
Your task: make failing tests pass.
|
|
29151
|
+
Your task: make the failing tests pass by writing real source code.
|
|
29115
29152
|
|
|
29116
|
-
|
|
29117
|
-
|
|
29118
|
-
|
|
29119
|
-
|
|
29120
|
-
|
|
29121
|
-
|
|
29153
|
+
Workflow:
|
|
29154
|
+
1. Read every failing test in scope. The tests are the contract \u2014 understand what each one asserts before editing source.
|
|
29155
|
+
2. Run the scoped test files once to establish the baseline (which fail, which pass, and why).
|
|
29156
|
+
3. Implement source code in the package's source location (the project context names it).
|
|
29157
|
+
4. After each meaningful change, re-run only the scoped test files \u2014 never the full suite.
|
|
29158
|
+
5. When all scoped tests pass, stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
|
|
29159
|
+
|
|
29160
|
+
Rules:
|
|
29161
|
+
- Do NOT modify test files. Three narrow exceptions: (a) a lint-only fix to a test, (b) a contract drift where the test imports a removed/renamed symbol, (c) a sibling test file rename forced by your source change. Name which exception applies in the commit body before editing any test file.
|
|
29162
|
+
- Goal: every acceptance criterion covered by at least one passing test; all changes committed.`;
|
|
29122
29163
|
}
|
|
29123
29164
|
return `# Role: Implementer (Lite)
|
|
29124
29165
|
|
|
29125
|
-
Your task:
|
|
29166
|
+
Your task: make the failing tests pass AND fill any test coverage gaps an earlier session left.
|
|
29126
29167
|
|
|
29127
|
-
Context: A test-writer session has already created
|
|
29168
|
+
Context: A test-writer session has already created tests and may have added minimal stubs in the package's source location. Your job is to (a) replace stubs with real implementations and (b) confirm every AC has test coverage before committing.
|
|
29128
29169
|
|
|
29129
|
-
|
|
29130
|
-
|
|
29131
|
-
|
|
29132
|
-
-
|
|
29133
|
-
|
|
29170
|
+
Workflow:
|
|
29171
|
+
1. Run the existing scoped tests to see which fail and why (assertion failure vs import error).
|
|
29172
|
+
2. Read each failing test. Note which ACs they cover and which they DON'T.
|
|
29173
|
+
3. Replace stubs with real implementations. A stub is one of: a type-only declaration, a function returning a placeholder/throwing "not implemented", or a const placeholder.
|
|
29174
|
+
4. If any AC has no test, add one before implementing \u2014 do not implement uncovered behavior.
|
|
29175
|
+
5. Re-run only the scoped test files after each meaningful change.
|
|
29176
|
+
6. When all scoped tests pass, stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
|
|
29177
|
+
|
|
29178
|
+
Rules:
|
|
29179
|
+
- Three test-modification exceptions apply (lint-only fix, contract drift, sibling rename). Name the exception in the commit body before editing any test the test-writer wrote.
|
|
29134
29180
|
- ${frameworkHint}
|
|
29135
|
-
-
|
|
29136
|
-
- Goal: all tests green, all criteria met, all changes committed`;
|
|
29181
|
+
- Goal: every AC has at least one passing test; all stubs replaced with real logic; all changes committed.`;
|
|
29137
29182
|
}
|
|
29138
29183
|
if (role === "test-writer") {
|
|
29139
29184
|
if (isolation === "lite") {
|
|
29140
29185
|
return `# Role: Test-Writer (Lite)
|
|
29141
29186
|
|
|
29142
|
-
Your task:
|
|
29187
|
+
Your task: write failing tests AND minimal stubs that let the tests compile.
|
|
29143
29188
|
|
|
29144
29189
|
Context: You are session 1 of a multi-session workflow. An implementer will follow to make your tests pass.
|
|
29145
29190
|
|
|
29146
|
-
|
|
29147
|
-
-
|
|
29148
|
-
|
|
29191
|
+
Workflow:
|
|
29192
|
+
1. Re-read the acceptance criteria above.
|
|
29193
|
+
2. Create test files in the location the project uses for tests.
|
|
29194
|
+
3. Create stubs in the package's source location so the tests can import and compile. A stub is one of: a type/interface declaration, a function returning a placeholder/throwing "not implemented" (no more than 3 lines of body), or a const placeholder. If a stub body needs real logic, you have crossed into implementer territory \u2014 stop.
|
|
29195
|
+
4. For each AC: at least one success-path test and one boundary/failure-path test.
|
|
29196
|
+
5. Run the new test files. Confirm tests compile (stubs work) AND fail with ASSERTION failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
|
|
29197
|
+
|
|
29198
|
+
Rules:
|
|
29199
|
+
- Stubs are NOT implementations. The implementer in the next session writes real logic.
|
|
29200
|
+
- Each test name describes ONE behavior. Use AC IDs in test names when available (e.g. \`it('AC4: throws Division by zero when b === 0')\`).
|
|
29201
|
+
- Assert on observable outputs.
|
|
29149
29202
|
- ${frameworkHint}
|
|
29150
|
-
-
|
|
29151
|
-
- You MAY create minimal stubs in src/ (type definitions, empty functions) so tests can import and compile
|
|
29152
|
-
- Write clear test names that document expected behavior
|
|
29153
|
-
- Focus on behavior, not implementation details
|
|
29154
|
-
- Goal: comprehensive failing test suite with compilable imports, ready for implementation`;
|
|
29203
|
+
- Goal: comprehensive failing test suite that compiles, with stubs \u22643 lines each, ready for implementation.`;
|
|
29155
29204
|
}
|
|
29156
29205
|
return `# Role: Test-Writer
|
|
29157
29206
|
|
|
29158
|
-
Your task:
|
|
29207
|
+
Your task: write failing tests that pin down every acceptance criterion. An implementer will follow.
|
|
29159
29208
|
|
|
29160
|
-
Context: You are session 1 of a multi-session workflow.
|
|
29209
|
+
Context: You are session 1 of a multi-session workflow.
|
|
29161
29210
|
|
|
29162
|
-
|
|
29163
|
-
-
|
|
29164
|
-
|
|
29165
|
-
|
|
29211
|
+
Workflow:
|
|
29212
|
+
1. Re-read the acceptance criteria above.
|
|
29213
|
+
2. Create test files in the location the project uses for tests (project context names it).
|
|
29214
|
+
3. For each AC: write at least one test for the success path AND at least one for a boundary/failure path (zero, empty, negative, missing, throws). ACs worded as "throws X" require a test asserting the throw.
|
|
29215
|
+
4. Run the new test files. Confirm every test fails with an ASSERTION failure \u2014 NOT an import error, compile error, or runtime crash before assertion. A test that errors before reaching its assertion does not prove the behavior is missing.
|
|
29216
|
+
|
|
29217
|
+
Rules:
|
|
29218
|
+
- Do NOT create or modify any source files. Read source for types/interfaces only.
|
|
29219
|
+
- Each test name describes ONE behavior; each test asserts ONE behavior. When the AC has a number or ID, prefix the test name (e.g. \`it('AC4: throws Division by zero when b === 0')\`).
|
|
29220
|
+
- Assert on observable outputs (return values, thrown errors, file contents, log output, boundary state). Do not assert on private helpers, internal call counts, or implementation-level mocks unless the AC requires it.
|
|
29166
29221
|
- ${frameworkHint}
|
|
29167
|
-
-
|
|
29168
|
-
- Focus on behavior, not implementation details
|
|
29169
|
-
- Goal: comprehensive failing test suite ready for implementation`;
|
|
29222
|
+
- Goal: every AC has at least one failing test that fails at assertion time and clearly documents what the implementer must build.`;
|
|
29170
29223
|
}
|
|
29171
29224
|
if (role === "verifier") {
|
|
29172
29225
|
return `# Role: Verifier
|
|
@@ -29187,45 +29240,59 @@ Instructions:
|
|
|
29187
29240
|
if (role === "single-session") {
|
|
29188
29241
|
return `# Role: Single-Session
|
|
29189
29242
|
|
|
29190
|
-
Your task:
|
|
29243
|
+
Your task: write tests AND implement the feature in one session.
|
|
29191
29244
|
|
|
29192
|
-
|
|
29193
|
-
|
|
29194
|
-
|
|
29245
|
+
Workflow:
|
|
29246
|
+
1. Read the acceptance criteria. For each AC, plan one success-path test and one boundary/failure test.
|
|
29247
|
+
2. Create test files in the location the project uses for tests. Cover every AC.
|
|
29248
|
+
3. Run the tests to confirm they fail with ASSERTION failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
|
|
29249
|
+
4. Implement source code in the package's source location to make the tests pass.
|
|
29250
|
+
5. After each meaningful change, re-run only the scoped test files \u2014 never the full suite.
|
|
29251
|
+
6. When all scoped tests pass, stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
|
|
29252
|
+
|
|
29253
|
+
Rules:
|
|
29254
|
+
- Each test name describes ONE behavior; use AC IDs when available.
|
|
29255
|
+
- Assert on observable outputs.
|
|
29195
29256
|
- ${frameworkHint}
|
|
29196
|
-
-
|
|
29197
|
-
- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
|
|
29198
|
-
- Goal: all tests passing, all changes committed, full story complete`;
|
|
29257
|
+
- Goal: every AC has at least one passing test; all changes committed.`;
|
|
29199
29258
|
}
|
|
29200
29259
|
if (role === "batch") {
|
|
29201
|
-
const verifyCmdLine = testCmd ? ` -
|
|
29260
|
+
const verifyCmdLine = testCmd ? ` - Re-run only the scoped test files after each meaningful change: ${testCmd}` : " - Re-run only the scoped test files after each meaningful change";
|
|
29202
29261
|
return `# Role: Batch Implementer
|
|
29203
29262
|
|
|
29204
|
-
Your task:
|
|
29263
|
+
Your task: implement each story in order using TDD \u2014 write tests first, then implement, then commit per story.
|
|
29205
29264
|
|
|
29206
|
-
|
|
29207
|
-
|
|
29208
|
-
|
|
29209
|
-
|
|
29210
|
-
|
|
29211
|
-
|
|
29212
|
-
|
|
29213
|
-
|
|
29265
|
+
Per-story workflow (RED \u2192 GREEN):
|
|
29266
|
+
1. RED \u2014 write failing tests in the location the project uses for tests covering the story's ACs (success + boundary).
|
|
29267
|
+
2. RED \u2014 run the new test files. Confirm assertion failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
|
|
29268
|
+
3. GREEN \u2014 implement source code in the package's source location.
|
|
29269
|
+
4. GREEN \u2014 re-run only the scoped test files after each meaningful change.
|
|
29270
|
+
5. Commit the story with its ID: \`git commit -m 'feat(<story-id>): <description>'\`.
|
|
29271
|
+
|
|
29272
|
+
Rules:
|
|
29273
|
+
- One commit per story \u2014 never bundle stories.
|
|
29274
|
+
- Process stories in order (Story 1, Story 2, \u2026).
|
|
29275
|
+
- Each test name describes ONE behavior; use AC IDs when available.
|
|
29214
29276
|
- ${frameworkHint}
|
|
29215
|
-
|
|
29216
|
-
- Goal:
|
|
29277
|
+
${verifyCmdLine}
|
|
29278
|
+
- Goal: every story implemented with passing tests; one commit per story tagged with the story ID.`;
|
|
29217
29279
|
}
|
|
29218
29280
|
return `# Role: TDD-Simple
|
|
29219
29281
|
|
|
29220
|
-
Your task:
|
|
29282
|
+
Your task: write failing tests FIRST, then implement in one session.
|
|
29221
29283
|
|
|
29222
|
-
|
|
29223
|
-
|
|
29224
|
-
|
|
29225
|
-
|
|
29226
|
-
-
|
|
29227
|
-
|
|
29228
|
-
|
|
29284
|
+
Workflow (RED \u2192 GREEN \u2192 REFACTOR):
|
|
29285
|
+
1. RED \u2014 write failing tests in the location the project uses for tests covering every AC (success + boundary).
|
|
29286
|
+
2. RED \u2014 run the tests. Confirm they fail with ASSERTION failures \u2014 NOT import errors or compile errors. A test that errors before reaching its assertion does not prove the behavior is missing.
|
|
29287
|
+
3. GREEN \u2014 implement minimum source code in the package's source location to make the tests pass.
|
|
29288
|
+
4. GREEN \u2014 re-run only the scoped test files after each meaningful change.
|
|
29289
|
+
5. REFACTOR \u2014 clean up while keeping tests green. No new behavior; no expanded scope.
|
|
29290
|
+
6. Stage and commit ALL changed files: \`git commit -m '${commitMsg}'\`.
|
|
29291
|
+
|
|
29292
|
+
Rules:
|
|
29293
|
+
- Each test name describes ONE behavior; use AC IDs when available.
|
|
29294
|
+
- ${frameworkHint}
|
|
29295
|
+
- Goal: every AC covered by passing tests; refactor complete; all changes committed.`;
|
|
29229
29296
|
}
|
|
29230
29297
|
var init_role_task = __esm(() => {
|
|
29231
29298
|
init_test_runners();
|
|
@@ -29457,6 +29524,97 @@ function buildSelfVerificationSection(role, input) {
|
|
|
29457
29524
|
}
|
|
29458
29525
|
var CHECK_HEADER = "# Self-Verification Gate";
|
|
29459
29526
|
|
|
29527
|
+
// src/prompts/sections/behavioral-guardrails.ts
|
|
29528
|
+
function buildBehavioralGuardrailsSection(role, level, _variant, _isolation) {
|
|
29529
|
+
if (level === "off" || role === "verifier" || role === "no-test") {
|
|
29530
|
+
return null;
|
|
29531
|
+
}
|
|
29532
|
+
if (role === "test-writer") {
|
|
29533
|
+
return buildTestWriterGuardrails(level);
|
|
29534
|
+
}
|
|
29535
|
+
if (role === "single-session" || role === "tdd-simple" || role === "batch") {
|
|
29536
|
+
return buildCombinedGuardrails(level);
|
|
29537
|
+
}
|
|
29538
|
+
return buildImplementerGuardrails(level);
|
|
29539
|
+
}
|
|
29540
|
+
function buildTestWriterGuardrails(level) {
|
|
29541
|
+
const lines = [
|
|
29542
|
+
"# Behavioral Guardrails",
|
|
29543
|
+
"",
|
|
29544
|
+
"- Simplicity: write tests that cover the acceptance criteria. No tests for behaviors the story does not require.",
|
|
29545
|
+
"- Surgical: do not modify source files beyond the stub allowance in the Isolation Rules above. Do not add tests for unrelated existing code."
|
|
29546
|
+
];
|
|
29547
|
+
if (level === "strict") {
|
|
29548
|
+
lines.push("- State Assumptions: when the story is ambiguous, pick an interpretation, proceed, and document the choice in the commit body under `Assumptions:`. Do not invent requirements; do not silently choose when the story is genuinely under-specified \u2014 note it.");
|
|
29549
|
+
}
|
|
29550
|
+
return lines.join(`
|
|
29551
|
+
`);
|
|
29552
|
+
}
|
|
29553
|
+
function buildCombinedGuardrails(level) {
|
|
29554
|
+
if (level === "lite") {
|
|
29555
|
+
return `# Behavioral Guardrails
|
|
29556
|
+
|
|
29557
|
+
- Simplicity (tests): write tests that cover the acceptance criteria only. No tests for behaviors the story does not require.
|
|
29558
|
+
- Simplicity (source): write the minimum source code that makes the tests pass. No speculative abstractions, configurability, or error handling for scenarios that cannot occur.
|
|
29559
|
+
- Surgical: every changed line must trace to the story. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires.
|
|
29560
|
+
- Anti-cheat: do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run.
|
|
29561
|
+
- Orphans: remove imports/variables/helpers that YOUR changes made unused. Do not delete pre-existing dead code.
|
|
29562
|
+
- Commit: include the story ID when known \u2014 \`feat(<story-id>): <description>\`.`;
|
|
29563
|
+
}
|
|
29564
|
+
return `# Behavioral Guardrails
|
|
29565
|
+
|
|
29566
|
+
## Simplicity (Tests)
|
|
29567
|
+
Write tests that cover the acceptance criteria only. No tests for behaviors the story does not require. Every test you add is a constraint the implementer must satisfy \u2014 do not over-constrain with speculative behavior.
|
|
29568
|
+
|
|
29569
|
+
## Simplicity (Source)
|
|
29570
|
+
Write the minimum source code that makes the tests pass. Every line you add is a line someone else must read, understand, and maintain. Do not add speculative abstractions, configurability, or error handling for scenarios that cannot occur given the story's constraints.
|
|
29571
|
+
|
|
29572
|
+
## Surgical
|
|
29573
|
+
Every changed line must trace directly to a story requirement or a failing test. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires. Reviewers will flag any change that cannot be linked to a specific requirement.
|
|
29574
|
+
|
|
29575
|
+
## Anti-cheat
|
|
29576
|
+
Do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run. A green test suite achieved by weakening tests is not a passing implementation \u2014 it is a failing one with hidden evidence.
|
|
29577
|
+
|
|
29578
|
+
## Orphans
|
|
29579
|
+
Remove imports, variables, and helpers that YOUR changes made unused. Do not delete pre-existing dead code that was already there before your changes.
|
|
29580
|
+
|
|
29581
|
+
## Commit
|
|
29582
|
+
Include the story ID when known \u2014 \`feat(<story-id>): <description>\`.
|
|
29583
|
+
|
|
29584
|
+
## State Assumptions
|
|
29585
|
+
When the story is ambiguous, pick an interpretation, proceed, and document the choice in the commit body under \`Assumptions:\`. Do not invent requirements; do not silently choose when the story is genuinely under-specified \u2014 note it.`;
|
|
29586
|
+
}
|
|
29587
|
+
function buildImplementerGuardrails(level) {
|
|
29588
|
+
if (level === "lite") {
|
|
29589
|
+
return `# Behavioral Guardrails
|
|
29590
|
+
|
|
29591
|
+
- Simplicity: write the minimum code that makes the tests pass. No speculative abstractions, configurability, or error handling for scenarios that cannot occur.
|
|
29592
|
+
- Surgical: every changed line must trace to the story. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires.
|
|
29593
|
+
- Anti-cheat: do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run.
|
|
29594
|
+
- Orphans: remove imports/variables/helpers that YOUR changes made unused. Do not delete pre-existing dead code.
|
|
29595
|
+
- Commit: include the story ID when known \u2014 \`feat(<story-id>): <description>\`.`;
|
|
29596
|
+
}
|
|
29597
|
+
return `# Behavioral Guardrails
|
|
29598
|
+
|
|
29599
|
+
## Simplicity
|
|
29600
|
+
Write the minimum code that makes the tests pass. Every line you add is a line someone else must read, understand, and maintain. Do not add speculative abstractions, configurability, or error handling for scenarios that cannot occur given the story's constraints. If it isn't required by a test or acceptance criterion, don't write it.
|
|
29601
|
+
|
|
29602
|
+
## Surgical
|
|
29603
|
+
Every changed line must trace directly to a story requirement or a failing test. Do not refactor adjacent code, reformat unrelated files, or rename symbols beyond what the story requires. Reviewers will flag any change that cannot be linked to a specific requirement.
|
|
29604
|
+
|
|
29605
|
+
## Anti-cheat
|
|
29606
|
+
Do not weaken assertions, catch-and-swallow exceptions in tests, or add tautological assertions to coerce a green run. A green test suite achieved by weakening tests is not a passing implementation \u2014 it is a failing one with hidden evidence.
|
|
29607
|
+
|
|
29608
|
+
## Orphans
|
|
29609
|
+
Remove imports, variables, and helpers that YOUR changes made unused. Do not delete pre-existing dead code that was already there before your changes.
|
|
29610
|
+
|
|
29611
|
+
## Commit
|
|
29612
|
+
Include the story ID when known \u2014 \`feat(<story-id>): <description>\`.
|
|
29613
|
+
|
|
29614
|
+
## State Assumptions
|
|
29615
|
+
When the story is ambiguous, pick an interpretation, proceed, and document the choice in the commit body under \`Assumptions:\`. Do not invent requirements; do not silently choose when the story is genuinely under-specified \u2014 note it.`;
|
|
29616
|
+
}
|
|
29617
|
+
|
|
29460
29618
|
// src/prompts/sections/index.ts
|
|
29461
29619
|
var init_sections2 = __esm(() => {
|
|
29462
29620
|
init_hermetic();
|
|
@@ -29614,6 +29772,12 @@ class TddPromptBuilder {
|
|
|
29614
29772
|
if (hermeticSection)
|
|
29615
29773
|
acc.add(this.s("hermetic", hermeticSection));
|
|
29616
29774
|
}
|
|
29775
|
+
const guardrailLevel = this.loaderConfig_?.prompts?.behavioralGuardrails ?? "lite";
|
|
29776
|
+
const guardrailVariant = this.options.variant;
|
|
29777
|
+
const guardrailIsolation = this.options.isolation;
|
|
29778
|
+
const guardrails = buildBehavioralGuardrailsSection(this.role, guardrailLevel, guardrailVariant, guardrailIsolation);
|
|
29779
|
+
if (guardrails)
|
|
29780
|
+
acc.add(this.s("guardrails", guardrails));
|
|
29617
29781
|
if (this.role !== "verifier") {
|
|
29618
29782
|
const selfVerify = buildSelfVerificationSection(this.role, this.selfVerification_);
|
|
29619
29783
|
if (selfVerify)
|
|
@@ -29650,7 +29814,7 @@ class TddPromptBuilder {
|
|
|
29650
29814
|
}
|
|
29651
29815
|
const variant = this.options.variant;
|
|
29652
29816
|
const isolation = this.options.isolation;
|
|
29653
|
-
return buildRoleTaskSection(this.role, variant, this.testCommand_, isolation, this.noTestJustification_);
|
|
29817
|
+
return buildRoleTaskSection(this.role, variant, this.testCommand_, isolation, this.noTestJustification_, this.story_?.id);
|
|
29654
29818
|
}
|
|
29655
29819
|
}
|
|
29656
29820
|
var init_tdd_builder = __esm(() => {
|
|
@@ -36082,11 +36246,6 @@ function makePassResult(storyId, strategy, opts = {}) {
|
|
|
36082
36246
|
};
|
|
36083
36247
|
}
|
|
36084
36248
|
|
|
36085
|
-
// src/verification/parser.ts
|
|
36086
|
-
var init_parser2 = __esm(() => {
|
|
36087
|
-
init_test_runners();
|
|
36088
|
-
});
|
|
36089
|
-
|
|
36090
36249
|
// src/utils/process-kill.ts
|
|
36091
36250
|
function killProcessGroup(pid, signal) {
|
|
36092
36251
|
try {
|
|
@@ -36335,9 +36494,9 @@ async function regression(options) {
|
|
|
36335
36494
|
}
|
|
36336
36495
|
var _regressionRunnerDeps;
|
|
36337
36496
|
var init_runners = __esm(() => {
|
|
36497
|
+
init_test_runners();
|
|
36338
36498
|
init_bun_deps();
|
|
36339
36499
|
init_executor();
|
|
36340
|
-
init_parser2();
|
|
36341
36500
|
_regressionRunnerDeps = {
|
|
36342
36501
|
sleep
|
|
36343
36502
|
};
|
|
@@ -36633,8 +36792,8 @@ class ScopedStrategy {
|
|
|
36633
36792
|
var DEFAULT_SMART_RUNNER_CONFIG, _scopedDeps;
|
|
36634
36793
|
var init_scoped = __esm(() => {
|
|
36635
36794
|
init_logger2();
|
|
36636
|
-
|
|
36637
|
-
|
|
36795
|
+
init_test_runners();
|
|
36796
|
+
init_test_runners();
|
|
36638
36797
|
init_runners();
|
|
36639
36798
|
init_smart_runner();
|
|
36640
36799
|
DEFAULT_SMART_RUNNER_CONFIG = {
|
|
@@ -37080,7 +37239,8 @@ ${debateResult.output}`;
|
|
|
37080
37239
|
story,
|
|
37081
37240
|
failures: failureRecords,
|
|
37082
37241
|
testCommand,
|
|
37083
|
-
conventions: true
|
|
37242
|
+
conventions: true,
|
|
37243
|
+
guardrailLevel: config2.prompts.behavioralGuardrails
|
|
37084
37244
|
});
|
|
37085
37245
|
const rectPromise = Promise.resolve(rectPrompt);
|
|
37086
37246
|
return (async () => {
|
|
@@ -37292,8 +37452,7 @@ var init_rectification_loop = __esm(() => {
|
|
|
37292
37452
|
init_prompts();
|
|
37293
37453
|
init_session_name();
|
|
37294
37454
|
init_session_keeper();
|
|
37295
|
-
|
|
37296
|
-
init_parser2();
|
|
37455
|
+
init_test_runners();
|
|
37297
37456
|
init_runners();
|
|
37298
37457
|
_rectificationDeps = {
|
|
37299
37458
|
agentManager: undefined,
|
|
@@ -37310,19 +37469,15 @@ __export(exports_verification, {
|
|
|
37310
37469
|
shouldRetryRectification: () => shouldRetryRectification,
|
|
37311
37470
|
scoped: () => scoped,
|
|
37312
37471
|
runRetryLoop: () => runRetryLoop,
|
|
37472
|
+
runRectificationLoop: () => runRectificationLoop,
|
|
37313
37473
|
regression: () => regression,
|
|
37314
|
-
parseTestOutput: () => parseTestOutput,
|
|
37315
|
-
parseBunTestOutput: () => parseBunTestOutput,
|
|
37316
37474
|
normalizeEnvironment: () => normalizeEnvironment,
|
|
37317
37475
|
fullSuite: () => fullSuite,
|
|
37318
|
-
formatFailureSummary: () => formatFailureSummary,
|
|
37319
37476
|
executeWithTimeout: () => executeWithTimeout,
|
|
37320
|
-
detectFramework: () => detectFramework,
|
|
37321
37477
|
buildTestCommand: () => buildTestCommand,
|
|
37322
37478
|
buildProgressivePromptPreamble: () => buildProgressivePromptPreamble,
|
|
37323
37479
|
appendOpenHandlesFlag: () => appendOpenHandlesFlag,
|
|
37324
37480
|
appendForceExitFlag: () => appendForceExitFlag,
|
|
37325
|
-
analyzeTestExitCode: () => analyzeTestExitCode,
|
|
37326
37481
|
_regressionRunnerDeps: () => _regressionRunnerDeps,
|
|
37327
37482
|
_rectificationDeps: () => _rectificationDeps,
|
|
37328
37483
|
_executorDeps: () => _executorDeps
|
|
@@ -37330,7 +37485,6 @@ __export(exports_verification, {
|
|
|
37330
37485
|
var init_verification = __esm(() => {
|
|
37331
37486
|
init_rectification_loop();
|
|
37332
37487
|
init_executor();
|
|
37333
|
-
init_parser2();
|
|
37334
37488
|
init_runners();
|
|
37335
37489
|
});
|
|
37336
37490
|
|
|
@@ -37359,7 +37513,8 @@ var init_full_suite_gate = __esm(() => {
|
|
|
37359
37513
|
return { config: config2, testCmd: resolvedTestCmd, fullSuiteTimeout };
|
|
37360
37514
|
},
|
|
37361
37515
|
runTests: async (input, gateCtx) => {
|
|
37362
|
-
const { executeWithTimeout: executeWithTimeout2
|
|
37516
|
+
const { executeWithTimeout: executeWithTimeout2 } = await Promise.resolve().then(() => (init_verification(), exports_verification));
|
|
37517
|
+
const { parseTestOutput: parseTestOutput2 } = await Promise.resolve().then(() => (init_test_runners(), exports_test_runners));
|
|
37363
37518
|
const result = await executeWithTimeout2(gateCtx.testCmd, gateCtx.fullSuiteTimeout, undefined, {
|
|
37364
37519
|
cwd: input.workdir
|
|
37365
37520
|
});
|
|
@@ -40502,7 +40657,7 @@ function renderPrioritizedFailures(failedChecks, opts) {
|
|
|
40502
40657
|
}
|
|
40503
40658
|
|
|
40504
40659
|
class RectifierPromptBuilder {
|
|
40505
|
-
static firstAttemptDelta(failedChecks, maxAttempts) {
|
|
40660
|
+
static firstAttemptDelta(failedChecks, maxAttempts, guardrailLevel) {
|
|
40506
40661
|
const parts = [];
|
|
40507
40662
|
const attemptWord = maxAttempts === 1 ? "1 attempt" : `${maxAttempts} attempts`;
|
|
40508
40663
|
parts.push(`Review failed after your implementation. Fix the following issues (${attemptWord} available before escalation):
|
|
@@ -40511,10 +40666,16 @@ class RectifierPromptBuilder {
|
|
|
40511
40666
|
parts.push(`
|
|
40512
40667
|
Fix in priority order. After fixing each priority, re-run the failing check(s) at that level to verify they pass before moving on. Do NOT change test files or test behavior \u2014 see the three narrow exceptions appended below. Commit your changes when all checks pass.`);
|
|
40513
40668
|
parts.push(CONTRADICTION_ESCAPE_HATCH);
|
|
40669
|
+
const guardrails = buildBehavioralGuardrailsSection("implementer", guardrailLevel ?? "lite");
|
|
40670
|
+
if (guardrails) {
|
|
40671
|
+
parts.push(`
|
|
40672
|
+
|
|
40673
|
+
${guardrails}`);
|
|
40674
|
+
}
|
|
40514
40675
|
return parts.join(`
|
|
40515
40676
|
`);
|
|
40516
40677
|
}
|
|
40517
|
-
static continuation(failedChecks, attempt, rethinkAtAttempt, urgencyAtAttempt) {
|
|
40678
|
+
static continuation(failedChecks, attempt, rethinkAtAttempt, urgencyAtAttempt, guardrailLevel) {
|
|
40518
40679
|
const parts = [];
|
|
40519
40680
|
parts.push(`Your previous fix attempt did not resolve all issues. Here are the remaining failures:
|
|
40520
40681
|
`);
|
|
@@ -40528,6 +40689,12 @@ Fix in priority order. After fixing each priority, re-run the failing check(s) a
|
|
|
40528
40689
|
parts.push("\n**URGENT: This is your final attempt.** If you cannot fix all issues, emit `UNRESOLVED: <reason>` to escalate.\n");
|
|
40529
40690
|
}
|
|
40530
40691
|
parts.push(CONTRADICTION_ESCAPE_HATCH);
|
|
40692
|
+
const guardrails = buildBehavioralGuardrailsSection("implementer", guardrailLevel ?? "lite");
|
|
40693
|
+
if (guardrails) {
|
|
40694
|
+
parts.push(`
|
|
40695
|
+
|
|
40696
|
+
${guardrails}`);
|
|
40697
|
+
}
|
|
40531
40698
|
return parts.join(`
|
|
40532
40699
|
`);
|
|
40533
40700
|
}
|
|
@@ -40919,6 +41086,13 @@ ${basePrompt}`;
|
|
|
40919
41086
|
parts.push(buildIsolationSection("implementer", opts.isolation, undefined));
|
|
40920
41087
|
parts.push(`
|
|
40921
41088
|
|
|
41089
|
+
`);
|
|
41090
|
+
}
|
|
41091
|
+
const guardrails = buildBehavioralGuardrailsSection("implementer", opts.guardrailLevel ?? "lite");
|
|
41092
|
+
if (guardrails) {
|
|
41093
|
+
parts.push(guardrails);
|
|
41094
|
+
parts.push(`
|
|
41095
|
+
|
|
40922
41096
|
`);
|
|
40923
41097
|
}
|
|
40924
41098
|
if (opts.conventions !== false) {
|
|
@@ -40975,7 +41149,7 @@ Fix the implementation (not the tests) to make all failing tests pass. Run the t
|
|
|
40975
41149
|
var PRIORITY_BUCKETS, PRIORITY_ORDER;
|
|
40976
41150
|
var init_rectifier_builder = __esm(() => {
|
|
40977
41151
|
init_review();
|
|
40978
|
-
|
|
41152
|
+
init_test_runners();
|
|
40979
41153
|
init_core3();
|
|
40980
41154
|
init_sections2();
|
|
40981
41155
|
init_rectifier_builder_helpers();
|
|
@@ -46911,11 +47085,6 @@ var init_types8 = __esm(() => {
|
|
|
46911
47085
|
safety: "yellow",
|
|
46912
47086
|
defaultSummary: "Story {{storyId}} is oversized ({{criteriaCount}} acceptance criteria) \u2014 decompose into smaller stories?"
|
|
46913
47087
|
},
|
|
46914
|
-
"story-ambiguity": {
|
|
46915
|
-
defaultFallback: "continue",
|
|
46916
|
-
safety: "green",
|
|
46917
|
-
defaultSummary: "Story {{storyId}} requirements unclear \u2014 continue with best effort?"
|
|
46918
|
-
},
|
|
46919
47088
|
"review-gate": {
|
|
46920
47089
|
defaultFallback: "continue",
|
|
46921
47090
|
safety: "green",
|
|
@@ -48315,12 +48484,6 @@ async function checkPreMerge(context, config2, chain) {
|
|
|
48315
48484
|
const response = await executeTrigger("pre-merge", context, config2, chain);
|
|
48316
48485
|
return response.action === "approve";
|
|
48317
48486
|
}
|
|
48318
|
-
async function checkStoryAmbiguity(context, config2, chain) {
|
|
48319
|
-
if (!isTriggerEnabled("story-ambiguity", config2))
|
|
48320
|
-
return true;
|
|
48321
|
-
const response = await executeTrigger("story-ambiguity", context, config2, chain);
|
|
48322
|
-
return response.action === "approve";
|
|
48323
|
-
}
|
|
48324
48487
|
async function checkReviewGate(context, config2, chain) {
|
|
48325
48488
|
if (!isTriggerEnabled("review-gate", config2))
|
|
48326
48489
|
return true;
|
|
@@ -54481,20 +54644,6 @@ var init_plan_inputs = __esm(() => {
|
|
|
54481
54644
|
});
|
|
54482
54645
|
|
|
54483
54646
|
// src/pipeline/stages/execution-helpers.ts
|
|
54484
|
-
function isAmbiguousOutput(output) {
|
|
54485
|
-
if (!output)
|
|
54486
|
-
return false;
|
|
54487
|
-
const ambiguityKeywords = [
|
|
54488
|
-
"unclear",
|
|
54489
|
-
"ambiguous",
|
|
54490
|
-
"need clarification",
|
|
54491
|
-
"please clarify",
|
|
54492
|
-
"which one",
|
|
54493
|
-
"not sure which"
|
|
54494
|
-
];
|
|
54495
|
-
const lowerOutput = output.toLowerCase();
|
|
54496
|
-
return ambiguityKeywords.some((keyword) => lowerOutput.includes(keyword));
|
|
54497
|
-
}
|
|
54498
54647
|
function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason) {
|
|
54499
54648
|
if (failureCategory === "isolation-violation") {
|
|
54500
54649
|
if (!isLiteMode) {
|
|
@@ -54850,13 +54999,6 @@ Category: ${failureCategory ?? "unknown"}`,
|
|
|
54850
54999
|
await cleanupSessionOnFailure(ctx);
|
|
54851
55000
|
return { action: "escalate" };
|
|
54852
55001
|
}
|
|
54853
|
-
if (agentResult.success && _postRunDeps.isAmbiguousOutput(combinedOutput) && ctx.interaction && isTriggerEnabled("story-ambiguity", ctx.config)) {
|
|
54854
|
-
const shouldContinue = await _postRunDeps.checkStoryAmbiguity({ featureName: ctx.prd.feature, storyId: ctx.story.id, reason: "Agent output suggests ambiguity" }, ctx.config, ctx.interaction);
|
|
54855
|
-
if (!shouldContinue) {
|
|
54856
|
-
logger.warn("execution", "Story ambiguity detected \u2014 escalating story", { storyId: ctx.story.id });
|
|
54857
|
-
return { action: "escalate", reason: "Story ambiguity detected \u2014 needs clarification" };
|
|
54858
|
-
}
|
|
54859
|
-
}
|
|
54860
55002
|
if (!isTdd) {
|
|
54861
55003
|
await _postRunDeps.autoCommitIfDirty(ctx.workdir, "execution", "single-session", ctx.story.id);
|
|
54862
55004
|
}
|
|
@@ -54879,8 +55021,6 @@ var init_post_run = __esm(() => {
|
|
|
54879
55021
|
_postRunDeps = {
|
|
54880
55022
|
detectMergeConflict,
|
|
54881
55023
|
checkMergeConflict,
|
|
54882
|
-
isAmbiguousOutput,
|
|
54883
|
-
checkStoryAmbiguity,
|
|
54884
55024
|
failAndClose,
|
|
54885
55025
|
rollbackToRef,
|
|
54886
55026
|
autoCommitIfDirty
|
|
@@ -55721,7 +55861,7 @@ class RegressionStrategy {
|
|
|
55721
55861
|
var _regressionStrategyDeps, DeferredRegressionStrategy;
|
|
55722
55862
|
var init_regression = __esm(() => {
|
|
55723
55863
|
init_logger2();
|
|
55724
|
-
|
|
55864
|
+
init_test_runners();
|
|
55725
55865
|
init_runners();
|
|
55726
55866
|
_regressionStrategyDeps = { runVerification: fullSuite };
|
|
55727
55867
|
DeferredRegressionStrategy = class DeferredRegressionStrategy extends RegressionStrategy {
|
|
@@ -59020,7 +59160,7 @@ var package_default;
|
|
|
59020
59160
|
var init_package = __esm(() => {
|
|
59021
59161
|
package_default = {
|
|
59022
59162
|
name: "@nathapp/nax",
|
|
59023
|
-
version: "0.67.0
|
|
59163
|
+
version: "0.67.0",
|
|
59024
59164
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
59025
59165
|
type: "module",
|
|
59026
59166
|
bin: {
|
|
@@ -59115,8 +59255,8 @@ var init_version = __esm(() => {
|
|
|
59115
59255
|
NAX_VERSION = package_default.version;
|
|
59116
59256
|
NAX_COMMIT = (() => {
|
|
59117
59257
|
try {
|
|
59118
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
59119
|
-
return "
|
|
59258
|
+
if (/^[0-9a-f]{6,10}$/.test("ffa2f392"))
|
|
59259
|
+
return "ffa2f392";
|
|
59120
59260
|
} catch {}
|
|
59121
59261
|
try {
|
|
59122
59262
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
@@ -60323,10 +60463,9 @@ var _regressionDeps;
|
|
|
60323
60463
|
var init_run_regression = __esm(() => {
|
|
60324
60464
|
init_logger2();
|
|
60325
60465
|
init_prd();
|
|
60466
|
+
init_test_runners();
|
|
60326
60467
|
init_git();
|
|
60327
60468
|
init_verification();
|
|
60328
|
-
init_rectification_loop();
|
|
60329
|
-
init_runners();
|
|
60330
60469
|
_regressionDeps = {
|
|
60331
60470
|
runVerification: fullSuite,
|
|
60332
60471
|
runRectificationLoop,
|