@nathapp/nax 0.31.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +117 -12
- package/package.json +1 -1
- package/src/cli/prompts.ts +4 -1
- package/src/config/types.ts +2 -2
- package/src/pipeline/stages/execution.ts +40 -0
- package/src/pipeline/stages/prompt.ts +4 -3
- package/src/pipeline/types.ts +1 -1
- package/src/prompts/sections/isolation.ts +10 -5
- package/src/prompts/sections/role-task.ts +26 -11
- package/src/prompts/types.ts +1 -1
- package/src/routing/router.ts +2 -2
- package/src/routing/strategies/keyword.ts +17 -7
- package/src/routing/strategies/llm-prompts.ts +16 -0
- package/src/tdd/session-runner.ts +51 -0
package/dist/nax.js
CHANGED
|
@@ -18540,7 +18540,7 @@ function determineTestStrategy(complexity, title, _description, tags = []) {
|
|
|
18540
18540
|
return "three-session-tdd";
|
|
18541
18541
|
}
|
|
18542
18542
|
if (complexity === "simple")
|
|
18543
|
-
return "
|
|
18543
|
+
return "tdd-simple";
|
|
18544
18544
|
return "three-session-tdd-lite";
|
|
18545
18545
|
}
|
|
18546
18546
|
function complexityToModelTier(complexity, context) {
|
|
@@ -18609,8 +18609,8 @@ var init_keyword = __esm(() => {
|
|
|
18609
18609
|
const modelTier = complexityToModelTier(complexity, context);
|
|
18610
18610
|
const testStrategy = determineTestStrategy(complexity, title, description, tags);
|
|
18611
18611
|
const reasons = [];
|
|
18612
|
+
const text = [title, description, ...tags].join(" ").toLowerCase();
|
|
18612
18613
|
if (testStrategy === "three-session-tdd") {
|
|
18613
|
-
const text = [title, description, ...tags].join(" ").toLowerCase();
|
|
18614
18614
|
if (SECURITY_KEYWORDS.some((kw) => text.includes(kw)))
|
|
18615
18615
|
reasons.push("security-critical");
|
|
18616
18616
|
if (PUBLIC_API_KEYWORDS.some((kw) => text.includes(kw)))
|
|
@@ -18618,11 +18618,21 @@ var init_keyword = __esm(() => {
|
|
|
18618
18618
|
if (complexity === "complex" || complexity === "expert")
|
|
18619
18619
|
reasons.push(`complexity:${complexity}`);
|
|
18620
18620
|
}
|
|
18621
|
+
let reasoning = "";
|
|
18622
|
+
if (testStrategy === "three-session-tdd") {
|
|
18623
|
+
reasoning = reasons.length > 0 ? `three-session-tdd: ${reasons.join(", ")}` : `three-session-tdd: ${complexity} task`;
|
|
18624
|
+
} else if (testStrategy === "three-session-tdd-lite") {
|
|
18625
|
+
reasoning = `three-session-tdd-lite: simple task (${complexity})`;
|
|
18626
|
+
} else if (testStrategy === "tdd-simple") {
|
|
18627
|
+
reasoning = `tdd-simple: simple task (${complexity})`;
|
|
18628
|
+
} else {
|
|
18629
|
+
reasoning = `${testStrategy}: ${complexity} task`;
|
|
18630
|
+
}
|
|
18621
18631
|
return {
|
|
18622
18632
|
complexity,
|
|
18623
18633
|
modelTier,
|
|
18624
18634
|
testStrategy,
|
|
18625
|
-
reasoning
|
|
18635
|
+
reasoning
|
|
18626
18636
|
};
|
|
18627
18637
|
}
|
|
18628
18638
|
};
|
|
@@ -18901,10 +18911,18 @@ Tags: ${tags.join(", ")}
|
|
|
18901
18911
|
- balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
|
|
18902
18912
|
- powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
|
|
18903
18913
|
|
|
18914
|
+
## Test Strategies (derived from complexity)
|
|
18915
|
+
Your complexity classification will determine the execution strategy:
|
|
18916
|
+
- simple \u2192 tdd-simple: Single-session TDD (agent writes tests first, then implements)
|
|
18917
|
+
- medium \u2192 three-session-tdd-lite: Multi-session with lite isolation
|
|
18918
|
+
- complex/expert \u2192 three-session-tdd: Strict multi-session TDD isolation
|
|
18919
|
+
- test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
|
|
18920
|
+
|
|
18904
18921
|
## Rules
|
|
18905
18922
|
- Default to the CHEAPEST tier that will succeed.
|
|
18906
18923
|
- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
|
|
18907
18924
|
- A story touching many files doesn't automatically mean complex \u2014 copy-paste refactors are simple.
|
|
18925
|
+
- If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
|
|
18908
18926
|
|
|
18909
18927
|
Respond with ONLY this JSON (no markdown, no explanation):
|
|
18910
18928
|
{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
|
|
@@ -18931,10 +18949,18 @@ ${storyBlocks}
|
|
|
18931
18949
|
- balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
|
|
18932
18950
|
- powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
|
|
18933
18951
|
|
|
18952
|
+
## Test Strategies (derived from complexity)
|
|
18953
|
+
Your complexity classification will determine the execution strategy:
|
|
18954
|
+
- simple \u2192 tdd-simple: Single-session TDD (agent writes tests first, then implements)
|
|
18955
|
+
- medium \u2192 three-session-tdd-lite: Multi-session with lite isolation
|
|
18956
|
+
- complex/expert \u2192 three-session-tdd: Strict multi-session TDD isolation
|
|
18957
|
+
- test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
|
|
18958
|
+
|
|
18934
18959
|
## Rules
|
|
18935
18960
|
- Default to the CHEAPEST tier that will succeed.
|
|
18936
18961
|
- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
|
|
18937
18962
|
- A story touching many files doesn't automatically mean complex \u2014 copy-paste refactors are simple.
|
|
18963
|
+
- If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
|
|
18938
18964
|
|
|
18939
18965
|
Respond with ONLY a JSON array (no markdown, no explanation):
|
|
18940
18966
|
[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
|
|
@@ -19360,7 +19386,7 @@ function determineTestStrategy2(complexity, title, description, tags = [], tddSt
|
|
|
19360
19386
|
return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
|
|
19361
19387
|
}
|
|
19362
19388
|
if (complexity === "simple")
|
|
19363
|
-
return "
|
|
19389
|
+
return "tdd-simple";
|
|
19364
19390
|
return "three-session-tdd-lite";
|
|
19365
19391
|
}
|
|
19366
19392
|
function complexityToModelTier2(complexity, config2) {
|
|
@@ -19505,7 +19531,7 @@ var package_default;
|
|
|
19505
19531
|
var init_package = __esm(() => {
|
|
19506
19532
|
package_default = {
|
|
19507
19533
|
name: "@nathapp/nax",
|
|
19508
|
-
version: "0.
|
|
19534
|
+
version: "0.32.0",
|
|
19509
19535
|
description: "AI Coding Agent Orchestrator \u2014 loops until done",
|
|
19510
19536
|
type: "module",
|
|
19511
19537
|
bin: {
|
|
@@ -19567,8 +19593,8 @@ var init_version = __esm(() => {
|
|
|
19567
19593
|
NAX_VERSION = package_default.version;
|
|
19568
19594
|
NAX_COMMIT = (() => {
|
|
19569
19595
|
try {
|
|
19570
|
-
if (/^[0-9a-f]{6,10}$/.test("
|
|
19571
|
-
return "
|
|
19596
|
+
if (/^[0-9a-f]{6,10}$/.test("76e82f7"))
|
|
19597
|
+
return "76e82f7";
|
|
19572
19598
|
} catch {}
|
|
19573
19599
|
try {
|
|
19574
19600
|
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
@@ -23719,7 +23745,10 @@ ${TEST_FILTER_RULE}`;
|
|
|
23719
23745
|
if (role === "verifier") {
|
|
23720
23746
|
return `${header}isolation scope: Read-only inspection. Review all test results, implementation code, and acceptance criteria compliance. You MAY write a verdict file (.nax-verifier-verdict.json) and apply legitimate fixes if needed.${footer}`;
|
|
23721
23747
|
}
|
|
23722
|
-
|
|
23748
|
+
if (role === "single-session") {
|
|
23749
|
+
return `${header}isolation scope: Create test files in test/ directory, then implement source code in src/ to make tests pass. Both directories are in scope for this session.${footer}`;
|
|
23750
|
+
}
|
|
23751
|
+
return `${header}isolation scope: You may modify both src/ and test/ files. Write failing tests FIRST, then implement to make them pass.`;
|
|
23723
23752
|
}
|
|
23724
23753
|
var TEST_FILTER_RULE;
|
|
23725
23754
|
var init_isolation2 = __esm(() => {
|
|
@@ -23783,7 +23812,8 @@ function buildRoleTaskSection(roleOrVariant, variant) {
|
|
|
23783
23812
|
` + `- Write a detailed verdict with reasoning
|
|
23784
23813
|
` + "- Goal: provide comprehensive verification and quality assurance";
|
|
23785
23814
|
}
|
|
23786
|
-
|
|
23815
|
+
if (role === "single-session") {
|
|
23816
|
+
return `# Role: Single-Session
|
|
23787
23817
|
|
|
23788
23818
|
` + `Your task: Write tests AND implement the feature in a single focused session.
|
|
23789
23819
|
|
|
@@ -23794,6 +23824,18 @@ function buildRoleTaskSection(roleOrVariant, variant) {
|
|
|
23794
23824
|
` + `- Run tests frequently throughout implementation
|
|
23795
23825
|
` + `- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
|
|
23796
23826
|
` + "- Goal: all tests passing, all changes committed, full story complete";
|
|
23827
|
+
}
|
|
23828
|
+
return `# Role: TDD-Simple
|
|
23829
|
+
|
|
23830
|
+
` + `Your task: Write failing tests FIRST, then implement to make them pass.
|
|
23831
|
+
|
|
23832
|
+
` + `Instructions:
|
|
23833
|
+
` + `- RED phase: Write failing tests FIRST for the acceptance criteria
|
|
23834
|
+
` + `- RED phase: Run the tests to confirm they fail
|
|
23835
|
+
` + `- GREEN phase: Implement the minimum code to make tests pass
|
|
23836
|
+
` + `- REFACTOR phase: Refactor while keeping tests green
|
|
23837
|
+
` + `- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
|
|
23838
|
+
` + "- Goal: all tests passing, feature complete, all changes committed";
|
|
23797
23839
|
}
|
|
23798
23840
|
|
|
23799
23841
|
// src/prompts/sections/story.ts
|
|
@@ -24001,6 +24043,7 @@ async function runTddSession(role, agent, story, config2, workdir, modelTier, be
|
|
|
24001
24043
|
if (!result.success && result.pid) {
|
|
24002
24044
|
await cleanupProcessTree(result.pid);
|
|
24003
24045
|
}
|
|
24046
|
+
await autoCommitIfDirty(workdir, role, story.id);
|
|
24004
24047
|
let isolation;
|
|
24005
24048
|
if (!skipIsolation) {
|
|
24006
24049
|
if (role === "test-writer") {
|
|
@@ -24047,6 +24090,38 @@ async function runTddSession(role, agent, story, config2, workdir, modelTier, be
|
|
|
24047
24090
|
estimatedCost: result.estimatedCost
|
|
24048
24091
|
};
|
|
24049
24092
|
}
|
|
24093
|
+
async function autoCommitIfDirty(workdir, role, storyId) {
|
|
24094
|
+
const logger = getLogger();
|
|
24095
|
+
try {
|
|
24096
|
+
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
24097
|
+
cwd: workdir,
|
|
24098
|
+
stdout: "pipe",
|
|
24099
|
+
stderr: "pipe"
|
|
24100
|
+
});
|
|
24101
|
+
const statusOutput = await new Response(statusProc.stdout).text();
|
|
24102
|
+
await statusProc.exited;
|
|
24103
|
+
if (!statusOutput.trim())
|
|
24104
|
+
return;
|
|
24105
|
+
logger.warn("tdd", `Agent did not commit after ${role} session \u2014 auto-committing`, {
|
|
24106
|
+
role,
|
|
24107
|
+
storyId,
|
|
24108
|
+
dirtyFiles: statusOutput.trim().split(`
|
|
24109
|
+
`).length
|
|
24110
|
+
});
|
|
24111
|
+
const addProc = Bun.spawn(["git", "add", "-A"], {
|
|
24112
|
+
cwd: workdir,
|
|
24113
|
+
stdout: "pipe",
|
|
24114
|
+
stderr: "pipe"
|
|
24115
|
+
});
|
|
24116
|
+
await addProc.exited;
|
|
24117
|
+
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
24118
|
+
cwd: workdir,
|
|
24119
|
+
stdout: "pipe",
|
|
24120
|
+
stderr: "pipe"
|
|
24121
|
+
});
|
|
24122
|
+
await commitProc.exited;
|
|
24123
|
+
} catch {}
|
|
24124
|
+
}
|
|
24050
24125
|
var init_session_runner = __esm(() => {
|
|
24051
24126
|
init_config();
|
|
24052
24127
|
init_logger2();
|
|
@@ -24497,6 +24572,34 @@ function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason) {
|
|
|
24497
24572
|
reason: reviewReason || "Three-session TDD requires review"
|
|
24498
24573
|
};
|
|
24499
24574
|
}
|
|
24575
|
+
async function autoCommitIfDirty2(workdir, role, storyId) {
|
|
24576
|
+
try {
|
|
24577
|
+
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
24578
|
+
cwd: workdir,
|
|
24579
|
+
stdout: "pipe",
|
|
24580
|
+
stderr: "pipe"
|
|
24581
|
+
});
|
|
24582
|
+
const statusOutput = await new Response(statusProc.stdout).text();
|
|
24583
|
+
await statusProc.exited;
|
|
24584
|
+
if (!statusOutput.trim())
|
|
24585
|
+
return;
|
|
24586
|
+
const logger = getLogger();
|
|
24587
|
+
logger.warn("execution", `Agent did not commit after ${role} session \u2014 auto-committing`, {
|
|
24588
|
+
role,
|
|
24589
|
+
storyId,
|
|
24590
|
+
dirtyFiles: statusOutput.trim().split(`
|
|
24591
|
+
`).length
|
|
24592
|
+
});
|
|
24593
|
+
const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
|
|
24594
|
+
await addProc.exited;
|
|
24595
|
+
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
24596
|
+
cwd: workdir,
|
|
24597
|
+
stdout: "pipe",
|
|
24598
|
+
stderr: "pipe"
|
|
24599
|
+
});
|
|
24600
|
+
await commitProc.exited;
|
|
24601
|
+
} catch {}
|
|
24602
|
+
}
|
|
24500
24603
|
var executionStage, _executionDeps;
|
|
24501
24604
|
var init_execution = __esm(() => {
|
|
24502
24605
|
init_agents();
|
|
@@ -24578,6 +24681,7 @@ var init_execution = __esm(() => {
|
|
|
24578
24681
|
dangerouslySkipPermissions: ctx.config.execution.dangerouslySkipPermissions
|
|
24579
24682
|
});
|
|
24580
24683
|
ctx.agentResult = result;
|
|
24684
|
+
await autoCommitIfDirty2(ctx.workdir, "single-session", ctx.story.id);
|
|
24581
24685
|
const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
|
|
24582
24686
|
if (_executionDeps.detectMergeConflict(combinedOutput) && ctx.interaction && isTriggerEnabled("merge-conflict", ctx.config)) {
|
|
24583
24687
|
const shouldProceed = await _executionDeps.checkMergeConflict({ featureName: ctx.prd.feature, storyId: ctx.story.id }, ctx.config, ctx.interaction);
|
|
@@ -24924,19 +25028,20 @@ var init_prompt = __esm(() => {
|
|
|
24924
25028
|
if (isBatch) {
|
|
24925
25029
|
prompt = buildBatchPrompt2(ctx.stories, ctx.contextMarkdown, ctx.constitution);
|
|
24926
25030
|
} else {
|
|
24927
|
-
const
|
|
25031
|
+
const role = ctx.routing.testStrategy === "tdd-simple" ? "tdd-simple" : "single-session";
|
|
25032
|
+
const builder = PromptBuilder.for(role).withLoader(ctx.workdir, ctx.config).story(ctx.story).context(ctx.contextMarkdown).constitution(ctx.constitution?.content);
|
|
24928
25033
|
prompt = await builder.build();
|
|
24929
25034
|
}
|
|
24930
25035
|
ctx.prompt = prompt;
|
|
24931
25036
|
if (isBatch) {
|
|
24932
25037
|
logger.info("prompt", "Batch session prepared", {
|
|
24933
25038
|
storyCount: ctx.stories.length,
|
|
24934
|
-
testStrategy:
|
|
25039
|
+
testStrategy: ctx.routing.testStrategy
|
|
24935
25040
|
});
|
|
24936
25041
|
} else {
|
|
24937
25042
|
logger.info("prompt", "Single session prepared", {
|
|
24938
25043
|
storyId: ctx.story.id,
|
|
24939
|
-
testStrategy:
|
|
25044
|
+
testStrategy: ctx.routing.testStrategy
|
|
24940
25045
|
});
|
|
24941
25046
|
}
|
|
24942
25047
|
return { action: "continue" };
|
package/package.json
CHANGED
package/src/cli/prompts.ts
CHANGED
|
@@ -273,11 +273,14 @@ const TEMPLATE_HEADER = `<!--
|
|
|
273
273
|
/**
|
|
274
274
|
* Execute the `nax prompts --init` command.
|
|
275
275
|
*
|
|
276
|
-
* Creates nax/templates/ and writes 4 default role-body template files
|
|
276
|
+
* Creates nax/templates/ and writes 4 default role-body template files
|
|
277
|
+
* (test-writer, implementer, verifier, single-session).
|
|
277
278
|
* Auto-wires prompts.overrides in nax.config.json if the file exists and overrides are not already set.
|
|
278
279
|
* Returns the list of file paths written. Returns empty array if files
|
|
279
280
|
* already exist and force is not set.
|
|
280
281
|
*
|
|
282
|
+
* Note: tdd-simple role is supported in the prompt system but not auto-generated as a template.
|
|
283
|
+
*
|
|
281
284
|
* @param options - Command options
|
|
282
285
|
* @returns Array of file paths written
|
|
283
286
|
*/
|
package/src/config/types.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
export type Complexity = "simple" | "medium" | "complex" | "expert";
|
|
9
|
-
export type TestStrategy = "test-after" | "three-session-tdd" | "three-session-tdd-lite";
|
|
9
|
+
export type TestStrategy = "test-after" | "tdd-simple" | "three-session-tdd" | "three-session-tdd-lite";
|
|
10
10
|
export type TddStrategy = "auto" | "strict" | "lite" | "off";
|
|
11
11
|
|
|
12
12
|
export interface EscalationEntry {
|
|
@@ -409,7 +409,7 @@ export interface RoutingConfig {
|
|
|
409
409
|
|
|
410
410
|
/** Prompt overrides config (PB-003) */
|
|
411
411
|
export interface PromptsConfig {
|
|
412
|
-
overrides?: Partial<Record<"test-writer" | "implementer" | "verifier" | "single-session", string>>;
|
|
412
|
+
overrides?: Partial<Record<"test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple", string>>;
|
|
413
413
|
}
|
|
414
414
|
|
|
415
415
|
/** Full nax configuration */
|
|
@@ -199,6 +199,9 @@ export const executionStage: PipelineStage = {
|
|
|
199
199
|
|
|
200
200
|
ctx.agentResult = result;
|
|
201
201
|
|
|
202
|
+
// BUG-058: Auto-commit if agent left uncommitted changes (single-session/test-after)
|
|
203
|
+
await autoCommitIfDirty(ctx.workdir, "single-session", ctx.story.id);
|
|
204
|
+
|
|
202
205
|
// merge-conflict trigger: detect CONFLICT markers in agent output
|
|
203
206
|
const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
|
|
204
207
|
if (
|
|
@@ -267,3 +270,40 @@ export const _executionDeps = {
|
|
|
267
270
|
isAmbiguousOutput,
|
|
268
271
|
checkStoryAmbiguity,
|
|
269
272
|
};
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* BUG-058: Auto-commit safety net for single-session/test-after.
|
|
276
|
+
* Mirrors the same function in tdd/session-runner.ts for three-session TDD.
|
|
277
|
+
*/
|
|
278
|
+
async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
|
|
279
|
+
try {
|
|
280
|
+
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
281
|
+
cwd: workdir,
|
|
282
|
+
stdout: "pipe",
|
|
283
|
+
stderr: "pipe",
|
|
284
|
+
});
|
|
285
|
+
const statusOutput = await new Response(statusProc.stdout).text();
|
|
286
|
+
await statusProc.exited;
|
|
287
|
+
|
|
288
|
+
if (!statusOutput.trim()) return;
|
|
289
|
+
|
|
290
|
+
const logger = getLogger();
|
|
291
|
+
logger.warn("execution", `Agent did not commit after ${role} session — auto-committing`, {
|
|
292
|
+
role,
|
|
293
|
+
storyId,
|
|
294
|
+
dirtyFiles: statusOutput.trim().split("\n").length,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
|
|
298
|
+
await addProc.exited;
|
|
299
|
+
|
|
300
|
+
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
301
|
+
cwd: workdir,
|
|
302
|
+
stdout: "pipe",
|
|
303
|
+
stderr: "pipe",
|
|
304
|
+
});
|
|
305
|
+
await commitProc.exited;
|
|
306
|
+
} catch {
|
|
307
|
+
// Silently ignore — auto-commit is best-effort
|
|
308
|
+
}
|
|
309
|
+
}
|
|
@@ -39,7 +39,8 @@ export const promptStage: PipelineStage = {
|
|
|
39
39
|
if (isBatch) {
|
|
40
40
|
prompt = buildBatchPrompt(ctx.stories, ctx.contextMarkdown, ctx.constitution);
|
|
41
41
|
} else {
|
|
42
|
-
const
|
|
42
|
+
const role = ctx.routing.testStrategy === "tdd-simple" ? "tdd-simple" : "single-session";
|
|
43
|
+
const builder = PromptBuilder.for(role)
|
|
43
44
|
.withLoader(ctx.workdir, ctx.config)
|
|
44
45
|
.story(ctx.story)
|
|
45
46
|
.context(ctx.contextMarkdown)
|
|
@@ -52,12 +53,12 @@ export const promptStage: PipelineStage = {
|
|
|
52
53
|
if (isBatch) {
|
|
53
54
|
logger.info("prompt", "Batch session prepared", {
|
|
54
55
|
storyCount: ctx.stories.length,
|
|
55
|
-
testStrategy:
|
|
56
|
+
testStrategy: ctx.routing.testStrategy,
|
|
56
57
|
});
|
|
57
58
|
} else {
|
|
58
59
|
logger.info("prompt", "Single session prepared", {
|
|
59
60
|
storyId: ctx.story.id,
|
|
60
|
-
testStrategy:
|
|
61
|
+
testStrategy: ctx.routing.testStrategy,
|
|
61
62
|
});
|
|
62
63
|
}
|
|
63
64
|
|
package/src/pipeline/types.ts
CHANGED
|
@@ -26,7 +26,7 @@ export interface RoutingResult {
|
|
|
26
26
|
/** Selected model tier */
|
|
27
27
|
modelTier: "fast" | "balanced" | "powerful";
|
|
28
28
|
/** Test strategy */
|
|
29
|
-
testStrategy: "test-after" | "three-session-tdd" | "three-session-tdd-lite";
|
|
29
|
+
testStrategy: "test-after" | "tdd-simple" | "three-session-tdd" | "three-session-tdd-lite";
|
|
30
30
|
/** Reasoning for the classification */
|
|
31
31
|
reasoning: string;
|
|
32
32
|
/** Estimated cost for this story */
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Isolation Rules Section
|
|
3
3
|
*
|
|
4
|
-
* Generates isolation rules for all
|
|
4
|
+
* Generates isolation rules for all 5 roles:
|
|
5
5
|
* - test-writer: Strict/Lite modes for test-first TDD
|
|
6
6
|
* - implementer: Implement source while respecting test integrity
|
|
7
7
|
* - verifier: Read-only inspection
|
|
8
8
|
* - single-session: Both test/ and src/ modification allowed
|
|
9
|
+
* - tdd-simple: Both test/ and src/ modification allowed (no isolation)
|
|
9
10
|
*
|
|
10
11
|
* Backwards compatible: also accepts old API (mode only)
|
|
11
12
|
* - buildIsolationSection("strict") → test-writer, strict
|
|
@@ -18,7 +19,7 @@ const TEST_FILTER_RULE =
|
|
|
18
19
|
"— full suite output will flood your context window and cause failures.";
|
|
19
20
|
|
|
20
21
|
export function buildIsolationSection(
|
|
21
|
-
roleOrMode: "implementer" | "test-writer" | "verifier" | "single-session" | "strict" | "lite",
|
|
22
|
+
roleOrMode: "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple" | "strict" | "lite",
|
|
22
23
|
mode?: "strict" | "lite",
|
|
23
24
|
): string {
|
|
24
25
|
// Old API support: buildIsolationSection("strict") or buildIsolationSection("lite")
|
|
@@ -26,7 +27,7 @@ export function buildIsolationSection(
|
|
|
26
27
|
return buildIsolationSection("test-writer", roleOrMode);
|
|
27
28
|
}
|
|
28
29
|
|
|
29
|
-
const role = roleOrMode as "implementer" | "test-writer" | "verifier" | "single-session";
|
|
30
|
+
const role = roleOrMode as "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple";
|
|
30
31
|
|
|
31
32
|
const header = "# Isolation Rules\n\n";
|
|
32
33
|
const footer = `\n\n${TEST_FILTER_RULE}`;
|
|
@@ -49,6 +50,10 @@ export function buildIsolationSection(
|
|
|
49
50
|
return `${header}isolation scope: Read-only inspection. Review all test results, implementation code, and acceptance criteria compliance. You MAY write a verdict file (.nax-verifier-verdict.json) and apply legitimate fixes if needed.${footer}`;
|
|
50
51
|
}
|
|
51
52
|
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
if (role === "single-session") {
|
|
54
|
+
return `${header}isolation scope: Create test files in test/ directory, then implement source code in src/ to make tests pass. Both directories are in scope for this session.${footer}`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// tdd-simple role — no isolation restrictions (no footer needed)
|
|
58
|
+
return `${header}isolation scope: You may modify both src/ and test/ files. Write failing tests FIRST, then implement to make them pass.`;
|
|
54
59
|
}
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Role-Task Section
|
|
3
3
|
*
|
|
4
|
-
* Generates role definition for all
|
|
4
|
+
* Generates role definition for all 5 roles in nax prompt orchestration:
|
|
5
5
|
* - implementer: Make failing tests pass (standard/lite variants)
|
|
6
6
|
* - test-writer: Write tests first (RED phase)
|
|
7
7
|
* - verifier: Review and verify implementation
|
|
8
8
|
* - single-session: Write tests AND implement in one session
|
|
9
|
+
* - tdd-simple: Write failing tests FIRST, then implement in one session
|
|
9
10
|
*
|
|
10
11
|
* Backwards compatible: also accepts old API (variant only)
|
|
11
12
|
* - buildRoleTaskSection("standard") → implementer, standard
|
|
@@ -13,7 +14,7 @@
|
|
|
13
14
|
*/
|
|
14
15
|
|
|
15
16
|
export function buildRoleTaskSection(
|
|
16
|
-
roleOrVariant: "implementer" | "test-writer" | "verifier" | "single-session" | "standard" | "lite",
|
|
17
|
+
roleOrVariant: "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple" | "standard" | "lite",
|
|
17
18
|
variant?: "standard" | "lite",
|
|
18
19
|
): string {
|
|
19
20
|
// Old API support: buildRoleTaskSection("standard") or buildRoleTaskSection("lite")
|
|
@@ -21,7 +22,7 @@ export function buildRoleTaskSection(
|
|
|
21
22
|
return buildRoleTaskSection("implementer", roleOrVariant);
|
|
22
23
|
}
|
|
23
24
|
|
|
24
|
-
const role = roleOrVariant as "implementer" | "test-writer" | "verifier" | "single-session";
|
|
25
|
+
const role = roleOrVariant as "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple";
|
|
25
26
|
|
|
26
27
|
if (role === "implementer") {
|
|
27
28
|
const v = variant ?? "standard";
|
|
@@ -79,16 +80,30 @@ export function buildRoleTaskSection(
|
|
|
79
80
|
);
|
|
80
81
|
}
|
|
81
82
|
|
|
82
|
-
|
|
83
|
+
if (role === "single-session") {
|
|
84
|
+
return (
|
|
85
|
+
"# Role: Single-Session\n\n" +
|
|
86
|
+
"Your task: Write tests AND implement the feature in a single focused session.\n\n" +
|
|
87
|
+
"Instructions:\n" +
|
|
88
|
+
"- Phase 1: Write comprehensive tests (test/ directory)\n" +
|
|
89
|
+
"- Phase 2: Implement to make all tests pass (src/ directory)\n" +
|
|
90
|
+
"- Use Bun test (describe/test/expect)\n" +
|
|
91
|
+
"- Run tests frequently throughout implementation\n" +
|
|
92
|
+
"- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'\n" +
|
|
93
|
+
"- Goal: all tests passing, all changes committed, full story complete"
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// tdd-simple role — test-driven development in one session
|
|
83
98
|
return (
|
|
84
|
-
"# Role:
|
|
85
|
-
"Your task: Write tests
|
|
99
|
+
"# Role: TDD-Simple\n\n" +
|
|
100
|
+
"Your task: Write failing tests FIRST, then implement to make them pass.\n\n" +
|
|
86
101
|
"Instructions:\n" +
|
|
87
|
-
"-
|
|
88
|
-
"-
|
|
89
|
-
"-
|
|
90
|
-
"-
|
|
102
|
+
"- RED phase: Write failing tests FIRST for the acceptance criteria\n" +
|
|
103
|
+
"- RED phase: Run the tests to confirm they fail\n" +
|
|
104
|
+
"- GREEN phase: Implement the minimum code to make tests pass\n" +
|
|
105
|
+
"- REFACTOR phase: Refactor while keeping tests green\n" +
|
|
91
106
|
"- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'\n" +
|
|
92
|
-
"- Goal: all tests passing, all changes committed
|
|
107
|
+
"- Goal: all tests passing, feature complete, all changes committed"
|
|
93
108
|
);
|
|
94
109
|
}
|
package/src/prompts/types.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/** Role determining which default template body to use */
|
|
8
|
-
export type PromptRole = "test-writer" | "implementer" | "verifier" | "single-session";
|
|
8
|
+
export type PromptRole = "test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple";
|
|
9
9
|
|
|
10
10
|
/** A single section of a composed prompt */
|
|
11
11
|
export interface PromptSection {
|
package/src/routing/router.ts
CHANGED
|
@@ -201,8 +201,8 @@ export function determineTestStrategy(
|
|
|
201
201
|
return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
//
|
|
205
|
-
if (complexity === "simple") return "
|
|
204
|
+
// TS-001: simple → tdd-simple (TDD discipline, 1 session), medium → tdd-lite (3 sessions)
|
|
205
|
+
if (complexity === "simple") return "tdd-simple";
|
|
206
206
|
return "three-session-tdd-lite";
|
|
207
207
|
}
|
|
208
208
|
|
|
@@ -117,8 +117,8 @@ function determineTestStrategy(
|
|
|
117
117
|
return "three-session-tdd";
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
//
|
|
121
|
-
if (complexity === "simple") return "
|
|
120
|
+
// TS-001: simple → tdd-simple (TDD discipline, 1 session), medium → tdd-lite (3 sessions)
|
|
121
|
+
if (complexity === "simple") return "tdd-simple";
|
|
122
122
|
return "three-session-tdd-lite";
|
|
123
123
|
}
|
|
124
124
|
|
|
@@ -150,21 +150,31 @@ export const keywordStrategy: RoutingStrategy = {
|
|
|
150
150
|
const testStrategy = determineTestStrategy(complexity, title, description, tags);
|
|
151
151
|
|
|
152
152
|
const reasons: string[] = [];
|
|
153
|
+
const text = [title, description, ...tags].join(" ").toLowerCase();
|
|
154
|
+
|
|
153
155
|
if (testStrategy === "three-session-tdd") {
|
|
154
|
-
const text = [title, description, ...tags].join(" ").toLowerCase();
|
|
155
156
|
if (SECURITY_KEYWORDS.some((kw) => text.includes(kw))) reasons.push("security-critical");
|
|
156
157
|
if (PUBLIC_API_KEYWORDS.some((kw) => text.includes(kw))) reasons.push("public-api");
|
|
157
158
|
if (complexity === "complex" || complexity === "expert") reasons.push(`complexity:${complexity}`);
|
|
158
159
|
}
|
|
159
160
|
|
|
161
|
+
let reasoning = "";
|
|
162
|
+
if (testStrategy === "three-session-tdd") {
|
|
163
|
+
reasoning =
|
|
164
|
+
reasons.length > 0 ? `three-session-tdd: ${reasons.join(", ")}` : `three-session-tdd: ${complexity} task`;
|
|
165
|
+
} else if (testStrategy === "three-session-tdd-lite") {
|
|
166
|
+
reasoning = `three-session-tdd-lite: simple task (${complexity})`;
|
|
167
|
+
} else if (testStrategy === "tdd-simple") {
|
|
168
|
+
reasoning = `tdd-simple: simple task (${complexity})`;
|
|
169
|
+
} else {
|
|
170
|
+
reasoning = `${testStrategy}: ${complexity} task`;
|
|
171
|
+
}
|
|
172
|
+
|
|
160
173
|
return {
|
|
161
174
|
complexity,
|
|
162
175
|
modelTier,
|
|
163
176
|
testStrategy,
|
|
164
|
-
reasoning
|
|
165
|
-
reasons.length > 0
|
|
166
|
-
? `three-session-tdd: ${reasons.join(", ")}`
|
|
167
|
-
: `three-session-tdd-lite: simple task (${complexity})`,
|
|
177
|
+
reasoning,
|
|
168
178
|
};
|
|
169
179
|
},
|
|
170
180
|
};
|
|
@@ -35,10 +35,18 @@ Tags: ${tags.join(", ")}
|
|
|
35
35
|
- balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
|
|
36
36
|
- powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
|
|
37
37
|
|
|
38
|
+
## Test Strategies (derived from complexity)
|
|
39
|
+
Your complexity classification will determine the execution strategy:
|
|
40
|
+
- simple → tdd-simple: Single-session TDD (agent writes tests first, then implements)
|
|
41
|
+
- medium → three-session-tdd-lite: Multi-session with lite isolation
|
|
42
|
+
- complex/expert → three-session-tdd: Strict multi-session TDD isolation
|
|
43
|
+
- test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
|
|
44
|
+
|
|
38
45
|
## Rules
|
|
39
46
|
- Default to the CHEAPEST tier that will succeed.
|
|
40
47
|
- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
|
|
41
48
|
- A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
|
|
49
|
+
- If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
|
|
42
50
|
|
|
43
51
|
Respond with ONLY this JSON (no markdown, no explanation):
|
|
44
52
|
{"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
|
|
@@ -73,10 +81,18 @@ ${storyBlocks}
|
|
|
73
81
|
- balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
|
|
74
82
|
- powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
|
|
75
83
|
|
|
84
|
+
## Test Strategies (derived from complexity)
|
|
85
|
+
Your complexity classification will determine the execution strategy:
|
|
86
|
+
- simple → tdd-simple: Single-session TDD (agent writes tests first, then implements)
|
|
87
|
+
- medium → three-session-tdd-lite: Multi-session with lite isolation
|
|
88
|
+
- complex/expert → three-session-tdd: Strict multi-session TDD isolation
|
|
89
|
+
- test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
|
|
90
|
+
|
|
76
91
|
## Rules
|
|
77
92
|
- Default to the CHEAPEST tier that will succeed.
|
|
78
93
|
- Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
|
|
79
94
|
- A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
|
|
95
|
+
- If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
|
|
80
96
|
|
|
81
97
|
Respond with ONLY a JSON array (no markdown, no explanation):
|
|
82
98
|
[{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
|
|
@@ -129,6 +129,9 @@ export async function runTddSession(
|
|
|
129
129
|
await cleanupProcessTree(result.pid);
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
+
// BUG-058: Auto-commit if agent left uncommitted changes
|
|
133
|
+
await autoCommitIfDirty(workdir, role, story.id);
|
|
134
|
+
|
|
132
135
|
// Check isolation based on role and skipIsolation flag.
|
|
133
136
|
let isolation: IsolationCheck | undefined;
|
|
134
137
|
if (!skipIsolation) {
|
|
@@ -181,3 +184,51 @@ export async function runTddSession(
|
|
|
181
184
|
estimatedCost: result.estimatedCost,
|
|
182
185
|
};
|
|
183
186
|
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* BUG-058: Auto-commit safety net.
|
|
190
|
+
*
|
|
191
|
+
* If the agent left uncommitted changes, stage and commit them automatically.
|
|
192
|
+
* This prevents the review stage from failing with "uncommitted changes" errors.
|
|
193
|
+
* Only triggers when the agent forgot — if tree is clean, this is a no-op.
|
|
194
|
+
*/
|
|
195
|
+
async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
|
|
196
|
+
const logger = getLogger();
|
|
197
|
+
|
|
198
|
+
// Check if working tree is dirty
|
|
199
|
+
try {
|
|
200
|
+
const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
|
|
201
|
+
cwd: workdir,
|
|
202
|
+
stdout: "pipe",
|
|
203
|
+
stderr: "pipe",
|
|
204
|
+
});
|
|
205
|
+
const statusOutput = await new Response(statusProc.stdout).text();
|
|
206
|
+
await statusProc.exited;
|
|
207
|
+
|
|
208
|
+
if (!statusOutput.trim()) return; // Clean tree, nothing to do
|
|
209
|
+
|
|
210
|
+
logger.warn("tdd", `Agent did not commit after ${role} session — auto-committing`, {
|
|
211
|
+
role,
|
|
212
|
+
storyId,
|
|
213
|
+
dirtyFiles: statusOutput.trim().split("\n").length,
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Stage all changes
|
|
217
|
+
const addProc = Bun.spawn(["git", "add", "-A"], {
|
|
218
|
+
cwd: workdir,
|
|
219
|
+
stdout: "pipe",
|
|
220
|
+
stderr: "pipe",
|
|
221
|
+
});
|
|
222
|
+
await addProc.exited;
|
|
223
|
+
|
|
224
|
+
// Commit with descriptive message
|
|
225
|
+
const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
|
|
226
|
+
cwd: workdir,
|
|
227
|
+
stdout: "pipe",
|
|
228
|
+
stderr: "pipe",
|
|
229
|
+
});
|
|
230
|
+
await commitProc.exited;
|
|
231
|
+
} catch {
|
|
232
|
+
// Silently ignore — auto-commit is best-effort
|
|
233
|
+
}
|
|
234
|
+
}
|