@nathapp/nax 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/nax.js CHANGED
@@ -18540,7 +18540,7 @@ function determineTestStrategy(complexity, title, _description, tags = []) {
18540
18540
  return "three-session-tdd";
18541
18541
  }
18542
18542
  if (complexity === "simple")
18543
- return "test-after";
18543
+ return "tdd-simple";
18544
18544
  return "three-session-tdd-lite";
18545
18545
  }
18546
18546
  function complexityToModelTier(complexity, context) {
@@ -18609,8 +18609,8 @@ var init_keyword = __esm(() => {
18609
18609
  const modelTier = complexityToModelTier(complexity, context);
18610
18610
  const testStrategy = determineTestStrategy(complexity, title, description, tags);
18611
18611
  const reasons = [];
18612
+ const text = [title, description, ...tags].join(" ").toLowerCase();
18612
18613
  if (testStrategy === "three-session-tdd") {
18613
- const text = [title, description, ...tags].join(" ").toLowerCase();
18614
18614
  if (SECURITY_KEYWORDS.some((kw) => text.includes(kw)))
18615
18615
  reasons.push("security-critical");
18616
18616
  if (PUBLIC_API_KEYWORDS.some((kw) => text.includes(kw)))
@@ -18618,11 +18618,21 @@ var init_keyword = __esm(() => {
18618
18618
  if (complexity === "complex" || complexity === "expert")
18619
18619
  reasons.push(`complexity:${complexity}`);
18620
18620
  }
18621
+ let reasoning = "";
18622
+ if (testStrategy === "three-session-tdd") {
18623
+ reasoning = reasons.length > 0 ? `three-session-tdd: ${reasons.join(", ")}` : `three-session-tdd: ${complexity} task`;
18624
+ } else if (testStrategy === "three-session-tdd-lite") {
18625
+ reasoning = `three-session-tdd-lite: simple task (${complexity})`;
18626
+ } else if (testStrategy === "tdd-simple") {
18627
+ reasoning = `tdd-simple: simple task (${complexity})`;
18628
+ } else {
18629
+ reasoning = `${testStrategy}: ${complexity} task`;
18630
+ }
18621
18631
  return {
18622
18632
  complexity,
18623
18633
  modelTier,
18624
18634
  testStrategy,
18625
- reasoning: reasons.length > 0 ? `three-session-tdd: ${reasons.join(", ")}` : `three-session-tdd-lite: simple task (${complexity})`
18635
+ reasoning
18626
18636
  };
18627
18637
  }
18628
18638
  };
@@ -18901,10 +18911,18 @@ Tags: ${tags.join(", ")}
18901
18911
  - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
18902
18912
  - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
18903
18913
 
18914
+ ## Test Strategies (derived from complexity)
18915
+ Your complexity classification will determine the execution strategy:
18916
+ - simple \u2192 tdd-simple: Single-session TDD (agent writes tests first, then implements)
18917
+ - medium \u2192 three-session-tdd-lite: Multi-session with lite isolation
18918
+ - complex/expert \u2192 three-session-tdd: Strict multi-session TDD isolation
18919
+ - test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
18920
+
18904
18921
  ## Rules
18905
18922
  - Default to the CHEAPEST tier that will succeed.
18906
18923
  - Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
18907
18924
  - A story touching many files doesn't automatically mean complex \u2014 copy-paste refactors are simple.
18925
+ - If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
18908
18926
 
18909
18927
  Respond with ONLY this JSON (no markdown, no explanation):
18910
18928
  {"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
@@ -18931,10 +18949,18 @@ ${storyBlocks}
18931
18949
  - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
18932
18950
  - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
18933
18951
 
18952
+ ## Test Strategies (derived from complexity)
18953
+ Your complexity classification will determine the execution strategy:
18954
+ - simple \u2192 tdd-simple: Single-session TDD (agent writes tests first, then implements)
18955
+ - medium \u2192 three-session-tdd-lite: Multi-session with lite isolation
18956
+ - complex/expert \u2192 three-session-tdd: Strict multi-session TDD isolation
18957
+ - test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
18958
+
18934
18959
  ## Rules
18935
18960
  - Default to the CHEAPEST tier that will succeed.
18936
18961
  - Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
18937
18962
  - A story touching many files doesn't automatically mean complex \u2014 copy-paste refactors are simple.
18963
+ - If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
18938
18964
 
18939
18965
  Respond with ONLY a JSON array (no markdown, no explanation):
18940
18966
  [{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
@@ -19360,7 +19386,7 @@ function determineTestStrategy2(complexity, title, description, tags = [], tddSt
19360
19386
  return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
19361
19387
  }
19362
19388
  if (complexity === "simple")
19363
- return "test-after";
19389
+ return "tdd-simple";
19364
19390
  return "three-session-tdd-lite";
19365
19391
  }
19366
19392
  function complexityToModelTier2(complexity, config2) {
@@ -19505,7 +19531,7 @@ var package_default;
19505
19531
  var init_package = __esm(() => {
19506
19532
  package_default = {
19507
19533
  name: "@nathapp/nax",
19508
- version: "0.31.0",
19534
+ version: "0.32.0",
19509
19535
  description: "AI Coding Agent Orchestrator \u2014 loops until done",
19510
19536
  type: "module",
19511
19537
  bin: {
@@ -19567,8 +19593,8 @@ var init_version = __esm(() => {
19567
19593
  NAX_VERSION = package_default.version;
19568
19594
  NAX_COMMIT = (() => {
19569
19595
  try {
19570
- if (/^[0-9a-f]{6,10}$/.test("6b2cc85"))
19571
- return "6b2cc85";
19596
+ if (/^[0-9a-f]{6,10}$/.test("76e82f7"))
19597
+ return "76e82f7";
19572
19598
  } catch {}
19573
19599
  try {
19574
19600
  const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
@@ -23719,7 +23745,10 @@ ${TEST_FILTER_RULE}`;
23719
23745
  if (role === "verifier") {
23720
23746
  return `${header}isolation scope: Read-only inspection. Review all test results, implementation code, and acceptance criteria compliance. You MAY write a verdict file (.nax-verifier-verdict.json) and apply legitimate fixes if needed.${footer}`;
23721
23747
  }
23722
- return `${header}isolation scope: Create test files in test/ directory, then implement source code in src/ to make tests pass. Both directories are in scope for this session.${footer}`;
23748
+ if (role === "single-session") {
23749
+ return `${header}isolation scope: Create test files in test/ directory, then implement source code in src/ to make tests pass. Both directories are in scope for this session.${footer}`;
23750
+ }
23751
+ return `${header}isolation scope: You may modify both src/ and test/ files. Write failing tests FIRST, then implement to make them pass.`;
23723
23752
  }
23724
23753
  var TEST_FILTER_RULE;
23725
23754
  var init_isolation2 = __esm(() => {
@@ -23783,7 +23812,8 @@ function buildRoleTaskSection(roleOrVariant, variant) {
23783
23812
  ` + `- Write a detailed verdict with reasoning
23784
23813
  ` + "- Goal: provide comprehensive verification and quality assurance";
23785
23814
  }
23786
- return `# Role: Single-Session
23815
+ if (role === "single-session") {
23816
+ return `# Role: Single-Session
23787
23817
 
23788
23818
  ` + `Your task: Write tests AND implement the feature in a single focused session.
23789
23819
 
@@ -23794,6 +23824,18 @@ function buildRoleTaskSection(roleOrVariant, variant) {
23794
23824
  ` + `- Run tests frequently throughout implementation
23795
23825
  ` + `- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
23796
23826
  ` + "- Goal: all tests passing, all changes committed, full story complete";
23827
+ }
23828
+ return `# Role: TDD-Simple
23829
+
23830
+ ` + `Your task: Write failing tests FIRST, then implement to make them pass.
23831
+
23832
+ ` + `Instructions:
23833
+ ` + `- RED phase: Write failing tests FIRST for the acceptance criteria
23834
+ ` + `- RED phase: Run the tests to confirm they fail
23835
+ ` + `- GREEN phase: Implement the minimum code to make tests pass
23836
+ ` + `- REFACTOR phase: Refactor while keeping tests green
23837
+ ` + `- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'
23838
+ ` + "- Goal: all tests passing, feature complete, all changes committed";
23797
23839
  }
23798
23840
 
23799
23841
  // src/prompts/sections/story.ts
@@ -24001,6 +24043,7 @@ async function runTddSession(role, agent, story, config2, workdir, modelTier, be
24001
24043
  if (!result.success && result.pid) {
24002
24044
  await cleanupProcessTree(result.pid);
24003
24045
  }
24046
+ await autoCommitIfDirty(workdir, role, story.id);
24004
24047
  let isolation;
24005
24048
  if (!skipIsolation) {
24006
24049
  if (role === "test-writer") {
@@ -24047,6 +24090,38 @@ async function runTddSession(role, agent, story, config2, workdir, modelTier, be
24047
24090
  estimatedCost: result.estimatedCost
24048
24091
  };
24049
24092
  }
24093
+ async function autoCommitIfDirty(workdir, role, storyId) {
24094
+ const logger = getLogger();
24095
+ try {
24096
+ const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
24097
+ cwd: workdir,
24098
+ stdout: "pipe",
24099
+ stderr: "pipe"
24100
+ });
24101
+ const statusOutput = await new Response(statusProc.stdout).text();
24102
+ await statusProc.exited;
24103
+ if (!statusOutput.trim())
24104
+ return;
24105
+ logger.warn("tdd", `Agent did not commit after ${role} session \u2014 auto-committing`, {
24106
+ role,
24107
+ storyId,
24108
+ dirtyFiles: statusOutput.trim().split(`
24109
+ `).length
24110
+ });
24111
+ const addProc = Bun.spawn(["git", "add", "-A"], {
24112
+ cwd: workdir,
24113
+ stdout: "pipe",
24114
+ stderr: "pipe"
24115
+ });
24116
+ await addProc.exited;
24117
+ const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
24118
+ cwd: workdir,
24119
+ stdout: "pipe",
24120
+ stderr: "pipe"
24121
+ });
24122
+ await commitProc.exited;
24123
+ } catch {}
24124
+ }
24050
24125
  var init_session_runner = __esm(() => {
24051
24126
  init_config();
24052
24127
  init_logger2();
@@ -24497,6 +24572,34 @@ function routeTddFailure(failureCategory, isLiteMode, ctx, reviewReason) {
24497
24572
  reason: reviewReason || "Three-session TDD requires review"
24498
24573
  };
24499
24574
  }
24575
+ async function autoCommitIfDirty2(workdir, role, storyId) {
24576
+ try {
24577
+ const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
24578
+ cwd: workdir,
24579
+ stdout: "pipe",
24580
+ stderr: "pipe"
24581
+ });
24582
+ const statusOutput = await new Response(statusProc.stdout).text();
24583
+ await statusProc.exited;
24584
+ if (!statusOutput.trim())
24585
+ return;
24586
+ const logger = getLogger();
24587
+ logger.warn("execution", `Agent did not commit after ${role} session \u2014 auto-committing`, {
24588
+ role,
24589
+ storyId,
24590
+ dirtyFiles: statusOutput.trim().split(`
24591
+ `).length
24592
+ });
24593
+ const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
24594
+ await addProc.exited;
24595
+ const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
24596
+ cwd: workdir,
24597
+ stdout: "pipe",
24598
+ stderr: "pipe"
24599
+ });
24600
+ await commitProc.exited;
24601
+ } catch {}
24602
+ }
24500
24603
  var executionStage, _executionDeps;
24501
24604
  var init_execution = __esm(() => {
24502
24605
  init_agents();
@@ -24578,6 +24681,7 @@ var init_execution = __esm(() => {
24578
24681
  dangerouslySkipPermissions: ctx.config.execution.dangerouslySkipPermissions
24579
24682
  });
24580
24683
  ctx.agentResult = result;
24684
+ await autoCommitIfDirty2(ctx.workdir, "single-session", ctx.story.id);
24581
24685
  const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
24582
24686
  if (_executionDeps.detectMergeConflict(combinedOutput) && ctx.interaction && isTriggerEnabled("merge-conflict", ctx.config)) {
24583
24687
  const shouldProceed = await _executionDeps.checkMergeConflict({ featureName: ctx.prd.feature, storyId: ctx.story.id }, ctx.config, ctx.interaction);
@@ -24924,19 +25028,20 @@ var init_prompt = __esm(() => {
24924
25028
  if (isBatch) {
24925
25029
  prompt = buildBatchPrompt2(ctx.stories, ctx.contextMarkdown, ctx.constitution);
24926
25030
  } else {
24927
- const builder = PromptBuilder.for("single-session").withLoader(ctx.workdir, ctx.config).story(ctx.story).context(ctx.contextMarkdown).constitution(ctx.constitution?.content);
25031
+ const role = ctx.routing.testStrategy === "tdd-simple" ? "tdd-simple" : "single-session";
25032
+ const builder = PromptBuilder.for(role).withLoader(ctx.workdir, ctx.config).story(ctx.story).context(ctx.contextMarkdown).constitution(ctx.constitution?.content);
24928
25033
  prompt = await builder.build();
24929
25034
  }
24930
25035
  ctx.prompt = prompt;
24931
25036
  if (isBatch) {
24932
25037
  logger.info("prompt", "Batch session prepared", {
24933
25038
  storyCount: ctx.stories.length,
24934
- testStrategy: "test-after"
25039
+ testStrategy: ctx.routing.testStrategy
24935
25040
  });
24936
25041
  } else {
24937
25042
  logger.info("prompt", "Single session prepared", {
24938
25043
  storyId: ctx.story.id,
24939
- testStrategy: "test-after"
25044
+ testStrategy: ctx.routing.testStrategy
24940
25045
  });
24941
25046
  }
24942
25047
  return { action: "continue" };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nathapp/nax",
3
- "version": "0.31.0",
3
+ "version": "0.32.0",
4
4
  "description": "AI Coding Agent Orchestrator \u2014 loops until done",
5
5
  "type": "module",
6
6
  "bin": {
@@ -273,11 +273,14 @@ const TEMPLATE_HEADER = `<!--
273
273
  /**
274
274
  * Execute the `nax prompts --init` command.
275
275
  *
276
- * Creates nax/templates/ and writes 4 default role-body template files.
276
+ * Creates nax/templates/ and writes 4 default role-body template files
277
+ * (test-writer, implementer, verifier, single-session).
277
278
  * Auto-wires prompts.overrides in nax.config.json if the file exists and overrides are not already set.
278
279
  * Returns the list of file paths written. Returns empty array if files
279
280
  * already exist and force is not set.
280
281
  *
282
+ * Note: tdd-simple role is supported in the prompt system but not auto-generated as a template.
283
+ *
281
284
  * @param options - Command options
282
285
  * @returns Array of file paths written
283
286
  */
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  export type Complexity = "simple" | "medium" | "complex" | "expert";
9
- export type TestStrategy = "test-after" | "three-session-tdd" | "three-session-tdd-lite";
9
+ export type TestStrategy = "test-after" | "tdd-simple" | "three-session-tdd" | "three-session-tdd-lite";
10
10
  export type TddStrategy = "auto" | "strict" | "lite" | "off";
11
11
 
12
12
  export interface EscalationEntry {
@@ -409,7 +409,7 @@ export interface RoutingConfig {
409
409
 
410
410
  /** Prompt overrides config (PB-003) */
411
411
  export interface PromptsConfig {
412
- overrides?: Partial<Record<"test-writer" | "implementer" | "verifier" | "single-session", string>>;
412
+ overrides?: Partial<Record<"test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple", string>>;
413
413
  }
414
414
 
415
415
  /** Full nax configuration */
@@ -199,6 +199,9 @@ export const executionStage: PipelineStage = {
199
199
 
200
200
  ctx.agentResult = result;
201
201
 
202
+ // BUG-058: Auto-commit if agent left uncommitted changes (single-session/test-after)
203
+ await autoCommitIfDirty(ctx.workdir, "single-session", ctx.story.id);
204
+
202
205
  // merge-conflict trigger: detect CONFLICT markers in agent output
203
206
  const combinedOutput = (result.output ?? "") + (result.stderr ?? "");
204
207
  if (
@@ -267,3 +270,40 @@ export const _executionDeps = {
267
270
  isAmbiguousOutput,
268
271
  checkStoryAmbiguity,
269
272
  };
273
+
274
+ /**
275
+ * BUG-058: Auto-commit safety net for single-session/test-after.
276
+ * Mirrors the same function in tdd/session-runner.ts for three-session TDD.
277
+ */
278
+ async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
279
+ try {
280
+ const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
281
+ cwd: workdir,
282
+ stdout: "pipe",
283
+ stderr: "pipe",
284
+ });
285
+ const statusOutput = await new Response(statusProc.stdout).text();
286
+ await statusProc.exited;
287
+
288
+ if (!statusOutput.trim()) return;
289
+
290
+ const logger = getLogger();
291
+ logger.warn("execution", `Agent did not commit after ${role} session — auto-committing`, {
292
+ role,
293
+ storyId,
294
+ dirtyFiles: statusOutput.trim().split("\n").length,
295
+ });
296
+
297
+ const addProc = Bun.spawn(["git", "add", "-A"], { cwd: workdir, stdout: "pipe", stderr: "pipe" });
298
+ await addProc.exited;
299
+
300
+ const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
301
+ cwd: workdir,
302
+ stdout: "pipe",
303
+ stderr: "pipe",
304
+ });
305
+ await commitProc.exited;
306
+ } catch {
307
+ // Silently ignore — auto-commit is best-effort
308
+ }
309
+ }
@@ -39,7 +39,8 @@ export const promptStage: PipelineStage = {
39
39
  if (isBatch) {
40
40
  prompt = buildBatchPrompt(ctx.stories, ctx.contextMarkdown, ctx.constitution);
41
41
  } else {
42
- const builder = PromptBuilder.for("single-session")
42
+ const role = ctx.routing.testStrategy === "tdd-simple" ? "tdd-simple" : "single-session";
43
+ const builder = PromptBuilder.for(role)
43
44
  .withLoader(ctx.workdir, ctx.config)
44
45
  .story(ctx.story)
45
46
  .context(ctx.contextMarkdown)
@@ -52,12 +53,12 @@ export const promptStage: PipelineStage = {
52
53
  if (isBatch) {
53
54
  logger.info("prompt", "Batch session prepared", {
54
55
  storyCount: ctx.stories.length,
55
- testStrategy: "test-after",
56
+ testStrategy: ctx.routing.testStrategy,
56
57
  });
57
58
  } else {
58
59
  logger.info("prompt", "Single session prepared", {
59
60
  storyId: ctx.story.id,
60
- testStrategy: "test-after",
61
+ testStrategy: ctx.routing.testStrategy,
61
62
  });
62
63
  }
63
64
 
@@ -26,7 +26,7 @@ export interface RoutingResult {
26
26
  /** Selected model tier */
27
27
  modelTier: "fast" | "balanced" | "powerful";
28
28
  /** Test strategy */
29
- testStrategy: "test-after" | "three-session-tdd" | "three-session-tdd-lite";
29
+ testStrategy: "test-after" | "tdd-simple" | "three-session-tdd" | "three-session-tdd-lite";
30
30
  /** Reasoning for the classification */
31
31
  reasoning: string;
32
32
  /** Estimated cost for this story */
@@ -1,11 +1,12 @@
1
1
  /**
2
2
  * Isolation Rules Section
3
3
  *
4
- * Generates isolation rules for all 4 roles:
4
+ * Generates isolation rules for all 5 roles:
5
5
  * - test-writer: Strict/Lite modes for test-first TDD
6
6
  * - implementer: Implement source while respecting test integrity
7
7
  * - verifier: Read-only inspection
8
8
  * - single-session: Both test/ and src/ modification allowed
9
+ * - tdd-simple: Both test/ and src/ modification allowed (no isolation)
9
10
  *
10
11
  * Backwards compatible: also accepts old API (mode only)
11
12
  * - buildIsolationSection("strict") → test-writer, strict
@@ -18,7 +19,7 @@ const TEST_FILTER_RULE =
18
19
  "— full suite output will flood your context window and cause failures.";
19
20
 
20
21
  export function buildIsolationSection(
21
- roleOrMode: "implementer" | "test-writer" | "verifier" | "single-session" | "strict" | "lite",
22
+ roleOrMode: "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple" | "strict" | "lite",
22
23
  mode?: "strict" | "lite",
23
24
  ): string {
24
25
  // Old API support: buildIsolationSection("strict") or buildIsolationSection("lite")
@@ -26,7 +27,7 @@ export function buildIsolationSection(
26
27
  return buildIsolationSection("test-writer", roleOrMode);
27
28
  }
28
29
 
29
- const role = roleOrMode as "implementer" | "test-writer" | "verifier" | "single-session";
30
+ const role = roleOrMode as "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple";
30
31
 
31
32
  const header = "# Isolation Rules\n\n";
32
33
  const footer = `\n\n${TEST_FILTER_RULE}`;
@@ -49,6 +50,10 @@ export function buildIsolationSection(
49
50
  return `${header}isolation scope: Read-only inspection. Review all test results, implementation code, and acceptance criteria compliance. You MAY write a verdict file (.nax-verifier-verdict.json) and apply legitimate fixes if needed.${footer}`;
50
51
  }
51
52
 
52
- // single-session role
53
- return `${header}isolation scope: Create test files in test/ directory, then implement source code in src/ to make tests pass. Both directories are in scope for this session.${footer}`;
53
+ if (role === "single-session") {
54
+ return `${header}isolation scope: Create test files in test/ directory, then implement source code in src/ to make tests pass. Both directories are in scope for this session.${footer}`;
55
+ }
56
+
57
+ // tdd-simple role — no isolation restrictions (no footer needed)
58
+ return `${header}isolation scope: You may modify both src/ and test/ files. Write failing tests FIRST, then implement to make them pass.`;
54
59
  }
@@ -1,11 +1,12 @@
1
1
  /**
2
2
  * Role-Task Section
3
3
  *
4
- * Generates role definition for all 4 roles in nax prompt orchestration:
4
+ * Generates role definition for all 5 roles in nax prompt orchestration:
5
5
  * - implementer: Make failing tests pass (standard/lite variants)
6
6
  * - test-writer: Write tests first (RED phase)
7
7
  * - verifier: Review and verify implementation
8
8
  * - single-session: Write tests AND implement in one session
9
+ * - tdd-simple: Write failing tests FIRST, then implement in one session
9
10
  *
10
11
  * Backwards compatible: also accepts old API (variant only)
11
12
  * - buildRoleTaskSection("standard") → implementer, standard
@@ -13,7 +14,7 @@
13
14
  */
14
15
 
15
16
  export function buildRoleTaskSection(
16
- roleOrVariant: "implementer" | "test-writer" | "verifier" | "single-session" | "standard" | "lite",
17
+ roleOrVariant: "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple" | "standard" | "lite",
17
18
  variant?: "standard" | "lite",
18
19
  ): string {
19
20
  // Old API support: buildRoleTaskSection("standard") or buildRoleTaskSection("lite")
@@ -21,7 +22,7 @@ export function buildRoleTaskSection(
21
22
  return buildRoleTaskSection("implementer", roleOrVariant);
22
23
  }
23
24
 
24
- const role = roleOrVariant as "implementer" | "test-writer" | "verifier" | "single-session";
25
+ const role = roleOrVariant as "implementer" | "test-writer" | "verifier" | "single-session" | "tdd-simple";
25
26
 
26
27
  if (role === "implementer") {
27
28
  const v = variant ?? "standard";
@@ -79,16 +80,30 @@ export function buildRoleTaskSection(
79
80
  );
80
81
  }
81
82
 
82
- // single-session role
83
+ if (role === "single-session") {
84
+ return (
85
+ "# Role: Single-Session\n\n" +
86
+ "Your task: Write tests AND implement the feature in a single focused session.\n\n" +
87
+ "Instructions:\n" +
88
+ "- Phase 1: Write comprehensive tests (test/ directory)\n" +
89
+ "- Phase 2: Implement to make all tests pass (src/ directory)\n" +
90
+ "- Use Bun test (describe/test/expect)\n" +
91
+ "- Run tests frequently throughout implementation\n" +
92
+ "- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'\n" +
93
+ "- Goal: all tests passing, all changes committed, full story complete"
94
+ );
95
+ }
96
+
97
+ // tdd-simple role — test-driven development in one session
83
98
  return (
84
- "# Role: Single-Session\n\n" +
85
- "Your task: Write tests AND implement the feature in a single focused session.\n\n" +
99
+ "# Role: TDD-Simple\n\n" +
100
+ "Your task: Write failing tests FIRST, then implement to make them pass.\n\n" +
86
101
  "Instructions:\n" +
87
- "- Phase 1: Write comprehensive tests (test/ directory)\n" +
88
- "- Phase 2: Implement to make all tests pass (src/ directory)\n" +
89
- "- Use Bun test (describe/test/expect)\n" +
90
- "- Run tests frequently throughout implementation\n" +
102
+ "- RED phase: Write failing tests FIRST for the acceptance criteria\n" +
103
+ "- RED phase: Run the tests to confirm they fail\n" +
104
+ "- GREEN phase: Implement the minimum code to make tests pass\n" +
105
+ "- REFACTOR phase: Refactor while keeping tests green\n" +
91
106
  "- When all tests are green, stage and commit ALL changed files with: git commit -m 'feat: <description>'\n" +
92
- "- Goal: all tests passing, all changes committed, full story complete"
107
+ "- Goal: all tests passing, feature complete, all changes committed"
93
108
  );
94
109
  }
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /** Role determining which default template body to use */
8
- export type PromptRole = "test-writer" | "implementer" | "verifier" | "single-session";
8
+ export type PromptRole = "test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple";
9
9
 
10
10
  /** A single section of a composed prompt */
11
11
  export interface PromptSection {
@@ -201,8 +201,8 @@ export function determineTestStrategy(
201
201
  return hasLiteTag ? "three-session-tdd-lite" : "three-session-tdd";
202
202
  }
203
203
 
204
- // BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
205
- if (complexity === "simple") return "test-after";
204
+ // TS-001: simple → tdd-simple (TDD discipline, 1 session), medium → tdd-lite (3 sessions)
205
+ if (complexity === "simple") return "tdd-simple";
206
206
  return "three-session-tdd-lite";
207
207
  }
208
208
 
@@ -117,8 +117,8 @@ function determineTestStrategy(
117
117
  return "three-session-tdd";
118
118
  }
119
119
 
120
- // BUG-045: simple → test-after (low overhead), medium → tdd-lite (sweet spot)
121
- if (complexity === "simple") return "test-after";
120
+ // TS-001: simple → tdd-simple (TDD discipline, 1 session), medium → tdd-lite (3 sessions)
121
+ if (complexity === "simple") return "tdd-simple";
122
122
  return "three-session-tdd-lite";
123
123
  }
124
124
 
@@ -150,21 +150,31 @@ export const keywordStrategy: RoutingStrategy = {
150
150
  const testStrategy = determineTestStrategy(complexity, title, description, tags);
151
151
 
152
152
  const reasons: string[] = [];
153
+ const text = [title, description, ...tags].join(" ").toLowerCase();
154
+
153
155
  if (testStrategy === "three-session-tdd") {
154
- const text = [title, description, ...tags].join(" ").toLowerCase();
155
156
  if (SECURITY_KEYWORDS.some((kw) => text.includes(kw))) reasons.push("security-critical");
156
157
  if (PUBLIC_API_KEYWORDS.some((kw) => text.includes(kw))) reasons.push("public-api");
157
158
  if (complexity === "complex" || complexity === "expert") reasons.push(`complexity:${complexity}`);
158
159
  }
159
160
 
161
+ let reasoning = "";
162
+ if (testStrategy === "three-session-tdd") {
163
+ reasoning =
164
+ reasons.length > 0 ? `three-session-tdd: ${reasons.join(", ")}` : `three-session-tdd: ${complexity} task`;
165
+ } else if (testStrategy === "three-session-tdd-lite") {
166
+ reasoning = `three-session-tdd-lite: simple task (${complexity})`;
167
+ } else if (testStrategy === "tdd-simple") {
168
+ reasoning = `tdd-simple: simple task (${complexity})`;
169
+ } else {
170
+ reasoning = `${testStrategy}: ${complexity} task`;
171
+ }
172
+
160
173
  return {
161
174
  complexity,
162
175
  modelTier,
163
176
  testStrategy,
164
- reasoning:
165
- reasons.length > 0
166
- ? `three-session-tdd: ${reasons.join(", ")}`
167
- : `three-session-tdd-lite: simple task (${complexity})`,
177
+ reasoning,
168
178
  };
169
179
  },
170
180
  };
@@ -35,10 +35,18 @@ Tags: ${tags.join(", ")}
35
35
  - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
36
36
  - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
37
37
 
38
+ ## Test Strategies (derived from complexity)
39
+ Your complexity classification will determine the execution strategy:
40
+ - simple → tdd-simple: Single-session TDD (agent writes tests first, then implements)
41
+ - medium → three-session-tdd-lite: Multi-session with lite isolation
42
+ - complex/expert → three-session-tdd: Strict multi-session TDD isolation
43
+ - test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
44
+
38
45
  ## Rules
39
46
  - Default to the CHEAPEST tier that will succeed.
40
47
  - Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
41
48
  - A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
49
+ - If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
42
50
 
43
51
  Respond with ONLY this JSON (no markdown, no explanation):
44
52
  {"complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}`;
@@ -73,10 +81,18 @@ ${storyBlocks}
73
81
  - balanced: Standard features, moderate logic, straightforward tests. 30-90 min.
74
82
  - powerful: Complex architecture, security-critical, multi-file refactors, novel algorithms. >90 min.
75
83
 
84
+ ## Test Strategies (derived from complexity)
85
+ Your complexity classification will determine the execution strategy:
86
+ - simple → tdd-simple: Single-session TDD (agent writes tests first, then implements)
87
+ - medium → three-session-tdd-lite: Multi-session with lite isolation
88
+ - complex/expert → three-session-tdd: Strict multi-session TDD isolation
89
+ - test-after: Reserved for non-TDD work (refactors, deletions, config-only changes)
90
+
76
91
  ## Rules
77
92
  - Default to the CHEAPEST tier that will succeed.
78
93
  - Simple barrel exports, re-exports, or index files are ALWAYS simple + fast.
79
94
  - A story touching many files doesn't automatically mean complex — copy-paste refactors are simple.
95
+ - If the story is pure refactoring/deletion with no new behavior, consider it "simple" for tdd-simple strategy.
80
96
 
81
97
  Respond with ONLY a JSON array (no markdown, no explanation):
82
98
  [{"id":"US-001","complexity":"simple|medium|complex|expert","modelTier":"fast|balanced|powerful","reasoning":"<one line>"}]`;
@@ -129,6 +129,9 @@ export async function runTddSession(
129
129
  await cleanupProcessTree(result.pid);
130
130
  }
131
131
 
132
+ // BUG-058: Auto-commit if agent left uncommitted changes
133
+ await autoCommitIfDirty(workdir, role, story.id);
134
+
132
135
  // Check isolation based on role and skipIsolation flag.
133
136
  let isolation: IsolationCheck | undefined;
134
137
  if (!skipIsolation) {
@@ -181,3 +184,51 @@ export async function runTddSession(
181
184
  estimatedCost: result.estimatedCost,
182
185
  };
183
186
  }
187
+
188
+ /**
189
+ * BUG-058: Auto-commit safety net.
190
+ *
191
+ * If the agent left uncommitted changes, stage and commit them automatically.
192
+ * This prevents the review stage from failing with "uncommitted changes" errors.
193
+ * Only triggers when the agent forgot — if tree is clean, this is a no-op.
194
+ */
195
+ async function autoCommitIfDirty(workdir: string, role: string, storyId: string): Promise<void> {
196
+ const logger = getLogger();
197
+
198
+ // Check if working tree is dirty
199
+ try {
200
+ const statusProc = Bun.spawn(["git", "status", "--porcelain"], {
201
+ cwd: workdir,
202
+ stdout: "pipe",
203
+ stderr: "pipe",
204
+ });
205
+ const statusOutput = await new Response(statusProc.stdout).text();
206
+ await statusProc.exited;
207
+
208
+ if (!statusOutput.trim()) return; // Clean tree, nothing to do
209
+
210
+ logger.warn("tdd", `Agent did not commit after ${role} session — auto-committing`, {
211
+ role,
212
+ storyId,
213
+ dirtyFiles: statusOutput.trim().split("\n").length,
214
+ });
215
+
216
+ // Stage all changes
217
+ const addProc = Bun.spawn(["git", "add", "-A"], {
218
+ cwd: workdir,
219
+ stdout: "pipe",
220
+ stderr: "pipe",
221
+ });
222
+ await addProc.exited;
223
+
224
+ // Commit with descriptive message
225
+ const commitProc = Bun.spawn(["git", "commit", "-m", `chore(${storyId}): auto-commit after ${role} session`], {
226
+ cwd: workdir,
227
+ stdout: "pipe",
228
+ stderr: "pipe",
229
+ });
230
+ await commitProc.exited;
231
+ } catch {
232
+ // Silently ignore — auto-commit is best-effort
233
+ }
234
+ }