gsd-pi 2.47.0-dev.04be8c9 → 2.47.0-dev.f2e721d

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/resources/extensions/gsd/auto-start.js +8 -1
  2. package/dist/resources/extensions/gsd/forensics.js +292 -1
  3. package/dist/resources/extensions/gsd/guided-flow.js +85 -3
  4. package/dist/resources/extensions/gsd/prompts/forensics.md +37 -5
  5. package/dist/resources/extensions/gsd/session-forensics.js +10 -1
  6. package/dist/web/standalone/.next/BUILD_ID +1 -1
  7. package/dist/web/standalone/.next/app-path-routes-manifest.json +17 -17
  8. package/dist/web/standalone/.next/build-manifest.json +2 -2
  9. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  10. package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
  11. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  12. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  13. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  14. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  15. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  16. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  17. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  18. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  19. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  20. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  21. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  22. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  23. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  24. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  25. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  26. package/dist/web/standalone/.next/server/app/api/forensics/route.js +1 -1
  27. package/dist/web/standalone/.next/server/app/index.html +1 -1
  28. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  29. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  30. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  31. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  32. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  33. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  34. package/dist/web/standalone/.next/server/app-paths-manifest.json +17 -17
  35. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  36. package/dist/web/standalone/.next/server/pages/500.html +2 -2
  37. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  38. package/package.json +1 -1
  39. package/packages/pi-agent-core/dist/agent-loop.js +3 -2
  40. package/packages/pi-agent-core/dist/agent-loop.js.map +1 -1
  41. package/packages/pi-agent-core/src/agent-loop.ts +3 -2
  42. package/packages/pi-coding-agent/dist/core/model-registry-auth-mode.test.js +43 -0
  43. package/packages/pi-coding-agent/dist/core/model-registry-auth-mode.test.js.map +1 -1
  44. package/packages/pi-coding-agent/dist/core/model-registry.d.ts.map +1 -1
  45. package/packages/pi-coding-agent/dist/core/model-registry.js +26 -3
  46. package/packages/pi-coding-agent/dist/core/model-registry.js.map +1 -1
  47. package/packages/pi-coding-agent/src/core/model-registry-auth-mode.test.ts +70 -0
  48. package/packages/pi-coding-agent/src/core/model-registry.ts +29 -2
  49. package/packages/pi-tui/dist/components/box.d.ts +1 -0
  50. package/packages/pi-tui/dist/components/box.d.ts.map +1 -1
  51. package/packages/pi-tui/dist/components/box.js +10 -0
  52. package/packages/pi-tui/dist/components/box.js.map +1 -1
  53. package/packages/pi-tui/src/components/box.ts +10 -0
  54. package/src/resources/extensions/gsd/auto-start.ts +7 -1
  55. package/src/resources/extensions/gsd/forensics.ts +329 -2
  56. package/src/resources/extensions/gsd/guided-flow.ts +105 -3
  57. package/src/resources/extensions/gsd/prompts/forensics.md +37 -5
  58. package/src/resources/extensions/gsd/session-forensics.ts +11 -1
  59. package/src/resources/extensions/gsd/tests/discuss-queued-milestones.test.ts +241 -0
  60. package/src/resources/extensions/gsd/tests/forensics-error-filter.test.ts +121 -0
  61. package/src/resources/extensions/gsd/tests/forensics-journal.test.ts +162 -0
  62. package/src/resources/extensions/gsd/tests/preflight-context-draft-filter.test.ts +115 -0
  63. package/src/resources/extensions/gsd/tests/stale-milestone-id-reservation.test.ts +79 -0
  64. /package/dist/web/standalone/.next/static/{GR9tXQAPXXBL4AUugDPlJ → O3E7X3EJ2lEKs_0hIUzGd}/_buildManifest.js +0 -0
  65. /package/dist/web/standalone/.next/static/{GR9tXQAPXXBL4AUugDPlJ → O3E7X3EJ2lEKs_0hIUzGd}/_ssgManifest.js +0 -0
@@ -35,7 +35,7 @@ import { showProjectInit, offerMigration } from "./init-wizard.js";
35
35
  import { validateDirectory } from "./validate-directory.js";
36
36
  import { showConfirm } from "../shared/tui.js";
37
37
  import { debugLog } from "./debug-logger.js";
38
- import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds } from "./milestone-ids.js";
38
+ import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
39
39
  import { parkMilestone, discardMilestone } from "./milestone-actions.js";
40
40
  import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
41
41
 
@@ -373,6 +373,9 @@ export async function showHeadlessMilestoneCreation(
373
373
  basePath: string,
374
374
  seedContext: string,
375
375
  ): Promise<void> {
376
+ // Clear stale reservations from previous cancelled sessions (#2488)
377
+ clearReservedMilestoneIds();
378
+
376
379
  // Ensure .gsd/ is bootstrapped
377
380
  bootstrapGsdProject(basePath);
378
381
 
@@ -511,9 +514,14 @@ export async function showDiscuss(
511
514
 
512
515
  const state = await deriveState(basePath);
513
516
 
514
- // Guard: no active milestone
517
+ // No active milestone — check for pending milestones to discuss instead
515
518
  if (!state.activeMilestone) {
516
- ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
519
+ const pendingMilestones = state.registry.filter(m => m.status === "pending");
520
+ if (pendingMilestones.length === 0) {
521
+ ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
522
+ return;
523
+ }
524
+ await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
517
525
  return;
518
526
  }
519
527
 
@@ -648,6 +656,17 @@ export async function showDiscuss(
648
656
  };
649
657
  });
650
658
 
659
+ // Offer access to queued milestones when any exist
660
+ const pendingMilestones = state.registry.filter(m => m.status === "pending");
661
+ if (pendingMilestones.length > 0) {
662
+ actions.push({
663
+ id: "discuss_queued_milestone",
664
+ label: "Discuss a queued milestone",
665
+ description: `Refine context for ${pendingMilestones.length} queued milestone(s). Does not affect current execution.`,
666
+ recommended: false,
667
+ });
668
+ }
669
+
651
670
  const choice = await showNextAction(ctx, {
652
671
  title: "GSD — Discuss a slice",
653
672
  summary: [
@@ -660,6 +679,11 @@ export async function showDiscuss(
660
679
 
661
680
  if (choice === "not_yet") return;
662
681
 
682
+ if (choice === "discuss_queued_milestone") {
683
+ await showDiscussQueuedMilestone(ctx, pi, basePath, pendingMilestones);
684
+ return;
685
+ }
686
+
663
687
  const chosen = pendingSlices.find(s => s.id === choice);
664
688
  if (!chosen) return;
665
689
 
@@ -689,6 +713,79 @@ export async function showDiscuss(
689
713
  }
690
714
  }
691
715
 
716
+ // ─── Queued Milestone Discussion ─────────────────────────────────────────────
717
+
718
+ /**
719
+ * Show a picker of queued (pending) milestones and dispatch a discuss flow for
720
+ * the chosen one. Discussing a queued milestone does NOT activate it — it only
721
+ * refines the CONTEXT.md artifact so it is better prepared when auto-mode
722
+ * eventually reaches it.
723
+ */
724
+ async function showDiscussQueuedMilestone(
725
+ ctx: ExtensionCommandContext,
726
+ pi: ExtensionAPI,
727
+ basePath: string,
728
+ pendingMilestones: Array<{ id: string; title: string; status: string }>,
729
+ ): Promise<void> {
730
+ const actions = pendingMilestones.map((m, i) => {
731
+ const hasContext = !!resolveMilestoneFile(basePath, m.id, "CONTEXT");
732
+ const hasDraft = !hasContext && !!resolveMilestoneFile(basePath, m.id, "CONTEXT-DRAFT");
733
+ const contextStatus = hasContext ? "context ✓" : hasDraft ? "draft context" : "no context yet";
734
+ return {
735
+ id: m.id,
736
+ label: `${m.id}: ${m.title}`,
737
+ description: `[queued] · ${contextStatus}`,
738
+ recommended: i === 0,
739
+ };
740
+ });
741
+
742
+ const choice = await showNextAction(ctx, {
743
+ title: "GSD — Discuss a queued milestone",
744
+ summary: [
745
+ "Select a queued milestone to discuss.",
746
+ "Discussing will update its context file. It will not be activated.",
747
+ ],
748
+ actions,
749
+ notYetMessage: "Run /gsd discuss when ready.",
750
+ });
751
+
752
+ if (choice === "not_yet") return;
753
+
754
+ const chosen = pendingMilestones.find(m => m.id === choice);
755
+ if (!chosen) return;
756
+
757
+ await dispatchDiscussForMilestone(ctx, pi, basePath, chosen.id, chosen.title);
758
+ }
759
+
760
+ /**
761
+ * Dispatch the guided-discuss-milestone prompt for a milestone without
762
+ * setting pendingAutoStart — so discussing a queued milestone does not
763
+ * implicitly activate it when the session ends.
764
+ */
765
+ async function dispatchDiscussForMilestone(
766
+ ctx: ExtensionCommandContext,
767
+ pi: ExtensionAPI,
768
+ basePath: string,
769
+ mid: string,
770
+ milestoneTitle: string,
771
+ ): Promise<void> {
772
+ const draftFile = resolveMilestoneFile(basePath, mid, "CONTEXT-DRAFT");
773
+ const draftContent = draftFile ? await loadFile(draftFile) : null;
774
+ const discussMilestoneTemplates = inlineTemplate("context", "Context");
775
+ const structuredQuestionsAvailable = pi.getActiveTools().includes("ask_user_questions") ? "true" : "false";
776
+ const basePrompt = loadPrompt("guided-discuss-milestone", {
777
+ milestoneId: mid,
778
+ milestoneTitle,
779
+ inlinedTemplates: discussMilestoneTemplates,
780
+ structuredQuestionsAvailable,
781
+ commitInstruction: buildDocsCommitInstruction(`docs(${mid}): milestone context from discuss`),
782
+ });
783
+ const prompt = draftContent
784
+ ? `${basePrompt}\n\n## Prior Discussion (Draft Seed)\n\n${draftContent}`
785
+ : basePrompt;
786
+ await dispatchWorkflow(pi, prompt, "gsd-discuss", ctx, "plan-milestone");
787
+ }
788
+
692
789
  // ─── Smart Entry Point ────────────────────────────────────────────────────────
693
790
 
694
791
  /**
@@ -842,6 +939,11 @@ export async function showSmartEntry(
842
939
  ): Promise<void> {
843
940
  const stepMode = options?.step;
844
941
 
942
+ // ── Clear stale milestone ID reservations from previous cancelled sessions ──
943
+ // Reservations only need to survive within a single /gsd interaction.
944
+ // Without this, each cancelled session permanently bumps the next ID. (#2488)
945
+ clearReservedMilestoneIds();
946
+
845
947
  // ── Directory safety check — refuse to operate in system/home dirs ───
846
948
  const dirCheck = validateDirectory(basePath);
847
949
  if (dirCheck.severity === "blocked") {
@@ -36,6 +36,8 @@ GSD extension source code is at: `{{gsdSourceDir}}`
36
36
  ├── doctor-history.jsonl — doctor check history
37
37
  ├── activity/ — session activity logs (JSONL per unit)
38
38
  │ └── {seq}-{unitType}-{unitId}.jsonl
39
+ ├── journal/ — structured event journal (JSONL per day)
40
+ │ └── YYYY-MM-DD.jsonl
39
41
  ├── runtime/
40
42
  │ ├── paused-session.json — serialized session when auto pauses
41
43
  │ └── headless-context.md — headless resume context
@@ -60,6 +62,32 @@ GSD extension source code is at: `{{gsdSourceDir}}`
60
62
  - `usage` field on assistant messages: `input`, `output`, `cacheRead`, `cacheWrite`, `totalTokens`, `cost`
61
63
  - **To trace a failure**: find the last activity log, search for `isError: true` tool results, then read the agent's reasoning text preceding that error
62
64
 
65
+ ### Journal Format (`.gsd/journal/`)
66
+
67
+ The journal is a structured event log for auto-mode iterations. Each daily file contains JSONL entries:
68
+
69
+ ```
70
+ { ts: "ISO-8601", flowId: "UUID", seq: 0, eventType: "iteration-start", rule?: "rule-name", causedBy?: { flowId, seq }, data?: { unitId, status, ... } }
71
+ ```
72
+
73
+ **Key event types:**
74
+ - `iteration-start` / `iteration-end` — marks loop iteration boundaries
75
+ - `dispatch-match` / `dispatch-stop` — what the auto-mode decided to do (or not do)
76
+ - `unit-start` / `unit-end` — lifecycle of individual work units
77
+ - `terminal` — auto-mode reached a terminal state (all done, budget exceeded, etc.)
78
+ - `guard-block` — dispatch was blocked by a guard condition (e.g. needs user input)
79
+ - `stuck-detected` — the loop detected it was stuck (same unit repeatedly dispatched)
80
+ - `milestone-transition` — a milestone was promoted or completed
81
+ - `worktree-enter` / `worktree-create-failed` / `worktree-merge-start` / `worktree-merge-failed` — worktree operations
82
+
83
+ **Key concepts:**
84
+ - **flowId**: UUID grouping all events in one iteration. Use to reconstruct what happened in a single loop pass.
85
+ - **causedBy**: Cross-reference to a prior event (same or different flow). Enables causal chain tracing.
86
+ - **seq**: Monotonically increasing within a flow. Reconstruct event order within an iteration.
87
+
88
+ **To trace a stuck loop**: filter for `stuck-detected` events, then follow `flowId` to see the surrounding dispatch and unit events.
89
+ **To trace a guard block**: filter for `guard-block` events, check `data.reason` for why dispatch was blocked.
90
+
63
91
  ### Crash Lock Format (`auto.lock`)
64
92
 
65
93
  JSON with fields: `pid`, `startedAt`, `unitType`, `unitId`, `unitStartedAt`, `completedUnits`, `sessionFile`
@@ -78,20 +106,24 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a
78
106
 
79
107
  1. **Start with the pre-parsed forensic report** above. The anomaly section contains automated findings — treat these as leads, not conclusions.
80
108
 
81
- 2. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
109
+ 2. **Check the journal timeline** if present. The journal events show the auto-mode's decision sequence (dispatches, guards, stuck detection, worktree operations). Use flow IDs to group related events and trace causal chains.
110
+
111
+ 3. **Cross-reference activity logs and journal**. Activity logs show *what the LLM did* (tool calls, reasoning, errors). Journal events show *what auto-mode decided* (dispatch rules, iteration boundaries, state transitions). Together they reveal the full picture.
112
+
113
+ 4. **Form hypotheses** about which module and code path is responsible. Use the source map to identify candidate files.
82
114
 
83
- 3. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
115
+ 5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
84
116
 
85
- 4. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
117
+ 6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
86
118
 
87
- 5. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
119
+ 7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
88
120
  - Missing edge case / unhandled condition
89
121
  - Wrong boolean logic or comparison
90
122
  - Race condition or ordering issue
91
123
  - State corruption (e.g. completed-units.json out of sync with artifacts)
92
124
  - Timeout / recovery logic not triggering correctly
93
125
 
94
- 6. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
126
+ 8. **Clarify if needed.** Use ask_user_questions (max 2 questions) only if the report is genuinely insufficient. Do not ask questions you can answer from the data or source code.
95
127
 
96
128
  ## Output
97
129
 
@@ -172,7 +172,17 @@ export function extractTrace(entries: unknown[]): ExecutionTrace {
172
172
  }
173
173
 
174
174
  if (isError && resultText) {
175
- errors.push(resultText.slice(0, 300));
175
+ // Filter out benign "errors" that are normal during code exploration:
176
+ // - grep/rg/find returning exit code 1 (no matches) is expected POSIX behavior
177
+ // - User interrupts (Escape/skip) are intentional, not failures
178
+ const trimmed = resultText.trim();
179
+ const isBenignNoMatch = pending?.name === "bash" &&
180
+ /^\(no output\)\s*\n\s*Command exited with code 1$/m.test(trimmed);
181
+ const isUserSkip = /^Skipped due to queued user message/i.test(trimmed);
182
+
183
+ if (!isBenignNoMatch && !isUserSkip) {
184
+ errors.push(resultText.slice(0, 300));
185
+ }
176
186
  }
177
187
  }
178
188
  }
@@ -0,0 +1,241 @@
1
+ /**
2
+ * discuss-queued-milestones.test.ts — Tests for #2307.
3
+ *
4
+ * /gsd discuss was previously gated on state.activeMilestone, which prevented
5
+ * users from discussing queued (pending) milestones during roadmap grooming.
6
+ *
7
+ * These tests verify:
8
+ * 1. deriveState correctly identifies pending milestones (the set the picker
9
+ * will show when no active milestone is present)
10
+ * 2. resolveMilestoneFile correctly resolves context artifacts for pending
11
+ * milestones so the picker can report their discussion state
12
+ * 3. The guided-flow.ts source code no longer hard-exits when no active
13
+ * milestone exists but pending milestones are present
14
+ * 4. The helper functions for queued discuss exist in the source
15
+ */
16
+
17
+ import { describe, test, afterEach } from "node:test";
18
+ import assert from "node:assert/strict";
19
+ import { mkdtempSync, mkdirSync, rmSync, writeFileSync, readFileSync } from "node:fs";
20
+ import { join } from "node:path";
21
+ import { tmpdir } from "node:os";
22
+ import { fileURLToPath } from "node:url";
23
+ import { dirname } from "node:path";
24
+
25
+ import { deriveState } from "../state.ts";
26
+ import { invalidateAllCaches } from "../cache.ts";
27
+ import { resolveMilestoneFile } from "../paths.ts";
28
+
29
+ // ─── Fixture Helpers ──────────────────────────────────────────────────────────
30
+
31
+ function createBase(): string {
32
+ const base = mkdtempSync(join(tmpdir(), "gsd-discuss-queued-"));
33
+ mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
34
+ return base;
35
+ }
36
+
37
+ function cleanup(base: string): void {
38
+ rmSync(base, { recursive: true, force: true });
39
+ }
40
+
41
+ function writeMilestoneDir(base: string, mid: string): void {
42
+ mkdirSync(join(base, ".gsd", "milestones", mid), { recursive: true });
43
+ }
44
+
45
+ function writeContext(base: string, mid: string, content: string): void {
46
+ writeMilestoneDir(base, mid);
47
+ writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-CONTEXT.md`), content);
48
+ }
49
+
50
+ function writeContextDraft(base: string, mid: string, content: string): void {
51
+ writeMilestoneDir(base, mid);
52
+ writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-CONTEXT-DRAFT.md`), content);
53
+ }
54
+
55
+ function writeRoadmap(base: string, mid: string, content: string): void {
56
+ writeMilestoneDir(base, mid);
57
+ writeFileSync(join(base, ".gsd", "milestones", mid, `${mid}-ROADMAP.md`), content);
58
+ }
59
+
60
+ function readGuidedFlowSource(): string {
61
+ const thisFile = fileURLToPath(import.meta.url);
62
+ const thisDir = dirname(thisFile);
63
+ return readFileSync(join(thisDir, "..", "guided-flow.ts"), "utf-8");
64
+ }
65
+
66
+ // ─── Tests ────────────────────────────────────────────────────────────────────
67
+
68
+ describe("discuss-queued-milestones (#2307)", () => {
69
+
70
+ test("1. pending milestones appear in registry when active milestone exists", async () => {
71
+ const base = createBase();
72
+ try {
73
+ // M001: active — has context + roadmap with a slice
74
+ writeContext(base, "M001", "# M001: Active\nContext here.");
75
+ writeRoadmap(base, "M001",
76
+ "# M001: Active\n\n## Slices\n- [ ] **S01: Do work** `risk:low` `depends:[]`\n > After this: works\n");
77
+
78
+ // M002: pending — context only, no roadmap
79
+ writeContext(base, "M002", "# M002: Queued\nFuture work.");
80
+
81
+ // M003: pending — draft context only
82
+ writeContextDraft(base, "M003", "# M003: Draft\nSeed material.");
83
+
84
+ invalidateAllCaches();
85
+ const state = await deriveState(base);
86
+
87
+ assert.ok(!!state.activeMilestone, "M001 should be the active milestone");
88
+ assert.strictEqual(state.activeMilestone?.id, "M001");
89
+
90
+ const pendingIds = state.registry
91
+ .filter(m => m.status === "pending")
92
+ .map(m => m.id);
93
+
94
+ assert.ok(pendingIds.includes("M002"), "M002 should be pending");
95
+ assert.ok(pendingIds.includes("M003"), "M003 should be pending");
96
+ } finally {
97
+ cleanup(base);
98
+ }
99
+ });
100
+
101
+ test("2. first context-only milestone is active, subsequent ones are pending", async () => {
102
+ const base = createBase();
103
+ try {
104
+ // M001: first milestone with context but no roadmap — deriveState marks it active
105
+ writeContext(base, "M001", "# M001: First\nContext here.");
106
+ // M002: will be pending since M001 is active
107
+ writeContext(base, "M002", "# M002: Second\nMore future work.");
108
+
109
+ invalidateAllCaches();
110
+ const state = await deriveState(base);
111
+
112
+ // deriveState makes the first unfinished milestone "active" even without a roadmap
113
+ assert.ok(!!state.activeMilestone, "first milestone should be active");
114
+ assert.strictEqual(state.activeMilestone?.id, "M001", "M001 is the active milestone");
115
+
116
+ const pendingIds = state.registry
117
+ .filter(m => m.status === "pending")
118
+ .map(m => m.id);
119
+
120
+ assert.ok(pendingIds.includes("M002"),
121
+ "M002 should be pending — it comes after the active M001");
122
+ } finally {
123
+ cleanup(base);
124
+ }
125
+ });
126
+
127
+ test("3. resolveMilestoneFile finds CONTEXT.md for pending milestone", (t) => {
128
+ const base = createBase();
129
+ try {
130
+ writeContext(base, "M002", "# M002: Queued\nContent.");
131
+
132
+ const contextFile = resolveMilestoneFile(base, "M002", "CONTEXT");
133
+ assert.ok(contextFile !== null, "resolveMilestoneFile should find CONTEXT.md for M002");
134
+ assert.ok(contextFile!.endsWith("M002-CONTEXT.md"),
135
+ "resolved path should point to M002-CONTEXT.md");
136
+ } finally {
137
+ cleanup(base);
138
+ }
139
+ });
140
+
141
+ test("4. resolveMilestoneFile finds CONTEXT-DRAFT.md for pending milestone", (t) => {
142
+ const base = createBase();
143
+ try {
144
+ writeContextDraft(base, "M003", "# M003: Draft\nSeed content.");
145
+
146
+ const draftFile = resolveMilestoneFile(base, "M003", "CONTEXT-DRAFT");
147
+ assert.ok(draftFile !== null, "resolveMilestoneFile should find CONTEXT-DRAFT.md for M003");
148
+ assert.ok(draftFile!.endsWith("M003-CONTEXT-DRAFT.md"),
149
+ "resolved path should point to M003-CONTEXT-DRAFT.md");
150
+ } finally {
151
+ cleanup(base);
152
+ }
153
+ });
154
+
155
+ test("5. resolveMilestoneFile returns null when pending milestone has no context", (t) => {
156
+ const base = createBase();
157
+ try {
158
+ writeMilestoneDir(base, "M004");
159
+
160
+ const contextFile = resolveMilestoneFile(base, "M004", "CONTEXT");
161
+ assert.strictEqual(contextFile, null,
162
+ "resolveMilestoneFile should return null when no CONTEXT.md exists");
163
+
164
+ const draftFile = resolveMilestoneFile(base, "M004", "CONTEXT-DRAFT");
165
+ assert.strictEqual(draftFile, null,
166
+ "resolveMilestoneFile should return null when no CONTEXT-DRAFT.md exists");
167
+ } finally {
168
+ cleanup(base);
169
+ }
170
+ });
171
+
172
+ test("6. guided-flow no longer hard-exits when no active milestone but pending exist", () => {
173
+ const source = readGuidedFlowSource();
174
+
175
+ // The old guard was a simple early-exit:
176
+ // if (!state.activeMilestone) {
177
+ // ctx.ui.notify("No active milestone. Run /gsd to create one first.", "warning");
178
+ // return;
179
+ // }
180
+ //
181
+ // The new guard should check for pending milestones and route instead.
182
+ const oldGuardPattern = /if\s*\(!state\.activeMilestone\)\s*\{\s*ctx\.ui\.notify\("No active milestone/;
183
+ assert.ok(
184
+ !oldGuardPattern.test(source),
185
+ "guided-flow must not unconditionally exit when activeMilestone is null",
186
+ );
187
+ });
188
+
189
+ test("7. showDiscussQueuedMilestone helper exists in guided-flow", () => {
190
+ const source = readGuidedFlowSource();
191
+ assert.ok(
192
+ source.includes("showDiscussQueuedMilestone"),
193
+ "guided-flow must export showDiscussQueuedMilestone helper",
194
+ );
195
+ });
196
+
197
+ test("8. dispatchDiscussForMilestone helper exists in guided-flow", () => {
198
+ const source = readGuidedFlowSource();
199
+ assert.ok(
200
+ source.includes("dispatchDiscussForMilestone"),
201
+ "guided-flow must export dispatchDiscussForMilestone helper",
202
+ );
203
+ });
204
+
205
+ test("9. dispatchDiscussForMilestone does not set pendingAutoStart", () => {
206
+ const source = readGuidedFlowSource();
207
+
208
+ // Extract the dispatchDiscussForMilestone function body
209
+ const fnMatch = source.match(
210
+ /async function dispatchDiscussForMilestone\s*\([^)]*\)[^{]*\{([\s\S]*?)\n\}/,
211
+ );
212
+ assert.ok(!!fnMatch, "dispatchDiscussForMilestone function body must be present");
213
+
214
+ if (fnMatch) {
215
+ assert.ok(
216
+ !fnMatch[1].includes("pendingAutoStart"),
217
+ "dispatchDiscussForMilestone must NOT set pendingAutoStart — discussing a queued milestone must not activate it",
218
+ );
219
+ }
220
+ });
221
+
222
+ test("10. slice picker includes queued milestone option when pending milestones exist", () => {
223
+ const source = readGuidedFlowSource();
224
+ assert.ok(
225
+ source.includes("discuss_queued_milestone"),
226
+ "slice picker must include a 'discuss_queued_milestone' action id for queued milestones",
227
+ );
228
+ assert.ok(
229
+ source.includes("Discuss a queued milestone"),
230
+ "slice picker must label the queued milestone action clearly",
231
+ );
232
+ });
233
+
234
+ test("11. queued milestone picker labels entries with [queued]", () => {
235
+ const source = readGuidedFlowSource();
236
+ assert.ok(
237
+ source.includes("[queued]"),
238
+ "queued milestone picker must label entries with [queued] to distinguish from active",
239
+ );
240
+ });
241
+ });
@@ -0,0 +1,121 @@
1
+ /**
2
+ * Regression test for #2539: extractTrace should not count benign bash
3
+ * exit-code-1 (grep no-match) or user skips as errors.
4
+ */
5
+ import { describe, test } from "node:test";
6
+ import assert from "node:assert/strict";
7
+
8
+ import { extractTrace } from "../session-forensics.ts";
9
+
10
+ /**
11
+ * Build a minimal JSONL entry pair: assistant tool_use → toolResult.
12
+ * This is the shape extractTrace() expects from session activity files.
13
+ */
14
+ function makeToolPair(
15
+ toolName: string,
16
+ input: Record<string, unknown>,
17
+ resultText: string,
18
+ isError: boolean,
19
+ ): unknown[] {
20
+ const toolCallId = `toolu_${Math.random().toString(36).slice(2, 10)}`;
21
+ return [
22
+ {
23
+ type: "message",
24
+ message: {
25
+ role: "assistant",
26
+ content: [
27
+ {
28
+ type: "toolCall",
29
+ id: toolCallId,
30
+ name: toolName,
31
+ arguments: input,
32
+ },
33
+ ],
34
+ },
35
+ },
36
+ {
37
+ type: "message",
38
+ message: {
39
+ role: "toolResult",
40
+ toolCallId,
41
+ toolName,
42
+ isError,
43
+ content: [{ type: "text", text: resultText }],
44
+ },
45
+ },
46
+ ];
47
+ }
48
+
49
+ describe("extractTrace error filtering (#2539)", () => {
50
+ test("grep exit-code-1 (no matches) is not counted as an error", () => {
51
+ const entries = makeToolPair(
52
+ "bash",
53
+ { command: "grep -rn 'nonexistent' src/" },
54
+ "(no output)\nCommand exited with code 1",
55
+ true,
56
+ );
57
+ const trace = extractTrace(entries);
58
+ assert.equal(trace.errors.length, 0, "grep no-match should not be an error");
59
+ });
60
+
61
+ test("user skip is not counted as an error", () => {
62
+ const entries = makeToolPair(
63
+ "bash",
64
+ { command: "npm run test" },
65
+ "Skipped due to queued user message",
66
+ true,
67
+ );
68
+ const trace = extractTrace(entries);
69
+ assert.equal(trace.errors.length, 0, "user skip should not be an error");
70
+ });
71
+
72
+ test("real bash error is still counted", () => {
73
+ const entries = makeToolPair(
74
+ "bash",
75
+ { command: "cat /nonexistent" },
76
+ "cat: /nonexistent: No such file or directory\nCommand exited with code 1",
77
+ true,
78
+ );
79
+ const trace = extractTrace(entries);
80
+ assert.equal(trace.errors.length, 1, "real error should still be counted");
81
+ assert.match(trace.errors[0], /No such file or directory/);
82
+ });
83
+
84
+ test("non-bash tool error is still counted", () => {
85
+ const entries = makeToolPair(
86
+ "edit",
87
+ { path: "foo.ts", oldText: "x", newText: "y" },
88
+ "oldText not found in file",
89
+ true,
90
+ );
91
+ const trace = extractTrace(entries);
92
+ assert.equal(trace.errors.length, 1, "non-bash tool errors should still be counted");
93
+ });
94
+
95
+ test("mixed entries: only real errors are counted", () => {
96
+ const entries = [
97
+ // benign grep no-match
98
+ ...makeToolPair("bash", { command: "grep -rn 'pattern' src/" }, "(no output)\nCommand exited with code 1", true),
99
+ // user skip
100
+ ...makeToolPair("bash", { command: "npm test" }, "Skipped due to queued user message", true),
101
+ // real error
102
+ ...makeToolPair("bash", { command: "node broken.js" }, "SyntaxError: Unexpected token\nCommand exited with code 1", true),
103
+ // successful command (not an error)
104
+ ...makeToolPair("bash", { command: "echo hello" }, "hello", false),
105
+ ];
106
+ const trace = extractTrace(entries);
107
+ assert.equal(trace.errors.length, 1, "only the real error should be counted");
108
+ assert.match(trace.errors[0], /SyntaxError/);
109
+ });
110
+
111
+ test("exit code 1 with actual output is still an error", () => {
112
+ const entries = makeToolPair(
113
+ "bash",
114
+ { command: "npm run lint" },
115
+ "src/foo.ts:10:5 - error TS2304: Cannot find name 'x'\nCommand exited with code 1",
116
+ true,
117
+ );
118
+ const trace = extractTrace(entries);
119
+ assert.equal(trace.errors.length, 1, "lint error with output should be counted");
120
+ });
121
+ });