@opengsd/gsd-pi 1.1.1-dev.75048e7 → 1.1.1-dev.9f86580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/resources/.managed-resources-content-hash +1 -1
  2. package/dist/resources/extensions/browser-tools/engine/managed-gsd-browser.js +18 -2
  3. package/dist/resources/extensions/browser-tools/engine/selection.js +1 -1
  4. package/dist/resources/extensions/browser-tools/extension-manifest.json +1 -1
  5. package/dist/resources/extensions/browser-tools/index.js +29 -2
  6. package/dist/resources/extensions/browser-tools/web-app-detect.js +52 -0
  7. package/dist/resources/extensions/gsd/auto/phases.js +45 -3
  8. package/dist/resources/extensions/gsd/auto/session.js +2 -0
  9. package/dist/resources/extensions/gsd/auto-dispatch.js +10 -2
  10. package/dist/resources/extensions/gsd/auto-model-selection.js +26 -0
  11. package/dist/resources/extensions/gsd/auto-timers.js +24 -10
  12. package/dist/resources/extensions/gsd/auto.js +26 -4
  13. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +29 -21
  14. package/dist/resources/extensions/gsd/bootstrap/system-context.js +1 -1
  15. package/dist/resources/extensions/gsd/commands/handlers/auto.js +10 -0
  16. package/dist/resources/extensions/gsd/commands-mcp-status.js +1 -1
  17. package/dist/resources/extensions/gsd/config-overlay.js +1 -0
  18. package/dist/resources/extensions/gsd/context-masker.js +129 -5
  19. package/dist/resources/extensions/gsd/guided-flow.js +4 -1
  20. package/dist/resources/extensions/gsd/planner-handoff.js +98 -0
  21. package/dist/resources/extensions/gsd/preferences-models.js +1 -0
  22. package/dist/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
  23. package/dist/resources/extensions/gsd/prompts/run-uat.md +2 -2
  24. package/dist/resources/extensions/gsd/prompts/system.md +1 -1
  25. package/dist/resources/extensions/gsd/skill-manifest.js +12 -0
  26. package/dist/resources/extensions/gsd/tool-contract.js +1 -1
  27. package/dist/resources/extensions/gsd/tool-presentation-plan.js +19 -2
  28. package/dist/resources/extensions/gsd/tools/complete-slice.js +28 -1
  29. package/dist/resources/extensions/gsd/tools/workflow-tool-executors.js +32 -4
  30. package/dist/resources/extensions/gsd/unit-tool-contracts.js +38 -14
  31. package/dist/resources/extensions/gsd/workflow-mcp.js +2 -3
  32. package/dist/resources/extensions/gsd/worktree-manager.js +26 -0
  33. package/dist/resources/extensions/gsd/worktree-reentry.js +96 -0
  34. package/dist/resources/extensions/shared/gsd-browser-cli.js +6 -0
  35. package/dist/web/standalone/.next/BUILD_ID +1 -1
  36. package/dist/web/standalone/.next/app-path-routes-manifest.json +8 -8
  37. package/dist/web/standalone/.next/build-manifest.json +2 -2
  38. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  39. package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
  40. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  43. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  46. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  47. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  48. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  51. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  53. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  54. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  55. package/dist/web/standalone/.next/server/app/index.html +1 -1
  56. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  57. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  58. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  59. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  60. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  61. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  62. package/dist/web/standalone/.next/server/app-paths-manifest.json +8 -8
  63. package/dist/web/standalone/.next/server/chunks/8357.js +1 -1
  64. package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
  65. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  66. package/dist/web/standalone/.next/server/pages/500.html +1 -1
  67. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  68. package/package.json +1 -1
  69. package/packages/cloud-mcp-gateway/package.json +2 -2
  70. package/packages/contracts/package.json +1 -1
  71. package/packages/daemon/package.json +4 -4
  72. package/packages/gsd-agent-core/package.json +5 -5
  73. package/packages/gsd-agent-modes/package.json +7 -7
  74. package/packages/mcp-server/package.json +3 -3
  75. package/packages/native/package.json +1 -1
  76. package/packages/pi-agent-core/package.json +1 -1
  77. package/packages/pi-ai/dist/models.generated.d.ts +158 -2
  78. package/packages/pi-ai/dist/models.generated.d.ts.map +1 -1
  79. package/packages/pi-ai/dist/models.generated.js +149 -9
  80. package/packages/pi-ai/dist/models.generated.js.map +1 -1
  81. package/packages/pi-ai/dist/providers/transform-messages.d.ts.map +1 -1
  82. package/packages/pi-ai/dist/providers/transform-messages.js +8 -1
  83. package/packages/pi-ai/dist/providers/transform-messages.js.map +1 -1
  84. package/packages/pi-ai/package.json +1 -1
  85. package/packages/pi-coding-agent/package.json +7 -7
  86. package/packages/pi-tui/package.json +1 -1
  87. package/packages/rpc-client/package.json +2 -2
  88. package/pkg/package.json +1 -1
  89. package/scripts/install/handoff.js +16 -3
  90. package/src/resources/extensions/browser-tools/engine/managed-gsd-browser.ts +21 -2
  91. package/src/resources/extensions/browser-tools/engine/selection.ts +1 -1
  92. package/src/resources/extensions/browser-tools/extension-manifest.json +1 -1
  93. package/src/resources/extensions/browser-tools/index.ts +36 -5
  94. package/src/resources/extensions/browser-tools/tests/browser-engine-selection.test.mjs +2 -2
  95. package/src/resources/extensions/browser-tools/tests/gsd-browser-launch-config.test.mjs +37 -0
  96. package/src/resources/extensions/browser-tools/tests/web-app-detect.test.mjs +68 -0
  97. package/src/resources/extensions/browser-tools/web-app-detect.ts +63 -0
  98. package/src/resources/extensions/gsd/auto/phases.ts +48 -6
  99. package/src/resources/extensions/gsd/auto/session.ts +2 -0
  100. package/src/resources/extensions/gsd/auto-dispatch.ts +34 -2
  101. package/src/resources/extensions/gsd/auto-model-selection.ts +26 -0
  102. package/src/resources/extensions/gsd/auto-timers.ts +25 -9
  103. package/src/resources/extensions/gsd/auto.ts +28 -4
  104. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +40 -21
  105. package/src/resources/extensions/gsd/bootstrap/system-context.ts +1 -1
  106. package/src/resources/extensions/gsd/commands/handlers/auto.ts +9 -0
  107. package/src/resources/extensions/gsd/commands-mcp-status.ts +1 -1
  108. package/src/resources/extensions/gsd/config-overlay.ts +1 -0
  109. package/src/resources/extensions/gsd/context-masker.ts +152 -5
  110. package/src/resources/extensions/gsd/guided-flow.ts +4 -1
  111. package/src/resources/extensions/gsd/planner-handoff.ts +149 -0
  112. package/src/resources/extensions/gsd/preferences-models.ts +1 -0
  113. package/src/resources/extensions/gsd/preferences-types.ts +8 -0
  114. package/src/resources/extensions/gsd/prompts/plan-milestone.md +1 -1
  115. package/src/resources/extensions/gsd/prompts/run-uat.md +2 -2
  116. package/src/resources/extensions/gsd/prompts/system.md +1 -1
  117. package/src/resources/extensions/gsd/skill-manifest.ts +12 -0
  118. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +99 -0
  119. package/src/resources/extensions/gsd/tests/auto-model-selection-tool-poisoning.test.ts +66 -4
  120. package/src/resources/extensions/gsd/tests/auto-supervisor.test.mjs +4 -0
  121. package/src/resources/extensions/gsd/tests/bundled-skill-triggers.test.ts +9 -0
  122. package/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts +118 -0
  123. package/src/resources/extensions/gsd/tests/context-masker.test.ts +56 -1
  124. package/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts +1 -0
  125. package/src/resources/extensions/gsd/tests/dispatch-rule-coverage.test.ts +24 -0
  126. package/src/resources/extensions/gsd/tests/integration/run-uat.test.ts +1 -1
  127. package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +27 -0
  128. package/src/resources/extensions/gsd/tests/journal-integration.test.ts +1 -0
  129. package/src/resources/extensions/gsd/tests/mcp-project-config.test.ts +7 -1
  130. package/src/resources/extensions/gsd/tests/mcp-status.test.ts +1 -1
  131. package/src/resources/extensions/gsd/tests/planner-handoff.test.ts +100 -0
  132. package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +113 -1
  133. package/src/resources/extensions/gsd/tests/provider-switch-observer.test.ts +55 -0
  134. package/src/resources/extensions/gsd/tests/runtime-invariant-modules.test.ts +20 -0
  135. package/src/resources/extensions/gsd/tests/skill-manifest.test.ts +4 -3
  136. package/src/resources/extensions/gsd/tests/workflow-mcp.test.ts +77 -10
  137. package/src/resources/extensions/gsd/tests/workflow-tool-executors.test.ts +131 -2
  138. package/src/resources/extensions/gsd/tests/worktree-reentry.test.ts +102 -0
  139. package/src/resources/extensions/gsd/tool-contract.ts +1 -1
  140. package/src/resources/extensions/gsd/tool-presentation-plan.ts +21 -2
  141. package/src/resources/extensions/gsd/tools/complete-slice.ts +29 -1
  142. package/src/resources/extensions/gsd/tools/workflow-tool-executors.ts +46 -4
  143. package/src/resources/extensions/gsd/unit-tool-contracts.ts +38 -14
  144. package/src/resources/extensions/gsd/workflow-mcp.ts +2 -3
  145. package/src/resources/extensions/gsd/worktree-manager.ts +32 -0
  146. package/src/resources/extensions/gsd/worktree-reentry.ts +103 -0
  147. package/src/resources/extensions/shared/gsd-browser-cli.ts +6 -0
  148. /package/dist/web/standalone/.next/static/{h4TGni4xJzlZjGkxaT6uU → zzYMrKpPGfRQRxSFO32Jr}/_buildManifest.js +0 -0
  149. /package/dist/web/standalone/.next/static/{h4TGni4xJzlZjGkxaT6uU → zzYMrKpPGfRQRxSFO32Jr}/_ssgManifest.js +0 -0
@@ -155,6 +155,124 @@ describe('complete-slice verification gate (#3580)', () => {
155
155
  }
156
156
  });
157
157
 
158
+ // ── Browser/web UAT classification gate (M001/S03 regression) ──────────
159
+ const BROWSER_UAT_BODY = [
160
+ '## UAT Type',
161
+ '- UAT mode: artifact-driven',
162
+ '',
163
+ '## Smoke Test',
164
+ '1. Open the page in a browser and perform add/edit/complete/delete once.',
165
+ ].join('\n');
166
+
167
+ test('rejects an artifact-driven UAT that drives a browser (open the page in a browser)', async () => {
168
+ const result = await handleCompleteSlice(
169
+ makeParams({ uatContent: BROWSER_UAT_BODY }),
170
+ basePath,
171
+ );
172
+ assert.ok('error' in result, 'expected handler to reject a browser UAT mislabeled artifact-driven');
173
+ assert.match((result as { error: string }).error, /requires browser verification/i);
174
+ });
175
+
176
+ test('allows a runtime-executable UAT that runs a browser test command (playwright)', async () => {
177
+ // Bugbot regression: runtime-executable legitimately drives a browser via a
178
+ // command captured by gsd_uat_exec — it must not be pushed to gsd-browser.
179
+ const body = [
180
+ '## UAT Type',
181
+ '- UAT mode: runtime-executable',
182
+ '',
183
+ '## Test Cases',
184
+ '1. Run `npx playwright test` and confirm a passing exit code; capture a screenshot artifact.',
185
+ '2. Hit http://localhost:3000/health and assert a 200 response.',
186
+ ].join('\n');
187
+ const result = await handleCompleteSlice(
188
+ makeParams({ uatContent: body }),
189
+ basePath,
190
+ );
191
+ if ('error' in result) {
192
+ assert.doesNotMatch(
193
+ result.error,
194
+ /artifact-driven|browser-capable|browser verification/i,
195
+ `runtime-executable command UATs must not be gated, got: ${result.error}`,
196
+ );
197
+ }
198
+ });
199
+
200
+ test('allows an artifact-driven UAT that only disclaims browser coverage (no false positive)', async () => {
201
+ // S01-style: genuinely artifact-driven persistence scaffolding that merely
202
+ // mentions "cross-browser" / "browser-level" in a Not-Proven disclaimer.
203
+ const body = [
204
+ '## UAT Type',
205
+ '- UAT mode: artifact-driven',
206
+ '',
207
+ '## Not Proven By This UAT',
208
+ '- Interactive browser-level CRUD and real cross-browser localStorage behavior.',
209
+ ].join('\n');
210
+ const result = await handleCompleteSlice(
211
+ makeParams({ uatContent: body }),
212
+ basePath,
213
+ );
214
+ if ('error' in result) {
215
+ assert.doesNotMatch(
216
+ result.error,
217
+ /requires browser verification/i,
218
+ `disclaimer-only mention must not trip the browser gate, got: ${result.error}`,
219
+ );
220
+ }
221
+ });
222
+
223
+ test('allows an artifact-driven UAT whose "navigate" step targets a file, not a browser', async () => {
224
+ // Bugbot regression: a bare "navigate to <file/API>" must not trip the gate
225
+ // just because it contains the word "navigate".
226
+ const body = [
227
+ '## UAT Type',
228
+ '- UAT mode: artifact-driven',
229
+ '',
230
+ '## Test Cases',
231
+ '1. Navigate to the generated report file and confirm the schema section exists.',
232
+ ].join('\n');
233
+ const result = await handleCompleteSlice(
234
+ makeParams({ uatContent: body }),
235
+ basePath,
236
+ );
237
+ if ('error' in result) {
238
+ assert.doesNotMatch(
239
+ result.error,
240
+ /requires browser verification/i,
241
+ `non-web "navigate" must not trip the browser gate, got: ${result.error}`,
242
+ );
243
+ }
244
+ });
245
+
246
+ test('allows a browser UAT when it is declared browser-executable', async () => {
247
+ const body = BROWSER_UAT_BODY.replace('artifact-driven', 'browser-executable');
248
+ const result = await handleCompleteSlice(
249
+ makeParams({ uatContent: body }),
250
+ basePath,
251
+ );
252
+ if ('error' in result) {
253
+ assert.doesNotMatch(
254
+ result.error,
255
+ /requires browser verification/i,
256
+ `browser-executable UAT must pass the browser gate, got: ${result.error}`,
257
+ );
258
+ }
259
+ });
260
+
261
+ test('allows a browser UAT when it is declared mixed (mixed receives browser tools)', async () => {
262
+ const body = BROWSER_UAT_BODY.replace('artifact-driven', 'mixed (artifact-driven + browser)');
263
+ const result = await handleCompleteSlice(
264
+ makeParams({ uatContent: body }),
265
+ basePath,
266
+ );
267
+ if ('error' in result) {
268
+ assert.doesNotMatch(
269
+ result.error,
270
+ /requires browser verification/i,
271
+ `mixed UAT must pass the browser gate, got: ${result.error}`,
272
+ );
273
+ }
274
+ });
275
+
158
276
  test('backfills prior verification narrative when verification is omitted on re-completion', async () => {
159
277
  // Seed full_summary_md with a prior verification narrative (simulates a
160
278
  // previous completion where the verification text was recorded).
@@ -1,7 +1,12 @@
1
1
  import test from "node:test";
2
2
  import assert from "node:assert/strict";
3
3
 
4
- import { createObservationMask } from "../context-masker.js";
4
+ import {
5
+ createObservationMask,
6
+ createResponsesInputObservationMask,
7
+ truncateContextResultMessages,
8
+ truncateResponsesInputResultItems,
9
+ } from "../context-masker.js";
5
10
 
6
11
  // These helpers produce messages in the pi-ai LLM payload format
7
12
  // (post-convertToLlm, pre-provider), which is what before_provider_request sees.
@@ -120,3 +125,53 @@ test("masks toolResult by role, not by type field", () => {
120
125
  const result = mask(messages as any);
121
126
  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
122
127
  });
128
+
129
+ test("truncates recent bash result user messages", () => {
130
+ const messages = [
131
+ userMsg("turn 1"),
132
+ bashResult("a".repeat(50)),
133
+ assistantMsg("response 1"),
134
+ ];
135
+ const result = truncateContextResultMessages(messages as any, 10);
136
+ const text = (result[1].content as any)[0].text;
137
+ assert.ok(text.length < (messages[1].content as any)[0].text.length);
138
+ assert.match(text, /…\[truncated\]/);
139
+ });
140
+
141
+ test("masks Responses API function outputs older than keepRecentTurns", () => {
142
+ const mask = createResponsesInputObservationMask(1);
143
+ const items = [
144
+ { role: "user", content: [{ type: "input_text", text: "turn 1" }] },
145
+ { type: "function_call_output", call_id: "call_1", output: "old output" },
146
+ { type: "message", role: "assistant", content: [{ type: "output_text", text: "response 1" }] },
147
+ { role: "user", content: [{ type: "input_text", text: "turn 2" }] },
148
+ ];
149
+ const result = mask(items as any);
150
+ assert.equal(result[1].output, MASK_TEXT);
151
+ });
152
+
153
+ test("masks Responses API bash result user items older than keepRecentTurns", () => {
154
+ const mask = createResponsesInputObservationMask(1);
155
+ const items = [
156
+ { role: "user", content: [{ type: "input_text", text: "turn 1" }] },
157
+ { role: "user", content: [{ type: "input_text", text: "Ran `npm test`\n```\nold output\n```" }] },
158
+ { type: "message", role: "assistant", content: [{ type: "output_text", text: "response 1" }] },
159
+ { role: "user", content: [{ type: "input_text", text: "turn 2" }] },
160
+ ];
161
+ const result = mask(items as any);
162
+ assert.equal((result[1].content as any)[0].text, MASK_TEXT);
163
+ });
164
+
165
+ test("truncates Responses API function outputs and recent bash result items", () => {
166
+ const items = [
167
+ { role: "user", content: [{ type: "input_text", text: "turn 1" }] },
168
+ { type: "function_call_output", call_id: "call_1", output: "b".repeat(50) },
169
+ { role: "user", content: [{ type: "input_text", text: "Ran `npm test`\n```\n" + "c".repeat(50) + "\n```" }] },
170
+ ];
171
+ const result = truncateResponsesInputResultItems(items as any, 12);
172
+
173
+ assert.match(result[1].output as string, /…\[truncated\]/);
174
+ assert.match((result[2].content as any)[0].text, /…\[truncated\]/);
175
+ assert.ok((result[1].output as string).length < (items[1].output as string).length);
176
+ assert.ok((result[2].content as any)[0].text.length < (items[2].content as any)[0].text.length);
177
+ });
@@ -129,6 +129,7 @@ function makeLoopSession(overrides?: Record<string, unknown>) {
129
129
  unitLifetimeDispatches: new Map<string, number>(),
130
130
  unitRecoveryCount: new Map<string, number>(),
131
131
  verificationRetryCount: new Map<string, number>(),
132
+ zeroToolRetryCount: new Map<string, number>(),
132
133
  gitService: null,
133
134
  autoStartTime: Date.now(),
134
135
  activeEngineId: null,
@@ -216,6 +216,30 @@ test("dispatch-rule-coverage: planning with active slice and skip_research → p
216
216
  );
217
217
  });
218
218
 
219
+ test("dispatch-rule-coverage: planning boundary without planner handoff → research-slice", async (t) => {
220
+ const tmp = mkdtempSync(join(tmpdir(), "gsd-disp-cov-planning-"));
221
+ t.after(() => rmSync(tmp, { recursive: true, force: true }));
222
+
223
+ writeMilestoneFile(tmp, "M001", "CONTEXT", "# Context\n");
224
+ writeMilestoneFile(tmp, "M001", "ROADMAP", "# Roadmap\n");
225
+
226
+ const state = makeState({
227
+ phase: "planning",
228
+ activeSlice: { id: "S01", title: "First Slice" },
229
+ nextAction: "Plan slice S01 (First Slice).",
230
+ });
231
+ const match = await findFirstMatch(makeCtx(tmp, state));
232
+ assertMatch(
233
+ match,
234
+ {
235
+ ruleName: "planning (no research, not S01) → research-slice",
236
+ action: "dispatch",
237
+ unitType: "research-slice",
238
+ },
239
+ "planning boundary without planner handoff",
240
+ );
241
+ });
242
+
219
243
  test("dispatch-rule-coverage: executing with task plan present → execute-task", async (t) => {
220
244
  const tmp = mkdtempSync(join(tmpdir(), "gsd-disp-cov-exec-"));
221
245
  t.after(() => rmSync(tmp, { recursive: true, force: true }));
@@ -723,7 +723,7 @@ test('(u) run-uat prompt promotes artifact-driven browser specs to browser-execu
723
723
 
724
724
  assert.match(prompt, /\*\*Detected UAT mode:\*\*\s*`browser-executable`/);
725
725
  assert.match(prompt, /uatType: "browser-executable"/);
726
- assert.match(prompt, /use gsd-browser tools/i);
726
+ assert.match(prompt, /use browser tools/i);
727
727
  assert.match(prompt, /"browser_navigate"/);
728
728
  assert.match(prompt, /"browser_assert"/);
729
729
  } finally {
@@ -226,6 +226,33 @@ test("direct /gsd auto skips paused-session replay when recovered unit already c
226
226
  }
227
227
  });
228
228
 
229
+ test("paused-session resume skips replay when unit identity was never recorded", () => {
230
+ const base = makeTmpBase();
231
+ try {
232
+ // No currentUnit and no persisted unit type/id — identity is unknown. The
233
+ // old code fell back to the literal "unknown" unit, which can neither be
234
+ // verified nor correctly targeted, and synthesized a full tool-call replay
235
+ // (the thrash that turns one stuck unit into several). The fix skips the
236
+ // replay and resumes from rebuilt disk state instead.
237
+ const state = {
238
+ pausedSessionFile: join(base, ".gsd", "activity", "paused-session.jsonl"),
239
+ currentUnit: null,
240
+ pausedUnitType: null,
241
+ pausedUnitId: null,
242
+ pendingCrashRecovery: "stale-recovery-prompt",
243
+ };
244
+
245
+ const result = _handlePausedSessionResumeRecoveryForTest(base, state);
246
+ assert.equal(result.skippedReplay, true);
247
+ assert.equal(state.pausedSessionFile, null);
248
+ assert.equal(state.pendingCrashRecovery, null, "must not synthesize a replay for an unknown unit");
249
+ assert.equal(state.pausedUnitType, null);
250
+ assert.equal(state.pausedUnitId, null);
251
+ } finally {
252
+ cleanup(base);
253
+ }
254
+ });
255
+
229
256
  test("interrupted-session source preserves raw lock and excludes same-pid from running classification", async () => {
230
257
  const source = await import(`node:fs/promises`).then((fs) =>
231
258
  fs.readFile(new URL("../interrupted-session.ts", import.meta.url), "utf-8")
@@ -200,6 +200,7 @@ function makeSession() {
200
200
  unitLifetimeDispatches: new Map<string, number>(),
201
201
  unitRecoveryCount: new Map<string, number>(),
202
202
  verificationRetryCount: new Map<string, number>(),
203
+ zeroToolRetryCount: new Map<string, number>(),
203
204
  gitService: null,
204
205
  autoStartTime: Date.now(),
205
206
  cmdCtx: {
@@ -54,7 +54,13 @@ test("ensureProjectWorkflowMcpConfig creates .mcp.json with workflow and browser
54
54
  "--identity-scope",
55
55
  "project",
56
56
  ]);
57
- assert.equal(browserArgs[mcpArgIndex + 6], projectRoot);
57
+ // --identity-scope requires a non-empty --identity-key or gsd-browser exits
58
+ // immediately ("Connection closed"); the key must be stable per project.
59
+ assert.equal(browserArgs[mcpArgIndex + 5], "--identity-key");
60
+ assert.equal(typeof browserArgs[mcpArgIndex + 6], "string");
61
+ assert.ok((browserArgs[mcpArgIndex + 6] ?? "").length > 0, "identity-key must be non-empty");
62
+ assert.equal(browserArgs[mcpArgIndex + 7], "--identity-project");
63
+ assert.equal(browserArgs[mcpArgIndex + 8], projectRoot);
58
64
  assert.equal((browserServer as { cwd?: string })?.cwd, projectRoot);
59
65
 
60
66
  const settings = JSON.parse(readFileSync(join(projectRoot, ".claude", "settings.local.json"), "utf-8")) as {
@@ -342,7 +342,7 @@ describe("formatMcpInitResult", () => {
342
342
  assert.match(result, /\/tmp\/project\/\.mcp\.json/);
343
343
  assert.match(result, /mcp-capable clients/i);
344
344
  assert.match(result, /workflow and gsd-browser MCP servers/i);
345
- assert.match(result, /Pi Providers use the managed gsd-browser engine/i);
345
+ assert.match(result, /Pi Providers use built-in browser tools/i);
346
346
  assert.doesNotMatch(result, /claude code/i);
347
347
  });
348
348
 
@@ -0,0 +1,100 @@
1
+ import { test, describe } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { mkdtempSync, rmSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import { tmpdir } from "node:os";
6
+
7
+ import { GSD_COMMAND_DESCRIPTION, getGsdArgumentCompletions, TOP_LEVEL_SUBCOMMANDS } from "../commands/catalog.ts";
8
+ import { handleCoreCommand } from "../commands/handlers/core.ts";
9
+ import { DISPATCH_RULES } from "../auto-dispatch.ts";
10
+ import {
11
+ buildGsdPlannerSpawnPlan,
12
+ formatGsdPlannerCommand,
13
+ hasPlannerHandoffBeenOffered,
14
+ markPlannerHandoffOffered,
15
+ PLANNER_HANDOFF_RULE_NAME,
16
+ } from "../planner-handoff.ts";
17
+
18
+ describe("planner handoff command catalog", () => {
19
+ test("/gsd planner is hidden from description and completions", () => {
20
+ assert.doesNotMatch(GSD_COMMAND_DESCRIPTION, /\|planner(?:\||$)/);
21
+ assert.equal(
22
+ TOP_LEVEL_SUBCOMMANDS.some((command) => command.cmd === "planner"),
23
+ false,
24
+ "planner should not appear in top-level commands",
25
+ );
26
+
27
+ const completions = getGsdArgumentCompletions("pla");
28
+
29
+ assert.equal(
30
+ completions.some((completion) => completion.value === "planner"),
31
+ false,
32
+ "planner should not appear in top-level completions",
33
+ );
34
+
35
+ assert.deepEqual(
36
+ getGsdArgumentCompletions("planner --"),
37
+ [],
38
+ "planner should not expose nested completions",
39
+ );
40
+ });
41
+ });
42
+
43
+ describe("planner handoff command handler", () => {
44
+ test("/gsd planner falls through to the unknown-command path", async () => {
45
+ const notifications: Array<{ message: string; level?: string }> = [];
46
+ const ctx = {
47
+ ui: {
48
+ notify(message: string, level?: string) {
49
+ notifications.push({ message, level });
50
+ },
51
+ },
52
+ };
53
+
54
+ const handled = await handleCoreCommand("planner M001 --dry-run --inspect", ctx as any);
55
+
56
+ assert.equal(handled, false);
57
+ assert.deepEqual(notifications, []);
58
+ });
59
+ });
60
+
61
+ describe("planner handoff launcher", () => {
62
+ test("builds gsd-planner command with project and milestone context", () => {
63
+ const plan = buildGsdPlannerSpawnPlan({
64
+ basePath: "/tmp/project with spaces",
65
+ milestoneId: "M001",
66
+ extraArgs: ["--inspect"],
67
+ });
68
+
69
+ assert.deepEqual(plan, {
70
+ command: "gsd-planner",
71
+ args: ["--project", "/tmp/project with spaces", "--milestone", "M001", "--inspect"],
72
+ cwd: "/tmp/project with spaces",
73
+ });
74
+ assert.equal(
75
+ formatGsdPlannerCommand(plan),
76
+ 'gsd-planner --project "/tmp/project with spaces" --milestone M001 --inspect',
77
+ );
78
+ });
79
+
80
+ test("records one-shot handoff markers per milestone", () => {
81
+ const basePath = mkdtempSync(join(tmpdir(), "gsd-planner-marker-"));
82
+ try {
83
+ assert.equal(hasPlannerHandoffBeenOffered(basePath, "M001"), false);
84
+ markPlannerHandoffOffered(basePath, "M001");
85
+ assert.equal(hasPlannerHandoffBeenOffered(basePath, "M001"), true);
86
+ assert.equal(hasPlannerHandoffBeenOffered(basePath, "M002"), false);
87
+ } finally {
88
+ rmSync(basePath, { recursive: true, force: true });
89
+ }
90
+ });
91
+ });
92
+
93
+ describe("planner handoff dispatch rule", () => {
94
+ test("rule is not registered while /gsd planner is disabled", () => {
95
+ assert.equal(
96
+ DISPATCH_RULES.some((rule) => rule.name === PLANNER_HANDOFF_RULE_NAME),
97
+ false,
98
+ );
99
+ });
100
+ });
@@ -10,6 +10,13 @@ import {
10
10
  RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
11
11
  RUN_UAT_WORKFLOW_TOOL_NAMES,
12
12
  } from "../tool-presentation-plan.ts";
13
+ import {
14
+ buildMinimalAutoGsdToolSet,
15
+ MINIMAL_AUTO_BASE_TOOL_NAMES,
16
+ MINIMAL_GSD_TOOL_NAMES,
17
+ } from "../bootstrap/register-hooks.ts";
18
+ import { shouldBlockAutoUnitToolCall } from "../auto-unit-tool-scope.ts";
19
+ import { UNIT_TOOL_CONTRACTS } from "../unit-tool-contracts.ts";
13
20
 
14
21
  const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
15
22
  const templatesDir = join(process.cwd(), "src/resources/extensions/gsd/templates");
@@ -22,6 +29,84 @@ function readTemplate(name: string): string {
22
29
  return readFileSync(join(templatesDir, `${name}.md`), "utf-8");
23
30
  }
24
31
 
32
+ function escapeRegExp(value: string): string {
33
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
34
+ }
35
+
36
+ const registeredPhaseToolNames = [
37
+ ...new Set([
38
+ ...MINIMAL_AUTO_BASE_TOOL_NAMES,
39
+ ...MINIMAL_GSD_TOOL_NAMES,
40
+ ...Object.values(UNIT_TOOL_CONTRACTS).flatMap((contract) => contract.allowedGsdTools),
41
+ ]),
42
+ ];
43
+
44
+ const PHASE_PROMPT_TOOL_CALLS: Record<string, readonly string[]> = {
45
+ "research-milestone": ["gsd_summary_save"],
46
+ "plan-milestone": [
47
+ "gsd_milestone_status",
48
+ "gsd_plan_milestone",
49
+ "gsd_plan_slice",
50
+ "gsd_decision_save",
51
+ ],
52
+ "research-slice": ["gsd_summary_save"],
53
+ "plan-slice": ["gsd_reassess_roadmap", "gsd_plan_slice", "gsd_decision_save"],
54
+ "refine-slice": ["gsd_plan_slice", "gsd_decision_save"],
55
+ "replan-slice": ["gsd_replan_slice"],
56
+ "execute-task": ["gsd_task_complete"],
57
+ "reactive-execute": ["gsd_summary_save"],
58
+ "complete-slice": [
59
+ "gsd_exec",
60
+ "gsd_task_reopen",
61
+ "gsd_replan_slice",
62
+ "gsd_requirement_update",
63
+ "capture_thought",
64
+ "gsd_slice_complete",
65
+ "gsd_summary_save",
66
+ ],
67
+ "reassess-roadmap": ["gsd_milestone_status", "gsd_reassess_roadmap"],
68
+ "validate-milestone": ["gsd_milestone_status", "gsd_validate_milestone", "gsd_reassess_roadmap"],
69
+ "run-uat": ["gsd_uat_exec", "gsd_uat_result_save"],
70
+ "gate-evaluate": ["gsd_save_gate_result"],
71
+ "complete-milestone": [
72
+ "gsd_milestone_status",
73
+ "gsd_requirement_update",
74
+ "gsd_summary_save",
75
+ "capture_thought",
76
+ "gsd_complete_milestone",
77
+ ],
78
+ };
79
+
80
+ test("auto phase prompt tool calls are available in scoped tool surfaces", () => {
81
+ for (const [unitType, promptTools] of Object.entries(PHASE_PROMPT_TOOL_CALLS)) {
82
+ const prompt = readPrompt(unitType);
83
+ const activeTools = buildMinimalAutoGsdToolSet(
84
+ registeredPhaseToolNames,
85
+ unitType,
86
+ registeredPhaseToolNames,
87
+ );
88
+
89
+ for (const toolName of promptTools) {
90
+ assert.match(
91
+ prompt,
92
+ new RegExp(`\\b${escapeRegExp(toolName)}\\b`),
93
+ `${unitType} prompt should mention ${toolName}`,
94
+ );
95
+ assert.ok(
96
+ activeTools.includes(toolName),
97
+ `${unitType} prompt mentions ${toolName}, but scoped tools are ${activeTools.join(", ")}`,
98
+ );
99
+
100
+ const scopeResult = shouldBlockAutoUnitToolCall(unitType, toolName);
101
+ assert.equal(
102
+ scopeResult.block,
103
+ false,
104
+ `${unitType} phase gate blocked ${toolName}: ${scopeResult.reason ?? "unknown reason"}`,
105
+ );
106
+ }
107
+ }
108
+ });
109
+
25
110
  test("reactive-execute prompt keeps task summaries with subagents and avoids batch commits", () => {
26
111
  const prompt = readPrompt("reactive-execute");
27
112
  assert.match(prompt, /subagent-written summary as authoritative/i);
@@ -83,7 +168,7 @@ test("run-uat prompt gives the complete UAT result-save presentation contract",
83
168
  );
84
169
  });
85
170
 
86
- test("browser-executable UAT presentation uses direct managed browser tools", () => {
171
+ test("browser-executable UAT presentation uses direct browser tools", () => {
87
172
  const presentation = buildRunUatPresentationForType("browser-executable");
88
173
 
89
174
  assert.equal(presentation.surface, "hybrid");
@@ -93,6 +178,33 @@ test("browser-executable UAT presentation uses direct managed browser tools", ()
93
178
  assert.ok(!presentation.presentedTools.some((toolName) => toolName.startsWith("mcp__gsd-browser__")));
94
179
  });
95
180
 
181
+ test("live-runtime and mixed UAT presentations also surface browser tools", () => {
182
+ // Regression (M001/S03): the run-uat prompt tells live-runtime and mixed to
183
+ // drive a browser, so the runner must actually receive the browser tools and
184
+ // a hybrid surface — otherwise live checks silently downgrade to NEEDS-HUMAN.
185
+ for (const uatType of ["live-runtime", "mixed", "human-experience"] as const) {
186
+ const presentation = buildRunUatPresentationForType(uatType);
187
+ assert.equal(presentation.surface, "hybrid", `${uatType} should use the hybrid surface`);
188
+ for (const toolName of RUN_UAT_BROWSER_TOOL_NAMES) {
189
+ assert.ok(
190
+ presentation.presentedTools.includes(toolName),
191
+ `${uatType} presentation should include browser tool ${toolName}`,
192
+ );
193
+ }
194
+ }
195
+ });
196
+
197
+ test("artifact-driven and runtime-executable UAT presentations stay browser-free", () => {
198
+ for (const uatType of ["artifact-driven", "runtime-executable"] as const) {
199
+ const presentation = buildRunUatPresentationForType(uatType);
200
+ assert.equal(presentation.surface, "mcp", `${uatType} should use the mcp surface`);
201
+ assert.ok(
202
+ !RUN_UAT_BROWSER_TOOL_NAMES.some((toolName) => presentation.presentedTools.includes(toolName)),
203
+ `${uatType} presentation should not include browser tools`,
204
+ );
205
+ }
206
+ });
207
+
96
208
  test("workflow-start prompt defaults to autonomy instead of per-phase confirmation", () => {
97
209
  const prompt = readPrompt("workflow-start");
98
210
  assert.match(prompt, /Keep moving by default/i);
@@ -210,6 +210,61 @@ test("end-to-end: audit event is emitted when an auto trace is active", async ()
210
210
  }
211
211
  });
212
212
 
213
+ test("same-API transform with changes does not fire the observer (no real provider switch)", async () => {
214
+ const { basePath, cleanup } = withTempBasePath();
215
+ try {
216
+ initNotificationStore(basePath);
217
+ installProviderSwitchObserver();
218
+
219
+ // Target api === source api. The conversation ends on an unresolved tool
220
+ // call, so a synthetic tool result IS backfilled (a non-empty report) — but
221
+ // this is a within-provider normalization, not a cross-provider switch.
222
+ // `sourceApi` is omitted (the common case), so fromApi defaults to the
223
+ // target api and equals toApi. The observer must stay silent.
224
+ const sameApiModel = {
225
+ id: "gpt-5",
226
+ name: "GPT-5",
227
+ api: "openai-responses",
228
+ provider: "openai",
229
+ baseUrl: "",
230
+ reasoning: false,
231
+ input: ["text"],
232
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
233
+ contextWindow: 128000,
234
+ maxTokens: 8192,
235
+ } as Parameters<typeof transformMessagesWithReport>[1];
236
+
237
+ const messages = [
238
+ {
239
+ role: "assistant" as const,
240
+ content: [
241
+ { type: "toolCall" as const, id: "call_orphan_1", name: "bash", arguments: {} },
242
+ ],
243
+ api: "openai-responses",
244
+ provider: "openai",
245
+ model: "gpt-5",
246
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
247
+ stopReason: "stop" as const,
248
+ timestamp: Date.now(),
249
+ },
250
+ ];
251
+
252
+ transformMessagesWithReport(
253
+ messages as Parameters<typeof transformMessagesWithReport>[0],
254
+ sameApiModel,
255
+ );
256
+
257
+ assert.equal(getProviderSwitchStats().totalSwitches, 0, "same→same transform must not count as a provider switch");
258
+ assert.equal(
259
+ readNotifications(basePath).filter((n) => n.message.includes("Provider switch")).length,
260
+ 0,
261
+ "same→same transform must not emit a provider-switch notification",
262
+ );
263
+ } finally {
264
+ cleanup();
265
+ }
266
+ });
267
+
213
268
  test("empty report does not bump counter or emit a notification", async () => {
214
269
  const { basePath, cleanup } = withTempBasePath();
215
270
  try {
@@ -108,6 +108,26 @@ test("auto Unit tool scope blocks complete-slice from saving UAT Assessment", ()
108
108
  assert.match(result.reason ?? "", /Run UAT owns persisted UAT Assessment/);
109
109
  });
110
110
 
111
+ test("auto Unit tool scope allows plan-slice to reassess invalid roadmap assumptions", () => {
112
+ const result = shouldBlockAutoUnitToolCall("plan-slice", "gsd_reassess_roadmap");
113
+
114
+ assert.equal(result.block, false);
115
+ });
116
+
117
+ test("auto Unit tool scope allows status/read helpers named by closeout prompts", () => {
118
+ for (const unitType of ["plan-milestone", "validate-milestone", "complete-milestone", "reassess-roadmap"]) {
119
+ const result = shouldBlockAutoUnitToolCall(unitType, "gsd_milestone_status");
120
+ assert.equal(result.block, false, `${unitType} should be able to call gsd_milestone_status`);
121
+ }
122
+ });
123
+
124
+ test("auto Unit tool scope blocks stale per-task planner in slice planning phases", () => {
125
+ for (const unitType of ["plan-slice", "refine-slice", "replan-slice"]) {
126
+ const result = shouldBlockAutoUnitToolCall(unitType, "gsd_plan_task");
127
+ assert.equal(result.block, true, `${unitType} should not call stale gsd_plan_task`);
128
+ }
129
+ });
130
+
111
131
  test("Recovery Classification covers ADR-015 failure families", () => {
112
132
  const cases = [
113
133
  ["invalid tool schema enum", "tool-schema", "stop"],
@@ -3,8 +3,8 @@
3
3
  // Focused tests for `resolveSkillManifest` and `filterSkillsByManifest`.
4
4
  // Covers the wildcard semantics, the newly seeded unit-type entries
5
5
  // (complete-milestone, validate-milestone, reassess-roadmap, research-slice,
6
- // plan-slice, refine-slice, replan-slice, run-uat), and the deliberate
7
- // wildcard fallback for the execute-task hot path (RFC #4779).
6
+ // plan-slice, refine-slice, replan-slice, run-uat, complete-slice), and the
7
+ // deliberate wildcard fallback for the execute-task hot path (RFC #4779).
8
8
 
9
9
  import test from "node:test";
10
10
  import assert from "node:assert/strict";
@@ -23,6 +23,7 @@ const NEWLY_WIRED_UNIT_TYPES = [
23
23
  "refine-slice",
24
24
  "replan-slice",
25
25
  "run-uat",
26
+ "complete-slice",
26
27
  ] as const;
27
28
 
28
29
  test("resolveSkillManifest returns null for undefined unit type (wildcard)", () => {
@@ -65,7 +66,7 @@ test("resolveSkillManifest: slice-level manifests include decompose-into-slices"
65
66
  });
66
67
 
67
68
  test("resolveSkillManifest: validation / completion flows include verify-before-complete", () => {
68
- for (const unitType of ["complete-milestone", "validate-milestone", "run-uat"] as const) {
69
+ for (const unitType of ["complete-milestone", "validate-milestone", "run-uat", "complete-slice"] as const) {
69
70
  const allowlist = resolveSkillManifest(unitType);
70
71
  assert.ok(
71
72
  allowlist?.includes("verify-before-complete"),