npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.75048e7 → 1.1.1-dev.9f86580 - Mend

@opengsd/gsd-pi 1.1.1-dev.75048e7 → 1.1.1-dev.9f86580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

package/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts CHANGED Viewed

@@ -155,6 +155,124 @@ describe('complete-slice verification gate (#3580)', () => {
     }
   });
+  // ── Browser/web UAT classification gate (M001/S03 regression) ──────────
+  const BROWSER_UAT_BODY = [
+    '## UAT Type',
+    '- UAT mode: artifact-driven',
+    '',
+    '## Smoke Test',
+    '1. Open the page in a browser and perform add/edit/complete/delete once.',
+  ].join('\n');
+  test('rejects an artifact-driven UAT that drives a browser (open the page in a browser)', async () => {
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: BROWSER_UAT_BODY }),
+      basePath,
+    );
+    assert.ok('error' in result, 'expected handler to reject a browser UAT mislabeled artifact-driven');
+    assert.match((result as { error: string }).error, /requires browser verification/i);
+  });
+  test('allows a runtime-executable UAT that runs a browser test command (playwright)', async () => {
+    // Bugbot regression: runtime-executable legitimately drives a browser via a
+    // command captured by gsd_uat_exec — it must not be pushed to gsd-browser.
+    const body = [
+      '## UAT Type',
+      '- UAT mode: runtime-executable',
+      '',
+      '## Test Cases',
+      '1. Run `npx playwright test` and confirm a passing exit code; capture a screenshot artifact.',
+      '2. Hit http://localhost:3000/health and assert a 200 response.',
+    ].join('\n');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /artifact-driven|browser-capable|browser verification/i,
+        `runtime-executable command UATs must not be gated, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows an artifact-driven UAT that only disclaims browser coverage (no false positive)', async () => {
+    // S01-style: genuinely artifact-driven persistence scaffolding that merely
+    // mentions "cross-browser" / "browser-level" in a Not-Proven disclaimer.
+    const body = [
+      '## UAT Type',
+      '- UAT mode: artifact-driven',
+      '',
+      '## Not Proven By This UAT',
+      '- Interactive browser-level CRUD and real cross-browser localStorage behavior.',
+    ].join('\n');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `disclaimer-only mention must not trip the browser gate, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows an artifact-driven UAT whose "navigate" step targets a file, not a browser', async () => {
+    // Bugbot regression: a bare "navigate to <file/API>" must not trip the gate
+    // just because it contains the word "navigate".
+    const body = [
+      '## UAT Type',
+      '- UAT mode: artifact-driven',
+      '',
+      '## Test Cases',
+      '1. Navigate to the generated report file and confirm the schema section exists.',
+    ].join('\n');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `non-web "navigate" must not trip the browser gate, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows a browser UAT when it is declared browser-executable', async () => {
+    const body = BROWSER_UAT_BODY.replace('artifact-driven', 'browser-executable');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `browser-executable UAT must pass the browser gate, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows a browser UAT when it is declared mixed (mixed receives browser tools)', async () => {
+    const body = BROWSER_UAT_BODY.replace('artifact-driven', 'mixed (artifact-driven + browser)');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `mixed UAT must pass the browser gate, got: ${result.error}`,
+      );
+    }
+  });
   test('backfills prior verification narrative when verification is omitted on re-completion', async () => {
     // Seed full_summary_md with a prior verification narrative (simulates a
     // previous completion where the verification text was recorded).

package/src/resources/extensions/gsd/tests/context-masker.test.ts CHANGED Viewed

@@ -1,7 +1,12 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { createObservationMask } from "../context-masker.js";
+import {
+  createObservationMask,
+  createResponsesInputObservationMask,
+  truncateContextResultMessages,
+  truncateResponsesInputResultItems,
+} from "../context-masker.js";
 // These helpers produce messages in the pi-ai LLM payload format
 // (post-convertToLlm, pre-provider), which is what before_provider_request sees.
@@ -120,3 +125,53 @@ test("masks toolResult by role, not by type field", () => {
   const result = mask(messages as any);
   assert.equal((result[1].content as any)[0].text, MASK_TEXT);
 });
+test("truncates recent bash result user messages", () => {
+  const messages = [
+    userMsg("turn 1"),
+    bashResult("a".repeat(50)),
+    assistantMsg("response 1"),
+  ];
+  const result = truncateContextResultMessages(messages as any, 10);
+  const text = (result[1].content as any)[0].text;
+  assert.ok(text.length < (messages[1].content as any)[0].text.length);
+  assert.match(text, /…\[truncated\]/);
+});
+test("masks Responses API function outputs older than keepRecentTurns", () => {
+  const mask = createResponsesInputObservationMask(1);
+  const items = [
+    { role: "user", content: [{ type: "input_text", text: "turn 1" }] },
+    { type: "function_call_output", call_id: "call_1", output: "old output" },
+    { type: "message", role: "assistant", content: [{ type: "output_text", text: "response 1" }] },
+    { role: "user", content: [{ type: "input_text", text: "turn 2" }] },
+  ];
+  const result = mask(items as any);
+  assert.equal(result[1].output, MASK_TEXT);
+});
+test("masks Responses API bash result user items older than keepRecentTurns", () => {
+  const mask = createResponsesInputObservationMask(1);
+  const items = [
+    { role: "user", content: [{ type: "input_text", text: "turn 1" }] },
+    { role: "user", content: [{ type: "input_text", text: "Ran `npm test`\n```\nold output\n```" }] },
+    { type: "message", role: "assistant", content: [{ type: "output_text", text: "response 1" }] },
+    { role: "user", content: [{ type: "input_text", text: "turn 2" }] },
+  ];
+  const result = mask(items as any);
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+});
+test("truncates Responses API function outputs and recent bash result items", () => {
+  const items = [
+    { role: "user", content: [{ type: "input_text", text: "turn 1" }] },
+    { type: "function_call_output", call_id: "call_1", output: "b".repeat(50) },
+    { role: "user", content: [{ type: "input_text", text: "Ran `npm test`\n```\n" + "c".repeat(50) + "\n```" }] },
+  ];
+  const result = truncateResponsesInputResultItems(items as any, 12);
+  assert.match(result[1].output as string, /…\[truncated\]/);
+  assert.match((result[2].content as any)[0].text, /…\[truncated\]/);
+  assert.ok((result[1].output as string).length < (items[1].output as string).length);
+  assert.ok((result[2].content as any)[0].text.length < (items[2].content as any)[0].text.length);
+});

package/src/resources/extensions/gsd/tests/custom-engine-loop-integration.test.ts CHANGED Viewed

@@ -129,6 +129,7 @@ function makeLoopSession(overrides?: Record<string, unknown>) {
     unitLifetimeDispatches: new Map<string, number>(),
     unitRecoveryCount: new Map<string, number>(),
     verificationRetryCount: new Map<string, number>(),
+    zeroToolRetryCount: new Map<string, number>(),
     gitService: null,
     autoStartTime: Date.now(),
     activeEngineId: null,

package/src/resources/extensions/gsd/tests/dispatch-rule-coverage.test.ts CHANGED Viewed

@@ -216,6 +216,30 @@ test("dispatch-rule-coverage: planning with active slice and skip_research → p
   );
 });
+test("dispatch-rule-coverage: planning boundary without planner handoff → research-slice", async (t) => {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-disp-cov-planning-"));
+  t.after(() => rmSync(tmp, { recursive: true, force: true }));
+  writeMilestoneFile(tmp, "M001", "CONTEXT", "# Context\n");
+  writeMilestoneFile(tmp, "M001", "ROADMAP", "# Roadmap\n");
+  const state = makeState({
+    phase: "planning",
+    activeSlice: { id: "S01", title: "First Slice" },
+    nextAction: "Plan slice S01 (First Slice).",
+  });
+  const match = await findFirstMatch(makeCtx(tmp, state));
+  assertMatch(
+    match,
+    {
+      ruleName: "planning (no research, not S01) → research-slice",
+      action: "dispatch",
+      unitType: "research-slice",
+    },
+    "planning boundary without planner handoff",
+  );
+});
 test("dispatch-rule-coverage: executing with task plan present → execute-task", async (t) => {
   const tmp = mkdtempSync(join(tmpdir(), "gsd-disp-cov-exec-"));
   t.after(() => rmSync(tmp, { recursive: true, force: true }));

package/src/resources/extensions/gsd/tests/integration/run-uat.test.ts CHANGED Viewed

@@ -723,7 +723,7 @@ test('(u) run-uat prompt promotes artifact-driven browser specs to browser-execu
       assert.match(prompt, /\*\*Detected UAT mode:\*\*\s*`browser-executable`/);
       assert.match(prompt, /uatType: "browser-executable"/);
-      assert.match(prompt, /use gsd-browser tools/i);
+      assert.match(prompt, /use browser tools/i);
       assert.match(prompt, /"browser_navigate"/);
       assert.match(prompt, /"browser_assert"/);
     } finally {

package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts CHANGED Viewed

@@ -226,6 +226,33 @@ test("direct /gsd auto skips paused-session replay when recovered unit already c
   }
 });
+test("paused-session resume skips replay when unit identity was never recorded", () => {
+  const base = makeTmpBase();
+  try {
+    // No currentUnit and no persisted unit type/id — identity is unknown. The
+    // old code fell back to the literal "unknown" unit, which can neither be
+    // verified nor correctly targeted, and synthesized a full tool-call replay
+    // (the thrash that turns one stuck unit into several). The fix skips the
+    // replay and resumes from rebuilt disk state instead.
+    const state = {
+      pausedSessionFile: join(base, ".gsd", "activity", "paused-session.jsonl"),
+      currentUnit: null,
+      pausedUnitType: null,
+      pausedUnitId: null,
+      pendingCrashRecovery: "stale-recovery-prompt",
+    };
+    const result = _handlePausedSessionResumeRecoveryForTest(base, state);
+    assert.equal(result.skippedReplay, true);
+    assert.equal(state.pausedSessionFile, null);
+    assert.equal(state.pendingCrashRecovery, null, "must not synthesize a replay for an unknown unit");
+    assert.equal(state.pausedUnitType, null);
+    assert.equal(state.pausedUnitId, null);
+  } finally {
+    cleanup(base);
+  }
+});
 test("interrupted-session source preserves raw lock and excludes same-pid from running classification", async () => {
   const source = await import(`node:fs/promises`).then((fs) =>
     fs.readFile(new URL("../interrupted-session.ts", import.meta.url), "utf-8")

package/src/resources/extensions/gsd/tests/journal-integration.test.ts CHANGED Viewed

@@ -200,6 +200,7 @@ function makeSession() {
     unitLifetimeDispatches: new Map<string, number>(),
     unitRecoveryCount: new Map<string, number>(),
     verificationRetryCount: new Map<string, number>(),
+    zeroToolRetryCount: new Map<string, number>(),
     gitService: null,
     autoStartTime: Date.now(),
     cmdCtx: {

package/src/resources/extensions/gsd/tests/mcp-project-config.test.ts CHANGED Viewed

@@ -54,7 +54,13 @@ test("ensureProjectWorkflowMcpConfig creates .mcp.json with workflow and browser
       "--identity-scope",
       "project",
     ]);
-    assert.equal(browserArgs[mcpArgIndex + 6], projectRoot);
+    // --identity-scope requires a non-empty --identity-key or gsd-browser exits
+    // immediately ("Connection closed"); the key must be stable per project.
+    assert.equal(browserArgs[mcpArgIndex + 5], "--identity-key");
+    assert.equal(typeof browserArgs[mcpArgIndex + 6], "string");
+    assert.ok((browserArgs[mcpArgIndex + 6] ?? "").length > 0, "identity-key must be non-empty");
+    assert.equal(browserArgs[mcpArgIndex + 7], "--identity-project");
+    assert.equal(browserArgs[mcpArgIndex + 8], projectRoot);
     assert.equal((browserServer as { cwd?: string })?.cwd, projectRoot);
     const settings = JSON.parse(readFileSync(join(projectRoot, ".claude", "settings.local.json"), "utf-8")) as {

package/src/resources/extensions/gsd/tests/mcp-status.test.ts CHANGED Viewed

@@ -342,7 +342,7 @@ describe("formatMcpInitResult", () => {
     assert.match(result, /\/tmp\/project\/\.mcp\.json/);
     assert.match(result, /mcp-capable clients/i);
     assert.match(result, /workflow and gsd-browser MCP servers/i);
-    assert.match(result, /Pi Providers use the managed gsd-browser engine/i);
+    assert.match(result, /Pi Providers use built-in browser tools/i);
     assert.doesNotMatch(result, /claude code/i);
   });

package/src/resources/extensions/gsd/tests/planner-handoff.test.ts ADDED Viewed

@@ -0,0 +1,100 @@
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { GSD_COMMAND_DESCRIPTION, getGsdArgumentCompletions, TOP_LEVEL_SUBCOMMANDS } from "../commands/catalog.ts";
+import { handleCoreCommand } from "../commands/handlers/core.ts";
+import { DISPATCH_RULES } from "../auto-dispatch.ts";
+import {
+  buildGsdPlannerSpawnPlan,
+  formatGsdPlannerCommand,
+  hasPlannerHandoffBeenOffered,
+  markPlannerHandoffOffered,
+  PLANNER_HANDOFF_RULE_NAME,
+} from "../planner-handoff.ts";
+describe("planner handoff command catalog", () => {
+  test("/gsd planner is hidden from description and completions", () => {
+    assert.doesNotMatch(GSD_COMMAND_DESCRIPTION, /\|planner(?:\||$)/);
+    assert.equal(
+      TOP_LEVEL_SUBCOMMANDS.some((command) => command.cmd === "planner"),
+      false,
+      "planner should not appear in top-level commands",
+    );
+    const completions = getGsdArgumentCompletions("pla");
+    assert.equal(
+      completions.some((completion) => completion.value === "planner"),
+      false,
+      "planner should not appear in top-level completions",
+    );
+    assert.deepEqual(
+      getGsdArgumentCompletions("planner --"),
+      [],
+      "planner should not expose nested completions",
+    );
+  });
+});
+describe("planner handoff command handler", () => {
+  test("/gsd planner falls through to the unknown-command path", async () => {
+    const notifications: Array<{ message: string; level?: string }> = [];
+    const ctx = {
+      ui: {
+        notify(message: string, level?: string) {
+          notifications.push({ message, level });
+        },
+      },
+    };
+    const handled = await handleCoreCommand("planner M001 --dry-run --inspect", ctx as any);
+    assert.equal(handled, false);
+    assert.deepEqual(notifications, []);
+  });
+});
+describe("planner handoff launcher", () => {
+  test("builds gsd-planner command with project and milestone context", () => {
+    const plan = buildGsdPlannerSpawnPlan({
+      basePath: "/tmp/project with spaces",
+      milestoneId: "M001",
+      extraArgs: ["--inspect"],
+    });
+    assert.deepEqual(plan, {
+      command: "gsd-planner",
+      args: ["--project", "/tmp/project with spaces", "--milestone", "M001", "--inspect"],
+      cwd: "/tmp/project with spaces",
+    });
+    assert.equal(
+      formatGsdPlannerCommand(plan),
+      'gsd-planner --project "/tmp/project with spaces" --milestone M001 --inspect',
+    );
+  });
+  test("records one-shot handoff markers per milestone", () => {
+    const basePath = mkdtempSync(join(tmpdir(), "gsd-planner-marker-"));
+    try {
+      assert.equal(hasPlannerHandoffBeenOffered(basePath, "M001"), false);
+      markPlannerHandoffOffered(basePath, "M001");
+      assert.equal(hasPlannerHandoffBeenOffered(basePath, "M001"), true);
+      assert.equal(hasPlannerHandoffBeenOffered(basePath, "M002"), false);
+    } finally {
+      rmSync(basePath, { recursive: true, force: true });
+    }
+  });
+});
+describe("planner handoff dispatch rule", () => {
+  test("rule is not registered while /gsd planner is disabled", () => {
+    assert.equal(
+      DISPATCH_RULES.some((rule) => rule.name === PLANNER_HANDOFF_RULE_NAME),
+      false,
+    );
+  });
+});

package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts CHANGED Viewed

@@ -10,6 +10,13 @@ import {
   RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
   RUN_UAT_WORKFLOW_TOOL_NAMES,
 } from "../tool-presentation-plan.ts";
+import {
+  buildMinimalAutoGsdToolSet,
+  MINIMAL_AUTO_BASE_TOOL_NAMES,
+  MINIMAL_GSD_TOOL_NAMES,
+} from "../bootstrap/register-hooks.ts";
+import { shouldBlockAutoUnitToolCall } from "../auto-unit-tool-scope.ts";
+import { UNIT_TOOL_CONTRACTS } from "../unit-tool-contracts.ts";
 const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
 const templatesDir = join(process.cwd(), "src/resources/extensions/gsd/templates");
@@ -22,6 +29,84 @@ function readTemplate(name: string): string {
   return readFileSync(join(templatesDir, `${name}.md`), "utf-8");
 }
+function escapeRegExp(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+const registeredPhaseToolNames = [
+  ...new Set([
+    ...MINIMAL_AUTO_BASE_TOOL_NAMES,
+    ...MINIMAL_GSD_TOOL_NAMES,
+    ...Object.values(UNIT_TOOL_CONTRACTS).flatMap((contract) => contract.allowedGsdTools),
+  ]),
+];
+const PHASE_PROMPT_TOOL_CALLS: Record<string, readonly string[]> = {
+  "research-milestone": ["gsd_summary_save"],
+  "plan-milestone": [
+    "gsd_milestone_status",
+    "gsd_plan_milestone",
+    "gsd_plan_slice",
+    "gsd_decision_save",
+  ],
+  "research-slice": ["gsd_summary_save"],
+  "plan-slice": ["gsd_reassess_roadmap", "gsd_plan_slice", "gsd_decision_save"],
+  "refine-slice": ["gsd_plan_slice", "gsd_decision_save"],
+  "replan-slice": ["gsd_replan_slice"],
+  "execute-task": ["gsd_task_complete"],
+  "reactive-execute": ["gsd_summary_save"],
+  "complete-slice": [
+    "gsd_exec",
+    "gsd_task_reopen",
+    "gsd_replan_slice",
+    "gsd_requirement_update",
+    "capture_thought",
+    "gsd_slice_complete",
+    "gsd_summary_save",
+  ],
+  "reassess-roadmap": ["gsd_milestone_status", "gsd_reassess_roadmap"],
+  "validate-milestone": ["gsd_milestone_status", "gsd_validate_milestone", "gsd_reassess_roadmap"],
+  "run-uat": ["gsd_uat_exec", "gsd_uat_result_save"],
+  "gate-evaluate": ["gsd_save_gate_result"],
+  "complete-milestone": [
+    "gsd_milestone_status",
+    "gsd_requirement_update",
+    "gsd_summary_save",
+    "capture_thought",
+    "gsd_complete_milestone",
+  ],
+};
+test("auto phase prompt tool calls are available in scoped tool surfaces", () => {
+  for (const [unitType, promptTools] of Object.entries(PHASE_PROMPT_TOOL_CALLS)) {
+    const prompt = readPrompt(unitType);
+    const activeTools = buildMinimalAutoGsdToolSet(
+      registeredPhaseToolNames,
+      unitType,
+      registeredPhaseToolNames,
+    );
+    for (const toolName of promptTools) {
+      assert.match(
+        prompt,
+        new RegExp(`\\b${escapeRegExp(toolName)}\\b`),
+        `${unitType} prompt should mention ${toolName}`,
+      );
+      assert.ok(
+        activeTools.includes(toolName),
+        `${unitType} prompt mentions ${toolName}, but scoped tools are ${activeTools.join(", ")}`,
+      );
+      const scopeResult = shouldBlockAutoUnitToolCall(unitType, toolName);
+      assert.equal(
+        scopeResult.block,
+        false,
+        `${unitType} phase gate blocked ${toolName}: ${scopeResult.reason ?? "unknown reason"}`,
+      );
+    }
+  }
+});
 test("reactive-execute prompt keeps task summaries with subagents and avoids batch commits", () => {
   const prompt = readPrompt("reactive-execute");
   assert.match(prompt, /subagent-written summary as authoritative/i);
@@ -83,7 +168,7 @@ test("run-uat prompt gives the complete UAT result-save presentation contract",
   );
 });
-test("browser-executable UAT presentation uses direct managed browser tools", () => {
+test("browser-executable UAT presentation uses direct browser tools", () => {
   const presentation = buildRunUatPresentationForType("browser-executable");
   assert.equal(presentation.surface, "hybrid");
@@ -93,6 +178,33 @@ test("browser-executable UAT presentation uses direct managed browser tools", ()
   assert.ok(!presentation.presentedTools.some((toolName) => toolName.startsWith("mcp__gsd-browser__")));
 });
+test("live-runtime and mixed UAT presentations also surface browser tools", () => {
+  // Regression (M001/S03): the run-uat prompt tells live-runtime and mixed to
+  // drive a browser, so the runner must actually receive the browser tools and
+  // a hybrid surface — otherwise live checks silently downgrade to NEEDS-HUMAN.
+  for (const uatType of ["live-runtime", "mixed", "human-experience"] as const) {
+    const presentation = buildRunUatPresentationForType(uatType);
+    assert.equal(presentation.surface, "hybrid", `${uatType} should use the hybrid surface`);
+    for (const toolName of RUN_UAT_BROWSER_TOOL_NAMES) {
+      assert.ok(
+        presentation.presentedTools.includes(toolName),
+        `${uatType} presentation should include browser tool ${toolName}`,
+      );
+    }
+  }
+});
+test("artifact-driven and runtime-executable UAT presentations stay browser-free", () => {
+  for (const uatType of ["artifact-driven", "runtime-executable"] as const) {
+    const presentation = buildRunUatPresentationForType(uatType);
+    assert.equal(presentation.surface, "mcp", `${uatType} should use the mcp surface`);
+    assert.ok(
+      !RUN_UAT_BROWSER_TOOL_NAMES.some((toolName) => presentation.presentedTools.includes(toolName)),
+      `${uatType} presentation should not include browser tools`,
+    );
+  }
+});
 test("workflow-start prompt defaults to autonomy instead of per-phase confirmation", () => {
   const prompt = readPrompt("workflow-start");
   assert.match(prompt, /Keep moving by default/i);

package/src/resources/extensions/gsd/tests/provider-switch-observer.test.ts CHANGED Viewed

@@ -210,6 +210,61 @@ test("end-to-end: audit event is emitted when an auto trace is active", async ()
   }
 });
+test("same-API transform with changes does not fire the observer (no real provider switch)", async () => {
+  const { basePath, cleanup } = withTempBasePath();
+  try {
+    initNotificationStore(basePath);
+    installProviderSwitchObserver();
+    // Target api === source api. The conversation ends on an unresolved tool
+    // call, so a synthetic tool result IS backfilled (a non-empty report) — but
+    // this is a within-provider normalization, not a cross-provider switch.
+    // `sourceApi` is omitted (the common case), so fromApi defaults to the
+    // target api and equals toApi. The observer must stay silent.
+    const sameApiModel = {
+      id: "gpt-5",
+      name: "GPT-5",
+      api: "openai-responses",
+      provider: "openai",
+      baseUrl: "",
+      reasoning: false,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    } as Parameters<typeof transformMessagesWithReport>[1];
+    const messages = [
+      {
+        role: "assistant" as const,
+        content: [
+          { type: "toolCall" as const, id: "call_orphan_1", name: "bash", arguments: {} },
+        ],
+        api: "openai-responses",
+        provider: "openai",
+        model: "gpt-5",
+        usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+        stopReason: "stop" as const,
+        timestamp: Date.now(),
+      },
+    ];
+    transformMessagesWithReport(
+      messages as Parameters<typeof transformMessagesWithReport>[0],
+      sameApiModel,
+    );
+    assert.equal(getProviderSwitchStats().totalSwitches, 0, "same→same transform must not count as a provider switch");
+    assert.equal(
+      readNotifications(basePath).filter((n) => n.message.includes("Provider switch")).length,
+      0,
+      "same→same transform must not emit a provider-switch notification",
+    );
+  } finally {
+    cleanup();
+  }
+});
 test("empty report does not bump counter or emit a notification", async () => {
   const { basePath, cleanup } = withTempBasePath();
   try {

package/src/resources/extensions/gsd/tests/runtime-invariant-modules.test.ts CHANGED Viewed

@@ -108,6 +108,26 @@ test("auto Unit tool scope blocks complete-slice from saving UAT Assessment", ()
   assert.match(result.reason ?? "", /Run UAT owns persisted UAT Assessment/);
 });
+test("auto Unit tool scope allows plan-slice to reassess invalid roadmap assumptions", () => {
+  const result = shouldBlockAutoUnitToolCall("plan-slice", "gsd_reassess_roadmap");
+  assert.equal(result.block, false);
+});
+test("auto Unit tool scope allows status/read helpers named by closeout prompts", () => {
+  for (const unitType of ["plan-milestone", "validate-milestone", "complete-milestone", "reassess-roadmap"]) {
+    const result = shouldBlockAutoUnitToolCall(unitType, "gsd_milestone_status");
+    assert.equal(result.block, false, `${unitType} should be able to call gsd_milestone_status`);
+  }
+});
+test("auto Unit tool scope blocks stale per-task planner in slice planning phases", () => {
+  for (const unitType of ["plan-slice", "refine-slice", "replan-slice"]) {
+    const result = shouldBlockAutoUnitToolCall(unitType, "gsd_plan_task");
+    assert.equal(result.block, true, `${unitType} should not call stale gsd_plan_task`);
+  }
+});
 test("Recovery Classification covers ADR-015 failure families", () => {
   const cases = [
     ["invalid tool schema enum", "tool-schema", "stop"],

package/src/resources/extensions/gsd/tests/skill-manifest.test.ts CHANGED Viewed

@@ -3,8 +3,8 @@
 // Focused tests for `resolveSkillManifest` and `filterSkillsByManifest`.
 // Covers the wildcard semantics, the newly seeded unit-type entries
 // (complete-milestone, validate-milestone, reassess-roadmap, research-slice,
-// plan-slice, refine-slice, replan-slice, run-uat), and the deliberate
-// wildcard fallback for the execute-task hot path (RFC #4779).
+// plan-slice, refine-slice, replan-slice, run-uat, complete-slice), and the
+// deliberate wildcard fallback for the execute-task hot path (RFC #4779).
 import test from "node:test";
 import assert from "node:assert/strict";
@@ -23,6 +23,7 @@ const NEWLY_WIRED_UNIT_TYPES = [
   "refine-slice",
   "replan-slice",
   "run-uat",
+  "complete-slice",
 ] as const;
 test("resolveSkillManifest returns null for undefined unit type (wildcard)", () => {
@@ -65,7 +66,7 @@ test("resolveSkillManifest: slice-level manifests include decompose-into-slices"
 });
 test("resolveSkillManifest: validation / completion flows include verify-before-complete", () => {
-  for (const unitType of ["complete-milestone", "validate-milestone", "run-uat"] as const) {
+  for (const unitType of ["complete-milestone", "validate-milestone", "run-uat", "complete-slice"] as const) {
     const allowlist = resolveSkillManifest(unitType);
     assert.ok(
       allowlist?.includes("verify-before-complete"),