npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.9bb7453 → 1.1.1-dev.9f86580 - Mend

@opengsd/gsd-pi 1.1.1-dev.9bb7453 → 1.1.1-dev.9f86580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (219) hide show

package/src/resources/extensions/gsd/tests/auto-loop.test.ts CHANGED Viewed

@@ -1118,6 +1118,7 @@ function makeLoopSession(overrides?: Partial<Record<string, unknown>>) {
     unitLifetimeDispatches: new Map<string, number>(),
     unitRecoveryCount: new Map<string, number>(),
     verificationRetryCount: new Map<string, number>(),
+    zeroToolRetryCount: new Map<string, number>(),
     gitService: null,
     lastRequestTimestamp: 0,
     autoStartTime: Date.now(),
@@ -4693,6 +4694,104 @@ test("runUnitPhase retries 0-tool units with ordinary network-related assistant
   assert.equal(deps.callLog.includes("pauseAuto"), false);
 });
+test("runUnitPhase pauses auto-mode when zero-tool-call retry is exhausted", async (t) => {
+  _resetPendingResolve();
+  const basePath = mkdtempSync(join(tmpdir(), "gsd-zero-tool-exhausted-"));
+  t.after(() => {
+    rmSync(basePath, { recursive: true, force: true });
+  });
+  const ctx = {
+    ...makeMockCtx(),
+    ui: {
+      notify: () => {},
+      setStatus: () => {},
+      setWorkingMessage: () => {},
+    },
+    sessionManager: {
+      getEntries: () => [],
+    },
+    modelRegistry: {
+      getProviderAuthMode: () => undefined,
+      isProviderRequestReady: () => true,
+    },
+  } as any;
+  const pi = {
+    ...makeMockPi(),
+    sendMessage: () => {
+      queueMicrotask(() => resolveAgentEnd(makeEvent([
+        {
+          role: "assistant",
+          content: [
+            { type: "text", text: "Error: I'll investigate the network error handling next." },
+          ],
+        },
+      ])));
+    },
+  } as any;
+  const s = makeLoopSession({
+    basePath,
+    canonicalProjectRoot: basePath,
+    originalBasePath: basePath,
+  });
+  // Pre-seed counter at MAX_ZERO_TOOL_RETRIES so the next zero-tool turn exhausts the cap
+  s.zeroToolRetryCount.set("execute-task/M001/S01/T01", 1);
+  const mockLedger = {
+    version: 1,
+    projectStartedAt: Date.now(),
+    units: [] as any[],
+  };
+  const deps = makeMockDeps({
+    closeoutUnit: async () => {
+      mockLedger.units.push({
+        type: "execute-task",
+        id: "M001/S01/T01",
+        startedAt: s.currentUnit?.startedAt ?? Date.now(),
+        toolCalls: 0,
+        assistantMessages: 1,
+        tokens: { input: 100, output: 20, total: 120, cacheRead: 0, cacheWrite: 0 },
+        cost: 0.01,
+      });
+    },
+    getLedger: () => mockLedger,
+  });
+  let seq = 0;
+  const result = await runUnitPhase(
+    { ctx, pi, s, deps, prefs: undefined, iteration: 1, flowId: "flow-zero-tool-exhausted", nextSeq: () => ++seq },
+    {
+      unitType: "execute-task",
+      unitId: "M001/S01/T01",
+      prompt: "do work",
+      finalPrompt: "do work",
+      pauseAfterUatDispatch: false,
+      state: {
+        phase: "executing",
+        activeMilestone: { id: "M001", title: "Milestone" },
+        activeSlice: { id: "S01", title: "Slice" },
+        activeTask: { id: "T01", title: "Task" },
+        registry: [{ id: "M001", title: "Milestone", status: "active" }],
+        recentDecisions: [],
+        blockers: [],
+        nextAction: "",
+        progress: { milestones: { done: 0, total: 1 } },
+        requirements: { active: 0, validated: 0, deferred: 0, outOfScope: 0, blocked: 0, total: 0 },
+      } as any,
+      mid: "M001",
+      midTitle: "Milestone",
+      isRetry: false,
+      previousTier: undefined,
+    },
+    { recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 },
+  );
+  assert.equal(result.action, "break");
+  assert.equal((result as any).reason, "zero-tool-calls-exhausted");
+  assert.equal(deps.callLog.includes("pauseAuto"), true);
+});
 test("autoLoop pauses user-driven deep question instead of flagging 0 tool calls", async () => {
   _resetPendingResolve();

package/src/resources/extensions/gsd/tests/auto-model-selection-tool-poisoning.test.ts CHANGED Viewed

@@ -33,6 +33,7 @@ import {
   selectAndApplyModel,
   ModelPolicyDispatchBlockedError,
   clearToolBaseline,
+  getToolBaselineSnapshot,
 } from "../auto-model-selection.js";
 import { applyModelPolicyFilter } from "../uok/model-policy.js";
 import {
@@ -139,7 +140,7 @@ function makeCtx(
 test("vacuous-truth (a): unit type with empty workflow-required tools → dispatch succeeds", async () => {
   const env = makeTempProject();
   try {
-    // `refine-slice` is not in the getRequiredWorkflowToolsForAutoUnit switch
+    // `rewrite-docs` has no required workflow tools
     // → returns []. Exercises the empty-requiredTools branch in
     // applyModelPolicyFilter (existing test used
     // gate-evaluate which has non-empty required tools and never hit this path).
@@ -161,7 +162,7 @@ test("vacuous-truth (a): unit type with empty workflow-required tools → dispat
     const result = await selectAndApplyModel(
       makeCtx(availableModels),
       pi as any,
-      "refine-slice",
+      "rewrite-docs",
       "x1",
       env.dir,
       undefined,
@@ -308,8 +309,8 @@ test("genuinely-impossible (a): pi-native required tool incompatible with candid
 test("genuinely-impossible (b): cross-provider routing disabled + provider mismatch → typed error", async () => {
   const env = makeTempProject();
   try {
-    // Use plan-slice (workflow-required: ["gsd_plan_slice"]) but pretend no
-    // candidate model can carry it.  The simplest way: provide a model whose
+    // Use plan-slice but pretend no candidate model can carry its required
+    // workflow tools. The simplest way: provide a model whose
     // api is a fictitious "no-tools" string — `filterToolsForProvider` returns
     // every tool as filtered for an unknown api with toolCalling=false, OR we
     // can pick a real api that also denies the tool.  We use an api that
@@ -711,3 +712,64 @@ test("cross-mode (#4965): auto → guided → auto preserves the original auto-e
     env.cleanup();
   }
 });
+// ─── 8. Baseline union: MCP tools connected after baseline capture (#477) ─────
+//
+// `getToolBaselineSnapshot` must return the UNION of the frozen WeakMap baseline
+// and the current live tool set.  This ensures:
+//   (a) Provider-narrowed tools (in baseline, dropped from live) are still seen
+//       by transport preflight — the bug-5 fix from #477.
+//   (b) Tools connected after the baseline was captured (e.g. MCP server attached
+//       mid-session) are also visible — so a paused run that resumes after MCP
+//       reconnects clears the transport warning on the first iteration instead of
+//       repeating it indefinitely.
+test("baseline union (#477): getToolBaselineSnapshot includes live tools not present in frozen baseline", async () => {
+  const env = makeTempProject();
+  try {
+    const availableModels = [
+      { id: "claude-sonnet-4-6", provider: "anthropic", api: "anthropic-messages" },
+    ];
+    const initialTools = ["bash", "read", "write"];
+    const pi = makeRecordingPi(initialTools);
+    clearToolBaseline(pi as unknown as object);
+    // Capture baseline with only native tools (no MCP connected yet).
+    await selectAndApplyModel(
+      makeCtx(availableModels),
+      pi as any,
+      "execute-task",
+      "u1",
+      env.dir,
+      undefined,
+      false,
+      { provider: "anthropic", id: "claude-sonnet-4-6" },
+      undefined,
+      /* isAutoMode */ true,
+    );
+    // Simulate: provider narrows tools (Groq cap, hook override, etc.).
+    // The baseline in the WeakMap still has the full initial set.
+    pi.setActiveTools(["bash"]);
+    // Simulate: user connects MCP mid-session (after the baseline was captured).
+    const liveTools = pi.getActiveTools().concat(["mcp__gsd-workflow__gsd_uat_exec"]);
+    pi.setActiveTools(liveTools);
+    const snapshot = getToolBaselineSnapshot(pi as any);
+    // All baseline tools must be present (even the provider-narrowed ones).
+    for (const t of initialTools) {
+      assert.ok(snapshot.includes(t), `snapshot must include baseline tool: ${t}`);
+    }
+    // Newly connected MCP tool must also be present.
+    assert.ok(
+      snapshot.includes("mcp__gsd-workflow__gsd_uat_exec"),
+      "snapshot must include MCP tool connected after baseline capture",
+    );
+  } finally {
+    env.restoreEnv();
+    env.cleanup();
+  }
+});

package/src/resources/extensions/gsd/tests/auto-recovery.test.ts CHANGED Viewed

@@ -1558,6 +1558,14 @@ test("verifyExpectedArtifact complete-milestone passes when DB milestone is comp
     openDatabase(join(base, ".gsd", "gsd.db"));
     insertMilestone({ id: "M001", title: "Milestone One", status: "complete" });
+    insertSlice({ id: "S01", milestoneId: "M001", title: "Done Slice", status: "complete" });
+    insertAssessment({
+      path: "milestones/M001/M001-VALIDATION.md",
+      milestoneId: "M001",
+      status: "pass",
+      scope: "milestone-validation",
+      fullContent: "verdict: pass",
+    });
     const result = verifyExpectedArtifact("complete-milestone", "M001", base);
     assert.equal(result, true, "complete-milestone should pass when DB status is complete");
@@ -1566,7 +1574,7 @@ test("verifyExpectedArtifact complete-milestone passes when DB milestone is comp
   }
 });
-test("verifyExpectedArtifact complete-milestone tolerates transient DB lag when SUMMARY is canonical success (#4658)", () => {
+test("verifyExpectedArtifact complete-milestone rejects success SUMMARY when DB milestone is still open (#4658)", () => {
   const base = makeGitBase();
   try {
     execFileSync("git", ["checkout", "-b", "feat/ms-db-lag-success"], { cwd: base, stdio: "ignore" });
@@ -1591,7 +1599,7 @@ test("verifyExpectedArtifact complete-milestone tolerates transient DB lag when
     insertMilestone({ id: "M001", title: "Milestone One", status: "active" });
     const result = verifyExpectedArtifact("complete-milestone", "M001", base);
-    assert.equal(result, true, "canonical success SUMMARY should pass verification during transient DB lag");
+    assert.equal(result, false, "success SUMMARY must not overrule an open DB milestone");
   } finally {
     cleanup(base);
   }

package/src/resources/extensions/gsd/tests/auto-start-bootstrap-await-3420.test.ts CHANGED Viewed

@@ -30,6 +30,7 @@ test("checkAutoStartAfterDiscuss waits until discussion artifacts exist before r
   setPendingAutoStart(base, {
     basePath: base,
     milestoneId: "M001",
+    startAuto: false,
     ctx: { ui: { notify: (message: string) => notifications.push(message) } } as any,
     pi: { sendMessage: () => {} } as any,
   });
@@ -41,5 +42,7 @@ test("checkAutoStartAfterDiscuss waits until discussion artifacts exist before r
   writeFileSync(join(base, ".gsd", "STATE.md"), "# State\n", "utf-8");
   assert.equal(checkAutoStartAfterDiscuss(), true);
-  assert.deepEqual(notifications, ["Milestone M001 ready."]);
+  assert.deepEqual(notifications, [
+    "Milestone M001 context captured. Continuing the planning pipeline.",
+  ]);
 });

package/src/resources/extensions/gsd/tests/auto-supervisor.test.mjs CHANGED Viewed

@@ -20,6 +20,10 @@ test('resolveAutoSupervisorConfig provides safe timeout defaults', () => {
     assert.equal(supervisor.soft_timeout_minutes, 20);
     assert.equal(supervisor.idle_timeout_minutes, 10);
     assert.equal(supervisor.hard_timeout_minutes, 30);
+    // A single hung tool gets its own short budget, well below the idle window,
+    // so a genuinely stuck tool is recovered in minutes instead of waiting out
+    // the full idle timeout.
+    assert.equal(supervisor.stalled_tool_timeout_minutes, 5);
   } finally {
     if (previousGsdHome === undefined) {
       delete process.env.GSD_HOME;

package/src/resources/extensions/gsd/tests/auto-warning-noise-regression.test.ts CHANGED Viewed

@@ -92,13 +92,23 @@ test("checkAutoStartAfterDiscuss completes when discussion manifest is absent",
     setPendingAutoStart(base, {
       basePath: base,
       milestoneId: "M001",
-      ctx: { ui: { notify: (message: string, level: string) => notifications.push({ message, level }) } } as any,
+      startAuto: false,
+      ctx: {
+        ui: {
+          notify: (message: string, level: string) => notifications.push({ message, level }),
+        },
+      } as any,
       pi: { sendMessage: () => { scheduled = true; } } as any,
     });
     assert.equal(checkAutoStartAfterDiscuss(), true);
     assert.equal(scheduled, false);
-    assert.deepEqual(notifications, [{ message: "Milestone M001 ready.", level: "success" }]);
+    assert.deepEqual(notifications, [
+      {
+        message: "Milestone M001 context captured. Continuing the planning pipeline.",
+        level: "success",
+      },
+    ]);
   } finally {
     clearPendingAutoStart(base);
     rmSync(base, { recursive: true, force: true });

package/src/resources/extensions/gsd/tests/bundled-skill-triggers.test.ts CHANGED Viewed

@@ -76,3 +76,12 @@ test('BUNDLED_SKILL_TRIGGERS: skill ids are unique', () => {
     seen.add(skill);
   }
 });
+test('BUNDLED_SKILL_TRIGGERS: gsd-browser and agent-browser stay distinct', () => {
+  const gsdBrowser = BUNDLED_SKILL_TRIGGERS.find(entry => entry.skill === 'gsd-browser');
+  const agentBrowser = BUNDLED_SKILL_TRIGGERS.find(entry => entry.skill === 'agent-browser');
+  assert.ok(gsdBrowser, 'gsd-browser trigger should be registered');
+  assert.ok(agentBrowser, 'agent-browser trigger should be registered');
+  assert.notStrictEqual(gsdBrowser.trigger, agentBrowser.trigger);
+});

package/src/resources/extensions/gsd/tests/check-auto-start-pending-gate.test.ts CHANGED Viewed

@@ -164,17 +164,13 @@ describe("checkAutoStartAfterDiscuss Gate 1a (pending depth-verification gate)",
   test("Gate 1a does NOT trip when the pending gate is for a DIFFERENT milestone", () => {
     base = mkBase();
     openDatabase(":memory:");
-    // status: "queued" so that Gate 1b downstream of Gate 1a fires its
-    // recovery notify ("context file exists but milestone is still queued") —
-    // observing that notify proves we advanced past Gate 1a. If Gate 1a
-    // wrongly tripped on the M999 gate it would `return false` immediately
-    // and Gate 1b would never run, so the notify would be absent.
     insertMilestone({ id: "M001", title: "Pending Gate Test", status: "queued" });
     cap = mkCapture();
     setPendingAutoStart(base, {
       basePath: base,
       milestoneId: "M001",
+      startAuto: false,
       ctx: mkCtx(cap),
       pi: mkPi(cap),
     });
@@ -182,21 +178,19 @@ describe("checkAutoStartAfterDiscuss Gate 1a (pending depth-verification gate)",
     setPendingGate("depth_verification_M999_confirm", base);
     const result = checkAutoStartAfterDiscuss();
-    assert.equal(result, false, "Gate 1b returns false (expected) — but only if Gate 1a let us through");
+    assert.equal(result, true, "different milestone gate must not block this handoff");
-    // Positive proof we passed Gate 1a: Gate 1b emitted its recovery notify
-    // about M001 (not M999 — the pending-gate milestone is irrelevant here).
-    const gate1bNotify = cap.notifies.find(n =>
-      n.level === "warning" && /M001.*context file exists but milestone is still queued/i.test(n.msg)
+    const successNotify = cap.notifies.find(n =>
+      n.level === "success" && /M001 context captured/i.test(n.msg)
     );
     assert.ok(
-      gate1bNotify,
-      `expected Gate 1b warning notify about M001; got: ${JSON.stringify(cap.notifies)}`,
+      successNotify,
+      `expected context-captured success notify about M001; got: ${JSON.stringify(cap.notifies)}`,
     );
-    // Negative proof: no Gate 1a notification path exists in source today, but
-    // also assert no notify mentions M999 (the pending-gate milestone) — that
-    // would suggest Gate 1a is leaking the wrong milestone into messaging.
+    const retryNotify = cap.notifies.find(n => /queued|gsd_plan_milestone/i.test(n.msg));
+    assert.equal(retryNotify, undefined, "handoff must not mention queued state or internal plan retry");
     const m999Notify = cap.notifies.find(n => /M999/i.test(n.msg));
     assert.equal(m999Notify, undefined, "no notify should reference M999 (the pending-gate milestone)");
   });

package/src/resources/extensions/gsd/tests/check-auto-start-ready-guard.test.ts CHANGED Viewed

@@ -1,9 +1,7 @@
-// gsd-pi + Regression tests for checkAutoStartAfterDiscuss "ready" notify guard (R3b)
+// gsd-pi + Regression tests for checkAutoStartAfterDiscuss handoff copy (R3b)
 //
-// Belt-and-suspenders: even when CONTEXT.md and STATE.md exist on disk, the
-// "Milestone X ready." success notify must not fire when the milestone DB row
-// is absent. Otherwise the user sees "ready" and then /gsd reports
-// "No Active Milestone" because the milestone was never registered.
+// Missing-row repair may accept a context handoff, but "Milestone X ready."
+// is reserved for executable plans with persisted slices in DB mode.
 import { describe, test, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
@@ -21,6 +19,7 @@ import {
   openDatabase,
   closeDatabase,
   insertMilestone,
+  insertSlice,
   getMilestone,
 } from "../gsd-db.ts";
 import {
@@ -92,49 +91,60 @@ describe("checkAutoStartAfterDiscuss ready-notify DB guard (R3b)", () => {
     }
   });
-  test("does not announce 'ready' when the milestone DB row is absent — recovers via Gate 1b", () => {
+  test("repairs a missing milestone DB row and accepts context-captured handoff", () => {
     base = mkBase();
-    // Open a fresh in-memory DB but DO NOT insertMilestone for M001.
     openDatabase(":memory:");
     cap = mkCapture();
     setPendingAutoStart(base, {
       basePath: base,
       milestoneId: "M001",
+      startAuto: false,
       ctx: mkCtx(cap),
       pi: mkPi(cap),
     });
     const result = checkAutoStartAfterDiscuss();
-    assert.equal(result, false, "must return false when DB row missing");
+    assert.equal(result, true, "missing row with pinned context should repair and accept handoff");
-    // No success "ready" notify
     const successReady = cap.notifies.find(
       (n) => n.level === "success" && /ready\.?$/i.test(n.msg),
     );
     assert.equal(successReady, undefined, "must not announce 'ready' when DB row missing");
-    // When CONTEXT.md is on disk the R3b path recovers: it inserts a placeholder
-    // "queued" row (so Gate 1b can retry gsd_plan_milestone) and emits a warning.
     const recovered = getMilestone("M001");
     assert.ok(recovered, "R3b recovery must insert a placeholder 'queued' DB row");
     assert.equal(recovered!.status, "queued", "placeholder row must have status 'queued'");
-    const warnNotify = cap.notifies.find((n) => n.level === "warning");
-    assert.ok(warnNotify, "must emit a warning notify during R3b recovery");
-    assert.match(warnNotify!.msg, /M001/, "warning must mention the milestone id");
-    assert.match(warnNotify!.msg, /recovering/i, "warning must mention recovery");
+    assert.equal(
+      cap.notifies.some(n => n.level === "warning"),
+      false,
+      "successful missing-row repair must not warn the user",
+    );
+    assert.deepEqual(cap.notifies, [
+      {
+        msg: "Milestone M001 context captured. Continuing the planning pipeline.",
+        level: "success",
+      },
+    ]);
   });
-  test("announces 'ready' when DB row exists", () => {
+  test("announces 'ready' when DB row has executable slices", () => {
     base = mkBase();
     openDatabase(":memory:");
     insertMilestone({ id: "M001", title: "Ready Guard Test", status: "active" });
+    insertSlice({
+      id: "S01",
+      milestoneId: "M001",
+      title: "Executable Slice",
+      status: "pending",
+    });
     cap = mkCapture();
     setPendingAutoStart(base, {
       basePath: base,
       milestoneId: "M001",
+      startAuto: false,
       ctx: mkCtx(cap),
       pi: mkPi(cap),
     });

package/src/resources/extensions/gsd/tests/commands-dispatcher-unmerged-milestone.test.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { handleGSDCommand } from "../commands/dispatcher.ts";
 import {
   closeDatabase,
+  insertAssessment,
   insertMilestone,
   insertSlice,
   openDatabase,
@@ -106,6 +107,13 @@ function seedRegisteredCompletedWorktreeWithoutRoadmap(base: string): void {
     title: "Live Text Search",
     status: "complete",
   });
+  insertAssessment({
+    path: "milestones/M008/M008-VALIDATION.md",
+    milestoneId: "M008",
+    status: "pass",
+    scope: "milestone-validation",
+    fullContent: "verdict: pass",
+  });
   writeFileSync(
     join(base, ".gsd", "PREFERENCES.md"),
     "---\ngit:\n  isolation: worktree\n---\n",
@@ -124,6 +132,19 @@ function seedRegisteredCompletedWorktree(base: string): void {
   mkdirSync(join(base, ".gsd"), { recursive: true });
   openDatabase(join(base, ".gsd", "gsd.db"));
   insertMilestone({ id: "M008", title: "Live Text Search", status: "complete" });
+  insertSlice({
+    id: "S01",
+    milestoneId: "M008",
+    title: "Live Text Search",
+    status: "complete",
+  });
+  insertAssessment({
+    path: "milestones/M008/M008-VALIDATION.md",
+    milestoneId: "M008",
+    status: "pass",
+    scope: "milestone-validation",
+    fullContent: "verdict: pass",
+  });
   writeWorktreePreferencesAndRoadmap(base);
   const worktreePath = join(base, ".gsd", "worktrees", "M008");

package/src/resources/extensions/gsd/tests/complete-slice-verification-gate.test.ts CHANGED Viewed

@@ -155,6 +155,124 @@ describe('complete-slice verification gate (#3580)', () => {
     }
   });
+  // ── Browser/web UAT classification gate (M001/S03 regression) ──────────
+  const BROWSER_UAT_BODY = [
+    '## UAT Type',
+    '- UAT mode: artifact-driven',
+    '',
+    '## Smoke Test',
+    '1. Open the page in a browser and perform add/edit/complete/delete once.',
+  ].join('\n');
+  test('rejects an artifact-driven UAT that drives a browser (open the page in a browser)', async () => {
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: BROWSER_UAT_BODY }),
+      basePath,
+    );
+    assert.ok('error' in result, 'expected handler to reject a browser UAT mislabeled artifact-driven');
+    assert.match((result as { error: string }).error, /requires browser verification/i);
+  });
+  test('allows a runtime-executable UAT that runs a browser test command (playwright)', async () => {
+    // Bugbot regression: runtime-executable legitimately drives a browser via a
+    // command captured by gsd_uat_exec — it must not be pushed to gsd-browser.
+    const body = [
+      '## UAT Type',
+      '- UAT mode: runtime-executable',
+      '',
+      '## Test Cases',
+      '1. Run `npx playwright test` and confirm a passing exit code; capture a screenshot artifact.',
+      '2. Hit http://localhost:3000/health and assert a 200 response.',
+    ].join('\n');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /artifact-driven|browser-capable|browser verification/i,
+        `runtime-executable command UATs must not be gated, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows an artifact-driven UAT that only disclaims browser coverage (no false positive)', async () => {
+    // S01-style: genuinely artifact-driven persistence scaffolding that merely
+    // mentions "cross-browser" / "browser-level" in a Not-Proven disclaimer.
+    const body = [
+      '## UAT Type',
+      '- UAT mode: artifact-driven',
+      '',
+      '## Not Proven By This UAT',
+      '- Interactive browser-level CRUD and real cross-browser localStorage behavior.',
+    ].join('\n');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `disclaimer-only mention must not trip the browser gate, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows an artifact-driven UAT whose "navigate" step targets a file, not a browser', async () => {
+    // Bugbot regression: a bare "navigate to <file/API>" must not trip the gate
+    // just because it contains the word "navigate".
+    const body = [
+      '## UAT Type',
+      '- UAT mode: artifact-driven',
+      '',
+      '## Test Cases',
+      '1. Navigate to the generated report file and confirm the schema section exists.',
+    ].join('\n');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `non-web "navigate" must not trip the browser gate, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows a browser UAT when it is declared browser-executable', async () => {
+    const body = BROWSER_UAT_BODY.replace('artifact-driven', 'browser-executable');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `browser-executable UAT must pass the browser gate, got: ${result.error}`,
+      );
+    }
+  });
+  test('allows a browser UAT when it is declared mixed (mixed receives browser tools)', async () => {
+    const body = BROWSER_UAT_BODY.replace('artifact-driven', 'mixed (artifact-driven + browser)');
+    const result = await handleCompleteSlice(
+      makeParams({ uatContent: body }),
+      basePath,
+    );
+    if ('error' in result) {
+      assert.doesNotMatch(
+        result.error,
+        /requires browser verification/i,
+        `mixed UAT must pass the browser gate, got: ${result.error}`,
+      );
+    }
+  });
   test('backfills prior verification narrative when verification is omitted on re-completion', async () => {
     // Seed full_summary_md with a prior verification narrative (simulates a
     // previous completion where the verification text was recorded).