npm - gsd-pi - Versions diffs - 2.77.0-dev.58d3d4d6c → 2.77.0-dev.cfd69e714 - Mend

gsd-pi 2.77.0-dev.58d3d4d6c → 2.77.0-dev.cfd69e714

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (429) hide show

package/src/resources/extensions/gsd/tests/auto-deterministic-error-classification-4973.test.ts ADDED Viewed

@@ -0,0 +1,341 @@
+// GSD-2 + Regression tests for deterministic policy error classification (#4973)
+//
+// When gsd_summary_save returns context_write_blocked (a deterministic write-gate
+// rejection), the retry controller must NOT re-dispatch with escalating model tiers.
+// Instead it must write a blocker placeholder and advance the pipeline immediately.
+//
+// Test 5 — deterministic error short-circuits retry:
+//   - isDeterministicPolicyError correctly classifies context_write_blocked errors
+//   - recordToolInvocationError captures deterministic errors in lastToolInvocationError
+//   - postUnitPreVerification returns "continue" (not "retry"), writes placeholder,
+//     leaves pendingVerificationRetry null — zero additional model calls dispatched
+//
+// Test 6 — model-quality failures still use standard retry path:
+//   - non-deterministic failures set pendingVerificationRetry and return "retry"
+//   - tier escalates on retry 1 (previousTier "standard" → "heavy")
+//   - tier is RETAINED at "heavy" on subsequent retries (no downgrade back to fresh
+//     classification when already at max tier) — "escalate once" semantics
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, existsSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+import {
+  isDeterministicPolicyError,
+  DETERMINISTIC_POLICY_ERROR_STRINGS,
+} from "../auto-tool-tracking.ts";
+import { AutoSession } from "../auto/session.ts";
+import { _setAutoActiveForTest } from "../auto.ts";
+import { escalateTier } from "../model-router.ts";
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+const tmpDirs: string[] = [];
+function makeTmpBase(): string {
+  const base = mkdtempSync(join(tmpdir(), `gsd-test-4973-${randomUUID().slice(0, 8)}-`));
+  tmpDirs.push(base);
+  mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+  return base;
+}
+function resetAutoState(): void {
+  _setAutoActiveForTest(false);
+}
+// ─── Test 5: Deterministic error short-circuits retry ─────────────────────
+describe("Test 5 — isDeterministicPolicyError classifier (#4973)", () => {
+  // ── Classifier unit tests ──────────────────────────────────────────────
+  test("classifies context_write_blocked fallback text as deterministic", () => {
+    // This is the text emitted by workflow-tool-executors.ts when contextGuard.reason
+    // is undefined: `Error saving artifact: ${contextGuard.reason ?? "context write blocked"}`
+    const errorText = "gsd_summary_save: Error saving artifact: context write blocked";
+    assert.strictEqual(
+      isDeterministicPolicyError(errorText),
+      true,
+      "fallback context_write_blocked text must be classified as deterministic",
+    );
+  });
+  test("classifies write-gate verbose reason as deterministic", () => {
+    // This is the text when shouldBlockContextArtifactSaveInSnapshot returns its reason:
+    // "HARD BLOCK: Cannot save milestone CONTEXT without depth verification for M001. ..."
+    const verboseError = [
+      "gsd_summary_save: Error saving artifact:",
+      "HARD BLOCK: Cannot save milestone CONTEXT without depth verification for M001.",
+      "This is a mechanical gate — you MUST NOT proceed, retry, or rationalize past this block.",
+    ].join(" ");
+    assert.strictEqual(
+      isDeterministicPolicyError(verboseError),
+      true,
+      "verbose write-gate reason containing 'CONTEXT without depth verification' must be classified as deterministic",
+    );
+  });
+  test("returns false for malformed-JSON errors (separate classification path)", () => {
+    assert.strictEqual(
+      isDeterministicPolicyError("Unexpected end of JSON input"),
+      false,
+      "malformed-JSON errors are not deterministic policy errors",
+    );
+    assert.strictEqual(
+      isDeterministicPolicyError("Validation failed for tool gsd_complete_slice"),
+      false,
+    );
+  });
+  test("returns false for normal business-logic tool errors", () => {
+    assert.strictEqual(
+      isDeterministicPolicyError("Slice S01 is already complete"),
+      false,
+    );
+    assert.strictEqual(
+      isDeterministicPolicyError("Error saving artifact: db_unavailable"),
+      false,
+    );
+  });
+  test("returns false for empty string", () => {
+    assert.strictEqual(isDeterministicPolicyError(""), false);
+  });
+  test("DETERMINISTIC_POLICY_ERROR_STRINGS list is non-empty and contains context_write_blocked entry", () => {
+    assert.ok(
+      DETERMINISTIC_POLICY_ERROR_STRINGS.length > 0,
+      "must have at least one known deterministic error string",
+    );
+    const hasContextWriteBlocked = DETERMINISTIC_POLICY_ERROR_STRINGS.some(
+      (s) => s.includes("context write blocked") || s.includes("CONTEXT without depth verification"),
+    );
+    assert.ok(hasContextWriteBlocked, "must include context_write_blocked family entries");
+  });
+});
+describe("Test 5 — recordToolInvocationError captures deterministic errors (#4973)", () => {
+  beforeEach(resetAutoState);
+  afterEach(resetAutoState);
+  test("lastToolInvocationError is NOT set for deterministic errors on current main (pre-fix baseline)", () => {
+    // This test documents the FIXED behavior: deterministic errors ARE captured.
+    // On current main (before this fix), recordToolInvocationError would NOT store
+    // context_write_blocked because it only checked isToolInvocationError and
+    // isQueuedUserMessageSkip.  After the fix, it also checks isDeterministicPolicyError.
+    //
+    // We test the fixed behavior here: the error IS captured.
+    _setAutoActiveForTest(true);
+    // Import recordToolInvocationError from auto.ts (it delegates to auto-tool-tracking.ts)
+    // We test indirectly via the session state: after calling recordToolInvocationError,
+    // lastToolInvocationError should be set for deterministic errors.
+    //
+    // Since recordToolInvocationError is not exported directly, we verify the fix
+    // through the AutoSession field behavior documented in the classifier tests above.
+    // The recordToolInvocationError integration is exercised in the postUnitPreVerification
+    // integration test below.
+    const s = new AutoSession();
+    assert.strictEqual(s.lastToolInvocationError, null, "starts null");
+    // Simulate what postUnitPreVerification checks: if isDeterministicPolicyError
+    // matches on lastToolInvocationError, the short-circuit fires.
+    // The value is set by recordToolInvocationError (tested via auto.ts integration).
+    s.lastToolInvocationError = "gsd_summary_save: Error saving artifact: context write blocked";
+    assert.ok(
+      isDeterministicPolicyError(s.lastToolInvocationError),
+      "classifier recognises the stored error — short-circuit will fire",
+    );
+    assert.strictEqual(s.pendingVerificationRetry, null, "pendingVerificationRetry starts null");
+  });
+  test("AutoSession.lastToolInvocationError can hold a deterministic policy error string", () => {
+    const s = new AutoSession();
+    s.lastToolInvocationError = "gsd_summary_save: Error saving artifact: context write blocked";
+    assert.ok(s.lastToolInvocationError);
+    assert.ok(isDeterministicPolicyError(s.lastToolInvocationError));
+  });
+  test("AutoSession.lastToolInvocationError is cleared on reset()", () => {
+    const s = new AutoSession();
+    s.lastToolInvocationError = "gsd_summary_save: Error saving artifact: context write blocked";
+    s.reset();
+    assert.strictEqual(s.lastToolInvocationError, null);
+  });
+});
+describe("Test 5 — postUnitPreVerification short-circuits on deterministic error (#4973)", () => {
+  // This integration test calls postUnitPreVerification with a deterministic error
+  // in lastToolInvocationError and asserts that:
+  //   1. pendingVerificationRetry is NOT set (no retry dispatched)
+  //   2. the blocker placeholder is written to disk
+  //   3. the function returns "continue" (not "retry" or "dispatched")
+  let base = "";
+  beforeEach(() => {
+    base = makeTmpBase();
+    _setAutoActiveForTest(true);
+  });
+  afterEach(() => {
+    _setAutoActiveForTest(false);
+    // Cleanup is handled by tmpDirs at process exit; individual cleanup here
+    // is best-effort only so as not to mask assertion failures.
+  });
+  test("returns 'continue' and writes placeholder for context_write_blocked — no pendingVerificationRetry set", async () => {
+    const { postUnitPreVerification } = await import("../auto-post-unit.ts");
+    const s = new AutoSession();
+    s.active = true;
+    s.basePath = base;
+    s.currentUnit = { type: "discuss-milestone", id: "M001", startedAt: Date.now() };
+    // Set the deterministic error that would be recorded by recordToolInvocationError
+    s.lastToolInvocationError = "gsd_summary_save: Error saving artifact: context write blocked";
+    s.verificationRetryCount.set("discuss-milestone:M001", 2);
+    let pauseCalled = false;
+    const ctx = {
+      ui: { notify: () => {} },
+    } as any;
+    const pi = {} as any;
+    const pctx = {
+      s,
+      ctx,
+      pi,
+      buildSnapshotOpts: () => ({}) as any,
+      lockBase: () => base,
+      stopAuto: async () => {},
+      pauseAuto: async () => { pauseCalled = true; },
+      updateProgressWidget: () => {},
+    } as any;
+    const result = await postUnitPreVerification(pctx, { skipSettleDelay: true });
+    // Core assertion: deterministic error short-circuits — returns "continue",
+    // no retry, and the placeholder is written so the pipeline can advance.
+    assert.strictEqual(result, "continue", "must return 'continue', not 'retry' or 'dispatched'");
+    assert.strictEqual(s.pendingVerificationRetry, null, "pendingVerificationRetry must NOT be set");
+    assert.strictEqual(s.verificationRetryCount.has("discuss-milestone:M001"), false, "deterministic short-circuit clears stale retry count");
+    assert.strictEqual(s.lastToolInvocationError, null, "lastToolInvocationError cleared after handling");
+    assert.strictEqual(pauseCalled, false, "pauseAuto must NOT be called for deterministic errors");
+    // The blocker placeholder must exist on disk so the pipeline can advance.
+    const placeholderPath = join(base, ".gsd", "milestones", "M001", "M001-CONTEXT.md");
+    assert.ok(
+      existsSync(placeholderPath),
+      `blocker placeholder must be written at ${placeholderPath}`,
+    );
+  });
+});
+// ─── Test 6: Model-quality failures use standard retry path ──────────────────
+describe("Test 6 — non-deterministic failures use standard retry; tier escalates once (#4973)", () => {
+  // ── escalateTier behavior (existing, unchanged) ───────────────────────────
+  test("escalateTier: light → standard → heavy → null (max)", () => {
+    assert.strictEqual(escalateTier("light"), "standard");
+    assert.strictEqual(escalateTier("standard"), "heavy");
+    assert.strictEqual(escalateTier("heavy"), null, "heavy is the max tier — no further escalation");
+  });
+  test("standard-start retry: escalates to heavy on retry 1, stays at heavy on retry 2 (escalateTier returns null)", () => {
+    // Simulate what selectAndApplyModel does across two retries for a standard-start unit.
+    // Retry 1: previousTier = "standard", escalateTier → "heavy". Applied tier = "heavy".
+    const tier1 = escalateTier("standard");
+    assert.strictEqual(tier1, "heavy", "retry 1: standard escalates to heavy");
+    // Retry 2: previousTier = "heavy" (from retry 1 result), escalateTier → null.
+    // The "retain escalated tier" fix kicks in: prevOrder(heavy=2) > freshOrder(standard=1),
+    // so the tier stays at "heavy" rather than reverting to fresh classification.
+    const tier2 = escalateTier("heavy");
+    assert.strictEqual(tier2, null, "retry 2: heavy cannot escalate further");
+    // Verify the tier-order comparison used in selectAndApplyModel (#4973 fix):
+    const tierOrder: Record<string, number> = { light: 0, standard: 1, heavy: 2 };
+    const prevOrder = tierOrder["heavy"] ?? 0;      // 2 (from retry 1 result)
+    const freshOrder = tierOrder["standard"] ?? 0;  // 1 (fresh classifyUnitComplexity for a standard unit)
+    assert.ok(
+      prevOrder > freshOrder,
+      "prevOrder(heavy=2) > freshOrder(standard=1) — the fix retains 'heavy' and prevents revert",
+    );
+  });
+  test("light-start retry 3: escalated tier is retained, not reverted to 'light'", () => {
+    // Without the fix: retry 3 would see previousTier="heavy" (from retry 2),
+    // escalateTier returns null, and fresh classification is "light" — the model
+    // reverts to a cheap light-tier model. With the fix, we retain "heavy".
+    // Retry 1: light → standard
+    assert.strictEqual(escalateTier("light"), "standard");
+    // Retry 2: standard → heavy
+    assert.strictEqual(escalateTier("standard"), "heavy");
+    // Retry 3: heavy → null (can't escalate), fix retains "heavy" instead of reverting to "light"
+    assert.strictEqual(escalateTier("heavy"), null);
+    // The fix logic: when escalateTier returns null, compare prevOrder vs freshOrder.
+    const tierOrder: Record<string, number> = { light: 0, standard: 1, heavy: 2 };
+    const prevOrderRetry3 = tierOrder["heavy"] ?? 0;  // 2
+    const freshOrderLight = tierOrder["light"] ?? 0;  // 0
+    assert.ok(
+      prevOrderRetry3 > freshOrderLight,
+      "on retry 3, prevOrder(heavy=2) > freshOrder(light=0) — 'heavy' must be retained, not reverted",
+    );
+  });
+  test("non-deterministic error: session sets pendingVerificationRetry (standard retry path)", () => {
+    // Simulate what postUnitPreVerification does for a non-deterministic failure:
+    // no lastToolInvocationError → falls into the standard retry path → sets pendingVerificationRetry.
+    const s = new AutoSession();
+    s.currentUnit = { type: "plan-slice", id: "M001:S01", startedAt: Date.now() };
+    // Simulate the retry count increment (as postUnitPreVerification does internally)
+    const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
+    const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1;
+    s.verificationRetryCount.set(retryKey, attempt);
+    // Simulate setting pendingVerificationRetry (what the "else" branch does)
+    s.pendingVerificationRetry = {
+      unitId: s.currentUnit.id,
+      failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
+      attempt,
+    };
+    assert.ok(s.pendingVerificationRetry !== null, "standard retry path sets pendingVerificationRetry");
+    assert.strictEqual(s.pendingVerificationRetry.attempt, 1, "attempt is 1");
+    assert.ok(
+      s.pendingVerificationRetry.failureContext.includes("plan-slice"),
+      "failureContext references the unit type",
+    );
+  });
+  test("isDeterministicPolicyError returns false for non-deterministic verification failure", () => {
+    // A plain 'artifact not found' is NOT a deterministic policy error.
+    // The standard retry path must still fire for these.
+    assert.strictEqual(
+      isDeterministicPolicyError(""),
+      false,
+      "empty error (no tool error) is not deterministic",
+    );
+    assert.strictEqual(
+      isDeterministicPolicyError("Artifact not found on disk"),
+      false,
+      "plain artifact-missing message is not a deterministic policy error",
+    );
+    assert.strictEqual(
+      isDeterministicPolicyError("existsSync returned false"),
+      false,
+    );
+  });
+});
+// Cleanup all temp dirs after the test suite completes
+process.on("exit", () => {
+  for (const dir of tmpDirs) {
+    try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
+  }
+});

package/src/resources/extensions/gsd/tests/auto-discuss-milestone-deadlock-4973.test.ts ADDED Viewed

@@ -0,0 +1,264 @@
+// GSD-2 + Regression tests for auto-mode discuss-milestone write-gate deadlock (#4973)
+//
+// The depth-verification write-gate in write-gate.ts:415-443 blocks
+// gsd_summary_save({artifact_type:"CONTEXT"}) until markDepthVerified() is
+// called. In interactive mode this happens when the user picks the confirmation
+// option in ask_user_questions. In auto-mode there is no human — the gate
+// deadlocked every discuss-milestone unit, wasting 200K-360K tokens per run.
+//
+// Fix: each dispatch rule that fires a discuss-milestone unit now calls
+// markDepthVerified(mid) when isAutoActive() is true, before returning the
+// dispatch action. These tests verify:
+//   Test 1 — CONTEXT artifact save unblocks after markDepthVerified
+//   Test 2 — raw write to *-CONTEXT.md unblocks after markDepthVerified
+//   Test 3 — session_switch ordering: clearDiscussionFlowState clears the mark
+//   Test 4 — interactive sessions (isAutoActive===false) are unaffected
+import { describe, test, afterEach, beforeEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, rmSync, existsSync, unlinkSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import {
+  markDepthVerified,
+  clearDiscussionFlowState,
+  shouldBlockContextArtifactSaveInSnapshot,
+  shouldBlockContextWrite,
+  loadWriteGateSnapshot,
+  isMilestoneDepthVerifiedInSnapshot,
+} from '../bootstrap/write-gate.ts';
+import { DISPATCH_RULES, type DispatchContext } from '../auto-dispatch.ts';
+import { _setAutoActiveForTest } from '../auto.ts';
+// Reset all relevant state before and after each test.
+function resetState(): void {
+  _setAutoActiveForTest(false);
+  clearDiscussionFlowState();
+}
+describe('auto-discuss-milestone-deadlock-4973', () => {
+  beforeEach(resetState);
+  afterEach(resetState);
+  // ── Test 1 ──────────────────────────────────────────────────────────────
+  // CONTEXT artifact save via gsd_summary_save is blocked before the mark
+  // and unblocked after it. This is the exact path that deadlocked in #4973:
+  // workflow-tool-executors.ts calls shouldBlockContextArtifactSaveInSnapshot
+  // against a snapshot that had no verified milestones.
+  test('Test 1: CONTEXT artifact save unblocks after markDepthVerified (auto-mode)', () => {
+    _setAutoActiveForTest(true);
+    // Before mark: blocked
+    const snapshotBefore = loadWriteGateSnapshot();
+    const beforeResult = shouldBlockContextArtifactSaveInSnapshot(
+      snapshotBefore,
+      'CONTEXT',
+      'M001',
+      null,
+    );
+    assert.strictEqual(beforeResult.block, true, 'should block before markDepthVerified');
+    // Simulate what the dispatch rule now does in auto-mode
+    markDepthVerified('M001');
+    // After mark: unblocked
+    const snapshotAfter = loadWriteGateSnapshot();
+    const afterResult = shouldBlockContextArtifactSaveInSnapshot(
+      snapshotAfter,
+      'CONTEXT',
+      'M001',
+      null,
+    );
+    assert.strictEqual(afterResult.block, false, 'should not block after markDepthVerified');
+  });
+  // ── Test 2 ──────────────────────────────────────────────────────────────
+  // Raw write tool to a *-CONTEXT.md path is also gated. The register-hooks
+  // tool_call handler calls shouldBlockContextWrite for write events.
+  test('Test 2: raw write to M001-CONTEXT.md unblocks after markDepthVerified (auto-mode)', () => {
+    _setAutoActiveForTest(true);
+    const contextPath = '.gsd/milestones/M001/M001-CONTEXT.md';
+    // Before mark: blocked
+    const beforeResult = shouldBlockContextWrite('write', contextPath, 'M001');
+    assert.strictEqual(beforeResult.block, true, 'write should be blocked before markDepthVerified');
+    // Simulate dispatch rule auto-mark
+    markDepthVerified('M001');
+    // After mark: unblocked
+    const afterResult = shouldBlockContextWrite('write', contextPath, 'M001');
+    assert.strictEqual(afterResult.block, false, 'write should not be blocked after markDepthVerified');
+  });
+  // ── Test 3 ──────────────────────────────────────────────────────────────
+  // Documents the session_switch ordering contract.
+  //
+  // When auto-mode dispatches a new session, the event sequence is:
+  //   session_switch → clearDiscussionFlowState() (register-hooks.ts:106)
+  //   before_agent_start fires
+  //   resolveDispatch is called → discuss-milestone rule match fn runs
+  //   markDepthVerified(mid) is called HERE (after the clear)
+  //
+  // This test demonstrates that clearDiscussionFlowState() (the session_switch
+  // side effect) clears any previously set mark, and that calling
+  // markDepthVerified after the clear correctly re-establishes it — proving
+  // the dispatch-site call site is safe regardless of prior session state.
+  test('Test 3: session_switch ordering — clearDiscussionFlowState clears mark; dispatch-site call re-establishes it', () => {
+    // Simulate a mark from a prior session
+    markDepthVerified('M001');
+    let snapshot = loadWriteGateSnapshot();
+    assert.strictEqual(
+      isMilestoneDepthVerifiedInSnapshot(snapshot, 'M001'),
+      true,
+      'precondition: mark set from prior session',
+    );
+    // session_switch fires clearDiscussionFlowState() — this is exactly what
+    // register-hooks.ts:106 does
+    clearDiscussionFlowState();
+    snapshot = loadWriteGateSnapshot();
+    assert.strictEqual(
+      isMilestoneDepthVerifiedInSnapshot(snapshot, 'M001'),
+      false,
+      'session_switch (clearDiscussionFlowState) must clear the mark',
+    );
+    // Now the dispatch rule fires (after session_switch cleared state)
+    // and re-establishes the mark for the new session
+    _setAutoActiveForTest(true);
+    markDepthVerified('M001'); // this is what the dispatch rule does
+    snapshot = loadWriteGateSnapshot();
+    assert.strictEqual(
+      isMilestoneDepthVerifiedInSnapshot(snapshot, 'M001'),
+      true,
+      'dispatch-site markDepthVerified re-establishes the mark after session_switch cleared it',
+    );
+    // And the artifact save is now unblocked for this session
+    const result = shouldBlockContextArtifactSaveInSnapshot(snapshot, 'CONTEXT', 'M001', null);
+    assert.strictEqual(result.block, false, 'CONTEXT save unblocked in the new session');
+  });
+  // ── Test 4 ──────────────────────────────────────────────────────────────
+  // Interactive sessions (isAutoActive()===false) must NOT be auto-marked.
+  // The dispatch rules guard the markDepthVerified call with isAutoActive(),
+  // so in a non-auto session the gate still requires the human to confirm.
+  // This test passes on both current main AND with the fix applied.
+  test('Test 4: interactive sessions unaffected — gate still blocks unverified milestones when auto is off', () => {
+    _setAutoActiveForTest(false);
+    // Do NOT call markDepthVerified — simulating that dispatch rule's
+    // isAutoActive() guard prevented the auto-mark (as it should for
+    // interactive sessions)
+    // CONTEXT artifact save is still blocked
+    const snapshotResult = shouldBlockContextArtifactSaveInSnapshot(
+      loadWriteGateSnapshot(),
+      'CONTEXT',
+      'M002',
+      null,
+    );
+    assert.strictEqual(
+      snapshotResult.block,
+      true,
+      'CONTEXT save must still be blocked in interactive mode without depth verification',
+    );
+    // Raw write to CONTEXT.md is still blocked
+    const writeResult = shouldBlockContextWrite(
+      'write',
+      '.gsd/milestones/M002/M002-CONTEXT.md',
+      'M002',
+    );
+    assert.strictEqual(
+      writeResult.block,
+      true,
+      'write to CONTEXT.md must still be blocked in interactive mode',
+    );
+  });
+  // ── Test 5 ──────────────────────────────────────────────────────────────
+  // The actual fix lives inside the discuss-milestone dispatch rules at
+  // auto-dispatch.ts:280-291, :423-432, :449-458. This test invokes the
+  // "needs-discussion → discuss-milestone" rule directly and asserts that
+  // (a) the rule auto-marks depth-verified when isAutoActive() is true, and
+  // (b) it does NOT mark when isAutoActive() is false.
+  //
+  // This is the test codex flagged as missing: Tests 1-4 above only exercise
+  // the markDepthVerified primitive — they pass on origin/main. This Test 5
+  // FAILS on origin/main (the rule does nothing for the gate) and PASSES with
+  // the fix (the rule calls markDepthVerified inside isAutoActive()).
+  test('Test 5: needs-discussion dispatch rule auto-marks depth-verified in auto-mode', async () => {
+    const rule = DISPATCH_RULES.find(r => r.name === 'needs-discussion → discuss-milestone');
+    assert.ok(rule, 'dispatch rule must exist');
+    // Use a real temp directory so the snapshot file the rule writes is
+    // readable by the same loadWriteGateSnapshot(basePath) the test reads
+    // from. The rule passes basePath through to markDepthVerified (since
+    // commit 73bb7e085) — without this, the rule writes the snapshot under
+    // basePath but the test would read process.cwd() and never see it.
+    const tempBase = mkdtempSync(join(tmpdir(), '4973-rule-test-'));
+    const snapshotFile = join(tempBase, '.gsd', 'runtime', 'write-gate-state.json');
+    try {
+      const baseCtx = {
+        basePath: tempBase,
+        mid: 'M005',
+        midTitle: 'Test Milestone',
+        state: { phase: 'needs-discussion' },
+        prefs: undefined,
+        structuredQuestionsAvailable: 'false',
+      } as unknown as DispatchContext;
+      // ── Auto-mode case: the rule must call markDepthVerified ──
+      _setAutoActiveForTest(true);
+      let snap = loadWriteGateSnapshot(tempBase);
+      assert.strictEqual(
+        isMilestoneDepthVerifiedInSnapshot(snap, 'M005'),
+        false,
+        'precondition: M005 not yet marked',
+      );
+      // The rule's match fn calls markDepthVerified(mid, basePath) BEFORE
+      // awaiting buildDiscussMilestonePrompt — so even if the prompt build
+      // fails (e.g. because basePath does not contain a valid milestone),
+      // the side effect (snapshot write) has already happened.
+      try { await rule!.match(baseCtx); } catch { /* prompt build may fail; we only care about the mark */ }
+      snap = loadWriteGateSnapshot(tempBase);
+      assert.strictEqual(
+        isMilestoneDepthVerifiedInSnapshot(snap, 'M005'),
+        true,
+        'auto-mode: dispatch rule must call markDepthVerified(mid) — this fails on origin/main without the H6 fix',
+      );
+      // ── Interactive case: the rule must NOT call markDepthVerified ──
+      // clearDiscussionFlowState() only deletes the snapshot at process.cwd(),
+      // so we must explicitly remove the snapshot under our tempBase too.
+      clearDiscussionFlowState();
+      if (existsSync(snapshotFile)) unlinkSync(snapshotFile);
+      _setAutoActiveForTest(false);
+      snap = loadWriteGateSnapshot(tempBase);
+      assert.strictEqual(
+        isMilestoneDepthVerifiedInSnapshot(snap, 'M005'),
+        false,
+        'precondition: state cleared',
+      );
+      try { await rule!.match(baseCtx); } catch { /* prompt build may fail */ }
+      snap = loadWriteGateSnapshot(tempBase);
+      assert.strictEqual(
+        isMilestoneDepthVerifiedInSnapshot(snap, 'M005'),
+        false,
+        'interactive mode: dispatch rule must NOT call markDepthVerified — humans still confirm',
+      );
+    } finally {
+      rmSync(tempBase, { recursive: true, force: true });
+    }
+  });
+});