npm - @opengsd/gsd-pi - Versions diffs - 1.3.0-dev.65546769 → 1.3.0-dev.eed73bea - Mend

@opengsd/gsd-pi 1.3.0-dev.65546769 → 1.3.0-dev.eed73bea

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

package/src/resources/extensions/gsd/tests/read-uat-gate-verdict.test.ts ADDED Viewed

@@ -0,0 +1,185 @@
+/**
+ * Behavioural regression test for the milestone-closeout UAT gate —
+ * `readUatGateVerdict`.
+ *
+ * The gate (ADR-017: DB-authoritative UAT sign-off) reads a slice's UAT
+ * verdict from its ASSESSMENT artifact via the *canonical* expected path
+ * (`resolveSliceFile` + a path-keyed `getAssessment`). When a milestone
+ * artifact-layout migration orphans the ASSESSMENT markdown from that canonical
+ * path (e.g. `phases/…` → `milestones/…`), the gate used to return `null` and
+ * block milestone closure with "missing UAT PASS verdict" — even though the
+ * verdict was correctly recorded in the `assessments` table by
+ * `gsd_uat_result_save`.
+ *
+ * The DB fallback added to `readUatGateVerdict` consults the authoritative
+ * `assessments` table by (milestoneId, sliceId, scope='run-uat') identity,
+ * independent of path. These tests pin that behaviour.
+ */
+import { describe, test, beforeEach, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertAssessment,
+} from '../gsd-db.ts';
+import { readUatGateVerdict } from '../auto-dispatch.ts';
+function tempDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-uat-gate-'));
+  return path.join(dir, 'test.db');
+}
+function cleanupDb(dbPath: string): void {
+  closeDatabase();
+  try { fs.rmSync(path.dirname(dbPath), { recursive: true, force: true }); } catch { /* */ }
+}
+const MID = 'M001';
+const SLICE = 'S01';
+/** Canonical on-disk ASSESSMENT path produced by `resolveSliceFile`. */
+function canonicalAssessmentPath(basePath: string): string {
+  return path.join(basePath, '.gsd', 'milestones', MID, 'slices', SLICE, `${SLICE}-ASSESSMENT.md`);
+}
+/** An ASSESSMENT body that declares a runtime-executable UAT type and a PASS verdict. */
+const RUNTIME_PASS_BODY = [
+  '---',
+  'verdict: pass',
+  '---',
+  '',
+  '# S01 UAT Assessment',
+  '',
+  '## UAT Type',
+  '- UAT mode: runtime-executable',
+  '',
+  '## Result',
+  'All checks passed.',
+].join('\n');
+describe('readUatGateVerdict — DB fallback for orphaned ASSESSMENT', () => {
+  let dbPath: string;
+  let basePath: string;
+  beforeEach(() => {
+    dbPath = tempDbPath();
+    openDatabase(dbPath);
+    basePath = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-uat-gate-proj-'));
+    insertMilestone({ id: MID });
+    insertSlice({ id: SLICE, milestoneId: MID });
+  });
+  afterEach(() => {
+    cleanupDb(dbPath);
+    try { fs.rmSync(basePath, { recursive: true, force: true }); } catch { /* */ }
+  });
+  test('returns pass when the ASSESSMENT is keyed by a legacy/orphaned path (the bug)', async () => {
+    // Reproduces milestone 15: `gsd_uat_result_save` wrote S01's assessment row
+    // under a now-migrated path; the canonical file never existed on disk and
+    // the `assessments.path` is not what `resolveSliceFile` computes.
+    insertAssessment({
+      // Deliberately non-canonical — a legacy `phases/…` path.
+      path: `.gsd/phases/01-some-feature/01-01-ASSESSMENT.md`,
+      milestoneId: MID,
+      sliceId: SLICE,
+      status: 'pass',
+      scope: 'run-uat',
+      fullContent: RUNTIME_PASS_BODY,
+    });
+    const result = await readUatGateVerdict(basePath, MID, SLICE);
+    assert.ok(result, 'expected the DB fallback to resolve a verdict, got null');
+    assert.equal(result!.verdict, 'pass');
+  });
+  test('the DB fallback derives uatType from the assessment body when no file exists', async () => {
+    insertAssessment({
+      path: `.gsd/phases/01-some-feature/01-01-ASSESSMENT.md`,
+      milestoneId: MID,
+      sliceId: SLICE,
+      status: 'pass',
+      scope: 'run-uat',
+      fullContent: RUNTIME_PASS_BODY,
+    });
+    const result = await readUatGateVerdict(basePath, MID, SLICE);
+    assert.ok(result);
+    assert.equal(result!.verdict, 'pass');
+    assert.equal(result!.uatType, 'runtime-executable');
+  });
+  test('canonical ASSESSMENT file on disk still resolves (regression guard)', async () => {
+    // When the file is present at the canonical path, the existing path-keyed
+    // lookup must resolve it without needing the fallback.
+    const file = canonicalAssessmentPath(basePath);
+    fs.mkdirSync(path.dirname(file), { recursive: true });
+    fs.writeFileSync(file, RUNTIME_PASS_BODY);
+    // Also seed the path-keyed assessments row, mirroring a normal save.
+    insertAssessment({
+      path: `.gsd/milestones/${MID}/slices/${SLICE}/${SLICE}-ASSESSMENT.md`,
+      milestoneId: MID,
+      sliceId: SLICE,
+      status: 'pass',
+      scope: 'run-uat',
+      fullContent: RUNTIME_PASS_BODY,
+    });
+    const result = await readUatGateVerdict(basePath, MID, SLICE);
+    assert.ok(result);
+    assert.equal(result!.verdict, 'pass');
+    assert.equal(result!.uatType, 'runtime-executable');
+  });
+  test('a roadmap-scoped assessment does NOT satisfy the UAT gate', async () => {
+    // `reassess-roadmap` writes roadmap-scoped assessments to the same
+    // S##-ASSESSMENT path; those must never be treated as a UAT verdict. The
+    // legacy-path fallback queries scope='run-uat', so a roadmap-only row is
+    // invisible and the gate returns null.
+    insertAssessment({
+      path: `.gsd/milestones/${MID}/slices/${SLICE}/${SLICE}-ASSESSMENT.md`,
+      milestoneId: MID,
+      sliceId: SLICE,
+      status: 'pass',
+      scope: 'roadmap',
+      fullContent: RUNTIME_PASS_BODY,
+    });
+    const result = await readUatGateVerdict(basePath, MID, SLICE);
+    assert.equal(result, null, 'roadmap-scoped assessments must not satisfy the UAT gate');
+  });
+  test('returns null when no assessment and no file exist (fallback does not hallucinate)', async () => {
+    const result = await readUatGateVerdict(basePath, MID, SLICE);
+    assert.equal(result, null);
+  });
+  test('surfaces a recorded non-pass verdict via the DB fallback', async () => {
+    // A failing verdict stored under a legacy path must surface (not be masked
+    // as "missing") so the gate's non-PASS branch can act on it.
+    insertAssessment({
+      path: `.gsd/phases/01-some-feature/01-01-ASSESSMENT.md`,
+      milestoneId: MID,
+      sliceId: SLICE,
+      status: 'fail',
+      scope: 'run-uat',
+      fullContent: RUNTIME_PASS_BODY.replace('verdict: pass', 'verdict: fail'),
+    });
+    const result = await readUatGateVerdict(basePath, MID, SLICE);
+    assert.ok(result);
+    assert.equal(result!.verdict, 'fail');
+  });
+});

package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts CHANGED Viewed

@@ -970,6 +970,93 @@ test("register-hooks agent_end does not re-arm deferred gate after workflow MCP
   });
 });
+test("register-hooks message_update uses in-memory write-gate snapshot instead of disk reconcile", async (t) => {
+  const dir = makeTempDir("message-update-memory-snapshot");
+  const originalCwd = process.cwd();
+  const originalEnv = process.env.GSD_PERSIST_WRITE_GATE_STATE;
+  process.chdir(dir);
+  process.env.GSD_PERSIST_WRITE_GATE_STATE = "1";
+  resetWriteGateState(dir);
+  clearPendingAutoStart(dir);
+  const gateId = "depth_verification_M012_confirm";
+  const statePath = join(dir, ".gsd", "runtime", "write-gate-state.json");
+  t.after(() => {
+    try {
+      resetWriteGateState(dir);
+      clearPendingAutoStart(dir);
+    } finally {
+      if (originalEnv === undefined) {
+        delete process.env.GSD_PERSIST_WRITE_GATE_STATE;
+      } else {
+        process.env.GSD_PERSIST_WRITE_GATE_STATE = originalEnv;
+      }
+      process.chdir(originalCwd);
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
+  const handlers = new Map<string, Array<(event: any, ctx?: any) => Promise<any> | any>>();
+  const pi = {
+    on(event: string, handler: (event: any, ctx?: any) => Promise<any> | any) {
+      const existing = handlers.get(event) ?? [];
+      existing.push(handler);
+      handlers.set(event, existing);
+    },
+  } as any;
+  const notices: Array<{ text: string; level: string }> = [];
+  const ctx = {
+    cwd: dir,
+    ui: { notify: (text: string, level: string) => notices.push({ text, level }) },
+  } as any;
+  registerHooks(pi, []);
+  setPendingAutoStart(dir, {
+    basePath: dir,
+    milestoneId: "M012",
+    ctx,
+    pi: { sendMessage: () => undefined } as any,
+  });
+  mkdirSync(join(dir, ".gsd", "runtime"), { recursive: true });
+  writeFileSync(statePath, JSON.stringify({
+    verifiedDepthMilestones: ["M012"],
+    verifiedApprovalGates: [gateId],
+    activeQueuePhase: false,
+    pendingGateId: null,
+  }, null, 2), "utf-8");
+  const approvalMessage = {
+    role: "assistant",
+    content: [
+      { type: "text", text: "Here is the milestone plan.\n\nDid I capture the project correctly?" },
+    ],
+  };
+  for (const handler of handlers.get("message_update") ?? []) {
+    await handler({ message: approvalMessage }, ctx);
+  }
+  assert.equal(
+    notices.some((n) => /discuss-milestone M012 is waiting for your approval - pausing/.test(n.text)),
+    true,
+    "streaming hook must not suppress the pause from a disk-only verification",
+  );
+  assert.equal(
+    shouldBlockContextArtifactSave("CONTEXT", "M012", null, dir).block,
+    true,
+    "streaming hook must not reconcile disk-only verification into the in-memory snapshot",
+  );
+  for (const handler of handlers.get("agent_end") ?? []) {
+    await handler({ messages: [] }, ctx);
+  }
+  assert.equal(getPendingGate(dir), null, "agent_end still reconciles disk and suppresses durable re-arm");
+});
 // ── External-engine post-hoc gate replay (write-gate two-process sync) ──────
 // On claude-code-cli, pi ingests the SDK turn's tool blocks after the workflow
 // MCP child already executed them. The depth gate can therefore arrive at

package/src/resources/extensions/gsd/tests/state-reconciliation-drift.test.ts CHANGED Viewed

@@ -42,6 +42,7 @@ import {
   type ReconciliationDeps,
 } from "../state-reconciliation.ts";
 import { classifyFailure } from "../recovery-classification.ts";
+import { staleRenderHandler } from "../state-reconciliation/drift/stale-render.ts";
 import type { GSDState } from "../types.ts";
 function makeState(overrides: Partial<GSDState> = {}): GSDState {
@@ -696,6 +697,81 @@ test("ADR-017 (#5702): stale-render drift detected and repaired end-to-end", asy
   assert.match(repairedContent, /\[x\][^\n]*\*\*T02\*\*/, "T02 checkbox should be checked after repair");
 });
+test("#1003: stale-render plan repair reopens DB before rendering", async (t) => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-stale-render-reopen-"));
+  const sliceDir = join(base, ".gsd", "phases", "01-test");
+  mkdirSync(sliceDir, { recursive: true });
+  t.after(() => {
+    try { closeDatabase(); } catch { /* noop */ }
+    rmTreeQuiet(base);
+  });
+  openDatabase(join(base, ".gsd", "gsd.db"));
+  clearRendererCaches();
+  insertMilestone({ id: "M001", title: "Test", status: "active" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "pending" });
+  insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "First task", status: "done" });
+  const planPath = join(sliceDir, "01-01-PLAN.md");
+  writeFileSync(planPath, makeStalePlanContent("S01", [
+    { id: "T01", title: "First task", done: false },
+  ]));
+  closeDatabase();
+  await staleRenderHandler.repair(
+    {
+      kind: "stale-render",
+      renderPath: planPath,
+      reason: "T01 is done in DB but unchecked in plan",
+    },
+    { basePath: base, state: makeState() },
+  );
+  const repairedContent = readFileSync(planPath, "utf-8");
+  assert.match(repairedContent, /\[x\][^\n]*\*\*T01\*\*/, "T01 checkbox should be checked after DB reopen repair");
+  assert.equal(getSliceTasks("M001", "S01").length, 1, "DB should be reopened on the original project database");
+});
+test("#1003: stale-render plan repair switches back from an open wrong DB", async (t) => {
+  const base = mkdtempSync(join(tmpdir(), "gsd-stale-render-wrong-db-"));
+  const wrongBase = mkdtempSync(join(tmpdir(), "gsd-stale-render-other-db-"));
+  const sliceDir = join(base, ".gsd", "phases", "01-test");
+  mkdirSync(sliceDir, { recursive: true });
+  mkdirSync(join(wrongBase, ".gsd"), { recursive: true });
+  t.after(() => {
+    try { closeDatabase(); } catch { /* noop */ }
+    rmTreeQuiet(base);
+    rmTreeQuiet(wrongBase);
+  });
+  openDatabase(join(base, ".gsd", "gsd.db"));
+  clearRendererCaches();
+  insertMilestone({ id: "M001", title: "Test", status: "active" });
+  insertSlice({ id: "S01", milestoneId: "M001", title: "Slice", status: "pending" });
+  insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "First task", status: "done" });
+  const planPath = join(sliceDir, "01-01-PLAN.md");
+  writeFileSync(planPath, makeStalePlanContent("S01", [
+    { id: "T01", title: "First task", done: false },
+  ]));
+  closeDatabase();
+  openDatabase(join(wrongBase, ".gsd", "gsd.db"));
+  await staleRenderHandler.repair(
+    {
+      kind: "stale-render",
+      renderPath: planPath,
+      reason: "T01 is done in DB but unchecked in plan",
+    },
+    { basePath: base, state: makeState() },
+  );
+  const repairedContent = readFileSync(planPath, "utf-8");
+  assert.match(repairedContent, /\[x\][^\n]*\*\*T01\*\*/, "T01 checkbox should be checked after switching back to the project DB");
+  assert.equal(getSliceTasks("M001", "S01").length, 1, "repair should leave the project DB active");
+});
 test("ADR-017 (#5702): stale-render detector reason strings match repair contract", (t) => {
   t.skip("TODO(flat-phase): stale-render detection temporarily disabled during layout transition"); return;
   const base = mkdtempSync(join(tmpdir(), "gsd-adr017-render-reasons-"));

package/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import {
   resetToolCallLoopGuard,
   disableToolCallLoopGuard,
   getToolCallLoopCount,
+  getToolCallCountForTool,
 } from '../bootstrap/tool-call-loop-guard.ts';
@@ -177,3 +178,70 @@ console.log('\n── Loop guard: nested key order is normalized ──');
 }
 // ═══════════════════════════════════════════════════════════════════════════
+// Per-tool-name cap (#783 Brief C) — catches improvisation loops with varied args
+// ═══════════════════════════════════════════════════════════════════════════
+console.log('\n── Loop guard: per-tool cap blocks varied-args improvisation (#783) ──');
+{
+  resetToolCallLoopGuard();
+  // A one-shot workflow tool called with DIFFERENT args each time (the reported
+  // improvisation pattern). The identical-signature streak alone would reset
+  // every call; the per-tool cap must catch it.
+  for (let i = 1; i <= 6; i++) {
+    const result = checkToolCallLoop('gsd_complete_milestone', { milestone: `M${i}` });
+    assert.ok(result.block === false, `one-shot call ${i} (varied args) should be allowed`);
+    assert.deepStrictEqual(getToolCallCountForTool('gsd_complete_milestone'), i, `per-tool count should be ${i}`);
+  }
+  // 7th call (cap 6 + 1) must be blocked by the per-tool guard.
+  const blocked = checkToolCallLoop('gsd_complete_milestone', { milestone: 'M7' });
+  assert.ok(blocked.block === true, '7th one-shot call (varied args) should be blocked by per-tool cap');
+  assert.ok(blocked.reason!.includes('repeated tool'), 'reason should identify the per-tool guard');
+  assert.ok(blocked.reason!.includes('gsd_complete_milestone'), 'reason should name the tool');
+  assert.ok(blocked.reason!.includes('7'), 'reason should mention the count');
+}
+// ═══════════════════════════════════════════════════════════════════════════
+// Repeatable tools get the higher cap
+// ═══════════════════════════════════════════════════════════════════════════
+console.log('\n── Loop guard: repeatable tools get the higher cap (#783) ──');
+{
+  resetToolCallLoopGuard();
+  // bash is repeatable: varied commands are legitimate up to the higher cap.
+  for (let i = 1; i <= 15; i++) {
+    const result = checkToolCallLoop('bash', { command: `echo ${i}` });
+    assert.ok(result.block === false, `bash call ${i} (varied args) should be allowed`);
+  }
+  // 16th call (cap 15 + 1) is blocked by the per-tool guard — this is the
+  // improvisation-through-bash case from the forensics (~51 calls).
+  const blocked = checkToolCallLoop('bash', { command: 'echo 16' });
+  assert.ok(blocked.block === true, '16th bash call (varied args) should be blocked by per-tool cap');
+  assert.ok(blocked.reason!.includes('cap 15'), 'reason should mention the repeatable cap');
+}
+// ═══════════════════════════════════════════════════════════════════════════
+// Per-tool counts are independent per tool and reset together
+// ═══════════════════════════════════════════════════════════════════════════
+console.log('\n── Loop guard: per-tool counts are independent and reset together (#783) ──');
+{
+  resetToolCallLoopGuard();
+  // Two different tools tracked separately.
+  for (let i = 0; i < 3; i++) checkToolCallLoop('read', { path: `f${i}` });
+  for (let i = 0; i < 3; i++) checkToolCallLoop('write', { path: `g${i}` });
+  assert.deepStrictEqual(getToolCallCountForTool('read'), 3, 'read tracked separately');
+  assert.deepStrictEqual(getToolCallCountForTool('write'), 3, 'write tracked separately');
+  assert.deepStrictEqual(getToolCallCountForTool('edit'), 0, 'uncalled tool reports 0');
+  resetToolCallLoopGuard();
+  assert.deepStrictEqual(getToolCallCountForTool('read'), 0, 'per-tool counts cleared on reset');
+  assert.deepStrictEqual(getToolCallCountForTool('write'), 0, 'per-tool counts cleared on reset');
+}
+// ═══════════════════════════════════════════════════════════════════════════

package/src/resources/extensions/gsd/tests/tool-param-optionality.test.ts CHANGED Viewed

@@ -13,6 +13,7 @@
 import { test } from "node:test";
 import assert from "node:assert/strict";
+import { SUMMARY_SAVE_CONTENT_MAX_LENGTH } from "@opengsd/contracts";
 import { registerDbTools } from "../bootstrap/db-tools.ts";
 import AjvModule from "ajv";
@@ -86,6 +87,31 @@ test("gsd_summary_save — validates UAT assessment params", () => {
   assert.strictEqual(valid, true, `UAT assessment params should validate but got errors: ${JSON.stringify(validate.errors)}`);
 });
+test("gsd_summary_save — content has a provider-safe maxLength", () => {
+  const tool = getTool("gsd_summary_save");
+  assert.ok(tool, "gsd_summary_save must be registered");
+  const contentSchema = tool.parameters.properties.content;
+  assert.strictEqual(contentSchema.maxLength, SUMMARY_SAVE_CONTENT_MAX_LENGTH);
+  const validAtLimit = validateSchema(tool, {
+    milestone_id: "M001",
+    artifact_type: "CONTEXT-DRAFT",
+    content: "x".repeat(SUMMARY_SAVE_CONTENT_MAX_LENGTH),
+  });
+  assert.deepEqual(validAtLimit, []);
+  const overLimit = validateSchema(tool, {
+    milestone_id: "M001",
+    artifact_type: "CONTEXT-DRAFT",
+    content: "x".repeat(SUMMARY_SAVE_CONTENT_MAX_LENGTH + 1),
+  });
+  assert.ok(
+    overLimit.some((error) => error.includes(`must NOT have more than ${SUMMARY_SAVE_CONTENT_MAX_LENGTH} characters`)),
+    `expected maxLength validation error, got: ${overLimit.join("; ")}`,
+  );
+});
 // ─── gsd_slice_complete: enrichment arrays must be optional ──────────────────
 test("gsd_slice_complete — enrichment arrays are optional", () => {