npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.2034b16 → 1.1.1-dev.595401e - Mend

@opengsd/gsd-pi 1.1.1-dev.2034b16 → 1.1.1-dev.595401e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

package/src/resources/extensions/gsd/rule-types.ts CHANGED Viewed

@@ -32,6 +32,8 @@ export interface RuleLifecycle {
   retry_on?: string;
   /** Max times this hook can fire for the same trigger unit. */
   max_cycles?: number;
+  /** Whether this hook is advisory or blocking. */
+  criticality?: PostUnitHookConfig["criticality"];
   /** Idempotency key pattern for this hook. */
   idempotency_key?: string;
 }

package/src/resources/extensions/gsd/tests/browser-evidence.test.ts ADDED Viewed

@@ -0,0 +1,142 @@
+// Project/App: gsd-pi
+// File Purpose: Unit tests for hasBrowserRequiredText heading-depth section guard.
+import { describe, test } from 'node:test';
+import assert from 'node:assert/strict';
+import { hasBrowserRequiredText } from '../browser-evidence.ts';
+describe('hasBrowserRequiredText', () => {
+  test('detects browser requirement in a plain test-cases section', () => {
+    const text = [
+      '## Test Cases',
+      '',
+      '1. Open index.html in a browser and navigate to /dashboard.',
+      '',
+    ].join('\n');
+    assert.ok(hasBrowserRequiredText(text), 'plain browser step should be detected');
+  });
+  test('ignores browser mention under a top-level non-requirement heading', () => {
+    const text = [
+      '## Not Proven',
+      '',
+      '- Keyboard usability through a real browser.',
+      '- Browser console cleanliness.',
+      '',
+    ].join('\n');
+    assert.ok(!hasBrowserRequiredText(text), 'browser mention under "Not Proven" should be ignored');
+  });
+  test('sub-heading inside a non-requirement section does not re-enable detection', () => {
+    // BUG (pre-fix): ### sub-heading under ## Not Proven resets inNonRequirementSection
+    // to false, causing subsequent lines to be detected as browser requirements.
+    const text = [
+      '## Not Proven By This UAT',
+      '',
+      '- No live browser session was used.',
+      '',
+      '### Visual Checks',
+      '',
+      '- Browser visual polish deferred to next slice.',
+      '- Keyboard interaction in a real browser is not proven here.',
+      '',
+    ].join('\n');
+    assert.ok(
+      !hasBrowserRequiredText(text),
+      'sub-heading under a non-requirement section must not re-enable browser detection',
+    );
+  });
+  test('requirement-level heading after non-requirement section re-enables detection', () => {
+    const text = [
+      '## Not Proven',
+      '',
+      '- Browser polish deferred.',
+      '',
+      '## Test Cases',
+      '',
+      '1. Launch browser and open localhost.',
+      '',
+    ].join('\n');
+    assert.ok(
+      hasBrowserRequiredText(text),
+      'browser step under "Test Cases" (same depth as "Not Proven") must still be detected',
+    );
+  });
+  test('deferred sub-heading inside a requirement section scopes exclusion to its own block', () => {
+    const text = [
+      '## Test Cases',
+      '',
+      '1. Open browser at localhost.',
+      '',
+      '### Deferred: keyboard check',
+      '',
+      '- Keyboard UAT deferred to next slice.',
+      '',
+      '### Step 2: Verify DOM',
+      '',
+      '1. Navigate to /dashboard in the browser.',
+      '',
+    ].join('\n');
+    assert.ok(
+      hasBrowserRequiredText(text),
+      'browser step under "Step 2" sub-heading must be detected after a sibling "Deferred" sub-heading',
+    );
+  });
+  test('deferred sub-heading at same depth as test cases does not escape to parent', () => {
+    const text = [
+      '## Test Cases',
+      '',
+      '### Deferred: responsive layout',
+      '',
+      '- Responsive layout check is deferred to S02.',
+      '',
+    ].join('\n');
+    assert.ok(
+      !hasBrowserRequiredText(text),
+      'content under a "Deferred" sub-heading should be excluded from detection',
+    );
+  });
+  test('detects browser requirement written only in a heading', () => {
+    // Regression: the line-by-line scan previously skip-continued past headings,
+    // missing browser obligations expressed only in heading text.
+    const text = '## Open browser session at localhost\n';
+    assert.ok(hasBrowserRequiredText(text), 'browser requirement in heading text must be detected');
+  });
+  test('heading that opens a non-requirement section is not itself detected as a requirement', () => {
+    const text = '## Not Proven\n\n- Some note.\n';
+    assert.ok(
+      !hasBrowserRequiredText(text),
+      'a non-requirement section heading should not trigger browser detection',
+    );
+  });
+  test('returns false for empty text', () => {
+    assert.ok(!hasBrowserRequiredText(''), 'empty string returns false');
+  });
+  test('notes-for-tester heading with sub-headings stays non-requirement', () => {
+    const text = [
+      '## Notes for Tester',
+      '',
+      '### Browser Setup',
+      '',
+      '- Run this spec without a browser; a DOM harness is sufficient.',
+      '- Browser-based visual checks are deferred.',
+      '',
+      '### Follow-up Items',
+      '',
+      '- Track browser session evidence in S02.',
+      '',
+    ].join('\n');
+    assert.ok(
+      !hasBrowserRequiredText(text),
+      'sub-headings under "Notes for Tester" should not re-enable browser detection',
+    );
+  });
+});

package/src/resources/extensions/gsd/tests/complete-milestone-excerpt.test.ts CHANGED Viewed

@@ -12,6 +12,7 @@ import { tmpdir } from "node:os";
 import { buildSliceSummaryExcerpt, buildCompleteMilestonePrompt, buildValidateMilestonePrompt } from "../auto-prompts.ts";
 import { invalidateAllCaches } from "../cache.ts";
+import { closeDatabase, insertMilestone, openDatabase } from "../gsd-db.ts";
 // ─── Fixture helpers ──────────────────────────────────────────────────────
@@ -364,3 +365,32 @@ test("validate-milestone prompt uses slice excerpts and on-demand paths instead
     "validate prompt must not inline full assessment traces",
   );
 });
+test("validate-milestone prompt inlines planned verification classes as canonical rows", async (t) => {
+  const base = createBase();
+  t.after(() => {
+    try { closeDatabase(); } catch { /* ignore */ }
+    cleanup(base);
+  });
+  invalidateAllCaches();
+  openDatabase(join(base, ".gsd", "gsd.db"));
+  insertMilestone({
+    id: "M001",
+    planning: {
+      verificationContract: "Local command exits 0.",
+      verificationOperational: "No long-running child process remains.",
+    },
+  });
+  writeRoadmap(base, makeRoadmap());
+  writeSummary(base, "S01", makeFatSummary("S01"));
+  writeSummary(base, "S02", makeFatSummary("S02"));
+  const prompt = await buildValidateMilestonePrompt("M001", "Test Milestone", base);
+  assert.match(prompt, /### Verification Classes \(from planning\)/);
+  assert.match(prompt, /Every row in this table must appear in `verificationClasses`/);
+  assert.match(prompt, /\| Class \| Planned Check \|/);
+  assert.match(prompt, /\| Contract \| Local command exits 0\. \|/);
+  assert.match(prompt, /\| Operational \| No long-running child process remains\. \|/);
+});

package/src/resources/extensions/gsd/tests/doctor-runtime-checks.test.ts CHANGED Viewed

@@ -45,3 +45,30 @@ test("doctor fix respects git.manage_gitignore false (#4161)", async (t) => {
   assert.equal(readFileSync(join(dir, ".gitignore"), "utf-8"), "node_modules/\n");
   assert.equal(existsSync(join(dir, ".gsd", "PREFERENCES.md")), true);
 });
+test("doctor fix resets run-uat counters at the dispatch cap", async (t) => {
+  const dir = createGitProject();
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
+  const runtimeDir = join(dir, ".gsd", "runtime");
+  mkdirSync(runtimeDir, { recursive: true });
+  const counterPath = join(runtimeDir, "uat-count-M002-S01.json");
+  writeFileSync(
+    counterPath,
+    JSON.stringify({ count: 3, updatedAt: "2026-06-02T19:40:23.289Z" }) + "\n",
+    "utf-8",
+  );
+  const detect = await runGSDDoctor(dir);
+  const issue = detect.issues.find((candidate) => candidate.code === "uat_retry_exhausted");
+  assert.ok(issue, "doctor reports the exhausted UAT retry counter at the dispatch cap");
+  assert.equal(issue.unitId, "M002/S01");
+  assert.match(issue.message, /3 attempt\(s\)/);
+  const fixed = await runGSDDoctor(dir, { fix: true, scope: "M002/S02" });
+  assert.ok(
+    fixed.fixesApplied.some((fix) => fix.includes("reset exhausted run-uat retry counter for M002/S01")),
+    "doctor --fix resets the blocked counter even when the current displayed scope has advanced",
+  );
+  assert.equal(existsSync(counterPath), false);
+});

package/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts CHANGED Viewed

@@ -119,8 +119,8 @@ test("resolveExpectedArtifactPath returns correct path for all slice-level types
 // ─── run-uat artifact path contract (#2873) ──────────────────────────────
 test("resolveExpectedArtifactPath for run-uat returns ASSESSMENT path, not UAT (#2873)", (t) => {
-  // The run-uat prompt instructs the agent to call gsd_summary_save with
-  // artifact_type: "ASSESSMENT", which writes S##-ASSESSMENT.md. The artifact
+  // The run-uat prompt instructs the agent to call gsd_uat_result_save, which
+  // writes S##-ASSESSMENT.md through the workflow persistence path. The artifact
   // verification path must match — otherwise verification fails and auto-mode
   // retries the unit in an infinite loop.
   const base = makeTmpBase();
@@ -147,12 +147,12 @@ test("diagnoseExpectedArtifact for run-uat references ASSESSMENT (#2873)", (t) =
 });
 test("verifyExpectedArtifact passes for run-uat when ASSESSMENT file exists (#2873)", (t) => {
-  // Regression test: run-uat writes S##-ASSESSMENT.md via gsd_summary_save,
+  // Regression test: run-uat writes S##-ASSESSMENT.md via gsd_uat_result_save,
   // but verification looked for S##-UAT.md, causing false stuck retries.
   const base = makeTmpBase();
   t.after(() => cleanup(base));
-  // Write the ASSESSMENT file (what gsd_summary_save actually produces)
+  // Write the ASSESSMENT file (what gsd_uat_result_save actually produces)
   const assessPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-ASSESSMENT.md");
   writeFileSync(assessPath, "---\nverdict: PASS\n---\n# UAT Assessment\n");

package/src/resources/extensions/gsd/tests/integration/run-uat.test.ts CHANGED Viewed

@@ -72,6 +72,38 @@ function makeBrowserObservableUatContent(mode = 'artifact-driven'): string {
   ].join('\n');
 }
+function makeDeferredBrowserUatContent(): string {
+  return [
+    '# UAT File',
+    '',
+    '## UAT Type',
+    '',
+    '- UAT mode: artifact-driven',
+    '- Why this mode is sufficient: Node interaction tests exercise the real app.js render/event/localStorage loop through a DOM harness. Live browser, keyboard, responsive, and visual-polish UAT remain intentionally deferred to S02.',
+    '',
+    '## Smoke Test',
+    '',
+    'Run `node --test tests/s01-static-interactions.test.js` and confirm all tests pass.',
+    '',
+    '## Test Cases',
+    '',
+    '1. Click the todo row edit control in the DOM harness.',
+    '2. Save changed text and reload/recreate the app from persisted localStorage.',
+    '3. Expected: the stored record shape remains unchanged.',
+    '',
+    '## Not Proven By This UAT',
+    '',
+    '- Final visual polish of edit controls.',
+    '- Keyboard usability through a real browser.',
+    '- Browser console and local network cleanliness.',
+    '',
+    '## Notes for Tester',
+    '',
+    'S02 should capture browser evidence for the full loop rather than changing this persisted model.',
+    '',
+  ].join('\n');
+}
 describe('run-uat', () => {
 test('(a) artifact-driven', () => {
   assert.deepStrictEqual(
@@ -232,8 +264,8 @@ test('(k) run-uat prompt template', () => {
     `prompt contains detected dynamic uatType value "${uatType}" after substitution`,
   );
   assert.ok(
-    promptResult?.includes(`uatType: ${uatType}`) ?? false,
-    `prompt contains dynamic uatType frontmatter value "${uatType}" after substitution`,
+    promptResult?.includes(`uatType: "${uatType}"`) ?? false,
+    `prompt contains dynamic uatType field "${uatType}" after substitution`,
   );
   assert.ok(
     !/\{\{[^}]+\}\}/.test(promptResult ?? ''),
@@ -249,7 +281,7 @@ test('(k) run-uat prompt template', () => {
   );
 });
-test('(k2) run-uat prompt references gsd_summary_save, not direct write', () => {
+test('(k2) run-uat prompt references gsd_uat_result_save, not direct write', () => {
   const promptResult = loadPromptFromWorktree('run-uat', {
     workingDirectory: '/tmp/test-project',
     milestoneId: 'M001',
@@ -261,17 +293,25 @@ test('(k2) run-uat prompt references gsd_summary_save, not direct write', () =>
   });
   assert.ok(
-    promptResult.includes('gsd_summary_save'),
-    'run-uat prompt should reference gsd_summary_save tool',
+    promptResult.includes('gsd_uat_result_save'),
+    'run-uat prompt should reference gsd_uat_result_save tool',
+  );
+  assert.ok(
+    promptResult.includes('presentedTools') && promptResult.includes('blockedTools'),
+    'run-uat prompt should specify the tool presentation contract',
   );
   assert.ok(
-    promptResult.includes('artifact_type: "ASSESSMENT"'),
-    'run-uat prompt should specify ASSESSMENT artifact type',
+    !promptResult.includes('Call `gsd_summary_save`'),
+    'run-uat prompt should not instruct direct summary-save UAT persistence',
   );
   assert.ok(
     !promptResult.includes('MUST write'),
     'run-uat prompt should not instruct direct file write in footer',
   );
+  assert.ok(
+    !promptResult.includes('Call `gsd_summary_save` with `artifact_type: "ASSESSMENT"`'),
+    'run-uat prompt should not instruct the legacy summary-save UAT path',
+  );
 });
 test('(l) dispatch preconditions via resolveSliceFile', () => {
@@ -482,8 +522,8 @@ test('(n) stale replay guard', async () => {
 });
 test('(q) verdict in ASSESSMENT file skips UAT dispatch (file-based path)', async () => {
-    // Regression test for #2644: run-uat prompt writes the verdict to
-    // S{sid}-ASSESSMENT.md (via gsd_summary_save artifact_type:"ASSESSMENT"),
+    // Regression test for #2644: run-uat writes the verdict to
+    // S{sid}-ASSESSMENT.md through the structured UAT save path,
     // but checkNeedsRunUat only checked S{sid}-UAT.md — causing a stuck loop.
     const base = createFixtureBase();
     try {
@@ -679,11 +719,27 @@ test('(u) run-uat prompt promotes artifact-driven browser specs to browser-execu
       const prompt = await buildRunUatPrompt('M001', 'S01', uatRel, uatContent, base);
       assert.match(prompt, /\*\*Detected UAT mode:\*\*\s*`browser-executable`/);
-      assert.match(prompt, /uatType: browser-executable/);
+      assert.match(prompt, /uatType: "browser-executable"/);
       assert.match(prompt, /use gsd-browser tools/i);
     } finally {
       cleanup(base);
     }
 });
+test('(v) run-uat prompt keeps deferred browser work artifact-driven', async () => {
+    const base = createFixtureBase();
+    try {
+      const uatRel = '.gsd/milestones/M001/slices/S01/S01-UAT.md';
+      const uatContent = makeDeferredBrowserUatContent();
+      writeSliceFile(base, 'M001', 'S01', 'UAT', uatContent);
+      const prompt = await buildRunUatPrompt('M001', 'S01', uatRel, uatContent, base);
+      assert.match(prompt, /\*\*Detected UAT mode:\*\*\s*`artifact-driven`/);
+      assert.match(prompt, /uatType: "artifact-driven"/);
+      assert.doesNotMatch(prompt, /uatType: "browser-executable"/);
+    } finally {
+      cleanup(base);
+    }
+});
 });

package/src/resources/extensions/gsd/tests/post-unit-hooks.test.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
   resetHookState,
   isRetryPending,
   consumeRetryTrigger,
+  consumeGateBlock,
   resolveHookArtifactPath,
   runPreDispatchHooks,
   persistHookState,
@@ -20,6 +21,7 @@ import {
   formatHookStatus,
   triggerHookManually,
 } from "../post-unit-hooks.ts";
+import { invalidateAllCaches } from "../cache.ts";
 // ─── Fixture Helpers ───────────────────────────────────────────────────────
@@ -29,6 +31,11 @@ function createFixtureBase(): string {
   return base;
 }
+function writeHookPreferences(base: string, hookYaml: string): void {
+  writeFileSync(join(base, ".gsd", "PREFERENCES.md"), `---\npost_unit_hooks:\n${hookYaml}\n---\n`, "utf-8");
+  invalidateAllCaches();
+}
 // ═══════════════════════════════════════════════════════════════════════════
 // Phase 1: Post-Unit Hook Tests
 // ═══════════════════════════════════════════════════════════════════════════
@@ -104,6 +111,156 @@ test('consumeRetryTrigger clears state', () => {
   assert.ok(!isRetryPending(), "no retry initially");
 });
+test('Advisory hook keeps artifact idempotency without verdict frontmatter', () => {
+  resetHookState();
+  const base = createFixtureBase();
+  try {
+    writeHookPreferences(base, `  - name: docs-hint
+    after:
+      - execute-task
+    prompt: Review docs
+    artifact: DOCS-HINT.md
+`);
+    writeFileSync(resolveHookArtifactPath(base, "M001/S01/T01", "DOCS-HINT.md"), "plain advisory note", "utf-8");
+    const result = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
+    assert.deepStrictEqual(result, null, "existing advisory artifact remains idempotent");
+    assert.deepStrictEqual(consumeGateBlock(), null, "advisory hook does not create gate block");
+  } finally {
+    resetHookState();
+    invalidateAllCaches();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test('Blocking hook skips only after passing frontmatter verdict', () => {
+  resetHookState();
+  const base = createFixtureBase();
+  try {
+    writeHookPreferences(base, `  - name: security-review
+    after:
+      - execute-task
+    prompt: Review security
+    artifact: SECURITY-REVIEW.md
+    criticality: blocking
+`);
+    writeFileSync(
+      resolveHookArtifactPath(base, "M001/S01/T01", "SECURITY-REVIEW.md"),
+      "---\nverdict: pass\n---\n\nNo blocking findings.\n",
+      "utf-8",
+    );
+    const result = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
+    assert.deepStrictEqual(result, null, "passing gate artifact is idempotent");
+    assert.deepStrictEqual(consumeGateBlock(), null, "passing gate does not block");
+  } finally {
+    resetHookState();
+    invalidateAllCaches();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test('Blocking hook reruns invalid artifact once then blocks at cycle budget', () => {
+  resetHookState();
+  const base = createFixtureBase();
+  try {
+    writeHookPreferences(base, `  - name: security-review
+    after:
+      - execute-task
+    prompt: Review security
+    artifact: SECURITY-REVIEW.md
+    criticality: blocking
+`);
+    writeFileSync(resolveHookArtifactPath(base, "M001/S01/T01", "SECURITY-REVIEW.md"), "partial output", "utf-8");
+    const dispatch = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
+    assert.ok(dispatch, "invalid gate artifact dispatches the blocking hook");
+    assert.equal(dispatch.unitType, "hook/security-review");
+    const afterHook = checkPostUnitHooks("hook/security-review", "M001/S01/T01", base);
+    assert.deepStrictEqual(afterHook, null, "no further hook dispatch after max_cycles=1");
+    const block = consumeGateBlock();
+    assert.ok(block, "gate block is recorded");
+    assert.equal(block.hookName, "security-review");
+    assert.match(block.reason, /missing frontmatter verdict/);
+  } finally {
+    resetHookState();
+    invalidateAllCaches();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test('Blocking hook restored from disk does not trust artifact without clean hook completion', () => {
+  resetHookState();
+  const base = createFixtureBase();
+  try {
+    writeHookPreferences(base, `  - name: security-review
+    after:
+      - execute-task
+    prompt: Review security
+    artifact: SECURITY-REVIEW.md
+    criticality: blocking
+    max_cycles: 2
+`);
+    const firstDispatch = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
+    assert.ok(firstDispatch, "gate dispatches first cycle");
+    persistHookState(base);
+    writeFileSync(
+      resolveHookArtifactPath(base, "M001/S01/T01", "SECURITY-REVIEW.md"),
+      "---\noutcome:\n  verdict: pass\n---\n",
+      "utf-8",
+    );
+    resetHookState();
+    restoreHookState(base);
+    const resumed = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
+    assert.ok(resumed, "persisted active gate reruns when clean hook completion was not observed");
+    assert.equal(resumed.unitType, "hook/security-review");
+  } finally {
+    resetHookState();
+    invalidateAllCaches();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test('Blocking hook needs-rework verdict requests trigger unit retry', () => {
+  resetHookState();
+  const base = createFixtureBase();
+  try {
+    writeHookPreferences(base, `  - name: review-arbiter
+    after:
+      - execute-task
+    prompt: Review task
+    artifact: REVIEW-DEBATE.md
+    criticality: blocking
+    max_cycles: 2
+    on_block:
+      action: retry-unit
+`);
+    const dispatch = checkPostUnitHooks("execute-task", "M001/S01/T01", base);
+    assert.ok(dispatch, "gate dispatches");
+    writeFileSync(
+      resolveHookArtifactPath(base, "M001/S01/T01", "REVIEW-DEBATE.md"),
+      "---\nverdict: needs-rework\n---\n\nRework required.\n",
+      "utf-8",
+    );
+    const afterHook = checkPostUnitHooks("hook/review-arbiter", "M001/S01/T01", base);
+    assert.deepStrictEqual(afterHook, null, "needs-rework routes via retry signal");
+    assert.ok(isRetryPending(), "retry is pending");
+    assert.deepStrictEqual(consumeRetryTrigger(), {
+      unitType: "execute-task",
+      unitId: "M001/S01/T01",
+    });
+  } finally {
+    resetHookState();
+    invalidateAllCaches();
+    rmSync(base, { recursive: true, force: true });
+  }
+});
 // ─── Variable substitution in prompts ──────────────────────────────────────
 test('Variable substitution', () => {
   const base = "/project";