npm - gsd-pi - Versions diffs - 2.59.0-dev.023bd39 → 2.59.0-dev.d77b3dd - Mend

gsd-pi 2.59.0-dev.023bd39 → 2.59.0-dev.d77b3dd

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/src/resources/extensions/gsd/rethink.ts CHANGED Viewed

@@ -112,8 +112,11 @@ function buildRethinkData(
     if (dbAvailable && status !== "complete") {
       const slices = getMilestoneSlices(mid);
       if (slices.length > 0) {
-        const done = slices.filter(s => s.status === "complete").length;
-        sliceInfo = `${done}/${slices.length} complete`;
+        const done = slices.filter(s => s.status === "complete" || s.status === "done").length;
+        const skipped = slices.filter(s => s.status === "skipped").length;
+        sliceInfo = skipped > 0
+          ? `${done}/${slices.length} complete, ${skipped} skipped`
+          : `${done}/${slices.length} complete`;
       }
     }

package/src/resources/extensions/gsd/state.ts CHANGED Viewed

@@ -295,7 +295,7 @@ function extractContextTitle(content: string | null, fallback: string): string {
  * Helper: check if a DB status counts as "done" (handles K002 ambiguity).
  */
 function isStatusDone(status: string): boolean {
-  return status === 'complete' || status === 'done';
+  return status === 'complete' || status === 'done' || status === 'skipped';
 }
 /**

package/src/resources/extensions/gsd/status-guards.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 /**
  * Status predicates for GSD state-machine guards.
  *
- * The DB stores status as free-form strings. Two values indicate
- * "closed": "complete" (canonical) and "done" (legacy / alias).
+ * The DB stores status as free-form strings. Three values indicate
+ * "closed": "complete" (canonical), "done" (legacy / alias), and
+ * "skipped" (user-directed skip via rethink or backtrack).
  * Every inline `status === "complete" || status === "done"` should
  * use isClosedStatus() instead.
  */
 /** Returns true when a milestone, slice, or task status indicates closure. */
 export function isClosedStatus(status: string): boolean {
-  return status === "complete" || status === "done";
+  return status === "complete" || status === "done" || status === "skipped";
 }

package/src/resources/extensions/gsd/tests/context-masker.test.ts ADDED Viewed

@@ -0,0 +1,122 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { createObservationMask } from "../context-masker.js";
+// These helpers produce messages in the pi-ai LLM payload format
+// (post-convertToLlm, pre-provider), which is what before_provider_request sees.
+function userMsg(content: string) {
+  return { role: "user", content: [{ type: "text", text: content }] };
+}
+function assistantMsg(content: string) {
+  return { role: "assistant", content: [{ type: "text", text: content }] };
+}
+/** toolResult in pi-ai format: role "toolResult", content as TextContent[] */
+function toolResult(text: string) {
+  return { role: "toolResult", content: [{ type: "text", text }], toolCallId: "toolu_test", toolName: "Read", isError: false };
+}
+/** bashExecution after convertToLlm: becomes a user message with "Ran `cmd`" prefix */
+function bashResult(text: string) {
+  return { role: "user", content: [{ type: "text", text: `Ran \`echo test\`\n\`\`\`\n${text}\n\`\`\`` }] };
+}
+const MASK_TEXT = "[result masked — within summarized history]";
+test("masks nothing when message count is within keepRecentTurns", () => {
+  const mask = createObservationMask(8);
+  const messages = [
+    userMsg("hello"),
+    assistantMsg("hi"),
+    toolResult("file contents"),
+  ];
+  const result = mask(messages as any);
+  assert.equal(result.length, 3);
+  assert.deepEqual((result[2].content as any)[0].text, "file contents");
+});
+test("masks tool results older than keepRecentTurns", () => {
+  const mask = createObservationMask(2);
+  const messages = [
+    userMsg("turn 1"),
+    toolResult("old tool output"),
+    assistantMsg("response 1"),
+    userMsg("turn 2"),
+    toolResult("newer tool output"),
+    assistantMsg("response 2"),
+    userMsg("turn 3"),
+    toolResult("newest tool output"),
+    assistantMsg("response 3"),
+  ];
+  const result = mask(messages as any);
+  // Old tool result (before boundary) should be masked
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+  // Recent tool results (within keep window) should be preserved
+  assert.equal((result[4].content as any)[0].text, "newer tool output");
+  assert.equal((result[7].content as any)[0].text, "newest tool output");
+});
+test("never masks assistant messages", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("turn 1"),
+    assistantMsg("old reasoning"),
+    userMsg("turn 2"),
+    assistantMsg("new reasoning"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[1].content as any)[0].text, "old reasoning");
+  assert.equal((result[3].content as any)[0].text, "new reasoning");
+});
+test("never masks user messages", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("old user message"),
+    assistantMsg("response"),
+    userMsg("new user message"),
+    assistantMsg("response"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[0].content as any)[0].text, "old user message");
+});
+test("masks bash result user messages", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("turn 1"),
+    bashResult("huge log output"),
+    assistantMsg("response 1"),
+    userMsg("turn 2"),
+    assistantMsg("response 2"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+});
+test("returns same array length", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("a"), toolResult("b"), assistantMsg("c"),
+    userMsg("d"), toolResult("e"), assistantMsg("f"),
+  ];
+  const result = mask(messages as any);
+  assert.equal(result.length, messages.length);
+});
+test("masks toolResult by role, not by type field", () => {
+  const mask = createObservationMask(1);
+  const messages = [
+    userMsg("turn 1"),
+    // This is the actual pi-ai format: role "toolResult", no type field
+    { role: "toolResult", content: [{ type: "text", text: "old result" }], toolCallId: "t1", toolName: "Read", isError: false },
+    assistantMsg("response 1"),
+    userMsg("turn 2"),
+    assistantMsg("response 2"),
+  ];
+  const result = mask(messages as any);
+  assert.equal((result[1].content as any)[0].text, MASK_TEXT);
+});

package/src/resources/extensions/gsd/tests/model-router.test.ts CHANGED Viewed

@@ -5,8 +5,11 @@ import {
   resolveModelForComplexity,
   escalateTier,
   defaultRoutingConfig,
+  scoreModel,
+  computeTaskRequirements,
+  MODEL_CAPABILITY_PROFILES,
 } from "../model-router.js";
-import type { DynamicRoutingConfig, RoutingDecision } from "../model-router.js";
+import type { DynamicRoutingConfig, RoutingDecision, ModelCapabilities } from "../model-router.js";
 import type { ClassificationResult } from "../complexity-classifier.js";
 // ─── Helpers ─────────────────────────────────────────────────────────────────
@@ -206,6 +209,89 @@ test("#2192: known model is still downgraded normally", () => {
   assert.notEqual(result.modelId, "claude-opus-4-6");
 });
+// ─── Capability Scoring (ADR-004 Phase 2) ───────────────────────────────────
+test("defaultRoutingConfig includes capability_routing: false", () => {
+  const config = defaultRoutingConfig();
+  assert.equal(config.capability_routing, false);
+});
+test("scoreModel computes weighted average of capability × requirement", () => {
+  const caps: ModelCapabilities = {
+    coding: 90, debugging: 80, research: 70,
+    reasoning: 85, speed: 50, longContext: 60, instruction: 75,
+  };
+  const reqs = { coding: 0.9, reasoning: 0.5 };
+  const score = scoreModel(caps, reqs);
+  // Expected: (0.9*90 + 0.5*85) / (0.9 + 0.5) = (81 + 42.5) / 1.4 = 88.21...
+  assert.ok(Math.abs(score - 88.21) < 0.1, `score ${score} should be ~88.21`);
+});
+test("scoreModel returns 50 for empty requirements", () => {
+  const caps: ModelCapabilities = {
+    coding: 90, debugging: 80, research: 70,
+    reasoning: 85, speed: 50, longContext: 60, instruction: 75,
+  };
+  const score = scoreModel(caps, {});
+  assert.equal(score, 50);
+});
+test("computeTaskRequirements returns base vector for known unit type", () => {
+  const reqs = computeTaskRequirements("execute-task");
+  assert.ok(reqs.coding !== undefined && reqs.coding > 0);
+});
+test("computeTaskRequirements boosts instruction for docs-tagged tasks", () => {
+  const reqs = computeTaskRequirements("execute-task", { tags: ["docs"] });
+  assert.ok((reqs.instruction ?? 0) >= 0.8);
+  assert.ok((reqs.coding ?? 1) <= 0.4);
+});
+test("computeTaskRequirements returns generic vector for unknown unit type", () => {
+  const reqs = computeTaskRequirements("unknown-unit");
+  assert.ok(reqs.reasoning !== undefined);
+});
+test("resolveModelForComplexity uses capability scoring when enabled", () => {
+  const config: DynamicRoutingConfig = {
+    ...defaultRoutingConfig(),
+    enabled: true,
+    capability_routing: true,
+  };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "claude-opus-4-6", fallbacks: [] },
+    config,
+    ["claude-opus-4-6", "claude-haiku-4-5", "gpt-4o-mini"],
+    "execute-task",
+  );
+  assert.equal(result.wasDowngraded, true);
+  assert.equal(result.selectionMethod, "capability-scored");
+});
+test("resolveModelForComplexity falls back to tier-only when capability_routing is false", () => {
+  const config: DynamicRoutingConfig = {
+    ...defaultRoutingConfig(),
+    enabled: true,
+    capability_routing: false,
+  };
+  const result = resolveModelForComplexity(
+    makeClassification("light"),
+    { primary: "claude-opus-4-6", fallbacks: [] },
+    config,
+    ["claude-opus-4-6", "claude-haiku-4-5", "gpt-4o-mini"],
+  );
+  assert.equal(result.wasDowngraded, true);
+  assert.ok(!result.selectionMethod || result.selectionMethod === "tier-only");
+});
+test("MODEL_CAPABILITY_PROFILES has entries for core models", () => {
+  const profiledModels = Object.keys(MODEL_CAPABILITY_PROFILES);
+  assert.ok(profiledModels.length >= 9, `Expected ≥9 profiles, got ${profiledModels.length}`);
+  assert.ok(MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]);
+  assert.ok(MODEL_CAPABILITY_PROFILES["claude-haiku-4-5"]);
+});
 // ─── #2885: openai-codex and modern OpenAI models in tier map ────────────────
 test("#2885: openai-codex light-tier models are recognized", () => {

package/src/resources/extensions/gsd/tests/phase-anchor.test.ts ADDED Viewed

@@ -0,0 +1,83 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { writePhaseAnchor, readPhaseAnchor, formatAnchorForPrompt } from "../phase-anchor.js";
+import type { PhaseAnchor } from "../phase-anchor.js";
+function makeTempBase(): string {
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-anchor-test-"));
+  mkdirSync(join(tmp, ".gsd", "milestones", "M001", "anchors"), { recursive: true });
+  return tmp;
+}
+test("writePhaseAnchor creates anchor file in correct location", () => {
+  const base = makeTempBase();
+  try {
+    const anchor: PhaseAnchor = {
+      phase: "discuss",
+      milestoneId: "M001",
+      generatedAt: new Date().toISOString(),
+      intent: "Define authentication requirements",
+      decisions: ["Use JWT tokens", "Session expiry 24h"],
+      blockers: [],
+      nextSteps: ["Plan the implementation slices"],
+    };
+    writePhaseAnchor(base, "M001", anchor);
+    assert.ok(existsSync(join(base, ".gsd", "milestones", "M001", "anchors", "discuss.json")));
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test("readPhaseAnchor returns written anchor", () => {
+  const base = makeTempBase();
+  try {
+    const anchor: PhaseAnchor = {
+      phase: "plan",
+      milestoneId: "M001",
+      generatedAt: new Date().toISOString(),
+      intent: "Break work into slices",
+      decisions: ["3 slices: auth, UI, tests"],
+      blockers: ["Need DB schema first"],
+      nextSteps: ["Execute S01"],
+    };
+    writePhaseAnchor(base, "M001", anchor);
+    const read = readPhaseAnchor(base, "M001", "plan");
+    assert.ok(read);
+    assert.equal(read!.intent, "Break work into slices");
+    assert.deepEqual(read!.decisions, ["3 slices: auth, UI, tests"]);
+    assert.deepEqual(read!.blockers, ["Need DB schema first"]);
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test("readPhaseAnchor returns null when no anchor exists", () => {
+  const base = makeTempBase();
+  try {
+    const read = readPhaseAnchor(base, "M001", "discuss");
+    assert.equal(read, null);
+  } finally {
+    rmSync(base, { recursive: true, force: true });
+  }
+});
+test("formatAnchorForPrompt produces markdown block", () => {
+  const anchor: PhaseAnchor = {
+    phase: "discuss",
+    milestoneId: "M001",
+    generatedAt: "2026-04-03T00:00:00.000Z",
+    intent: "Define requirements",
+    decisions: ["Use JWT"],
+    blockers: [],
+    nextSteps: ["Plan slices"],
+  };
+  const md = formatAnchorForPrompt(anchor);
+  assert.ok(md.includes("## Handoff from discuss"));
+  assert.ok(md.includes("Define requirements"));
+  assert.ok(md.includes("Use JWT"));
+  assert.ok(md.includes("Plan slices"));
+});

package/src/resources/extensions/gsd/tests/status-guards.test.ts CHANGED Viewed

@@ -13,6 +13,10 @@ test('isClosedStatus: "done" returns true', () => {
   assert.equal(isClosedStatus('done'), true);
 });
+test('isClosedStatus: "skipped" returns true', () => {
+  assert.equal(isClosedStatus('skipped'), true);
+});
 test('isClosedStatus: "pending" returns false', () => {
   assert.equal(isClosedStatus('pending'), false);
 });

package/src/resources/extensions/gsd/tests/stop-backtrack.test.ts ADDED Viewed

@@ -0,0 +1,216 @@
+/**
+ * Unit tests for stop/backtrack capture classifications and milestone regression (#3487).
+ *
+ * Tests:
+ * - "stop" and "backtrack" are valid classification types
+ * - loadStopCaptures returns unexecuted stop+backtrack captures
+ * - loadBacktrackCaptures returns only backtrack captures
+ * - revertExecutorResolvedCaptures reverts silenced captures
+ * - executeBacktrack writes trigger and regression markers
+ * - readBacktrackTrigger parses trigger file
+ */
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, readFileSync, writeFileSync, rmSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { isClosedStatus } from "../status-guards.ts";
+import {
+  appendCapture,
+  loadAllCaptures,
+  loadStopCaptures,
+  loadBacktrackCaptures,
+  markCaptureResolved,
+  revertExecutorResolvedCaptures,
+  hasPendingCaptures,
+} from "../captures.ts";
+import {
+  executeBacktrack,
+  readBacktrackTrigger,
+} from "../triage-resolution.ts";
+function makeTempDir(prefix: string): string {
+  const dir = join(
+    tmpdir(),
+    `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+function setupGsdDir(tmp: string): void {
+  mkdirSync(join(tmp, ".gsd"), { recursive: true });
+}
+// ─── Classification Types ─────────────────────────────────────────────────────
+test("stop is a valid classification", () => {
+  const tmp = makeTempDir("stop-class");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "stop running immediately");
+  markCaptureResolved(tmp, id, "stop", "Halt auto-mode", "User said stop", "M005");
+  const all = loadAllCaptures(tmp);
+  const cap = all.find(c => c.id === id);
+  assert.equal(cap?.classification, "stop");
+  rmSync(tmp, { recursive: true, force: true });
+});
+test("backtrack is a valid classification", () => {
+  const tmp = makeTempDir("bt-class");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "restart from M003");
+  markCaptureResolved(tmp, id, "backtrack", "Backtrack to M003", "User wants to restart", "M005");
+  const all = loadAllCaptures(tmp);
+  const cap = all.find(c => c.id === id);
+  assert.equal(cap?.classification, "backtrack");
+  rmSync(tmp, { recursive: true, force: true });
+});
+// ─── loadStopCaptures ─────────────────────────────────────────────────────────
+test("loadStopCaptures returns unexecuted stop and backtrack captures", () => {
+  const tmp = makeTempDir("load-stop");
+  setupGsdDir(tmp);
+  const stopId = appendCapture(tmp, "halt execution");
+  const btId = appendCapture(tmp, "go back to M003");
+  const noteId = appendCapture(tmp, "just a note");
+  markCaptureResolved(tmp, stopId, "stop", "Halt", "User stop", "M005");
+  markCaptureResolved(tmp, btId, "backtrack", "Backtrack to M003", "User backtrack", "M005");
+  markCaptureResolved(tmp, noteId, "note", "Info only", "Not actionable", "M005");
+  const stops = loadStopCaptures(tmp);
+  assert.equal(stops.length, 2);
+  assert.ok(stops.some(c => c.classification === "stop"));
+  assert.ok(stops.some(c => c.classification === "backtrack"));
+  rmSync(tmp, { recursive: true, force: true });
+});
+test("loadBacktrackCaptures returns only backtrack captures", () => {
+  const tmp = makeTempDir("load-bt");
+  setupGsdDir(tmp);
+  const stopId = appendCapture(tmp, "halt execution");
+  const btId = appendCapture(tmp, "go back to M003");
+  markCaptureResolved(tmp, stopId, "stop", "Halt", "User stop", "M005");
+  markCaptureResolved(tmp, btId, "backtrack", "Backtrack to M003", "User backtrack", "M005");
+  const bts = loadBacktrackCaptures(tmp);
+  assert.equal(bts.length, 1);
+  assert.equal(bts[0].classification, "backtrack");
+  rmSync(tmp, { recursive: true, force: true });
+});
+// ─── revertExecutorResolvedCaptures ───────────────────────────────────────────
+test("revertExecutorResolvedCaptures reverts captures resolved without classification", () => {
+  const tmp = makeTempDir("revert-exec");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "stop everything");
+  // Simulate an executor writing Status: resolved directly (no classification)
+  const capPath = join(tmp, ".gsd", "CAPTURES.md");
+  let content = readFileSync(capPath, "utf-8");
+  content = content.replace("**Status:** pending", "**Status:** resolved");
+  writeFileSync(capPath, content, "utf-8");
+  // Verify it's now "resolved" without classification
+  assert.equal(hasPendingCaptures(tmp), false);
+  // Revert should detect and fix it
+  const reverted = revertExecutorResolvedCaptures(tmp);
+  assert.equal(reverted, 1);
+  // Should be pending again
+  assert.equal(hasPendingCaptures(tmp), true);
+  rmSync(tmp, { recursive: true, force: true });
+});
+test("revertExecutorResolvedCaptures does NOT revert properly triaged captures", () => {
+  const tmp = makeTempDir("revert-skip");
+  setupGsdDir(tmp);
+  const id = appendCapture(tmp, "restart from M003");
+  markCaptureResolved(tmp, id, "backtrack", "Backtrack to M003", "User wants restart", "M005");
+  // This capture was properly triaged — should NOT be reverted
+  const reverted = revertExecutorResolvedCaptures(tmp);
+  assert.equal(reverted, 0);
+  rmSync(tmp, { recursive: true, force: true });
+});
+// ─── executeBacktrack ─────────────────────────────────────────────────────────
+test("executeBacktrack writes trigger and regression markers", () => {
+  const tmp = makeTempDir("exec-bt");
+  setupGsdDir(tmp);
+  // Create target milestone directory
+  mkdirSync(join(tmp, ".gsd", "milestones", "M003"), { recursive: true });
+  const targetMid = executeBacktrack(tmp, "M005", {
+    id: "CAP-test123",
+    text: "restart from M003 — milestones after 2 failed",
+    timestamp: new Date().toISOString(),
+    status: "resolved",
+    classification: "backtrack",
+    resolution: "Backtrack to M003",
+    rationale: "User directive",
+  });
+  assert.equal(targetMid, "M003");
+  // Check trigger file exists
+  const triggerPath = join(tmp, ".gsd", "BACKTRACK-TRIGGER.md");
+  assert.ok(existsSync(triggerPath));
+  const triggerContent = readFileSync(triggerPath, "utf-8");
+  assert.ok(triggerContent.includes("M005"));
+  assert.ok(triggerContent.includes("M003"));
+  // Check regression marker exists on target milestone
+  const regressionPath = join(tmp, ".gsd", "milestones", "M003", "M003-REGRESSION.md");
+  assert.ok(existsSync(regressionPath));
+  const regressionContent = readFileSync(regressionPath, "utf-8");
+  assert.ok(regressionContent.includes("M005"));
+  rmSync(tmp, { recursive: true, force: true });
+});
+// ─── readBacktrackTrigger ─────────────────────────────────────────────────────
+test("readBacktrackTrigger parses trigger file", () => {
+  const tmp = makeTempDir("read-bt");
+  setupGsdDir(tmp);
+  mkdirSync(join(tmp, ".gsd", "milestones", "M003"), { recursive: true });
+  executeBacktrack(tmp, "M005", {
+    id: "CAP-abc",
+    text: "go back to M003",
+    timestamp: new Date().toISOString(),
+    status: "resolved",
+    classification: "backtrack",
+    resolution: "Backtrack to M003",
+    rationale: "Regression",
+  });
+  const trigger = readBacktrackTrigger(tmp);
+  assert.ok(trigger);
+  assert.equal(trigger.target, "M003");
+  assert.equal(trigger.from, "M005");
+  rmSync(tmp, { recursive: true, force: true });
+});
+test("readBacktrackTrigger returns null when no trigger exists", () => {
+  const tmp = makeTempDir("no-bt");
+  setupGsdDir(tmp);
+  const trigger = readBacktrackTrigger(tmp);
+  assert.equal(trigger, null);
+  rmSync(tmp, { recursive: true, force: true });
+});
+// ─── Slice Skip Status (#3477) ──────────────────────────────────────────────
+test("isClosedStatus treats 'skipped' as closed", () => {
+  assert.equal(isClosedStatus("skipped"), true);
+  assert.equal(isClosedStatus("complete"), true);
+  assert.equal(isClosedStatus("done"), true);
+  assert.equal(isClosedStatus("pending"), false);
+  assert.equal(isClosedStatus("active"), false);
+});

package/src/resources/extensions/gsd/tests/tool-naming.test.ts CHANGED Viewed

@@ -45,7 +45,7 @@ console.log('\n── Tool naming: registration count ──');
 const pi = makeMockPi();
 registerDbTools(pi);
-assert.deepStrictEqual(pi.tools.length, 29, 'Should register exactly 29 tools (14 canonical + 14 aliases + 1 gate tool)');
+assert.deepStrictEqual(pi.tools.length, 30, 'Should register exactly 30 tools (14 canonical + 14 aliases + 1 gate tool + 1 gsd_skip_slice)');
 // ─── Both names exist for each pair ──────────────────────────────────────────