npm - gsd-pi - Versions diffs - 2.78.1-dev.e9d88a536 → 2.78.1-dev.eccf86e27 - Mend

gsd-pi 2.78.1-dev.e9d88a536 → 2.78.1-dev.eccf86e27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (212) hide show

package/src/resources/extensions/gsd/metrics.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+// GSD-2 + metrics.ts: token & cost tracking for auto-mode units
 /**
  * GSD Metrics — Token & Cost Tracking
  *
@@ -14,6 +15,7 @@
  */
 import { join } from "node:path";
+import { openSync, closeSync, unlinkSync, statSync, writeFileSync } from "node:fs";
 import type { ExtensionContext } from "@gsd/pi-coding-agent";
 import { gsdRoot } from "./paths.js";
 import { getAndClearSkills } from "./skill-telemetry.js";
@@ -21,6 +23,8 @@ import { loadJsonFile, loadJsonFileOrNull, saveJsonFile } from "./json-persisten
 import { parseUnitId } from "./unit-id.js";
 import { buildAuditEnvelope, emitUokAuditEvent } from "./uok/audit.js";
 import { isUnifiedAuditEnabled } from "./uok/audit-toggle.js";
+import type { MilestoneScope } from "./workspace.js";
+import { logWarning } from "./workflow-logger.js";
 // Re-export from shared — import directly from format-utils to avoid pulling
 // in the full barrel (mod.js → ui.js → @gsd/pi-tui) which breaks when loaded
@@ -108,11 +112,17 @@ export function classifyUnitPhase(unitType: string): MetricsPhase {
 let ledger: MetricsLedger | null = null;
 let basePath: string = "";
+// Per-workspace ledger map, keyed by workspace.identityKey.
+// Populated by initMetricsByScope; independent of the module singleton.
+const scopedLedgers = new Map<string, MetricsLedger>();
 // ─── Public API ───────────────────────────────────────────────────────────────
 /**
  * Initialize the metrics system for a given project.
  * Loads existing ledger from disk if present.
+ *
+ * @deprecated TODO(C-future): remove module singleton. Use initMetricsByScope instead.
  */
 export function initMetrics(base: string): void {
   basePath = base;
@@ -121,6 +131,8 @@ export function initMetrics(base: string): void {
 /**
  * Reset in-memory state. Called when auto-mode stops.
+ *
+ * @deprecated TODO(C-future): remove module singleton. Use resetMetricsByScope instead.
  */
 export function resetMetrics(): void {
   ledger = null;
@@ -130,6 +142,8 @@ export function resetMetrics(): void {
 /**
  * Snapshot usage metrics from the current session before it's wiped.
  * Scans session entries for AssistantMessage usage data.
+ *
+ * @deprecated TODO(C-future): remove module singleton. Use snapshotUnitMetricsByScope instead.
  */
 export function snapshotUnitMetrics(
   ctx: ExtensionContext,
@@ -272,6 +286,182 @@ export function getLedger(): MetricsLedger | null {
   return ledger;
 }
+// ─── Scope-aware API (canonical) ─────────────────────────────────────────────
+/**
+ * Initialize the metrics system for a given workspace scope.
+ * Loads existing ledger from disk into the per-scope ledger map.
+ * Does NOT touch the module-level singleton.
+ */
+export function initMetricsByScope(scope: MilestoneScope): void {
+  const base = scope.workspace.projectRoot;
+  const loaded = loadLedger(base);
+  scopedLedgers.set(scope.workspace.identityKey, loaded);
+}
+/**
+ * Get the in-memory ledger for the given scope, or null if not initialized.
+ */
+export function getLedgerByScope(scope: MilestoneScope): MetricsLedger | null {
+  return scopedLedgers.get(scope.workspace.identityKey) ?? null;
+}
+/**
+ * Reset scoped in-memory state for a workspace. Called when auto-mode stops.
+ */
+export function resetMetricsByScope(scope: MilestoneScope): void {
+  scopedLedgers.delete(scope.workspace.identityKey);
+}
+/**
+ * Snapshot usage metrics using an explicit workspace scope.
+ *
+ * This is the canonical variant. It derives the metrics path from
+ * scope.workspace.projectRoot rather than the module singleton, so it
+ * remains correct across session resume and in multi-workspace processes.
+ *
+ * Preserves the atomic write-merge logic from saveLedger so concurrent
+ * workers cannot silently discard each other's entries.
+ *
+ * If initMetricsByScope has not been called, the ledger is loaded from
+ * disk on first call (lazy init).
+ */
+export function snapshotUnitMetricsByScope(
+  scope: MilestoneScope,
+  ctx: ExtensionContext,
+  unitType: string,
+  unitId: string,
+  startedAt: number,
+  model: string,
+  opts?: {
+    tier?: string;
+    modelDowngraded?: boolean;
+    contextWindowTokens?: number;
+    truncationSections?: number;
+    continueHereFired?: boolean;
+    promptCharCount?: number;
+    baselineCharCount?: number;
+    autoSessionKey?: string;
+    traceId?: string;
+    turnId?: string;
+    causedBy?: string;
+  },
+): UnitMetrics | null {
+  const base = scope.workspace.projectRoot;
+  const key = scope.workspace.identityKey;
+  // Lazy init: load from disk if not yet in scoped map.
+  if (!scopedLedgers.has(key)) {
+    scopedLedgers.set(key, loadLedger(base));
+  }
+  const scopedLedger = scopedLedgers.get(key)!;
+  const entries = ctx.sessionManager.getEntries();
+  if (!entries || entries.length === 0) return null;
+  const tokens: TokenCounts = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 };
+  let cost = 0;
+  let toolCalls = 0;
+  let assistantMessages = 0;
+  let userMessages = 0;
+  for (const entry of entries) {
+    if (entry.type !== "message") continue;
+    const msg = (entry as any).message;
+    if (!msg) continue;
+    if (msg.role === "assistant") {
+      assistantMessages++;
+      if (msg.usage) {
+        tokens.input += msg.usage.input ?? 0;
+        tokens.output += msg.usage.output ?? 0;
+        tokens.cacheRead += msg.usage.cacheRead ?? 0;
+        tokens.cacheWrite += msg.usage.cacheWrite ?? 0;
+        tokens.total += msg.usage.totalTokens ?? 0;
+        if (msg.usage.cost != null) {
+          const c = msg.usage.cost;
+          cost += typeof c === "number" ? c : (c.total ?? 0);
+        }
+      }
+      if (msg.content && Array.isArray(msg.content)) {
+        for (const block of msg.content) {
+          if (block.type === "toolCall") toolCalls++;
+        }
+      }
+    } else if (msg.role === "user") {
+      userMessages++;
+    }
+  }
+  const unit: UnitMetrics = {
+    type: unitType,
+    id: unitId,
+    model,
+    startedAt,
+    finishedAt: Date.now(),
+    ...(opts?.autoSessionKey ? { autoSessionKey: opts.autoSessionKey } : {}),
+    tokens,
+    cost,
+    toolCalls,
+    assistantMessages,
+    userMessages,
+    apiRequests: assistantMessages,
+    ...(opts?.tier ? { tier: opts.tier } : {}),
+    ...(opts?.modelDowngraded !== undefined ? { modelDowngraded: opts.modelDowngraded } : {}),
+    ...(opts?.contextWindowTokens !== undefined ? { contextWindowTokens: opts.contextWindowTokens } : {}),
+    ...(opts?.truncationSections !== undefined ? { truncationSections: opts.truncationSections } : {}),
+    ...(opts?.continueHereFired !== undefined ? { continueHereFired: opts.continueHereFired } : {}),
+    ...(opts?.promptCharCount != null ? { promptCharCount: opts.promptCharCount } : {}),
+    ...(opts?.baselineCharCount != null ? { baselineCharCount: opts.baselineCharCount } : {}),
+  };
+  // Auto-capture skill telemetry (#599)
+  const skills = getAndClearSkills();
+  if (skills.length > 0) {
+    unit.skills = skills;
+  }
+  // Compute cache hit rate
+  if (tokens.cacheRead > 0 || tokens.input > 0) {
+    const totalInput = tokens.cacheRead + tokens.input;
+    unit.cacheHitRate = totalInput > 0 ? Math.round((tokens.cacheRead / totalInput) * 100) : 0;
+  }
+  // Idempotency guard: update in-place on duplicate, append otherwise.
+  const dupeIdx = scopedLedger.units.findIndex(
+    (u) => u.type === unit.type && u.id === unit.id && u.startedAt === unit.startedAt,
+  );
+  if (dupeIdx >= 0) {
+    scopedLedger.units[dupeIdx] = unit;
+  } else {
+    scopedLedger.units.push(unit);
+  }
+  saveLedger(base, scopedLedger);
+  if (isUnifiedAuditEnabled()) {
+    emitUokAuditEvent(
+      base,
+      buildAuditEnvelope({
+        traceId: opts?.traceId ?? `metrics:${unitType}:${unitId}`,
+        turnId: opts?.turnId,
+        causedBy: opts?.causedBy,
+        category: "metrics",
+        type: "unit-metrics-snapshot",
+        payload: {
+          unitType,
+          unitId,
+          model,
+          tokens: unit.tokens,
+          cost: unit.cost,
+          toolCalls: unit.toolCalls,
+        },
+      }),
+    );
+  }
+  return unit;
+}
 // ─── Aggregation helpers ──────────────────────────────────────────────────────
 export interface PhaseAggregate {
@@ -593,6 +783,12 @@ export function pruneMetricsLedger(base: string, keepCount: number): number {
   if (ledger) {
     ledger.units = ledger.units.slice(-keepCount);
   }
+  // Invalidate all scoped ledger cache entries. Prune is rare; clearing the
+  // entire map is simpler than tracking which entry belongs to `base`. Without
+  // this, scopedLedgers entries for the pruned workspace hold a pre-prune
+  // MetricsLedger that snapshotUnitMetricsByScope would merge back in, causing
+  // pruned units to reappear in subsequent snapshots.
+  scopedLedgers.clear();
   return removed;
 }
@@ -635,6 +831,130 @@ function deduplicateUnits(units: UnitMetrics[]): UnitMetrics[] {
   return Array.from(map.values());
 }
+// How long a lock file must be untouched (in ms) before it is considered
+// orphaned from a crashed process. Set to 2× the acquire timeout.
+export const STALE_LOCK_THRESHOLD_MS = 4000;
+// Retry interval between lock acquire attempts (ms). Caps syscall rate at
+// ~200 attempts over a 2s timeout instead of ~20,000 without any sleep.
+// Exposed for tests.
+export const LOCK_RETRY_INTERVAL_MS = 5;
+// Sync sleep via Atomics.wait — true OS-level sleep, no CPU spin.
+// Int32Array must reference a SharedArrayBuffer; we wait on index 0 which
+// will never be woken by a Atomics.notify, so the wait always times out.
+const _lockSleepBuf = new Int32Array(new SharedArrayBuffer(4));
+function syncSleep(ms: number): void {
+  Atomics.wait(_lockSleepBuf, 0, 0, ms);
+}
+// Counts the number of sleepy retries (non-stale-evicting) made by acquireLock
+// across all calls since the last reset. Exported for test instrumentation only.
+let _lockSleepyRetries = 0;
+export function getLockSleepyRetries(): number { return _lockSleepyRetries; }
+export function resetLockSleepyRetries(): void { _lockSleepyRetries = 0; }
+/**
+ * Acquire an exclusive .lock sentinel file via O_EXCL.
+ *
+ * Improvements over the original:
+ *  - No busy spin: the inner `while (Date.now() < waitUntil) {}` spin that
+ *    burned CPU doing nothing useful is removed. Each retry attempt now makes
+ *    one `openSync` syscall and immediately re-checks the deadline, which is
+ *    orders of magnitude cheaper than a tight spin loop.
+ *  - Stale-lock detection: if the existing lock file's mtime is older than
+ *    STALE_LOCK_THRESHOLD_MS, the lock is considered orphaned (e.g. the
+ *    writing process crashed) and is forcibly removed before retrying.
+ *    A warning is logged so operators can detect crash patterns.
+ *  - PID stamp: on success, writes the acquiring process's PID and a
+ *    timestamp into the lock file so external monitors can identify orphans.
+ *  - Retry sleep: after each non-stale-evicting retry, sleeps
+ *    LOCK_RETRY_INTERVAL_MS (5ms) via Atomics.wait so the process yields to
+ *    the OS. This caps syscall rate at ~200–400/s under contention instead of
+ *    the ~20,000/s that would result from a tight openSync loop.
+ *    After a stale-lock eviction (lock already removed), no sleep is injected
+ *    — we retry immediately to close the short race window.
+ *
+ * Returns true on success, false on timeout.
+ */
+function acquireLock(lockPath: string, timeoutMs = 2000): boolean {
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    try {
+      const fd = openSync(lockPath, "wx"); // O_WRONLY | O_CREAT | O_EXCL
+      closeSync(fd);
+      // Write PID stamp so external monitors can identify the lock owner.
+      try {
+        writeFileSync(lockPath, `${process.pid}\n${new Date().toISOString()}\n`, "utf-8");
+      } catch { /* non-fatal — stamp is diagnostic only */ }
+      return true;
+    } catch {
+      // Lock held by another process — check for staleness before retrying.
+      try {
+        const st = statSync(lockPath);
+        if (Date.now() - st.mtimeMs > STALE_LOCK_THRESHOLD_MS) {
+          logWarning(
+            "fs",
+            `stale metrics lock at ${lockPath} (age ${Date.now() - st.mtimeMs}ms); forcibly removing and retrying`,
+          );
+          try { unlinkSync(lockPath); } catch { /* already gone */ }
+          // Do NOT sleep after stale-lock eviction — retry the open
+          // immediately. The lock file was just removed; a short race window
+          // exists and sleeping here would unnecessarily delay recovery.
+          continue;
+        }
+      } catch { /* lock file disappeared between the failed open and stat — retry */ }
+      // Sleep between retries to yield to the OS and cap syscall rate.
+      // Uses Atomics.wait for a true blocking sleep (no CPU spin).
+      _lockSleepyRetries++;
+      syncSleep(LOCK_RETRY_INTERVAL_MS);
+    }
+  }
+  return false;
+}
+function releaseLock(lockPath: string): void {
+  try { unlinkSync(lockPath); } catch { /* ignore */ }
+}
+/**
+ * Save the ledger with cross-process merge semantics.
+ *
+ * Acquires a .lock sentinel file, reads the current on-disk ledger,
+ * merges worker units with existing peer units (worker's entry wins on
+ * type+id+startedAt conflict since it has the latest finishedAt),
+ * then writes atomically. This prevents parallel auto-mode workers from
+ * silently discarding each other's metrics entries.
+ *
+ * Falls back to a direct write (no merge) if the lock cannot be acquired
+ * within the timeout — better to potentially overwrite than to lose data
+ * entirely.
+ */
 function saveLedger(base: string, data: MetricsLedger): void {
-  saveJsonFile(metricsPath(base), data);
+  const path = metricsPath(base);
+  const lockPath = `${path}.lock`;
+  const acquired = acquireLock(lockPath);
+  if (acquired) {
+    try {
+      // Read current on-disk state and merge with worker's in-memory units.
+      // Worker units take precedence on conflict (by finishedAt in deduplicateUnits).
+      const onDisk = loadJsonFileOrNull(path, isMetricsLedger);
+      if (onDisk && onDisk.units.length > 0) {
+        const merged = deduplicateUnits([...onDisk.units, ...data.units]);
+        saveJsonFile(path, { ...data, units: merged });
+      } else {
+        saveJsonFile(path, data);
+      }
+    } finally {
+      releaseLock(lockPath);
+    }
+  } else {
+    // Lock could not be acquired within the timeout. Fall back to a direct
+    // write (no cross-process merge) to avoid losing this worker's data
+    // entirely. A concurrent writer may overwrite us, but that is preferable
+    // to a torn write caused by two writers simultaneously executing the
+    // read-merge-write sequence without mutual exclusion.
+    logWarning("fs", "saveLedger: lock not acquired — falling back to direct write (no merge)");
+    saveJsonFile(path, data);
+  }
 }

package/src/resources/extensions/gsd/paths.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+// GSD-2 — ID-based path resolution for GSD project files and directories
 /**
  * GSD Paths — ID-based path resolution
  *
@@ -128,9 +129,15 @@ function cachedReaddir(dirPath: string): string[] {
 }
 /**
- * Clear the directory listing cache.
+ * Clear the volatile directory listing caches.
  * Call after milestone transitions, file creation in planning directories,
  * or at the start/end of a dispatch cycle.
+ *
+ * NOTE: This does NOT clear gsdRootCache. The project root is stable for
+ * the lifetime of a process; clearing it on every agent turn-end caused a
+ * 250–2500 ms regression per session (git rev-parse + dir walk per turn).
+ * Use _clearGsdRootCache() at session-reset boundaries (workspace switch,
+ * process exit) when the project root may genuinely change.
  */
 export function clearPathCache(): void {
   dirEntryCache.clear();
@@ -285,6 +292,14 @@ const LEGACY_GSD_ROOT_FILES: Record<GSDRootFileKey, string> = {
 // ─── GSD Root Discovery ───────────────────────────────────────────────────────
+// Process-lifetime cache for gsdRoot() results.
+// Keys are realpath-normalized (via normCacheKey) so /foo and /foo/ share the
+// same entry and so do case-variant paths on case-insensitive volumes. This
+// normalization is the safety net that prevents cache poisoning from the
+// ~/.gsd walk-up bug (fixed in c46cf4786 + b35e070eb), making it safe to
+// hold this cache for the entire process lifetime.
+// Use _clearGsdRootCache() only at session-reset boundaries (workspace switch,
+// process exit) — NOT inside clearPathCache(), which runs on every agent turn.
 const gsdRootCache = new Map<string, string>();
 export interface GsdPathContract {
@@ -337,11 +352,37 @@ export function resolveGsdPathContract(
   };
 }
-/** Exported for tests only — do not call in production code. */
+/**
+ * Invalidate the gsdRoot cache.
+ * Use ONLY at session-reset boundaries: workspace switch, process exit, or
+ * any context where the project root itself may genuinely change.
+ * Do NOT call this on every agent turn — use clearPathCache() for volatile
+ * directory listing invalidation instead.
+ */
 export function _clearGsdRootCache(): void {
   gsdRootCache.clear();
 }
+/**
+ * Resolve a path to its canonical real path using the native resolver.
+ * On macOS case-insensitive (HFS+/APFS) volumes, realpathSync.native normalizes
+ * case — ensuring that /foo/Bar and /foo/bar resolve to the same string.
+ * Falls back to resolve(p) for non-existent paths.
+ *
+ * Use this helper everywhere a path is used as an identity/cache key so that
+ * all callers agree on the canonical form.
+ */
+export function normalizeRealPath(p: string): string {
+  try { return realpathSync.native(p); } catch { return resolve(p); }
+}
+/** Normalize a path for use as a gsdRootCache key (realpath + trailing-slash strip). */
+function normCacheKey(p: string): string {
+  const r = normalizeRealPath(p);
+  const s = r.replaceAll("\\", "/").replace(/\/+$/, "");
+  return process.platform === "win32" ? s.toLowerCase() : s;
+}
 /**
  * Resolve the `.gsd` directory for a given project base path.
  *
@@ -351,13 +392,19 @@ export function _clearGsdRootCache(): void {
  *   3. Walk up from basePath — handles moved .gsd in an ancestor (bounded by git root)
  *   4. basePath/.gsd         — creation fallback (init scenario)
  *
- * Result is cached per basePath for the process lifetime.
+ * Result is cached per normalized basePath for the process lifetime.
+ * Keys are realpath-normalized so /foo and /foo/ share the same cache entry.
  */
 export function gsdRoot(basePath: string): string {
-  const cached = gsdRootCache.get(basePath);
+  const cacheKey = normCacheKey(basePath);
+  const cached = gsdRootCache.get(cacheKey);
   if (cached) return cached;
-  const result = probeGsdRoot(basePath);
+  // Canonicalize result via realpath before asserting and caching so that
+  // callers always receive a canonical path regardless of whether probeGsdRoot
+  // returned a path through a symlink. Without this, the cached value can
+  // diverge from other realpath-normalized paths (e.g. workspace.identityKey).
+  const result = normalizeRealPath(probeGsdRoot(basePath));
   // Defense-in-depth: if basePath resolves to the user's home directory and
   // the result equals gsdHome(), refuse — project-scoped writes must never
@@ -365,7 +412,7 @@ export function gsdRoot(basePath: string): string {
   // valid (their basePath does not equal homedir).
   assertNotGlobalGsdHome(basePath, result);
-  gsdRootCache.set(basePath, result);
+  gsdRootCache.set(cacheKey, result);
   return result;
 }
@@ -466,9 +513,21 @@ function probeGsdRoot(rawBasePath: string): string {
     }
   } catch { /* git not available */ }
+  // Compute gsdHome once for the skip-check used in steps 2 and 3.
+  const normPath = (p: string): string => {
+    let r: string;
+    try { r = realpathSync.native(p); } catch { r = p; }
+    const s = r.replaceAll("\\", "/").replace(/\/+$/, "");
+    return process.platform === "win32" ? s.toLowerCase() : s;
+  };
+  let gsdHomeNorm: string;
+  try { gsdHomeNorm = normPath(gsdHome()); } catch { gsdHomeNorm = ""; }
   if (gitRoot) {
     const candidate = join(gitRoot, ".gsd");
-    if (existsSync(candidate)) return candidate;
+    // Skip if the candidate resolves to the global GSD home — a subdir basePath
+    // must not be anchored to ~/.gsd just because $HOME is a git repo.
+    if (existsSync(candidate) && normPath(candidate) !== gsdHomeNorm) return candidate;
   }
   // 3. Walk up from basePath to the git root (only if we are in a subdirectory)
@@ -476,7 +535,7 @@ function probeGsdRoot(rawBasePath: string): string {
     let cur = dirname(basePath);
     while (cur !== basePath) {
       const candidate = join(cur, ".gsd");
-      if (existsSync(candidate)) return candidate;
+      if (existsSync(candidate) && normPath(candidate) !== gsdHomeNorm) return candidate;
       if (cur === gitRoot) break;
       basePath = cur;
       cur = dirname(cur);

package/src/resources/extensions/gsd/prompts/complete-slice.md CHANGED Viewed

@@ -28,7 +28,7 @@ This unit runs under the `planning-dispatch` tools-policy: you may use the `suba
 - **Touched auth, network, parsing, file IO, shell exec, or crypto** → dispatch the **security** agent for an OWASP-style audit.
 - **Added or modified tests** → dispatch the **tester** agent to assess coverage gaps relative to the slice plan.
-Subagents read the diff and report findings — they do **not** write user source. You remain responsible for acting on their feedback before calling `gsd_complete_slice` with `milestoneId` and `sliceId`.
+Subagents read the diff and report findings — they do **not** write user source. You remain responsible for acting on their feedback before calling `gsd_slice_complete` with `milestoneId` and `sliceId`.
 Then:
 1. Use the **Slice Summary** and **UAT** output templates from the inlined context above
@@ -37,11 +37,11 @@ Then:
 4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections.
 5. Address every gate listed in the **Gates to Close** section above — each gate maps to a specific slice-summary section the handler inspects (for example, Q8 maps to **Operational Readiness**: health signal, failure signal, recovery procedure, and monitoring gaps). Leaving a section empty records the gate as `omitted`.
 6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `gsd_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.gsd/REQUIREMENTS.md` directly — the engine renders it from the database.
-7. Prepare the slice completion content you will pass to `gsd_complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts.
+7. Prepare the slice completion content you will pass to `gsd_slice_complete` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts.
 8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built. Fill the `UAT Type` and `Not Proven By This UAT` sections explicitly so the artifact states what class of acceptance it covers and what still remains unproven (e.g. live integration paths, performance under load, scenarios deferred to a later slice).
 9. Review task summaries for `key_decisions`. For each significant decision, call `capture_thought` with `category: "architecture"` (or `"pattern"`) and a `structuredFields` payload of `{ scope, decision, choice, rationale, made_by: "agent", revisable }`.
 10. Review task summaries for patterns, gotchas, or non-obvious lessons learned. For each one that would save future agents from repeating investigation, call `capture_thought` with the matching category (`gotcha`, `convention`, `pattern`, `environment`). The memory store is the single source of truth (ADR-013); do not append to `.gsd/DECISIONS.md` or `.gsd/KNOWLEDGE.md` directly.
-11. Call `gsd_complete_slice` with the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`, plus any optional enrichment fields you have. Do NOT manually mark the roadmap checkbox — the tool writes to the DB, renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}`, and updates the ROADMAP.md projection automatically.
+11. Call `gsd_slice_complete` with the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`, plus any optional enrichment fields you have. Do NOT manually mark the roadmap checkbox — the tool writes to the DB, renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}`, and updates the ROADMAP.md projection automatically.
 12. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
 13. Update `.gsd/PROJECT.md` if it exists — refresh current state if needed: use the `write` tool with `path: ".gsd/PROJECT.md"` and `content` containing the full updated document reflecting current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
@@ -49,6 +49,6 @@ Then:
 **File system safety:** Task summaries are preloaded in the inlined context above. Task artifacts use a **flat file layout** — files such as `T01-SUMMARY.md` and `T02-SUMMARY.md` live directly inside the `tasks/` directory, not inside per-task subdirectories like `tasks/T01/SUMMARY.md`. If you need to re-read any of them, use `find .gsd/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first. Never use `tasks/*/SUMMARY.md`, and never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories.
-**You MUST call `gsd_complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**
+**You MUST call `gsd_slice_complete` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**
 When done, say: "Slice {{sliceId}} complete."

package/src/resources/extensions/gsd/prompts/execute-task.md CHANGED Viewed

@@ -85,14 +85,14 @@ Then:
 17. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, call `capture_thought` with `category: "architecture"` (or `"pattern"`). For decisions, populate `structuredFields` with `{ scope, decision, choice, rationale, made_by: "agent", revisable }` so future projection back to a human-visible decisions register stays lossless. Not every task produces decisions — only capture when a meaningful choice was made.
 18. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, call `capture_thought` with `category: "gotcha"`, `"convention"`, `"pattern"`, or `"environment"` as appropriate. Only capture entries that would save future agents from repeating your investigation — don't capture obvious things. The memory store is the single source of truth for cross-session knowledge (ADR-013); do not append to `.gsd/DECISIONS.md` or `.gsd/KNOWLEDGE.md` directly.
 19. Read the template at `~/.gsd/agent/extensions/gsd/templates/task-summary.md`
-20. Use that template to prepare the completion content you will pass to `gsd_complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you.
-21. Call `gsd_complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically.
+20. Use that template to prepare the completion content you will pass to `gsd_task_complete` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you.
+21. Call `gsd_task_complete` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically.
 22. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
 All work stays in your working directory: `{{workingDirectory}}`.
 **Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in auto-mode — there is no human available to answer questions. Make reasonable assumptions and document them in the task summary. If a decision genuinely requires human input, note it in the summary and proceed with the best available option.
-**You MUST call `gsd_complete_task` before finishing. Do not manually write `{{taskSummaryPath}}`.**
+**You MUST call `gsd_task_complete` before finishing. Do not manually write `{{taskSummaryPath}}`.**
 When done, say: "Task {{taskId}} complete."

package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md CHANGED Viewed

@@ -12,6 +12,13 @@ Discuss milestone {{milestoneId}} ("{{milestoneTitle}}"). Identify gray areas, a
 {{fastPathInstruction}}
+### Read project shape
+Before your first question round, read `.gsd/PROJECT.md` and look for `## Project Shape` → `**Complexity:**`. The verdict is either **`simple`** or **`complex`** (default to `complex` if PROJECT.md is missing the section, predates this convention, or the value is unclear). The verdict scales the rest of this stage:
+- **`simple`** — favor 1–2 plain-text question rounds. Skip the parallel-research investigation. Skip `ask_user_questions` unless presenting concrete alternatives.
+- **`complex`** — full investigation, 3–4-option structured questions, multi-round.
 ### Before your first question round
 Do a lightweight targeted investigation so your questions are grounded in reality:
@@ -36,7 +43,7 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
 **Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing.
-**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**
+**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). In **`complex`** mode, each multi-choice question MUST present **3 or 4 concrete, researched options** plus a final **"Other — let me discuss"** option; options must be grounded in the investigation above (codebase signals, library docs, prior `.gsd/` artifacts), not generic placeholders. In **`simple`** mode, 2 options is fine when alternatives are genuinely binary. Binary depth-check / wrap-up gates are exempt from the 3-or-4 rule. When the user picks "Other — let me discuss" or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**
 **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.

package/src/resources/extensions/gsd/prompts/guided-discuss-project.md CHANGED Viewed

@@ -26,6 +26,18 @@ Ask the user a single freeform question in plain text, not structured: **"What d
 Wait for their response. This grounds every follow-up in their own terminology.
+### Classify project shape
+After the opening answer, classify the project as **`simple`** or **`complex`** before continuing. Print the verdict in chat as one line: `Project shape: simple` or `Project shape: complex` followed by a one-line rationale.
+**`simple`** — most of these apply: single primary user (the user themselves or one immediate team), no external integrations beyond well-known SDKs/libs, greenfield or self-contained, scope describable in 1–2 sentences without ambiguity, no compliance/regulatory needs, ≤5 distinct capabilities.
+**`complex`** — any of these apply: multi-user with roles/permissions, non-trivial brownfield codebase, external integrations with auth/data exchange, compliance/security/regulated domain (PII, payments, healthcare), >5 capabilities or unclear scope, cross-team/cross-org coordination, novel domain where assumptions need validation.
+**Default to `complex` when uncertain.** The user can override the verdict in plain text; if they do, accept it and proceed.
+The verdict drives the rest of this stage and gets persisted to PROJECT.md → `## Project Shape`. Downstream stages (`discuss-requirements`, `discuss-milestone`, `discuss-slice`) read it from there.
 ### Before deeper rounds
 Do a lightweight targeted investigation so your questions are grounded in reality:
@@ -50,9 +62,11 @@ Ask **1–3 questions per round**. Each round targets one of:
 **Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing.
-**Plain-text default:** Project discovery is open-ended. Ask question rounds in plain text unless you are presenting 2–3 concrete alternatives with clear tradeoffs.
+**Cadence is shape-dependent:**
+- **`simple`** — favor 1–2 plain-text rounds. Skip `ask_user_questions` unless you are presenting concrete alternatives. Get to the depth checklist fast.
+- **`complex`** — full investigation, multiple rounds, structured questions when meaningful alternatives exist.
-**If `{{structuredQuestionsAvailable}}` is `true` and you use `ask_user_questions`:** ask 1–3 questions per call. Every question object MUST include a stable lowercase `id`. Keep option labels short (3–5 words). Do not add a separate "Other" option; the question UI provides a freeform path automatically. Wait for each tool result before asking the next round.
+**If `{{structuredQuestionsAvailable}}` is `true` and you use `ask_user_questions`:** ask 1–3 questions per call. Every question object MUST include a stable lowercase `id`. Keep option labels short (3–5 words). In **`complex`** mode, each multi-choice question MUST present **3 or 4 concrete, researched options** plus a final **"Other — let me discuss"** option; options must be grounded in your investigation (codebase signals, library docs, prior `.gsd/` artifacts), not generic placeholders. In **`simple`** mode, 2 options is fine. Binary depth-check / wrap-up gates are exempt from the 3-or-4 rule. Wait for each tool result before asking the next round.
 **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions.
@@ -126,8 +140,9 @@ Once the user confirms depth:
 1. Use the **Project** output template (inlined above).
 2. Call `gsd_summary_save` with `artifact_type: "PROJECT"` and the full project markdown as `content`; omit `milestone_id`. The tool writes `.gsd/PROJECT.md` to disk and persists to DB. Preserve the user's exact terminology, emphasis, and framing.
-3. The `## Capability Contract` section MUST reference `.gsd/REQUIREMENTS.md` — that file does not yet exist; the next stage (`discuss-requirements`) will produce it.
-4. The `## Milestone Sequence` MUST list at least M001 with title and one-liner. Subsequent milestones may be listed as known intents; they will be elaborated in their own discuss-milestone stages.
-5. Do NOT use `artifact_type: "CONTEXT"` and do NOT pass `milestone_id: "PROJECT"`; that creates a fake milestone named PROJECT.
-6. {{commitInstruction}}
-7. Say exactly: `"Project context written."` — nothing else.
+3. The `## Project Shape` section MUST contain `**Complexity:** simple` or `**Complexity:** complex` (matching the verdict you announced) plus a one-line `**Why:**` rationale. Downstream stages read this line.
+4. The `## Capability Contract` section MUST reference `.gsd/REQUIREMENTS.md` — that file does not yet exist; the next stage (`discuss-requirements`) will produce it.
+5. The `## Milestone Sequence` MUST list at least M001 with title and one-liner. Subsequent milestones may be listed as known intents; they will be elaborated in their own discuss-milestone stages.
+6. Do NOT use `artifact_type: "CONTEXT"` and do NOT pass `milestone_id: "PROJECT"`; that creates a fake milestone named PROJECT.
+7. {{commitInstruction}}
+8. Say exactly: `"Project context written."` — nothing else.