npm - gsd-pi - Versions diffs - 2.18.0 → 2.19.0 - Mend

gsd-pi 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/dist/resources/extensions/gsd/complexity-classifier.ts ADDED Viewed

@@ -0,0 +1,322 @@
+// GSD Extension — Complexity Classifier
+// Classifies unit complexity for dynamic model routing.
+// Pure heuristics + adaptive learning — no LLM calls. Sub-millisecond classification.
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { gsdRoot } from "./paths.js";
+import { getAdaptiveTierAdjustment } from "./routing-history.js";
+// ─── Types ───────────────────────────────────────────────────────────────────
+export type ComplexityTier = "light" | "standard" | "heavy";
+export interface ClassificationResult {
+  tier: ComplexityTier;
+  reason: string;
+  downgraded: boolean;   // true if budget pressure lowered the tier
+}
+export interface TaskMetadata {
+  fileCount?: number;
+  dependencyCount?: number;
+  isNewFile?: boolean;
+  tags?: string[];
+  estimatedLines?: number;
+  codeBlockCount?: number;      // number of fenced code blocks in plan
+  complexityKeywords?: string[]; // detected complexity signals
+}
+// ─── Unit Type → Default Tier Mapping ────────────────────────────────────────
+const UNIT_TYPE_TIERS: Record<string, ComplexityTier> = {
+  // Tier 1 — Light: structured summaries, completion, UAT
+  "complete-slice": "light",
+  "run-uat": "light",
+  // Tier 2 — Standard: research, routine planning
+  "research-milestone": "standard",
+  "research-slice": "standard",
+  "plan-milestone": "standard",
+  "plan-slice": "standard",
+  // Tier 3 — Heavy: execution, replanning (requires deep reasoning)
+  "execute-task": "standard",   // default standard, upgraded by metadata
+  "replan-slice": "heavy",
+  "reassess-roadmap": "heavy",
+};
+// ─── Public API ──────────────────────────────────────────────────────────────
+/**
+ * Classify unit complexity to determine which model tier to use.
+ *
+ * @param unitType    The type of unit being dispatched
+ * @param unitId      The unit ID (e.g. "M001/S01/T01")
+ * @param basePath    Project base path (for reading task plans)
+ * @param budgetPct   Current budget usage as fraction (0.0-1.0+), or undefined if no budget
+ * @param metadata    Optional pre-parsed task metadata
+ */
+export function classifyUnitComplexity(
+  unitType: string,
+  unitId: string,
+  basePath: string,
+  budgetPct?: number,
+  metadata?: TaskMetadata,
+): ClassificationResult {
+  // Hook units default to light
+  if (unitType.startsWith("hook/")) {
+    const result: ClassificationResult = { tier: "light", reason: "hook unit", downgraded: false };
+    return applyBudgetPressure(result, budgetPct);
+  }
+  // Start with the default tier for this unit type
+  let tier = UNIT_TYPE_TIERS[unitType] ?? "standard";
+  let reason = `unit type: ${unitType}`;
+  // For execute-task, analyze task metadata for complexity signals
+  if (unitType === "execute-task") {
+    const taskAnalysis = analyzeTaskComplexity(unitId, basePath, metadata);
+    tier = taskAnalysis.tier;
+    reason = taskAnalysis.reason;
+  }
+  // For plan-slice, check if the slice has many tasks (complex planning)
+  if (unitType === "plan-slice" || unitType === "plan-milestone") {
+    const planAnalysis = analyzePlanComplexity(unitId, basePath);
+    if (planAnalysis) {
+      tier = planAnalysis.tier;
+      reason = planAnalysis.reason;
+    }
+  }
+  // Adaptive learning: check if history suggests bumping the tier
+  const tags = metadata?.tags ?? extractTaskMetadata(unitId, basePath).tags;
+  const adaptiveAdjustment = getAdaptiveTierAdjustment(unitType, tier, tags);
+  if (adaptiveAdjustment && tierOrdinal(adaptiveAdjustment) > tierOrdinal(tier)) {
+    reason = `${reason} (adaptive: high failure rate at ${tier})`;
+    tier = adaptiveAdjustment;
+  }
+  const result: ClassificationResult = { tier, reason, downgraded: false };
+  return applyBudgetPressure(result, budgetPct);
+}
+/**
+ * Get a short label for the tier (for dashboard display).
+ */
+export function tierLabel(tier: ComplexityTier): string {
+  switch (tier) {
+    case "light": return "L";
+    case "standard": return "S";
+    case "heavy": return "H";
+  }
+}
+/**
+ * Get the tier ordering value (for comparison).
+ */
+export function tierOrdinal(tier: ComplexityTier): number {
+  switch (tier) {
+    case "light": return 0;
+    case "standard": return 1;
+    case "heavy": return 2;
+  }
+}
+// ─── Task Complexity Analysis ────────────────────────────────────────────────
+interface TaskAnalysis {
+  tier: ComplexityTier;
+  reason: string;
+}
+function analyzeTaskComplexity(
+  unitId: string,
+  basePath: string,
+  metadata?: TaskMetadata,
+): TaskAnalysis {
+  // Try to read task plan for complexity signals
+  const meta = metadata ?? extractTaskMetadata(unitId, basePath);
+  // Heavy signals
+  if (meta.dependencyCount && meta.dependencyCount >= 3) {
+    return { tier: "heavy", reason: `${meta.dependencyCount} dependencies` };
+  }
+  if (meta.fileCount && meta.fileCount >= 6) {
+    return { tier: "heavy", reason: `${meta.fileCount} files to modify` };
+  }
+  if (meta.estimatedLines && meta.estimatedLines >= 500) {
+    return { tier: "heavy", reason: `~${meta.estimatedLines} lines estimated` };
+  }
+  // Heavy signals from complexity keywords (Phase 4)
+  if (meta.complexityKeywords && meta.complexityKeywords.length >= 2) {
+    return { tier: "heavy", reason: `complex: ${meta.complexityKeywords.join(", ")}` };
+  }
+  if (meta.codeBlockCount && meta.codeBlockCount >= 5) {
+    return { tier: "heavy", reason: `${meta.codeBlockCount} code blocks in plan` };
+  }
+  // Standard signals from single complexity keyword
+  if (meta.complexityKeywords && meta.complexityKeywords.length === 1) {
+    return { tier: "standard", reason: `${meta.complexityKeywords[0]} task` };
+  }
+  // Light signals (simple tasks)
+  if (meta.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) {
+    return { tier: "light", reason: `simple task: ${meta.tags.join(", ")}` };
+  }
+  if (meta.fileCount !== undefined && meta.fileCount <= 1 && !meta.isNewFile) {
+    return { tier: "light", reason: "single file modification" };
+  }
+  // Standard by default
+  return { tier: "standard", reason: "standard execution task" };
+}
+function analyzePlanComplexity(
+  unitId: string,
+  basePath: string,
+): TaskAnalysis | null {
+  // Check if this is a milestone-level plan (more complex) vs single slice
+  const parts = unitId.split("/");
+  if (parts.length === 1) {
+    // Milestone-level planning is always at least standard
+    return { tier: "standard", reason: "milestone-level planning" };
+  }
+  // For slice planning, try to read the context/research to gauge complexity
+  // If research exists and is large, bump to heavy
+  const [mid, sid] = parts;
+  const researchPath = join(gsdRoot(basePath), mid, "slices", sid, "RESEARCH.md");
+  try {
+    if (existsSync(researchPath)) {
+      const content = readFileSync(researchPath, "utf-8");
+      const lineCount = content.split("\n").length;
+      if (lineCount > 200) {
+        return { tier: "heavy", reason: `complex slice: ${lineCount}-line research` };
+      }
+    }
+  } catch {
+    // Non-fatal
+  }
+  return null; // Use default tier
+}
+/**
+ * Extract task metadata from the task plan file on disk.
+ */
+function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
+  const meta: TaskMetadata = {};
+  const parts = unitId.split("/");
+  if (parts.length !== 3) return meta;
+  const [mid, sid, tid] = parts;
+  const taskPlanPath = join(gsdRoot(basePath), mid, "slices", sid, "tasks", `${tid}-PLAN.md`);
+  try {
+    if (!existsSync(taskPlanPath)) return meta;
+    const content = readFileSync(taskPlanPath, "utf-8");
+    const lines = content.split("\n");
+    // Count files mentioned in "Files:" or "- Files:" lines
+    const fileLines = lines.filter(l => /^\s*-?\s*files?\s*:/i.test(l));
+    if (fileLines.length > 0) {
+      // Count comma-separated or bullet-pointed files
+      const allFiles = new Set<string>();
+      for (const line of fileLines) {
+        const filesStr = line.replace(/^\s*-?\s*files?\s*:\s*/i, "");
+        const files = filesStr.split(/[,;]/).map(f => f.trim()).filter(Boolean);
+        files.forEach(f => allFiles.add(f));
+      }
+      meta.fileCount = allFiles.size;
+    }
+    // Check for "new file" or "create" keywords
+    meta.isNewFile = lines.some(l => /\b(create|new file|scaffold|bootstrap)\b/i.test(l));
+    // Look for tags/labels in frontmatter or content
+    const tags: string[] = [];
+    if (content.match(/\b(refactor|migration|architect)/i)) tags.push("refactor");
+    if (content.match(/\b(test|spec|coverage)\b/i)) tags.push("test");
+    if (content.match(/\b(doc|readme|comment|jsdoc)\b/i)) tags.push("docs");
+    if (content.match(/\b(config|env|setting)\b/i)) tags.push("config");
+    if (content.match(/\b(rename|typo|spelling)\b/i)) tags.push("rename");
+    meta.tags = tags;
+    // Try to extract estimated lines from content
+    const estimateMatch = content.match(/~?\s*(\d+)\s*lines?\b/i);
+    if (estimateMatch) {
+      meta.estimatedLines = parseInt(estimateMatch[1], 10);
+    }
+    // Phase 4: Deeper introspection signals
+    // Count fenced code blocks (```) — more code blocks = more complex implementation
+    const codeBlockMatches = content.match(/^```/gm);
+    meta.codeBlockCount = codeBlockMatches ? Math.floor(codeBlockMatches.length / 2) : 0;
+    // Detect complexity keywords that suggest harder tasks
+    const complexityKeywords: string[] = [];
+    if (content.match(/\b(migration|migrate|schema change)\b/i)) complexityKeywords.push("migration");
+    if (content.match(/\b(architect|design pattern|system design)\b/i)) complexityKeywords.push("architecture");
+    if (content.match(/\b(security|auth|encrypt|credential|vulnerability)\b/i)) complexityKeywords.push("security");
+    if (content.match(/\b(performance|optimize|cache|index)\b/i)) complexityKeywords.push("performance");
+    if (content.match(/\b(concurrent|parallel|race condition|mutex|lock)\b/i)) complexityKeywords.push("concurrency");
+    if (content.match(/\b(backward.?compat|breaking change|deprecat)\b/i)) complexityKeywords.push("compatibility");
+    meta.complexityKeywords = complexityKeywords;
+  } catch {
+    // Non-fatal — metadata extraction is best-effort
+  }
+  return meta;
+}
+// ─── Budget Pressure ─────────────────────────────────────────────────────────
+/**
+ * Apply budget pressure to a classification result.
+ * As budget usage increases, more aggressively downgrade tiers.
+ *
+ * - <50%:   Normal classification (no change)
+ * - 50-75%: Tier 2 → Tier 1 where possible
+ * - 75-90%: Only heavy tasks keep configured model
+ * - >90%:   Everything except replan-slice gets cheapest model
+ */
+function applyBudgetPressure(
+  result: ClassificationResult,
+  budgetPct?: number,
+): ClassificationResult {
+  if (budgetPct === undefined || budgetPct < 0.5) return result;
+  const original = result.tier;
+  if (budgetPct >= 0.9) {
+    // >90%: almost everything goes to light
+    if (result.tier !== "heavy") {
+      result.tier = "light";
+    } else {
+      // Even heavy gets downgraded to standard
+      result.tier = "standard";
+    }
+  } else if (budgetPct >= 0.75) {
+    // 75-90%: only heavy stays, everything else goes to light
+    if (result.tier === "standard") {
+      result.tier = "light";
+    }
+  } else {
+    // 50-75%: standard → light
+    if (result.tier === "standard") {
+      result.tier = "light";
+    }
+  }
+  if (result.tier !== original) {
+    result.downgraded = true;
+    result.reason = `${result.reason} (budget pressure: ${Math.round(budgetPct * 100)}%)`;
+  }
+  return result;
+}

package/dist/resources/extensions/gsd/dashboard-overlay.ts CHANGED Viewed

@@ -39,6 +39,9 @@ function unitLabel(type: string): string {
     case "execute-task": return "Execute";
     case "complete-slice": return "Complete";
     case "reassess-roadmap": return "Reassess";
+    case "triage-captures": return "Triage";
+    case "quick-task": return "Quick Task";
+    case "replan-slice": return "Replan";
     default: return type;
   }
 }
@@ -345,6 +348,13 @@ export class GSDDashboardOverlay {
       lines.push(blank());
     }
+    // Pending captures badge — only shown when captures are waiting for triage
+    if (this.dashData.pendingCaptureCount > 0) {
+      const count = this.dashData.pendingCaptureCount;
+      lines.push(row(th.fg("warning", `📌 ${count} pending capture${count === 1 ? "" : "s"} awaiting triage`)));
+      lines.push(blank());
+    }
     if (this.loading) {
       lines.push(centered(th.fg("dim", "Loading dashboard…")));
       return lines;

package/dist/resources/extensions/gsd/metrics.ts CHANGED Viewed

@@ -39,6 +39,8 @@ export interface UnitMetrics {
   toolCalls: number;
   assistantMessages: number;
   userMessages: number;
+  tier?: string;           // complexity tier (light/standard/heavy) if dynamic routing active
+  modelDowngraded?: boolean; // true if dynamic routing used a cheaper model
 }
 export interface MetricsLedger {
@@ -104,6 +106,7 @@ export function snapshotUnitMetrics(
   unitId: string,
   startedAt: number,
   model: string,
+  extras?: { tier?: string; modelDowngraded?: boolean },
 ): UnitMetrics | null {
   if (!ledger) return null;
@@ -156,6 +159,8 @@ export function snapshotUnitMetrics(
     toolCalls,
     assistantMessages,
     userMessages,
+    ...(extras?.tier ? { tier: extras.tier } : {}),
+    ...(extras?.modelDowngraded !== undefined ? { modelDowngraded: extras.modelDowngraded } : {}),
   };
   ledger.units.push(unit);
@@ -294,6 +299,49 @@ export function getProjectTotals(units: UnitMetrics[]): ProjectTotals {
   return totals;
 }
+// ─── Tier Aggregation ────────────────────────────────────────────────────────
+export interface TierAggregate {
+  tier: string;
+  units: number;
+  tokens: TokenCounts;
+  cost: number;
+  downgraded: number;   // units that were downgraded by dynamic routing
+}
+export function aggregateByTier(units: UnitMetrics[]): TierAggregate[] {
+  const map = new Map<string, TierAggregate>();
+  for (const u of units) {
+    const tier = u.tier ?? "unknown";
+    let agg = map.get(tier);
+    if (!agg) {
+      agg = { tier, units: 0, tokens: emptyTokens(), cost: 0, downgraded: 0 };
+      map.set(tier, agg);
+    }
+    agg.units++;
+    agg.tokens = addTokens(agg.tokens, u.tokens);
+    agg.cost += u.cost;
+    if (u.modelDowngraded) agg.downgraded++;
+  }
+  const order = ["light", "standard", "heavy", "unknown"];
+  return order.map(t => map.get(t)).filter((a): a is TierAggregate => !!a);
+}
+/**
+ * Format a summary of savings from dynamic routing.
+ * Returns empty string if no units were downgraded.
+ */
+export function formatTierSavings(units: UnitMetrics[]): string {
+  const downgraded = units.filter(u => u.modelDowngraded);
+  if (downgraded.length === 0) return "";
+  const downgradedCost = downgraded.reduce((sum, u) => sum + u.cost, 0);
+  const totalUnits = units.filter(u => u.tier).length;
+  const pct = totalUnits > 0 ? Math.round((downgraded.length / totalUnits) * 100) : 0;
+  return `Dynamic routing: ${downgraded.length}/${totalUnits} units downgraded (${pct}%), cost: ${formatCost(downgradedCost)}`;
+}
 // ─── Formatting helpers ───────────────────────────────────────────────────────
 export function formatCost(cost: number): string {

package/dist/resources/extensions/gsd/model-cost-table.ts ADDED Viewed

@@ -0,0 +1,65 @@
+// GSD Extension — Model Cost Table
+// Static cost reference for known models, used by the dynamic router
+// for cross-provider cost comparison.
+//
+// Costs are approximate per-1K-token rates in USD (input tokens).
+// Updated with GSD releases. Users can override via preferences.
+export interface ModelCostEntry {
+  /** Model ID (bare, without provider prefix) */
+  id: string;
+  /** Approximate cost per 1K input tokens in USD */
+  inputPer1k: number;
+  /** Approximate cost per 1K output tokens in USD */
+  outputPer1k: number;
+  /** Last updated date */
+  updatedAt: string;
+}
+/**
+ * Bundled cost table for known models.
+ * Updated periodically with GSD releases.
+ */
+export const BUNDLED_COST_TABLE: ModelCostEntry[] = [
+  // Anthropic
+  { id: "claude-opus-4-6", inputPer1k: 0.015, outputPer1k: 0.075, updatedAt: "2025-03-15" },
+  { id: "claude-sonnet-4-6", inputPer1k: 0.003, outputPer1k: 0.015, updatedAt: "2025-03-15" },
+  { id: "claude-haiku-4-5", inputPer1k: 0.0008, outputPer1k: 0.004, updatedAt: "2025-03-15" },
+  { id: "claude-sonnet-4-5-20250514", inputPer1k: 0.003, outputPer1k: 0.015, updatedAt: "2025-03-15" },
+  { id: "claude-3-5-sonnet-latest", inputPer1k: 0.003, outputPer1k: 0.015, updatedAt: "2025-03-15" },
+  { id: "claude-3-5-haiku-latest", inputPer1k: 0.0008, outputPer1k: 0.004, updatedAt: "2025-03-15" },
+  { id: "claude-3-opus-latest", inputPer1k: 0.015, outputPer1k: 0.075, updatedAt: "2025-03-15" },
+  // OpenAI
+  { id: "gpt-4o", inputPer1k: 0.0025, outputPer1k: 0.01, updatedAt: "2025-03-15" },
+  { id: "gpt-4o-mini", inputPer1k: 0.00015, outputPer1k: 0.0006, updatedAt: "2025-03-15" },
+  { id: "o1", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
+  { id: "o3", inputPer1k: 0.015, outputPer1k: 0.06, updatedAt: "2025-03-15" },
+  { id: "gpt-4-turbo", inputPer1k: 0.01, outputPer1k: 0.03, updatedAt: "2025-03-15" },
+  // Google
+  { id: "gemini-2.0-flash", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" },
+  { id: "gemini-flash-2.0", inputPer1k: 0.0001, outputPer1k: 0.0004, updatedAt: "2025-03-15" },
+  { id: "gemini-2.5-pro", inputPer1k: 0.00125, outputPer1k: 0.005, updatedAt: "2025-03-15" },
+  // DeepSeek
+  { id: "deepseek-chat", inputPer1k: 0.00014, outputPer1k: 0.00028, updatedAt: "2025-03-15" },
+];
+/**
+ * Lookup cost for a model ID. Returns undefined if not found.
+ */
+export function lookupModelCost(modelId: string): ModelCostEntry | undefined {
+  const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+  return BUNDLED_COST_TABLE.find(e => e.id === bareId)
+    ?? BUNDLED_COST_TABLE.find(e => bareId.includes(e.id) || e.id.includes(bareId));
+}
+/**
+ * Compare two models by input cost. Returns negative if a is cheaper.
+ */
+export function compareModelCost(modelIdA: string, modelIdB: string): number {
+  const costA = lookupModelCost(modelIdA)?.inputPer1k ?? 999;
+  const costB = lookupModelCost(modelIdB)?.inputPer1k ?? 999;
+  return costA - costB;
+}