gsd-pi 2.59.0-dev.3de3832 → 2.59.0-dev.d77b3dd

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/resources/extensions/gsd/auto/phases.js +54 -1
  2. package/dist/resources/extensions/gsd/auto-model-selection.js +8 -3
  3. package/dist/resources/extensions/gsd/auto-post-unit.js +40 -1
  4. package/dist/resources/extensions/gsd/auto-prompts.js +13 -0
  5. package/dist/resources/extensions/gsd/bootstrap/db-tools.js +70 -0
  6. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +51 -5
  7. package/dist/resources/extensions/gsd/captures.js +54 -1
  8. package/dist/resources/extensions/gsd/complexity-classifier.js +1 -1
  9. package/dist/resources/extensions/gsd/context-masker.js +68 -0
  10. package/dist/resources/extensions/gsd/docs/preferences-reference.md +7 -0
  11. package/dist/resources/extensions/gsd/gsd-db.js +2 -2
  12. package/dist/resources/extensions/gsd/model-router.js +123 -4
  13. package/dist/resources/extensions/gsd/phase-anchor.js +56 -0
  14. package/dist/resources/extensions/gsd/preferences-types.js +1 -0
  15. package/dist/resources/extensions/gsd/preferences-validation.js +46 -0
  16. package/dist/resources/extensions/gsd/prompts/execute-task.md +2 -0
  17. package/dist/resources/extensions/gsd/prompts/rethink.md +7 -0
  18. package/dist/resources/extensions/gsd/prompts/triage-captures.md +6 -1
  19. package/dist/resources/extensions/gsd/rethink.js +5 -2
  20. package/dist/resources/extensions/gsd/state.js +1 -1
  21. package/dist/resources/extensions/gsd/status-guards.js +4 -3
  22. package/dist/resources/extensions/gsd/triage-resolution.js +128 -1
  23. package/dist/resources/extensions/gsd/triage-ui.js +12 -3
  24. package/dist/resources/skills/btw/SKILL.md +42 -0
  25. package/dist/web/standalone/.next/BUILD_ID +1 -1
  26. package/dist/web/standalone/.next/app-path-routes-manifest.json +20 -20
  27. package/dist/web/standalone/.next/build-manifest.json +2 -2
  28. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  29. package/dist/web/standalone/.next/required-server-files.json +1 -1
  30. package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
  31. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  32. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  33. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  34. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  35. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  36. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  37. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  38. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  39. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  40. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  41. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  42. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  43. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  44. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  45. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  46. package/dist/web/standalone/.next/server/app/index.html +1 -1
  47. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  48. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  49. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  50. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  51. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  52. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  53. package/dist/web/standalone/.next/server/app-paths-manifest.json +20 -20
  54. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  55. package/dist/web/standalone/.next/server/pages/500.html +2 -2
  56. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  57. package/dist/web/standalone/server.js +1 -1
  58. package/package.json +1 -1
  59. package/src/resources/extensions/gsd/auto/phases.ts +60 -1
  60. package/src/resources/extensions/gsd/auto-model-selection.ts +12 -3
  61. package/src/resources/extensions/gsd/auto-post-unit.ts +48 -1
  62. package/src/resources/extensions/gsd/auto-prompts.ts +17 -0
  63. package/src/resources/extensions/gsd/bootstrap/db-tools.ts +78 -0
  64. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +53 -4
  65. package/src/resources/extensions/gsd/captures.ts +71 -2
  66. package/src/resources/extensions/gsd/complexity-classifier.ts +1 -1
  67. package/src/resources/extensions/gsd/context-masker.ts +74 -0
  68. package/src/resources/extensions/gsd/docs/preferences-reference.md +7 -0
  69. package/src/resources/extensions/gsd/gsd-db.ts +2 -2
  70. package/src/resources/extensions/gsd/model-router.ts +171 -8
  71. package/src/resources/extensions/gsd/phase-anchor.ts +71 -0
  72. package/src/resources/extensions/gsd/preferences-types.ts +9 -0
  73. package/src/resources/extensions/gsd/preferences-validation.ts +38 -0
  74. package/src/resources/extensions/gsd/prompts/execute-task.md +2 -0
  75. package/src/resources/extensions/gsd/prompts/rethink.md +7 -0
  76. package/src/resources/extensions/gsd/prompts/triage-captures.md +6 -1
  77. package/src/resources/extensions/gsd/rethink.ts +5 -2
  78. package/src/resources/extensions/gsd/state.ts +1 -1
  79. package/src/resources/extensions/gsd/status-guards.ts +4 -3
  80. package/src/resources/extensions/gsd/tests/context-masker.test.ts +122 -0
  81. package/src/resources/extensions/gsd/tests/model-router.test.ts +87 -1
  82. package/src/resources/extensions/gsd/tests/phase-anchor.test.ts +83 -0
  83. package/src/resources/extensions/gsd/tests/status-guards.test.ts +4 -0
  84. package/src/resources/extensions/gsd/tests/stop-backtrack.test.ts +216 -0
  85. package/src/resources/extensions/gsd/tests/tool-naming.test.ts +1 -1
  86. package/src/resources/extensions/gsd/triage-resolution.ts +144 -1
  87. package/src/resources/extensions/gsd/triage-ui.ts +12 -3
  88. package/src/resources/skills/btw/SKILL.md +42 -0
  89. /package/dist/web/standalone/.next/static/{Y_HG7cJVptjBpkVSQQiFi → t_cBZAENjaOJIRST3dw08}/_buildManifest.js +0 -0
  90. /package/dist/web/standalone/.next/static/{Y_HG7cJVptjBpkVSQQiFi → t_cBZAENjaOJIRST3dw08}/_ssgManifest.js +0 -0
@@ -883,6 +883,84 @@ export function registerDbTools(pi: ExtensionAPI): void {
883
883
  pi.registerTool(sliceCompleteTool);
884
884
  registerAlias(pi, sliceCompleteTool, "gsd_complete_slice", "gsd_slice_complete");
885
885
 
886
+ // ─── gsd_skip_slice (#3477 / #3487) ───────────────────────────────────
887
+
888
+ const skipSliceExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
889
+ const dbAvailable = await ensureDbOpen();
890
+ if (!dbAvailable) {
891
+ return {
892
+ content: [{ type: "text" as const, text: "Error: GSD database is not available. Cannot skip slice." }],
893
+ details: { operation: "skip_slice", error: "db_unavailable" } as any,
894
+ };
895
+ }
896
+ try {
897
+ const { getSlice, updateSliceStatus } = await import("../gsd-db.js");
898
+ const { invalidateStateCache } = await import("../state.js");
899
+
900
+ const slice = getSlice(params.milestoneId, params.sliceId);
901
+ if (!slice) {
902
+ return {
903
+ content: [{ type: "text" as const, text: `Error: Slice ${params.sliceId} not found in milestone ${params.milestoneId}` }],
904
+ details: { operation: "skip_slice", error: "slice_not_found" } as any,
905
+ };
906
+ }
907
+
908
+ if (slice.status === "complete" || slice.status === "done") {
909
+ return {
910
+ content: [{ type: "text" as const, text: `Error: Slice ${params.sliceId} is already complete — cannot skip.` }],
911
+ details: { operation: "skip_slice", error: "already_complete" } as any,
912
+ };
913
+ }
914
+
915
+ if (slice.status === "skipped") {
916
+ return {
917
+ content: [{ type: "text" as const, text: `Slice ${params.sliceId} is already skipped.` }],
918
+ details: { operation: "skip_slice", sliceId: params.sliceId, milestoneId: params.milestoneId } as any,
919
+ };
920
+ }
921
+
922
+ updateSliceStatus(params.milestoneId, params.sliceId, "skipped");
923
+ invalidateStateCache();
924
+
925
+ return {
926
+ content: [{ type: "text" as const, text: `Skipped slice ${params.sliceId} (${params.milestoneId}). Reason: ${params.reason ?? "User-directed skip"}. Auto-mode will advance past this slice.` }],
927
+ details: {
928
+ operation: "skip_slice",
929
+ sliceId: params.sliceId,
930
+ milestoneId: params.milestoneId,
931
+ reason: params.reason,
932
+ } as any,
933
+ };
934
+ } catch (err) {
935
+ const msg = err instanceof Error ? err.message : String(err);
936
+ logError("tool", `skip_slice tool failed: ${msg}`, { tool: "gsd_skip_slice", error: String(err) });
937
+ return {
938
+ content: [{ type: "text" as const, text: `Error skipping slice: ${msg}` }],
939
+ details: { operation: "skip_slice", error: msg } as any,
940
+ };
941
+ }
942
+ };
943
+
944
+ pi.registerTool({
945
+ name: "gsd_skip_slice",
946
+ label: "Skip Slice",
947
+ description:
948
+ "Mark a slice as skipped so auto-mode advances past it without executing. " +
949
+ "The slice data is preserved for reference. The state machine treats skipped slices like completed ones for dependency satisfaction.",
950
+ promptSnippet: "Skip a GSD slice (mark as skipped, auto-mode will advance past it)",
951
+ promptGuidelines: [
952
+ "Use gsd_skip_slice when a slice should be bypassed — descoped, superseded, or no longer relevant.",
953
+ "Cannot skip a slice that is already complete.",
954
+ "Skipped slices satisfy downstream dependencies just like completed slices.",
955
+ ],
956
+ parameters: Type.Object({
957
+ sliceId: Type.String({ description: "Slice ID (e.g. S02)" }),
958
+ milestoneId: Type.String({ description: "Milestone ID (e.g. M003)" }),
959
+ reason: Type.Optional(Type.String({ description: "Reason for skipping this slice" })),
960
+ }),
961
+ execute: skipSliceExecute,
962
+ });
963
+
886
964
  // ─── gsd_complete_milestone ────────────────────────────────────────────
887
965
 
888
966
  const milestoneCompleteExecute = async (_toolCallId: string, params: any, _signal: AbortSignal | undefined, _onUpdate: unknown, _ctx: unknown) => {
@@ -263,13 +263,62 @@ export function registerHooks(pi: ExtensionAPI): void {
263
263
  });
264
264
 
265
265
  pi.on("before_provider_request", async (event) => {
266
+ const payload = event.payload as Record<string, unknown> | null;
267
+ if (!payload || typeof payload !== "object") return;
268
+
269
+ // ── Observation Masking ─────────────────────────────────────────────
270
+ // Replace old tool results with placeholders to reduce context bloat.
271
+ // Only active during auto-mode when context_management.observation_masking is enabled.
272
+ if (isAutoActive()) {
273
+ try {
274
+ const { loadEffectiveGSDPreferences } = await import("../preferences.js");
275
+ const prefs = loadEffectiveGSDPreferences();
276
+ const cmConfig = prefs?.preferences.context_management;
277
+
278
+ // Observation masking: replace old tool results with placeholders
279
+ if (cmConfig?.observation_masking !== false) {
280
+ const keepTurns = cmConfig?.observation_mask_turns ?? 8;
281
+ const { createObservationMask } = await import("../context-masker.js");
282
+ const mask = createObservationMask(keepTurns);
283
+ const messages = payload.messages;
284
+ if (Array.isArray(messages)) {
285
+ payload.messages = mask(messages);
286
+ }
287
+ }
288
+
289
+ // Tool result truncation: cap individual tool result content length.
290
+ // In pi-ai format, toolResult messages have role: "toolResult" and content: TextContent[].
291
+ // Creates new objects to avoid mutating shared conversation state.
292
+ const maxChars = cmConfig?.tool_result_max_chars ?? 800;
293
+ const msgs = payload.messages;
294
+ if (Array.isArray(msgs)) {
295
+ payload.messages = msgs.map((msg: Record<string, unknown>) => {
296
+ // Match toolResult messages (role: "toolResult", content is array of content blocks)
297
+ if (msg?.role === "toolResult" && Array.isArray(msg.content)) {
298
+ const blocks = msg.content as Array<Record<string, unknown>>;
299
+ const totalLen = blocks.reduce((sum: number, b) => sum + (typeof b.text === "string" ? b.text.length : 0), 0);
300
+ if (totalLen > maxChars) {
301
+ const truncated = blocks.map(b => {
302
+ if (typeof b.text === "string" && b.text.length > maxChars) {
303
+ return { ...b, text: b.text.slice(0, maxChars) + "\n…[truncated]" };
304
+ }
305
+ return b;
306
+ });
307
+ return { ...msg, content: truncated };
308
+ }
309
+ }
310
+ return msg;
311
+ });
312
+ }
313
+ } catch { /* non-fatal */ }
314
+ }
315
+
316
+ // ── Service Tier ────────────────────────────────────────────────────
266
317
  const modelId = event.model?.id;
267
- if (!modelId) return;
318
+ if (!modelId) return payload;
268
319
  const { getEffectiveServiceTier, supportsServiceTier } = await import("../service-tier.js");
269
320
  const tier = getEffectiveServiceTier();
270
- if (!tier || !supportsServiceTier(modelId)) return;
271
- const payload = event.payload as Record<string, unknown> | null;
272
- if (!payload || typeof payload !== "object") return;
321
+ if (!tier || !supportsServiceTier(modelId)) return payload;
273
322
  payload.service_tier = tier;
274
323
  return payload;
275
324
  });
@@ -15,7 +15,7 @@ import { gsdRoot } from "./paths.js";
15
15
 
16
16
  // ─── Types ────────────────────────────────────────────────────────────────────
17
17
 
18
- export type Classification = "quick-task" | "inject" | "defer" | "replan" | "note";
18
+ export type Classification = "quick-task" | "inject" | "defer" | "replan" | "note" | "stop" | "backtrack";
19
19
 
20
20
  export interface CaptureEntry {
21
21
  id: string;
@@ -42,7 +42,7 @@ export interface TriageResult {
42
42
 
43
43
  const CAPTURES_FILENAME = "CAPTURES.md";
44
44
  const VALID_CLASSIFICATIONS: readonly string[] = [
45
- "quick-task", "inject", "defer", "replan", "note",
45
+ "quick-task", "inject", "defer", "replan", "note", "stop", "backtrack",
46
46
  ];
47
47
 
48
48
  // ─── Path Resolution ──────────────────────────────────────────────────────────
@@ -285,6 +285,75 @@ export function loadActionableCaptures(basePath: string, currentMilestoneId?: st
285
285
  );
286
286
  }
287
287
 
288
+ /**
289
+ * Load unexecuted stop captures — user directives to halt auto-mode.
290
+ * These are checked in the pre-dispatch guard pipeline (runGuards) to
291
+ * pause auto-mode before the next unit is dispatched.
292
+ */
293
+ export function loadStopCaptures(basePath: string): CaptureEntry[] {
294
+ return loadAllCaptures(basePath).filter(
295
+ c => c.status === "resolved" && !c.executed &&
296
+ (c.classification === "stop" || c.classification === "backtrack"),
297
+ );
298
+ }
299
+
300
+ /**
301
+ * Load unexecuted backtrack captures specifically — captures directing
302
+ * auto-mode to abandon current milestone and return to a previous one.
303
+ */
304
+ export function loadBacktrackCaptures(basePath: string): CaptureEntry[] {
305
+ return loadAllCaptures(basePath).filter(
306
+ c => c.status === "resolved" && !c.executed && c.classification === "backtrack",
307
+ );
308
+ }
309
+
310
+ /**
311
+ * Revert captures that were silenced by non-triage agents.
312
+ *
313
+ * When an execute-task or other non-triage agent writes `**Status:** resolved`
314
+ * to CAPTURES.md, it bypasses the triage pipeline entirely. This function
315
+ * detects such captures (resolved but missing the Classification field that
316
+ * triage always writes) and reverts them to pending so the triage sidecar
317
+ * picks them up properly.
318
+ *
319
+ * Returns the number of captures reverted.
320
+ */
321
+ export function revertExecutorResolvedCaptures(basePath: string): number {
322
+ const filePath = resolveCapturesPath(basePath);
323
+ if (!existsSync(filePath)) return 0;
324
+
325
+ let content = readFileSync(filePath, "utf-8");
326
+ let reverted = 0;
327
+
328
+ const all = loadAllCaptures(basePath);
329
+ for (const capture of all) {
330
+ // A properly triaged capture has both resolved status AND a classification.
331
+ // An executor-silenced capture has resolved status but NO classification.
332
+ if (capture.status === "resolved" && !capture.classification) {
333
+ const sectionRegex = new RegExp(
334
+ `(### ${escapeRegex(capture.id)}\\n(?:(?!### ).)*?)(?=### |$)`,
335
+ "s",
336
+ );
337
+ const match = sectionRegex.exec(content);
338
+ if (match) {
339
+ let section = match[1];
340
+ section = section.replace(
341
+ /\*\*Status:\*\*\s*resolved/i,
342
+ "**Status:** pending",
343
+ );
344
+ content = content.replace(sectionRegex, section);
345
+ reverted++;
346
+ }
347
+ }
348
+ }
349
+
350
+ if (reverted > 0) {
351
+ writeFileSync(filePath, content, "utf-8");
352
+ }
353
+
354
+ return reverted;
355
+ }
356
+
288
357
  /**
289
358
  * Retroactively stamp a capture with a milestone ID.
290
359
  *
@@ -212,7 +212,7 @@ function analyzePlanComplexity(
212
212
  /**
213
213
  * Extract task metadata from the task plan file on disk.
214
214
  */
215
- function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
215
+ export function extractTaskMetadata(unitId: string, basePath: string): TaskMetadata {
216
216
  const meta: TaskMetadata = {};
217
217
  const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
218
218
  if (!mid || !sid || !tid) return meta;
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Observation masking for GSD auto-mode sessions.
3
+ *
4
+ * Replaces tool result content older than N turns with a placeholder.
5
+ * Reduces context bloat between compactions with zero LLM overhead.
6
+ * Preserves message ordering, roles, and all assistant/user messages.
7
+ *
8
+ * Operates on the pi-ai Message[] format (post-convertToLlm, pre-provider):
9
+ * - toolResult messages: { role: "toolResult", content: TextContent[] }
10
+ * - bash results are already converted to: { role: "user", content: [{type:"text",text:"..."}] }
11
+ * and start with "Ran `" from bashExecutionToText.
12
+ */
13
+
14
+ interface MaskableMessage {
15
+ role: string;
16
+ content: unknown;
17
+ type?: string;
18
+ [key: string]: unknown;
19
+ }
20
+
21
+ const MASK_PLACEHOLDER = "[result masked — within summarized history]";
22
+ const MASK_CONTENT_BLOCK = [{ type: "text" as const, text: MASK_PLACEHOLDER }];
23
+
24
+ function findTurnBoundary(messages: MaskableMessage[], keepRecentTurns: number): number {
25
+ let turnsSeen = 0;
26
+ for (let i = messages.length - 1; i >= 0; i--) {
27
+ const m = messages[i];
28
+ // In the LLM payload, genuine user turns have role "user".
29
+ // Tool results have role "toolResult" and are excluded by this check.
30
+ if (m.role === "user") {
31
+ // Skip bash-result user messages (converted from bashExecution) — these aren't real user turns
32
+ if (isBashResultUserMessage(m)) continue;
33
+ turnsSeen++;
34
+ if (turnsSeen >= keepRecentTurns) return i;
35
+ }
36
+ }
37
+ return 0;
38
+ }
39
+
40
+ /**
41
+ * Detect user messages that originated from bashExecution.
42
+ * After convertToLlm, these are {role: "user", content: [{type:"text", text:"Ran `cmd`\n..."}]}.
43
+ * The bashExecutionToText format always starts with "Ran `".
44
+ */
45
+ function isBashResultUserMessage(m: MaskableMessage): boolean {
46
+ if (m.role !== "user" || !Array.isArray(m.content)) return false;
47
+ const first = m.content[0];
48
+ return first && typeof first === "object" && "text" in first &&
49
+ typeof first.text === "string" && first.text.startsWith("Ran `");
50
+ }
51
+
52
+ function isMaskableMessage(m: MaskableMessage): boolean {
53
+ // Tool result messages (role: "toolResult" in pi-ai format)
54
+ if (m.role === "toolResult") return true;
55
+ // Bash-result user messages (converted from bashExecution by convertToLlm)
56
+ if (isBashResultUserMessage(m)) return true;
57
+ return false;
58
+ }
59
+
60
+ export function createObservationMask(keepRecentTurns: number = 8) {
61
+ return (messages: MaskableMessage[]): MaskableMessage[] => {
62
+ const boundary = findTurnBoundary(messages, keepRecentTurns);
63
+ if (boundary === 0) return messages;
64
+
65
+ return messages.map((m, i) => {
66
+ if (i >= boundary) return m;
67
+ if (isMaskableMessage(m)) {
68
+ // Content may be string or array of content blocks — always replace with array
69
+ return { ...m, content: MASK_CONTENT_BLOCK };
70
+ }
71
+ return m;
72
+ });
73
+ };
74
+ }
@@ -189,6 +189,13 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
189
189
  - `budget_pressure`: boolean — downgrade model tier when budget is under pressure. Default: `true`.
190
190
  - `cross_provider`: boolean — allow routing across different providers. Default: `true`.
191
191
  - `hooks`: boolean — enable routing hooks. Default: `true`.
192
+ - `capability_routing`: boolean — enable capability-profile scoring for model selection within a tier. Requires `enabled: true`. Default: `false`.
193
+
194
+ - `context_management`: configures context hygiene for auto-mode sessions. Keys:
195
+ - `observation_masking`: boolean — mask old tool results to reduce context bloat. Default: `true`.
196
+ - `observation_mask_turns`: number — keep this many recent turns verbatim (1-50). Default: `8`.
197
+ - `compaction_threshold_percent`: number — trigger compaction at this % of context window (0.5-0.95). Lower values fire compaction earlier, reducing drift. Default: `0.70`.
198
+ - `tool_result_max_chars`: number — max chars per tool result in GSD sessions (200-10000). Default: `800`.
192
199
 
193
200
  - `auto_visualize`: boolean — show a visualizer hint after each milestone completion in auto-mode. Default: `false`.
194
201
 
@@ -1661,11 +1661,11 @@ export function getActiveSliceFromDb(milestoneId: string): SliceRow | null {
1661
1661
  const row = currentDb.prepare(
1662
1662
  `SELECT s.* FROM slices s
1663
1663
  WHERE s.milestone_id = :mid
1664
- AND s.status NOT IN ('complete', 'done')
1664
+ AND s.status NOT IN ('complete', 'done', 'skipped')
1665
1665
  AND NOT EXISTS (
1666
1666
  SELECT 1 FROM json_each(s.depends) AS dep
1667
1667
  WHERE dep.value NOT IN (
1668
- SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done')
1668
+ SELECT id FROM slices WHERE milestone_id = :mid AND status IN ('complete', 'done', 'skipped')
1669
1669
  )
1670
1670
  )
1671
1671
  ORDER BY s.sequence, s.id
@@ -10,6 +10,7 @@ import type { ResolvedModelConfig } from "./preferences.js";
10
10
 
11
11
  export interface DynamicRoutingConfig {
12
12
  enabled?: boolean;
13
+ capability_routing?: boolean; // default: false — enable capability profile scoring
13
14
  tier_models?: {
14
15
  light?: string;
15
16
  standard?: string;
@@ -32,6 +33,12 @@ export interface RoutingDecision {
32
33
  wasDowngraded: boolean;
33
34
  /** Human-readable reason for this decision */
34
35
  reason: string;
36
+ /** How the model was selected. */
37
+ selectionMethod?: "tier-only" | "capability-scored";
38
+ /** Capability scores per model (when capability-scored). */
39
+ capabilityScores?: Record<string, number>;
40
+ /** Task requirement vector (when capability-scored). */
41
+ taskRequirements?: Partial<Record<string, number>>;
35
42
  }
36
43
 
37
44
  // ─── Known Model Tiers ───────────────────────────────────────────────────────
@@ -114,6 +121,91 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
114
121
  "deepseek-chat": 0.00014,
115
122
  };
116
123
 
124
+ // ─── Capability Profiles (ADR-004 Phase 2) ──────────────────────────────────
125
+ // 7-dimension profiles, 0–100 normalized. Models without a profile
126
+ // score 50 uniformly — capability scoring is a no-op for them.
127
+
128
+ export interface ModelCapabilities {
129
+ coding: number;
130
+ debugging: number;
131
+ research: number;
132
+ reasoning: number;
133
+ speed: number;
134
+ longContext: number;
135
+ instruction: number;
136
+ }
137
+
138
+ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
139
+ "claude-opus-4-6": { coding: 95, debugging: 90, research: 85, reasoning: 95, speed: 30, longContext: 80, instruction: 90 },
140
+ "claude-sonnet-4-6": { coding: 85, debugging: 80, research: 75, reasoning: 80, speed: 60, longContext: 75, instruction: 85 },
141
+ "claude-haiku-4-5": { coding: 60, debugging: 50, research: 45, reasoning: 50, speed: 95, longContext: 50, instruction: 75 },
142
+ "gpt-4o": { coding: 80, debugging: 75, research: 70, reasoning: 75, speed: 65, longContext: 70, instruction: 80 },
143
+ "gpt-4o-mini": { coding: 55, debugging: 45, research: 40, reasoning: 45, speed: 90, longContext: 45, instruction: 70 },
144
+ "gemini-2.5-pro": { coding: 75, debugging: 70, research: 85, reasoning: 75, speed: 55, longContext: 90, instruction: 75 },
145
+ "gemini-2.0-flash": { coding: 50, debugging: 40, research: 50, reasoning: 40, speed: 95, longContext: 60, instruction: 65 },
146
+ "deepseek-chat": { coding: 75, debugging: 65, research: 55, reasoning: 70, speed: 70, longContext: 55, instruction: 65 },
147
+ "o3": { coding: 80, debugging: 85, research: 80, reasoning: 92, speed: 25, longContext: 70, instruction: 85 },
148
+ };
149
+
150
+ const BASE_REQUIREMENTS: Record<string, Partial<Record<keyof ModelCapabilities, number>>> = {
151
+ "execute-task": { coding: 0.9, instruction: 0.7, speed: 0.3 },
152
+ "research-milestone": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
153
+ "research-slice": { research: 0.9, longContext: 0.7, reasoning: 0.5 },
154
+ "plan-milestone": { reasoning: 0.9, coding: 0.5 },
155
+ "plan-slice": { reasoning: 0.9, coding: 0.5 },
156
+ "replan-slice": { reasoning: 0.9, debugging: 0.6, coding: 0.5 },
157
+ "reassess-roadmap": { reasoning: 0.9, research: 0.5 },
158
+ "complete-slice": { instruction: 0.8, speed: 0.7 },
159
+ "run-uat": { instruction: 0.7, speed: 0.8 },
160
+ "discuss-milestone": { reasoning: 0.6, instruction: 0.7 },
161
+ "complete-milestone": { instruction: 0.8, reasoning: 0.5 },
162
+ };
163
+
164
+ /**
165
+ * Compute a task requirement vector from unit type and optional metadata.
166
+ */
167
+ export function computeTaskRequirements(
168
+ unitType: string,
169
+ metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
170
+ ): Partial<Record<keyof ModelCapabilities, number>> {
171
+ const base = { ...(BASE_REQUIREMENTS[unitType] ?? { reasoning: 0.5 }) };
172
+
173
+ if (unitType === "execute-task" && metadata) {
174
+ if (metadata.tags?.some(t => /^(docs?|readme|comment|config|typo|rename)$/i.test(t))) {
175
+ return { ...base, instruction: 0.9, coding: 0.3, speed: 0.7 };
176
+ }
177
+ if (metadata.complexityKeywords?.some(k => k === "concurrency" || k === "compatibility")) {
178
+ return { ...base, debugging: 0.9, reasoning: 0.8 };
179
+ }
180
+ if (metadata.complexityKeywords?.some(k => k === "migration" || k === "architecture")) {
181
+ return { ...base, reasoning: 0.9, coding: 0.8 };
182
+ }
183
+ if ((metadata.fileCount ?? 0) >= 6 || (metadata.estimatedLines ?? 0) >= 500) {
184
+ return { ...base, coding: 0.9, reasoning: 0.7 };
185
+ }
186
+ }
187
+
188
+ return base;
189
+ }
190
+
191
+ /**
192
+ * Score a model against a task requirement vector.
193
+ * Returns weighted average in range 0–100. Returns 50 for empty requirements.
194
+ */
195
+ export function scoreModel(
196
+ capabilities: ModelCapabilities,
197
+ requirements: Partial<Record<keyof ModelCapabilities, number>>,
198
+ ): number {
199
+ let weightedSum = 0;
200
+ let weightSum = 0;
201
+ for (const [dim, weight] of Object.entries(requirements)) {
202
+ const capability = capabilities[dim as keyof ModelCapabilities] ?? 50;
203
+ weightedSum += weight * capability;
204
+ weightSum += weight;
205
+ }
206
+ return weightSum > 0 ? weightedSum / weightSum : 50;
207
+ }
208
+
117
209
  // ─── Public API ──────────────────────────────────────────────────────────────
118
210
 
119
211
  /**
@@ -132,6 +224,8 @@ export function resolveModelForComplexity(
132
224
  phaseConfig: ResolvedModelConfig | undefined,
133
225
  routingConfig: DynamicRoutingConfig,
134
226
  availableModelIds: string[],
227
+ unitType?: string,
228
+ metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
135
229
  ): RoutingDecision {
136
230
  // If no phase config or routing disabled, pass through
137
231
  if (!phaseConfig || !routingConfig.enabled) {
@@ -175,25 +269,40 @@ export function resolveModelForComplexity(
175
269
  }
176
270
 
177
271
  // Find the best model for the requested tier
178
- const targetModelId = findModelForTier(
179
- requestedTier,
180
- routingConfig,
181
- availableModelIds,
182
- routingConfig.cross_provider !== false,
183
- );
272
+ const useCapabilityScoring = routingConfig.capability_routing && unitType;
273
+
274
+ let targetModelId: string | null;
275
+ let capabilityScores: Record<string, number> | undefined;
276
+ let taskRequirements: Partial<Record<string, number>> | undefined;
277
+ let selectionMethod: "tier-only" | "capability-scored" = "tier-only";
278
+
279
+ if (useCapabilityScoring) {
280
+ const result = findModelForTierWithCapability(
281
+ requestedTier, routingConfig, availableModelIds,
282
+ routingConfig.cross_provider !== false, unitType, metadata,
283
+ );
284
+ targetModelId = result.modelId;
285
+ capabilityScores = Object.keys(result.scores).length > 0 ? result.scores : undefined;
286
+ taskRequirements = Object.keys(result.requirements).length > 0 ? result.requirements : undefined;
287
+ selectionMethod = capabilityScores ? "capability-scored" : "tier-only";
288
+ } else {
289
+ targetModelId = findModelForTier(
290
+ requestedTier, routingConfig, availableModelIds,
291
+ routingConfig.cross_provider !== false,
292
+ );
293
+ }
184
294
 
185
295
  if (!targetModelId) {
186
- // No suitable model found — use configured primary
187
296
  return {
188
297
  modelId: configuredPrimary,
189
298
  fallbacks: phaseConfig.fallbacks,
190
299
  tier: requestedTier,
191
300
  wasDowngraded: false,
192
301
  reason: `no ${requestedTier}-tier model available`,
302
+ selectionMethod,
193
303
  };
194
304
  }
195
305
 
196
- // Build fallback chain: [downgraded_model, ...configured_fallbacks, configured_primary]
197
306
  const fallbacks = [
198
307
  ...phaseConfig.fallbacks.filter(f => f !== targetModelId),
199
308
  configuredPrimary,
@@ -205,6 +314,9 @@ export function resolveModelForComplexity(
205
314
  tier: requestedTier,
206
315
  wasDowngraded: true,
207
316
  reason: classification.reason,
317
+ selectionMethod,
318
+ capabilityScores,
319
+ taskRequirements,
208
320
  };
209
321
  }
210
322
 
@@ -226,6 +338,7 @@ export function escalateTier(currentTier: ComplexityTier): ComplexityTier | null
226
338
  export function defaultRoutingConfig(): DynamicRoutingConfig {
227
339
  return {
228
340
  enabled: true,
341
+ capability_routing: false,
229
342
  escalate_on_failure: true,
230
343
  budget_pressure: true,
231
344
  cross_provider: true,
@@ -298,6 +411,56 @@ function findModelForTier(
298
411
  return candidates[0] ?? null;
299
412
  }
300
413
 
414
+ function findModelForTierWithCapability(
415
+ tier: ComplexityTier,
416
+ config: DynamicRoutingConfig,
417
+ availableModelIds: string[],
418
+ crossProvider: boolean,
419
+ unitType: string,
420
+ metadata?: { tags?: string[]; complexityKeywords?: string[]; fileCount?: number; estimatedLines?: number },
421
+ ): { modelId: string | null; scores: Record<string, number>; requirements: Partial<Record<string, number>> } {
422
+ const explicitModel = config.tier_models?.[tier];
423
+ if (explicitModel) {
424
+ const match = availableModelIds.find(id => {
425
+ const bareAvail = id.includes("/") ? id.split("/").pop()! : id;
426
+ const bareExplicit = explicitModel.includes("/") ? explicitModel.split("/").pop()! : explicitModel;
427
+ return bareAvail === bareExplicit || id === explicitModel;
428
+ });
429
+ if (match) return { modelId: match, scores: {}, requirements: {} };
430
+ }
431
+
432
+ const requirements = computeTaskRequirements(unitType, metadata);
433
+ const candidates = availableModelIds.filter(id => getModelTier(id) === tier);
434
+ if (candidates.length === 0) return { modelId: null, scores: {}, requirements };
435
+
436
+ const scores: Record<string, number> = {};
437
+ for (const id of candidates) {
438
+ const bareId = id.includes("/") ? id.split("/").pop()! : id;
439
+ const profile = getModelProfile(bareId);
440
+ scores[id] = scoreModel(profile, requirements);
441
+ }
442
+
443
+ candidates.sort((a, b) => {
444
+ const scoreDiff = scores[b] - scores[a];
445
+ if (Math.abs(scoreDiff) > 2) return scoreDiff;
446
+ if (crossProvider) {
447
+ const costDiff = getModelCost(a) - getModelCost(b);
448
+ if (costDiff !== 0) return costDiff;
449
+ }
450
+ return a.localeCompare(b);
451
+ });
452
+
453
+ return { modelId: candidates[0], scores, requirements };
454
+ }
455
+
456
+ function getModelProfile(bareId: string): ModelCapabilities {
457
+ if (MODEL_CAPABILITY_PROFILES[bareId]) return MODEL_CAPABILITY_PROFILES[bareId];
458
+ for (const [knownId, profile] of Object.entries(MODEL_CAPABILITY_PROFILES)) {
459
+ if (bareId.includes(knownId) || knownId.includes(bareId)) return profile;
460
+ }
461
+ return { coding: 50, debugging: 50, research: 50, reasoning: 50, speed: 50, longContext: 50, instruction: 50 };
462
+ }
463
+
301
464
  function getModelCost(modelId: string): number {
302
465
  const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
303
466
 
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Phase handoff anchors — compact structured summaries written between
3
+ * GSD auto-mode phases so downstream agents inherit decisions, blockers,
4
+ * and intent without re-inferring from scratch.
5
+ */
6
+
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
8
+ import { join } from "node:path";
9
+ import { gsdRoot } from "./paths.js";
10
+
11
+ export interface PhaseAnchor {
12
+ phase: string;
13
+ milestoneId: string;
14
+ generatedAt: string;
15
+ intent: string;
16
+ decisions: string[];
17
+ blockers: string[];
18
+ nextSteps: string[];
19
+ }
20
+
21
+ function anchorsDir(basePath: string, milestoneId: string): string {
22
+ return join(gsdRoot(basePath), "milestones", milestoneId, "anchors");
23
+ }
24
+
25
+ function anchorPath(basePath: string, milestoneId: string, phase: string): string {
26
+ return join(anchorsDir(basePath, milestoneId), `${phase}.json`);
27
+ }
28
+
29
+ export function writePhaseAnchor(basePath: string, milestoneId: string, anchor: PhaseAnchor): void {
30
+ const dir = anchorsDir(basePath, milestoneId);
31
+ if (!existsSync(dir)) {
32
+ mkdirSync(dir, { recursive: true });
33
+ }
34
+ writeFileSync(anchorPath(basePath, milestoneId, anchor.phase), JSON.stringify(anchor, null, 2), "utf-8");
35
+ }
36
+
37
+ export function readPhaseAnchor(basePath: string, milestoneId: string, phase: string): PhaseAnchor | null {
38
+ const path = anchorPath(basePath, milestoneId, phase);
39
+ if (!existsSync(path)) return null;
40
+ try {
41
+ return JSON.parse(readFileSync(path, "utf-8")) as PhaseAnchor;
42
+ } catch {
43
+ return null;
44
+ }
45
+ }
46
+
47
+ export function formatAnchorForPrompt(anchor: PhaseAnchor): string {
48
+ const lines: string[] = [
49
+ `## Handoff from ${anchor.phase}`,
50
+ "",
51
+ `**Intent:** ${anchor.intent}`,
52
+ ];
53
+
54
+ if (anchor.decisions.length > 0) {
55
+ lines.push("", "**Decisions:**");
56
+ for (const d of anchor.decisions) lines.push(`- ${d}`);
57
+ }
58
+
59
+ if (anchor.blockers.length > 0) {
60
+ lines.push("", "**Blockers:**");
61
+ for (const b of anchor.blockers) lines.push(`- ${b}`);
62
+ }
63
+
64
+ if (anchor.nextSteps.length > 0) {
65
+ lines.push("", "**Next steps:**");
66
+ for (const s of anchor.nextSteps) lines.push(`- ${s}`);
67
+ }
68
+
69
+ lines.push("", "---");
70
+ return lines.join("\n");
71
+ }