gsd-pi 2.60.0-dev.d9052f5 → 2.61.0-dev.7aed0bf

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/dist/resources/extensions/ask-user-questions.js +7 -4
  2. package/dist/resources/extensions/gsd/auto/phases.js +15 -7
  3. package/dist/resources/extensions/gsd/auto-dashboard.js +21 -8
  4. package/dist/resources/extensions/gsd/auto-dispatch.js +6 -3
  5. package/dist/resources/extensions/gsd/auto-model-selection.js +58 -9
  6. package/dist/resources/extensions/gsd/auto-post-unit.js +3 -2
  7. package/dist/resources/extensions/gsd/auto-prompts.js +36 -20
  8. package/dist/resources/extensions/gsd/auto-recovery.js +37 -18
  9. package/dist/resources/extensions/gsd/auto-start.js +9 -5
  10. package/dist/resources/extensions/gsd/auto-timers.js +11 -5
  11. package/dist/resources/extensions/gsd/auto-unit-closeout.js +5 -3
  12. package/dist/resources/extensions/gsd/auto-verification.js +3 -2
  13. package/dist/resources/extensions/gsd/auto-worktree.js +120 -55
  14. package/dist/resources/extensions/gsd/auto.js +39 -17
  15. package/dist/resources/extensions/gsd/bootstrap/agent-end-recovery.js +6 -3
  16. package/dist/resources/extensions/gsd/bootstrap/db-tools.js +2 -2
  17. package/dist/resources/extensions/gsd/bootstrap/dynamic-tools.js +4 -10
  18. package/dist/resources/extensions/gsd/bootstrap/journal-tools.js +2 -1
  19. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +7 -0
  20. package/dist/resources/extensions/gsd/bootstrap/system-context.js +11 -10
  21. package/dist/resources/extensions/gsd/commands/catalog.js +2 -0
  22. package/dist/resources/extensions/gsd/commands-codebase.js +48 -21
  23. package/dist/resources/extensions/gsd/commands-inspect.js +2 -1
  24. package/dist/resources/extensions/gsd/commands-maintenance.js +32 -19
  25. package/dist/resources/extensions/gsd/complexity-classifier.js +8 -4
  26. package/dist/resources/extensions/gsd/custom-verification.js +3 -2
  27. package/dist/resources/extensions/gsd/gsd-db.js +33 -13
  28. package/dist/resources/extensions/gsd/guided-flow.js +19 -9
  29. package/dist/resources/extensions/gsd/init-wizard.js +12 -0
  30. package/dist/resources/extensions/gsd/markdown-renderer.js +11 -9
  31. package/dist/resources/extensions/gsd/md-importer.js +5 -4
  32. package/dist/resources/extensions/gsd/milestone-actions.js +3 -2
  33. package/dist/resources/extensions/gsd/milestone-ids.js +2 -1
  34. package/dist/resources/extensions/gsd/model-router.js +156 -121
  35. package/dist/resources/extensions/gsd/parallel-merge.js +5 -3
  36. package/dist/resources/extensions/gsd/parallel-orchestrator.js +26 -14
  37. package/dist/resources/extensions/gsd/preferences-types.js +1 -0
  38. package/dist/resources/extensions/gsd/preferences-validation.js +45 -0
  39. package/dist/resources/extensions/gsd/preferences.js +15 -3
  40. package/dist/resources/extensions/gsd/prompt-loader.js +3 -2
  41. package/dist/resources/extensions/gsd/prompts/rethink.md +1 -1
  42. package/dist/resources/extensions/gsd/rule-registry.js +7 -6
  43. package/dist/resources/extensions/gsd/safe-fs.js +6 -8
  44. package/dist/resources/extensions/gsd/tools/complete-milestone.js +3 -2
  45. package/dist/resources/extensions/gsd/tools/complete-slice.js +3 -2
  46. package/dist/resources/extensions/gsd/tools/complete-task.js +3 -2
  47. package/dist/resources/extensions/gsd/tools/plan-milestone.js +3 -2
  48. package/dist/resources/extensions/gsd/tools/plan-slice.js +3 -2
  49. package/dist/resources/extensions/gsd/tools/plan-task.js +2 -1
  50. package/dist/resources/extensions/gsd/tools/reassess-roadmap.js +4 -4
  51. package/dist/resources/extensions/gsd/tools/reopen-slice.js +2 -1
  52. package/dist/resources/extensions/gsd/tools/reopen-task.js +2 -1
  53. package/dist/resources/extensions/gsd/tools/replan-slice.js +2 -1
  54. package/dist/resources/extensions/gsd/tools/validate-milestone.js +2 -1
  55. package/dist/resources/extensions/gsd/triage-resolution.js +11 -4
  56. package/dist/resources/extensions/gsd/workflow-events.js +2 -1
  57. package/dist/resources/extensions/gsd/workflow-logger.js +37 -4
  58. package/dist/resources/extensions/gsd/workflow-migration.js +14 -12
  59. package/dist/resources/extensions/gsd/workflow-projections.js +2 -2
  60. package/dist/resources/extensions/gsd/workflow-reconcile.js +2 -2
  61. package/dist/resources/extensions/gsd/worktree-manager.js +26 -14
  62. package/dist/resources/extensions/shared/interview-ui.js +3 -1
  63. package/dist/web/standalone/.next/BUILD_ID +1 -1
  64. package/dist/web/standalone/.next/app-path-routes-manifest.json +19 -19
  65. package/dist/web/standalone/.next/build-manifest.json +2 -2
  66. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  67. package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
  68. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  69. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  70. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  71. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  72. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  73. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  74. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  75. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  76. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  77. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  78. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  79. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  80. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  81. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  82. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  83. package/dist/web/standalone/.next/server/app/index.html +1 -1
  84. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  85. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  86. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  87. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  88. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  89. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  90. package/dist/web/standalone/.next/server/app-paths-manifest.json +19 -19
  91. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  92. package/dist/web/standalone/.next/server/pages/500.html +2 -2
  93. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  94. package/package.json +1 -1
  95. package/packages/pi-coding-agent/dist/core/extensions/loader.d.ts.map +1 -1
  96. package/packages/pi-coding-agent/dist/core/extensions/loader.js +5 -0
  97. package/packages/pi-coding-agent/dist/core/extensions/loader.js.map +1 -1
  98. package/packages/pi-coding-agent/dist/core/extensions/runner.d.ts +2 -1
  99. package/packages/pi-coding-agent/dist/core/extensions/runner.d.ts.map +1 -1
  100. package/packages/pi-coding-agent/dist/core/extensions/runner.js +16 -0
  101. package/packages/pi-coding-agent/dist/core/extensions/runner.js.map +1 -1
  102. package/packages/pi-coding-agent/dist/core/extensions/types.d.ts +26 -0
  103. package/packages/pi-coding-agent/dist/core/extensions/types.d.ts.map +1 -1
  104. package/packages/pi-coding-agent/dist/core/extensions/types.js.map +1 -1
  105. package/packages/pi-coding-agent/dist/core/lsp/config.d.ts.map +1 -1
  106. package/packages/pi-coding-agent/dist/core/lsp/config.js +6 -1
  107. package/packages/pi-coding-agent/dist/core/lsp/config.js.map +1 -1
  108. package/packages/pi-coding-agent/dist/core/lsp/defaults.json +2 -2
  109. package/packages/pi-coding-agent/dist/core/lsp/lsp-legacy-alias.test.d.ts +2 -0
  110. package/packages/pi-coding-agent/dist/core/lsp/lsp-legacy-alias.test.d.ts.map +1 -0
  111. package/packages/pi-coding-agent/dist/core/lsp/lsp-legacy-alias.test.js +47 -0
  112. package/packages/pi-coding-agent/dist/core/lsp/lsp-legacy-alias.test.js.map +1 -0
  113. package/packages/pi-coding-agent/package.json +1 -1
  114. package/packages/pi-coding-agent/src/core/extensions/loader.ts +6 -0
  115. package/packages/pi-coding-agent/src/core/extensions/runner.ts +19 -0
  116. package/packages/pi-coding-agent/src/core/extensions/types.ts +26 -0
  117. package/packages/pi-coding-agent/src/core/lsp/config.ts +7 -1
  118. package/packages/pi-coding-agent/src/core/lsp/defaults.json +2 -2
  119. package/packages/pi-coding-agent/src/core/lsp/lsp-legacy-alias.test.ts +70 -0
  120. package/pkg/package.json +1 -1
  121. package/src/resources/extensions/ask-user-questions.ts +7 -3
  122. package/src/resources/extensions/gsd/auto/phases.ts +17 -7
  123. package/src/resources/extensions/gsd/auto-dashboard.ts +22 -8
  124. package/src/resources/extensions/gsd/auto-dispatch.ts +7 -3
  125. package/src/resources/extensions/gsd/auto-model-selection.ts +77 -15
  126. package/src/resources/extensions/gsd/auto-post-unit.ts +4 -4
  127. package/src/resources/extensions/gsd/auto-prompts.ts +37 -20
  128. package/src/resources/extensions/gsd/auto-recovery.ts +38 -18
  129. package/src/resources/extensions/gsd/auto-start.ts +10 -9
  130. package/src/resources/extensions/gsd/auto-timers.ts +12 -5
  131. package/src/resources/extensions/gsd/auto-unit-closeout.ts +6 -2
  132. package/src/resources/extensions/gsd/auto-verification.ts +3 -6
  133. package/src/resources/extensions/gsd/auto-worktree.ts +121 -55
  134. package/src/resources/extensions/gsd/auto.ts +40 -17
  135. package/src/resources/extensions/gsd/bootstrap/agent-end-recovery.ts +4 -3
  136. package/src/resources/extensions/gsd/bootstrap/db-tools.ts +2 -2
  137. package/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts +4 -16
  138. package/src/resources/extensions/gsd/bootstrap/journal-tools.ts +2 -1
  139. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +8 -0
  140. package/src/resources/extensions/gsd/bootstrap/system-context.ts +11 -10
  141. package/src/resources/extensions/gsd/commands/catalog.ts +2 -0
  142. package/src/resources/extensions/gsd/commands-codebase.ts +52 -20
  143. package/src/resources/extensions/gsd/commands-inspect.ts +2 -1
  144. package/src/resources/extensions/gsd/commands-maintenance.ts +28 -19
  145. package/src/resources/extensions/gsd/complexity-classifier.ts +9 -4
  146. package/src/resources/extensions/gsd/custom-verification.ts +3 -2
  147. package/src/resources/extensions/gsd/gsd-db.ts +12 -14
  148. package/src/resources/extensions/gsd/guided-flow.ts +9 -8
  149. package/src/resources/extensions/gsd/init-wizard.ts +12 -0
  150. package/src/resources/extensions/gsd/markdown-renderer.ts +11 -17
  151. package/src/resources/extensions/gsd/md-importer.ts +5 -4
  152. package/src/resources/extensions/gsd/milestone-actions.ts +3 -2
  153. package/src/resources/extensions/gsd/milestone-ids.ts +2 -1
  154. package/src/resources/extensions/gsd/model-router.ts +199 -173
  155. package/src/resources/extensions/gsd/parallel-merge.ts +5 -3
  156. package/src/resources/extensions/gsd/parallel-orchestrator.ts +18 -14
  157. package/src/resources/extensions/gsd/preferences-types.ts +13 -0
  158. package/src/resources/extensions/gsd/preferences-validation.ts +45 -0
  159. package/src/resources/extensions/gsd/preferences.ts +16 -3
  160. package/src/resources/extensions/gsd/prompt-loader.ts +3 -2
  161. package/src/resources/extensions/gsd/prompts/rethink.md +1 -1
  162. package/src/resources/extensions/gsd/rule-registry.ts +7 -6
  163. package/src/resources/extensions/gsd/safe-fs.ts +6 -5
  164. package/src/resources/extensions/gsd/tests/capability-router.test.ts +347 -0
  165. package/src/resources/extensions/gsd/tests/codebase-generator.test.ts +63 -0
  166. package/src/resources/extensions/gsd/tests/complexity-classifier.test.ts +27 -2
  167. package/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts +4 -4
  168. package/src/resources/extensions/gsd/tests/integration/state-machine-edge-cases.test.ts +1188 -0
  169. package/src/resources/extensions/gsd/tests/integration/state-machine-runtime-failures.test.ts +841 -0
  170. package/src/resources/extensions/gsd/tests/model-router.test.ts +403 -3
  171. package/src/resources/extensions/gsd/tests/preferences.test.ts +62 -0
  172. package/src/resources/extensions/gsd/tests/remote-questions.test.ts +21 -0
  173. package/src/resources/extensions/gsd/tests/silent-catch-diagnostics.test.ts +284 -0
  174. package/src/resources/extensions/gsd/tests/workflow-logger-audit.test.ts +120 -0
  175. package/src/resources/extensions/gsd/tests/workflow-logger.test.ts +6 -6
  176. package/src/resources/extensions/gsd/tools/complete-milestone.ts +3 -6
  177. package/src/resources/extensions/gsd/tools/complete-slice.ts +3 -6
  178. package/src/resources/extensions/gsd/tools/complete-task.ts +3 -6
  179. package/src/resources/extensions/gsd/tools/plan-milestone.ts +3 -6
  180. package/src/resources/extensions/gsd/tools/plan-slice.ts +3 -6
  181. package/src/resources/extensions/gsd/tools/plan-task.ts +2 -3
  182. package/src/resources/extensions/gsd/tools/reassess-roadmap.ts +4 -6
  183. package/src/resources/extensions/gsd/tools/reopen-slice.ts +2 -3
  184. package/src/resources/extensions/gsd/tools/reopen-task.ts +2 -3
  185. package/src/resources/extensions/gsd/tools/replan-slice.ts +2 -3
  186. package/src/resources/extensions/gsd/tools/validate-milestone.ts +2 -3
  187. package/src/resources/extensions/gsd/triage-resolution.ts +11 -4
  188. package/src/resources/extensions/gsd/types.ts +1 -0
  189. package/src/resources/extensions/gsd/workflow-events.ts +2 -1
  190. package/src/resources/extensions/gsd/workflow-logger.ts +52 -5
  191. package/src/resources/extensions/gsd/workflow-migration.ts +14 -12
  192. package/src/resources/extensions/gsd/workflow-projections.ts +2 -2
  193. package/src/resources/extensions/gsd/workflow-reconcile.ts +2 -2
  194. package/src/resources/extensions/gsd/worktree-manager.ts +16 -14
  195. package/src/resources/extensions/shared/interview-ui.ts +3 -1
  196. package/src/resources/extensions/shared/tests/interview-notes-loop.test.ts +144 -0
  197. /package/dist/web/standalone/.next/static/{JVkoVYumy0cDhOQISEYdG → b7FOoMHaUb3FPoLNbxar4}/_buildManifest.js +0 -0
  198. /package/dist/web/standalone/.next/static/{JVkoVYumy0cDhOQISEYdG → b7FOoMHaUb3FPoLNbxar4}/_ssgManifest.js +0 -0
@@ -1,4 +1,4 @@
1
- import test from "node:test";
1
+ import test, { describe } from "node:test";
2
2
  import assert from "node:assert/strict";
3
3
 
4
4
  import {
@@ -7,6 +7,8 @@ import {
7
7
  defaultRoutingConfig,
8
8
  scoreModel,
9
9
  computeTaskRequirements,
10
+ scoreEligibleModels,
11
+ getEligibleModels,
10
12
  MODEL_CAPABILITY_PROFILES,
11
13
  } from "../model-router.js";
12
14
  import type { DynamicRoutingConfig, RoutingDecision, ModelCapabilities } from "../model-router.js";
@@ -211,9 +213,9 @@ test("#2192: known model is still downgraded normally", () => {
211
213
 
212
214
  // ─── Capability Scoring (ADR-004 Phase 2) ───────────────────────────────────
213
215
 
214
- test("defaultRoutingConfig includes capability_routing: false", () => {
216
+ test("defaultRoutingConfig includes capability_routing: true", () => {
215
217
  const config = defaultRoutingConfig();
216
- assert.equal(config.capability_routing, false);
218
+ assert.equal(config.capability_routing, true);
217
219
  });
218
220
 
219
221
  test("scoreModel computes weighted average of capability × requirement", () => {
@@ -356,3 +358,401 @@ test("#2885: heavy openai-codex model downgrades to light for light task", () =>
356
358
  // Should pick a light-tier model
357
359
  assert.notEqual(result.modelId, "gpt-5.4", "should not use the heavy model for light task");
358
360
  });
361
+ // ─── scoreModel ──────────────────────────────────────────────────────────────
362
+
363
+ describe("scoreModel", () => {
364
+ const sonnetProfile: ModelCapabilities = MODEL_CAPABILITY_PROFILES["claude-sonnet-4-6"]!;
365
+
366
+ test("produces correct weighted average for two dimensions (coding:0.9, instruction:0.7)", () => {
367
+ // (0.9*85 + 0.7*85) / (0.9+0.7) = (76.5+59.5)/1.6 = 136/1.6 = 85.0
368
+ const score = scoreModel(sonnetProfile, { coding: 0.9, instruction: 0.7 });
369
+ assert.ok(Math.abs(score - 85.0) < 0.01, `Expected ~85.0, got ${score}`);
370
+ });
371
+
372
+ test("returns 50 when requirements is empty", () => {
373
+ const score = scoreModel(sonnetProfile, {});
374
+ assert.equal(score, 50);
375
+ });
376
+
377
+ test("returns correct score for single dimension coding:1.0", () => {
378
+ // coding=90 for claude-opus-4-6
379
+ const opusProfile = MODEL_CAPABILITY_PROFILES["claude-opus-4-6"]!;
380
+ const score = scoreModel(opusProfile, { coding: 1.0 });
381
+ assert.equal(score, 95);
382
+ });
383
+
384
+ test("handles all 7 dimensions correctly", () => {
385
+ // Uniform weight 1.0 on every dim → average of all dim values
386
+ const profile: ModelCapabilities = {
387
+ coding: 60, debugging: 60, research: 60, reasoning: 60,
388
+ speed: 60, longContext: 60, instruction: 60,
389
+ };
390
+ const reqs: Partial<Record<keyof ModelCapabilities, number>> = {
391
+ coding: 1.0, debugging: 1.0, research: 1.0, reasoning: 1.0,
392
+ speed: 1.0, longContext: 1.0, instruction: 1.0,
393
+ };
394
+ const score = scoreModel(profile, reqs);
395
+ assert.equal(score, 60);
396
+ });
397
+ });
398
+
399
+ // ─── computeTaskRequirements ─────────────────────────────────────────────────
400
+
401
+ describe("computeTaskRequirements", () => {
402
+ test("execute-task with no metadata returns base vector", () => {
403
+ const req = computeTaskRequirements("execute-task", undefined);
404
+ assert.deepStrictEqual(req, { coding: 0.9, instruction: 0.7, speed: 0.3 });
405
+ });
406
+
407
+ test("execute-task with tags:['docs'] adjusts requirements", () => {
408
+ const req = computeTaskRequirements("execute-task", { tags: ["docs"] });
409
+ assert.equal(req.instruction, 0.9);
410
+ assert.equal(req.coding, 0.3);
411
+ assert.equal(req.speed, 0.7);
412
+ });
413
+
414
+ test("execute-task with tags:['config'] adjusts requirements", () => {
415
+ const req = computeTaskRequirements("execute-task", { tags: ["config"] });
416
+ assert.equal(req.instruction, 0.9);
417
+ });
418
+
419
+ test("execute-task with complexityKeywords:['concurrency'] boosts debugging and reasoning", () => {
420
+ const req = computeTaskRequirements("execute-task", { complexityKeywords: ["concurrency"] });
421
+ assert.equal(req.debugging, 0.9);
422
+ assert.equal(req.reasoning, 0.8);
423
+ });
424
+
425
+ test("execute-task with complexityKeywords:['migration'] boosts reasoning and coding", () => {
426
+ const req = computeTaskRequirements("execute-task", { complexityKeywords: ["migration"] });
427
+ assert.equal(req.reasoning, 0.9);
428
+ assert.equal(req.coding, 0.8);
429
+ });
430
+
431
+ test("execute-task with fileCount:8 boosts coding and reasoning", () => {
432
+ const req = computeTaskRequirements("execute-task", { fileCount: 8 });
433
+ assert.equal(req.coding, 0.9);
434
+ assert.equal(req.reasoning, 0.7);
435
+ });
436
+
437
+ test("execute-task with estimatedLines:600 boosts coding and reasoning", () => {
438
+ const req = computeTaskRequirements("execute-task", { estimatedLines: 600 });
439
+ assert.equal(req.coding, 0.9);
440
+ assert.equal(req.reasoning, 0.7);
441
+ });
442
+
443
+ test("research-milestone returns correct base vector", () => {
444
+ const req = computeTaskRequirements("research-milestone");
445
+ assert.deepStrictEqual(req, { research: 0.9, longContext: 0.7, reasoning: 0.5 });
446
+ });
447
+
448
+ test("plan-slice returns correct base vector", () => {
449
+ const req = computeTaskRequirements("plan-slice");
450
+ assert.deepStrictEqual(req, { reasoning: 0.9, coding: 0.5 });
451
+ });
452
+
453
+ test("unknown-unit-type returns default reasoning requirement", () => {
454
+ const req = computeTaskRequirements("unknown-unit-type");
455
+ assert.deepStrictEqual(req, { reasoning: 0.5 });
456
+ });
457
+
458
+ test("non-execute-task with metadata ignores metadata refinements", () => {
459
+ // research-milestone should return the same vector regardless of metadata
460
+ const reqWithMeta = computeTaskRequirements("research-milestone", { tags: ["docs"], fileCount: 10 });
461
+ const reqWithout = computeTaskRequirements("research-milestone");
462
+ assert.deepStrictEqual(reqWithMeta, reqWithout);
463
+ });
464
+ });
465
+
466
+ // ─── scoreEligibleModels ─────────────────────────────────────────────────────
467
+
468
+ describe("scoreEligibleModels", () => {
469
+ test("ranks models by score descending when scores differ by more than 2", () => {
470
+ // research: heavily weights research dimension. gemini-2.5-pro has 85 research vs sonnet's 75
471
+ const requirements = { research: 0.9, longContext: 0.7, reasoning: 0.5 };
472
+ const results = scoreEligibleModels(["claude-sonnet-4-6", "gemini-2.5-pro"], requirements);
473
+ assert.equal(results.length, 2);
474
+ assert.ok(results[0].score >= results[1].score, "Should be sorted by score descending");
475
+ });
476
+
477
+ test("within 2-point threshold, prefers cheaper model", () => {
478
+ // Use models without built-in profiles (both get score 50) so tie-break applies
479
+ // Then use known models with equal scores: force this via single unknown model pair
480
+ const requirements = { coding: 1.0 };
481
+ // model-a and model-b are both unknown → score=50, cost=Infinity → lexicographic
482
+ const results = scoreEligibleModels(["model-z", "model-a"], requirements);
483
+ // Both unknown: score=50 (within 2), cost=Infinity (equal) → lex: model-a first
484
+ assert.equal(results[0].modelId, "model-a");
485
+ });
486
+
487
+ test("single model returns array of one", () => {
488
+ const results = scoreEligibleModels(["claude-sonnet-4-6"], { coding: 0.9 });
489
+ assert.equal(results.length, 1);
490
+ assert.equal(results[0].modelId, "claude-sonnet-4-6");
491
+ });
492
+
493
+ test("unknown model with no profile gets score of 50", () => {
494
+ const results = scoreEligibleModels(["totally-unknown-model"], { coding: 1.0 });
495
+ assert.equal(results[0].score, 50);
496
+ });
497
+
498
+ test("capabilityOverrides deep-merges with built-in profile", () => {
499
+ const requirements = { coding: 1.0 };
500
+ // Override sonnet's coding to 30 — gpt-4o (coding=80) should win
501
+ const results = scoreEligibleModels(
502
+ ["claude-sonnet-4-6", "gpt-4o"],
503
+ requirements,
504
+ { "claude-sonnet-4-6": { coding: 30 } },
505
+ );
506
+ assert.equal(results[0].modelId, "gpt-4o", "gpt-4o should rank first after coding override");
507
+ });
508
+ });
509
+
510
+ // ─── getEligibleModels ───────────────────────────────────────────────────────
511
+
512
+ describe("getEligibleModels", () => {
513
+ const ALL_MODELS = [
514
+ "claude-opus-4-6", // heavy
515
+ "claude-sonnet-4-6", // standard
516
+ "claude-haiku-4-5", // light
517
+ "gpt-4o-mini", // light
518
+ "gpt-4o", // standard
519
+ ];
520
+
521
+ test("returns light-tier models from available list sorted by cost", () => {
522
+ const config: DynamicRoutingConfig = defaultRoutingConfig();
523
+ const result = getEligibleModels("light", ALL_MODELS, config);
524
+ assert.ok(result.length >= 1);
525
+ for (const id of result) {
526
+ assert.ok(
527
+ ["claude-haiku-4-5", "gpt-4o-mini"].includes(id),
528
+ `Expected light-tier model, got ${id}`,
529
+ );
530
+ }
531
+ });
532
+
533
+ test("returns standard-tier models from available list sorted by cost", () => {
534
+ const config: DynamicRoutingConfig = defaultRoutingConfig();
535
+ const result = getEligibleModels("standard", ALL_MODELS, config);
536
+ assert.ok(result.length >= 1);
537
+ for (const id of result) {
538
+ assert.ok(
539
+ ["claude-sonnet-4-6", "gpt-4o"].includes(id),
540
+ `Expected standard-tier model, got ${id}`,
541
+ );
542
+ }
543
+ });
544
+
545
+ test("tier_models pinned model returns single-element array", () => {
546
+ const config: DynamicRoutingConfig = {
547
+ ...defaultRoutingConfig(),
548
+ tier_models: { light: "gpt-4o-mini" },
549
+ };
550
+ const result = getEligibleModels("light", ALL_MODELS, config);
551
+ assert.deepStrictEqual(result, ["gpt-4o-mini"]);
552
+ });
553
+
554
+ test("empty available list returns empty array", () => {
555
+ const config: DynamicRoutingConfig = defaultRoutingConfig();
556
+ const result = getEligibleModels("light", [], config);
557
+ assert.equal(result.length, 0);
558
+ });
559
+
560
+ test("unknown models classified as standard appear in standard tier results", () => {
561
+ const config: DynamicRoutingConfig = defaultRoutingConfig();
562
+ // unknown-model-xyz has no entry → defaults to standard tier
563
+ const result = getEligibleModels("standard", ["unknown-model-xyz"], config);
564
+ assert.ok(result.includes("unknown-model-xyz"), "Unknown model should appear in standard tier");
565
+ });
566
+ });
567
+
568
+ // ─── capability-aware routing integration ────────────────────────────────────
569
+
570
+ describe("capability-aware routing integration", () => {
571
+ // All standard-tier models available alongside heavy (opus)
572
+ const MULTI_MODEL_AVAILABLE = [
573
+ "claude-opus-4-6",
574
+ "claude-sonnet-4-6",
575
+ "gpt-4o",
576
+ "gemini-2.5-pro",
577
+ "claude-haiku-4-5",
578
+ "gpt-4o-mini",
579
+ ];
580
+
581
+ // 1. Full pipeline with capability scoring active
582
+ test("full pipeline with capability_routing: true returns capability-scored decision", () => {
583
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
584
+ // Configured primary is opus (heavy) — standard tier should trigger capability scoring
585
+ const result = resolveModelForComplexity(
586
+ { tier: "standard", reason: "test", downgraded: false },
587
+ { primary: "claude-opus-4-6", fallbacks: [] },
588
+ config,
589
+ MULTI_MODEL_AVAILABLE,
590
+ "execute-task",
591
+ { tags: [], complexityKeywords: [], fileCount: 3, estimatedLines: 100, codeBlockCount: 0 },
592
+ );
593
+ assert.equal(result.selectionMethod, "capability-scored", "should use capability scoring when enabled with multiple eligible models");
594
+ assert.ok(result.capabilityScores !== undefined, "capabilityScores should be populated");
595
+ assert.ok(Object.keys(result.capabilityScores!).length > 1, "should have scores for multiple models");
596
+ assert.equal(result.wasDowngraded, true, "should be downgraded from opus");
597
+ });
598
+
599
+ // 2. capability_routing: false falls back to tier-only
600
+ test("capability_routing: false skips scoring and uses tier-only", () => {
601
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: false };
602
+ const result = resolveModelForComplexity(
603
+ { tier: "standard", reason: "test", downgraded: false },
604
+ { primary: "claude-opus-4-6", fallbacks: [] },
605
+ config,
606
+ MULTI_MODEL_AVAILABLE,
607
+ "execute-task",
608
+ undefined,
609
+ );
610
+ assert.equal(result.selectionMethod, "tier-only", "capability_routing: false should use tier-only");
611
+ assert.equal(result.capabilityScores, undefined, "capabilityScores should be undefined for tier-only");
612
+ });
613
+
614
+ // 3. Single eligible model skips scoring
615
+ test("single eligible model skips capability scoring and uses tier-only", () => {
616
+ const config: DynamicRoutingConfig = {
617
+ ...defaultRoutingConfig(),
618
+ enabled: true,
619
+ capability_routing: true,
620
+ tier_models: { standard: "claude-sonnet-4-6" },
621
+ };
622
+ // Pin to single standard model — eligible.length === 1 → skips STEP 2
623
+ const result = resolveModelForComplexity(
624
+ { tier: "standard", reason: "test", downgraded: false },
625
+ { primary: "claude-opus-4-6", fallbacks: [] },
626
+ config,
627
+ MULTI_MODEL_AVAILABLE,
628
+ "execute-task",
629
+ undefined,
630
+ );
631
+ // Single pinned model → tier-only (no scoring needed)
632
+ assert.equal(result.selectionMethod, "tier-only", "single eligible model should use tier-only");
633
+ assert.equal(result.modelId, "claude-sonnet-4-6", "should use the pinned model");
634
+ });
635
+
636
+ // 4. Unknown model with no profile gets uniform 50s and competes
637
+ test("unknown model with no profile gets uniform score of 50 and can compete", () => {
638
+ const unknownModel = "unknown-future-model-xyz";
639
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
640
+ // Add unknown model to available list at standard tier (unknown → standard per D-15)
641
+ // scoring should still work with score=50 for the unknown model
642
+ const requirements = { coding: 0.9, instruction: 0.7, speed: 0.3 };
643
+ const scored = scoreEligibleModels([unknownModel, "claude-sonnet-4-6"], requirements);
644
+ const unknownEntry = scored.find(s => s.modelId === unknownModel);
645
+ assert.ok(unknownEntry !== undefined, "unknown model should be in scored results");
646
+ // Unknown model gets uniform 50s: (0.9*50 + 0.7*50 + 0.3*50) / (0.9+0.7+0.3) ≈ 50
647
+ assert.ok(Math.abs(unknownEntry!.score - 50) < 0.01, `expected score ~50, got ${unknownEntry!.score}`);
648
+ });
649
+
650
+ // 5. Capability overrides change scoring outcome
651
+ test("capabilityOverrides boost a model above another for same task", () => {
652
+ // sonnet: coding=85, gpt-4o: coding=80. Override gpt-4o coding to 99 → gpt-4o should win.
653
+ const requirements = { coding: 1.0 };
654
+ const overrides = { "gpt-4o": { coding: 99 } };
655
+ const scored = scoreEligibleModels(["claude-sonnet-4-6", "gpt-4o"], requirements, overrides);
656
+ assert.equal(scored[0].modelId, "gpt-4o", "overridden model should win for coding-heavy task");
657
+ assert.ok(scored[0].score > 90, `expected score > 90 after override, got ${scored[0].score}`);
658
+ });
659
+
660
+ // 5b. Capability overrides pass through resolveModelForComplexity to scoreEligibleModels
661
+ test("resolveModelForComplexity passes capabilityOverrides to scoring step", () => {
662
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
663
+ // sonnet coding=85, gpt-4o coding=80. Override gpt-4o coding to 99 → gpt-4o should win.
664
+ const overrides: Record<string, Partial<ModelCapabilities>> = { "gpt-4o": { coding: 99 } };
665
+ const result = resolveModelForComplexity(
666
+ { tier: "standard", reason: "test", downgraded: false },
667
+ { primary: "claude-opus-4-6", fallbacks: [] },
668
+ config,
669
+ ["claude-opus-4-6", "claude-sonnet-4-6", "gpt-4o"],
670
+ "execute-task",
671
+ undefined,
672
+ overrides,
673
+ );
674
+ assert.equal(result.selectionMethod, "capability-scored");
675
+ assert.equal(result.modelId, "gpt-4o", "gpt-4o should win with coding override");
676
+ });
677
+
678
+ // 6. Regression: existing routing guards unchanged
679
+ test("regression: routing-disabled passthrough still returns tier-only", () => {
680
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: false };
681
+ const result = resolveModelForComplexity(
682
+ { tier: "light", reason: "test", downgraded: false },
683
+ { primary: "claude-opus-4-6", fallbacks: [] },
684
+ config,
685
+ MULTI_MODEL_AVAILABLE,
686
+ "execute-task",
687
+ undefined,
688
+ );
689
+ assert.equal(result.selectionMethod, "tier-only");
690
+ assert.equal(result.wasDowngraded, false);
691
+ assert.equal(result.modelId, "claude-opus-4-6");
692
+ });
693
+
694
+ test("regression: unknown-model bypass returns tier-only and does not downgrade", () => {
695
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true };
696
+ const result = resolveModelForComplexity(
697
+ { tier: "light", reason: "test", downgraded: false },
698
+ { primary: "totally-unknown-custom-model", fallbacks: [] },
699
+ config,
700
+ ["totally-unknown-custom-model", ...MULTI_MODEL_AVAILABLE],
701
+ "execute-task",
702
+ undefined,
703
+ );
704
+ assert.equal(result.selectionMethod, "tier-only");
705
+ assert.equal(result.wasDowngraded, false);
706
+ assert.equal(result.modelId, "totally-unknown-custom-model");
707
+ });
708
+
709
+ test("regression: no-downgrade-needed path returns tier-only", () => {
710
+ const config: DynamicRoutingConfig = { ...defaultRoutingConfig(), enabled: true, capability_routing: true };
711
+ // Configured model is sonnet (standard), requesting standard → no downgrade needed
712
+ const result = resolveModelForComplexity(
713
+ { tier: "standard", reason: "test", downgraded: false },
714
+ { primary: "claude-sonnet-4-6", fallbacks: [] },
715
+ config,
716
+ MULTI_MODEL_AVAILABLE,
717
+ "execute-task",
718
+ undefined,
719
+ );
720
+ assert.equal(result.selectionMethod, "tier-only");
721
+ assert.equal(result.wasDowngraded, false);
722
+ assert.equal(result.modelId, "claude-sonnet-4-6");
723
+ });
724
+ });
725
+
726
+ // ─── getModelTier unknown default ────────────────────────────────────────────
727
+
728
+ describe("getModelTier unknown default", () => {
729
+ test("unknown model returns standard tier (not heavy) via downgrade behavior", () => {
730
+ // We can verify this indirectly: resolveModelForComplexity for a standard classification
731
+ // with an unknown primary model should NOT downgrade (because unknown → standard, not heavy)
732
+ const config = { ...defaultRoutingConfig(), enabled: true };
733
+ // Use "unknown-model-xyz" as primary — its tier will be "standard" per D-15
734
+ // Classification is "heavy" → tier >= standard → no downgrade
735
+ // But unknown models use the isKnownModel() guard, so they pass through anyway
736
+ // Test the positive: an unknown model is NOT treated as heavy
737
+ const result = resolveModelForComplexity(
738
+ makeClassification("standard"),
739
+ { primary: "claude-sonnet-4-6", fallbacks: [] },
740
+ config,
741
+ ["claude-sonnet-4-6", "claude-haiku-4-5", "gpt-4o-mini"],
742
+ );
743
+ // standard classification with standard model (sonnet) → no downgrade
744
+ assert.equal(result.wasDowngraded, false, "standard model should not downgrade for standard task");
745
+ assert.equal(result.modelId, "claude-sonnet-4-6");
746
+ });
747
+
748
+ test("unknown model in getEligibleModels defaults to standard tier", () => {
749
+ // Per D-15: getModelTier returns "standard" for unknown models
750
+ const config: DynamicRoutingConfig = defaultRoutingConfig();
751
+ const standardModels = getEligibleModels("standard", ["totally-unknown-model-abc"], config);
752
+ const lightModels = getEligibleModels("light", ["totally-unknown-model-abc"], config);
753
+ const heavyModels = getEligibleModels("heavy", ["totally-unknown-model-abc"], config);
754
+ assert.ok(standardModels.includes("totally-unknown-model-abc"), "Unknown model should be in standard tier");
755
+ assert.equal(lightModels.length, 0, "Unknown model should NOT be in light tier");
756
+ assert.equal(heavyModels.length, 0, "Unknown model should NOT be in heavy tier");
757
+ });
758
+ });
@@ -461,3 +461,65 @@ test("experimental.rtk defaults to off in new project preferences", () => {
461
461
  assert.notEqual(prefs, null);
462
462
  assert.equal(prefs!.experimental?.rtk, undefined);
463
463
  });
464
+
465
+ // ── Codebase Map Preferences ─────────────────────────────────────────────────
466
+
467
+ test("codebase preferences validate and pass through correctly", () => {
468
+ const result = validatePreferences({
469
+ codebase: {
470
+ exclude_patterns: ["docs/", "fixtures/"],
471
+ max_files: 1000,
472
+ collapse_threshold: 15,
473
+ },
474
+ });
475
+ assert.equal(result.errors.length, 0);
476
+ assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/", "fixtures/"]);
477
+ assert.equal(result.preferences.codebase?.max_files, 1000);
478
+ assert.equal(result.preferences.codebase?.collapse_threshold, 15);
479
+ });
480
+
481
+ test("codebase preferences reject invalid types", () => {
482
+ const result = validatePreferences({
483
+ codebase: {
484
+ exclude_patterns: "not-an-array" as any,
485
+ max_files: -5,
486
+ collapse_threshold: 0,
487
+ },
488
+ });
489
+ assert.ok(result.errors.some(e => e.includes("exclude_patterns must be an array")));
490
+ assert.ok(result.errors.some(e => e.includes("max_files must be a positive")));
491
+ assert.ok(result.errors.some(e => e.includes("collapse_threshold must be a positive")));
492
+ });
493
+
494
+ test("codebase preferences warn on unknown keys", () => {
495
+ const result = validatePreferences({
496
+ codebase: {
497
+ exclude_patterns: ["docs/"],
498
+ unknown_key: true,
499
+ } as any,
500
+ });
501
+ assert.equal(result.errors.length, 0);
502
+ assert.ok(result.warnings.some(w => w.includes('unknown codebase key "unknown_key"')));
503
+ assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/"]);
504
+ });
505
+
506
+ test("codebase preferences parse from markdown frontmatter", () => {
507
+ const content = [
508
+ "---",
509
+ "version: 1",
510
+ "codebase:",
511
+ " exclude_patterns:",
512
+ ' - "docs/"',
513
+ ' - ".cache/"',
514
+ " max_files: 800",
515
+ " collapse_threshold: 10",
516
+ "---",
517
+ ].join("\n");
518
+ const prefs = parsePreferencesMarkdown(content);
519
+ assert.notEqual(prefs, null);
520
+ const result = validatePreferences(prefs!);
521
+ assert.equal(result.errors.length, 0);
522
+ assert.deepEqual(result.preferences.codebase?.exclude_patterns, ["docs/", ".cache/"]);
523
+ assert.equal(result.preferences.codebase?.max_files, 800);
524
+ assert.equal(result.preferences.codebase?.collapse_threshold, 10);
525
+ });
@@ -739,6 +739,27 @@ test("config source-level: hydration skips api_key entries with empty keys", ()
739
739
  );
740
740
  });
741
741
 
742
+ test("ask-user-questions source-level: tryRemoteQuestions is called before the hasUI guard", () => {
743
+ // Regression test for #3480 — remote questions were silently skipped in interactive
744
+ // mode because tryRemoteQuestions was gated behind `if (!ctx.hasUI)`.
745
+ // The fix moved the remote call before that guard so configured channels
746
+ // (Telegram/Slack/Discord) fire regardless of UI availability.
747
+ const src = readFileSync(
748
+ join(__dirname, "..", "..", "ask-user-questions.ts"),
749
+ "utf-8",
750
+ );
751
+
752
+ const remoteCallIdx = src.indexOf("tryRemoteQuestions(params.questions");
753
+ const hasUIGuardIdx = src.indexOf("if (!ctx.hasUI)");
754
+
755
+ assert.ok(remoteCallIdx !== -1, "tryRemoteQuestions call should exist in ask-user-questions.ts");
756
+ assert.ok(hasUIGuardIdx !== -1, "!ctx.hasUI guard should exist in ask-user-questions.ts");
757
+ assert.ok(
758
+ remoteCallIdx < hasUIGuardIdx,
759
+ "tryRemoteQuestions must be called before the !ctx.hasUI guard — otherwise remote questions are skipped in interactive mode",
760
+ );
761
+ });
762
+
742
763
  test("config source-level: removeProviderToken uses auth.remove not auth.set with empty key", () => {
743
764
  const commandSrc = readFileSync(
744
765
  join(__dirname, "..", "..", "remote-questions", "remote-command.ts"),