@nathapp/nax 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. package/.claude/settings.json +15 -0
  2. package/.mcp.json +8 -0
  3. package/docs/20260304-review-nax.md +492 -0
  4. package/docs/ROADMAP.md +65 -18
  5. package/docs/adr/ADR-005-implementation-plan.md +655 -0
  6. package/docs/adr/ADR-005-pipeline-re-architecture.md +464 -0
  7. package/docs/specs/bug-039-orphan-processes.md +131 -0
  8. package/docs/specs/bug-040-review-rectification.md +82 -0
  9. package/docs/specs/bug-041-cross-story-test-isolation.md +88 -0
  10. package/docs/specs/bug-042-verifier-failure-capture.md +117 -0
  11. package/docs/specs/feat-010-smart-runner-git-history.md +96 -0
  12. package/docs/specs/feat-011-file-context-strategy.md +73 -0
  13. package/docs/specs/feat-012-tdd-writer-tier.md +79 -0
  14. package/docs/specs/feat-013-test-after-review.md +89 -0
  15. package/docs/specs/feat-014-heartbeat-observability.md +127 -0
  16. package/memory/topic/feat-010-baseref.md +28 -0
  17. package/memory/topic/feat-013-test-after-deprecation.md +22 -0
  18. package/nax/config.json +7 -4
  19. package/nax/features/bug-039-medium/prd.json +45 -0
  20. package/package.json +2 -2
  21. package/src/agents/claude.ts +109 -15
  22. package/src/config/types.ts +11 -0
  23. package/src/context/builder.ts +9 -1
  24. package/src/execution/dry-run.ts +81 -0
  25. package/src/execution/escalation/tier-outcome.ts +29 -44
  26. package/src/execution/executor-types.ts +65 -0
  27. package/src/execution/index.ts +0 -17
  28. package/src/execution/iteration-runner.ts +132 -0
  29. package/src/execution/lifecycle/index.ts +0 -1
  30. package/src/execution/lifecycle/run-regression.ts +5 -5
  31. package/src/execution/pipeline-result-handler.ts +51 -254
  32. package/src/execution/sequential-executor.ts +72 -315
  33. package/src/execution/story-selector.ts +75 -0
  34. package/src/pipeline/event-bus.ts +276 -0
  35. package/src/pipeline/runner.ts +51 -77
  36. package/src/pipeline/stages/autofix.ts +133 -0
  37. package/src/pipeline/stages/completion.ts +22 -30
  38. package/src/pipeline/stages/index.ts +30 -13
  39. package/src/pipeline/stages/rectify.ts +93 -0
  40. package/src/pipeline/stages/regression.ts +88 -0
  41. package/src/pipeline/stages/review.ts +19 -153
  42. package/src/pipeline/stages/verify.ts +19 -3
  43. package/src/pipeline/subscribers/hooks.ts +133 -0
  44. package/src/pipeline/subscribers/interaction.ts +68 -0
  45. package/src/pipeline/subscribers/reporters.ts +174 -0
  46. package/src/pipeline/types.ts +12 -1
  47. package/src/review/orchestrator.ts +105 -0
  48. package/src/review/runner.ts +39 -4
  49. package/src/routing/router.ts +3 -3
  50. package/src/routing/strategies/keyword.ts +5 -2
  51. package/src/routing/strategies/llm.ts +27 -1
  52. package/src/tdd/prompts.ts +1 -1
  53. package/src/utils/git.ts +49 -25
  54. package/src/verification/executor.ts +8 -2
  55. package/src/verification/index.ts +1 -1
  56. package/src/verification/orchestrator-types.ts +145 -0
  57. package/src/verification/orchestrator.ts +76 -0
  58. package/src/{execution/post-verify-rectification.ts → verification/rectification-loop.ts} +13 -20
  59. package/src/verification/{gate.ts → runners.ts} +17 -105
  60. package/src/verification/smart-runner.ts +6 -10
  61. package/src/verification/strategies/acceptance.ts +133 -0
  62. package/src/verification/strategies/regression.ts +90 -0
  63. package/src/verification/strategies/scoped.ts +123 -0
  64. package/test/COVERAGE-GAPS.md +333 -0
  65. package/test/{acceptance → e2e}/cm-003-default-view.test.ts +1 -0
  66. package/test/{integration/e2e.test.ts → e2e/plan-analyze-run.test.ts} +1 -0
  67. package/test/integration/{agent-validation.test.ts → cli/agent-validation.test.ts} +3 -3
  68. package/test/integration/{cli-config-default-edge-cases.test.ts → cli/cli-config-default-edge-cases.test.ts} +6 -5
  69. package/test/integration/{cli-config-default-view.test.ts → cli/cli-config-default-view.test.ts} +8 -7
  70. package/test/integration/{cli-config-diff.test.ts → cli/cli-config-diff.test.ts} +3 -2
  71. package/test/integration/{cli-config.test.ts → cli/cli-config.test.ts} +3 -2
  72. package/test/integration/{cli-diagnose.test.ts → cli/cli-diagnose.test.ts} +5 -4
  73. package/test/integration/{cli-logs.test.ts → cli/cli-logs.test.ts} +12 -3
  74. package/test/integration/{cli-plugins.test.ts → cli/cli-plugins.test.ts} +4 -3
  75. package/test/integration/{cli-precheck.test.ts → cli/cli-precheck.test.ts} +4 -3
  76. package/test/integration/{cli-run-headless.test.ts → cli/cli-run-headless.test.ts} +3 -2
  77. package/test/integration/{cli.test.ts → cli/cli.test.ts} +2 -1
  78. package/test/integration/{precheck-integration.test.ts → cli/precheck-integration.test.ts} +10 -9
  79. package/test/integration/{precheck-orchestrator.test.ts → cli/precheck-orchestrator.test.ts} +4 -3
  80. package/test/integration/{precheck.test.ts → cli/precheck.test.ts} +5 -4
  81. package/test/integration/{config-loader.test.ts → config/config-loader.test.ts} +2 -1
  82. package/test/integration/{config.test.ts → config/config.test.ts} +2 -2
  83. package/test/integration/config/merger.test.ts +1 -0
  84. package/test/integration/config/paths.test.ts +1 -0
  85. package/test/integration/{security-loader.test.ts → config/security-loader.test.ts} +2 -2
  86. package/test/integration/{context-integration.test.ts → context/context-integration.test.ts} +7 -6
  87. package/test/integration/{path-security.test.ts → context/context-path-security.test.ts} +2 -2
  88. package/test/integration/{context-provider-injection.test.ts → context/context-provider-injection.test.ts} +7 -6
  89. package/test/integration/{context-verification-integration.test.ts → context/context-verification-integration.test.ts} +5 -4
  90. package/test/integration/{s5-greenfield-fallback.test.ts → context/s5-greenfield-fallback.test.ts} +4 -3
  91. package/test/integration/{isolation.test.ts → execution/execution-isolation.test.ts} +1 -1
  92. package/test/integration/{execution.test.ts → execution/execution.test.ts} +8 -8
  93. package/test/integration/{parallel.test.ts → execution/parallel.test.ts} +2 -1
  94. package/test/integration/{prd-pause.test.ts → execution/prd-pause.test.ts} +2 -2
  95. package/test/integration/{prd-resolvers.test.ts → execution/prd-resolvers.test.ts} +3 -2
  96. package/test/integration/{progress.test.ts → execution/progress.test.ts} +1 -1
  97. package/test/integration/execution/runner-batching.test.ts +682 -0
  98. package/test/integration/{runner-config-plugins.test.ts → execution/runner-config-plugins.test.ts} +3 -2
  99. package/test/integration/execution/runner-escalation.test.ts +561 -0
  100. package/test/integration/{runner-fixes.test.ts → execution/runner-fixes.test.ts} +4 -3
  101. package/test/integration/{runner-plugin-integration.test.ts → execution/runner-plugin-integration.test.ts} +6 -5
  102. package/test/integration/execution/runner-queue-and-attempts.test.ts +476 -0
  103. package/test/integration/{status-file-integration.test.ts → execution/status-file-integration.test.ts} +9 -8
  104. package/test/integration/{status-file.test.ts → execution/status-file.test.ts} +3 -2
  105. package/test/integration/{status-writer.test.ts → execution/status-writer.test.ts} +5 -4
  106. package/test/integration/{story-id-in-events.test.ts → execution/story-id-in-events.test.ts} +9 -8
  107. package/test/integration/{interaction-chain-pipeline.test.ts → interaction/interaction-chain-pipeline.test.ts} +26 -14
  108. package/test/integration/{hooks.test.ts → pipeline/hooks.test.ts} +4 -2
  109. package/test/integration/{pipeline-acceptance.test.ts → pipeline/pipeline-acceptance.test.ts} +7 -6
  110. package/test/integration/{pipeline-events.test.ts → pipeline/pipeline-events.test.ts} +7 -6
  111. package/test/integration/{pipeline.test.ts → pipeline/pipeline.test.ts} +9 -7
  112. package/test/integration/{reporter-lifecycle.test.ts → pipeline/reporter-lifecycle.test.ts} +9 -7
  113. package/test/integration/{verify-stage.test.ts → pipeline/verify-stage.test.ts} +7 -5
  114. package/test/integration/{analyze-integration.test.ts → plan/analyze-integration.test.ts} +3 -2
  115. package/test/integration/{analyze-scanner.test.ts → plan/analyze-scanner.test.ts} +8 -7
  116. package/test/integration/{logger.test.ts → plan/logger.test.ts} +1 -1
  117. package/test/integration/{plan.test.ts → plan/plan.test.ts} +3 -3
  118. package/test/integration/plugins/config-integration.test.ts +1 -0
  119. package/test/integration/plugins/config-resolution.test.ts +1 -0
  120. package/test/integration/plugins/loader.test.ts +1 -0
  121. package/test/integration/plugins/{registry.test.ts → plugins-registry.test.ts} +1 -0
  122. package/test/integration/plugins/validator.test.ts +1 -0
  123. package/test/integration/{review-config-commands.test.ts → review/review-config-commands.test.ts} +4 -3
  124. package/test/integration/{review-config-schema.test.ts → review/review-config-schema.test.ts} +3 -2
  125. package/test/integration/{review-plugin-integration.test.ts → review/review-plugin-integration.test.ts} +5 -4
  126. package/test/integration/{review.test.ts → review/review.test.ts} +3 -2
  127. package/test/integration/routing/plugin-routing-advanced.test.ts +461 -0
  128. package/test/integration/{plugin-routing.test.ts → routing/plugin-routing-core.test.ts} +10 -404
  129. package/test/integration/{routing-stage-bug-021.test.ts → routing/routing-stage-bug-021.test.ts} +8 -7
  130. package/test/integration/{routing-stage-greenfield.test.ts → routing/routing-stage-greenfield.test.ts} +7 -6
  131. package/test/integration/{tdd-cleanup.test.ts → tdd/tdd-cleanup.test.ts} +1 -1
  132. package/test/integration/tdd/tdd-orchestrator-core.test.ts +565 -0
  133. package/test/integration/tdd/tdd-orchestrator-failureCategory.test.ts +355 -0
  134. package/test/integration/tdd/tdd-orchestrator-fallback.test.ts +311 -0
  135. package/test/integration/tdd/tdd-orchestrator-lite.test.ts +289 -0
  136. package/test/integration/tdd/tdd-orchestrator-prompts.test.ts +260 -0
  137. package/test/integration/tdd/tdd-orchestrator-verdict.test.ts +536 -0
  138. package/test/integration/tmp/headless-test/test.jsonl +30 -0
  139. package/test/integration/{test-scanner.test.ts → verification/test-scanner.test.ts} +1 -1
  140. package/test/integration/{verification-asset-check.test.ts → verification/verification-asset-check.test.ts} +3 -2
  141. package/test/unit/acceptance.test.ts +1 -0
  142. package/test/unit/agent-stderr-capture.test.ts +1 -0
  143. package/test/unit/agents/claude.test.ts +107 -0
  144. package/test/unit/analyze-classifier.test.ts +1 -0
  145. package/test/unit/auto-detect.test.ts +1 -0
  146. package/test/unit/cli-status.test.ts +1 -0
  147. package/test/unit/commands/common.test.ts +1 -0
  148. package/test/unit/commands/logs.test.ts +1 -0
  149. package/test/unit/commands/unlock.test.ts +1 -0
  150. package/test/unit/config/defaults.test.ts +1 -0
  151. package/test/unit/config/regression-gate-schema.test.ts +1 -0
  152. package/test/unit/config/smart-runner-flag.test.ts +1 -0
  153. package/test/unit/constitution-generators.test.ts +1 -0
  154. package/test/unit/constitution.test.ts +1 -0
  155. package/test/unit/context/context-autodetect.test.ts +297 -0
  156. package/test/unit/context/context-build.test.ts +575 -0
  157. package/test/unit/context/context-coverage.test.ts +236 -0
  158. package/test/unit/context/context-error.test.ts +93 -0
  159. package/test/unit/context/context-estimate-tokens.test.ts +201 -0
  160. package/test/unit/context/context-format.test.ts +302 -0
  161. package/test/unit/context/context-isolation.test.ts +267 -0
  162. package/test/unit/context/context-sort.test.ts +93 -0
  163. package/test/unit/context/context-story.test.ts +108 -0
  164. package/test/{context → unit/context}/prior-failures.test.ts +5 -4
  165. package/test/unit/context.test.ts +7 -3
  166. package/test/unit/crash-recovery.test.ts +1 -0
  167. package/test/unit/escalation.test.ts +1 -0
  168. package/test/unit/execution/lifecycle/run-completion.test.ts +1 -0
  169. package/test/unit/execution/lifecycle/run-regression.test.ts +2 -0
  170. package/test/{execution → unit/execution}/pid-registry.test.ts +2 -1
  171. package/test/{execution → unit/execution}/structured-failure.test.ts +3 -2
  172. package/test/unit/execution-logging-stderr.test.ts +1 -0
  173. package/test/unit/execution-stage.test.ts +1 -0
  174. package/test/unit/fix-generator.test.ts +1 -0
  175. package/test/unit/greenfield.test.ts +1 -0
  176. package/test/unit/interaction/human-review-trigger.test.ts +1 -0
  177. package/test/unit/interaction-network-failures.test.ts +1 -0
  178. package/test/unit/interaction-plugins.test.ts +1 -0
  179. package/test/unit/logging/formatter.test.ts +1 -0
  180. package/test/unit/merge.test.ts +1 -0
  181. package/test/unit/pipeline/event-bus.test.ts +105 -0
  182. package/test/unit/pipeline/routing-partial-override.test.ts +1 -0
  183. package/test/unit/pipeline/runner-retry.test.ts +89 -0
  184. package/test/unit/pipeline/stages/autofix.test.ts +97 -0
  185. package/test/unit/pipeline/stages/rectify.test.ts +101 -0
  186. package/test/unit/pipeline/stages/regression-stage.test.ts +69 -0
  187. package/test/unit/pipeline/stages/verify.test.ts +1 -0
  188. package/test/unit/pipeline/subscribers/hooks.test.ts +45 -0
  189. package/test/unit/pipeline/subscribers/interaction.test.ts +31 -0
  190. package/test/unit/pipeline/subscribers/reporters.test.ts +90 -0
  191. package/test/unit/pipeline/verify-smart-runner.test.ts +2 -1
  192. package/test/unit/prd-auto-default.test.ts +3 -2
  193. package/test/unit/prd-failure-category.test.ts +1 -0
  194. package/test/unit/prd-get-next-story.test.ts +1 -0
  195. package/test/unit/precheck-checks.test.ts +1 -0
  196. package/test/unit/precheck-story-size-gate.test.ts +1 -0
  197. package/test/unit/precheck-types.test.ts +1 -0
  198. package/test/unit/prompts.test.ts +1 -0
  199. package/test/unit/rectification.test.ts +2 -1
  200. package/test/unit/registry.test.ts +1 -0
  201. package/test/unit/routing/routing-stability.test.ts +2 -1
  202. package/test/unit/routing/strategies/llm.test.ts +251 -0
  203. package/test/unit/routing-advanced.test.ts +313 -0
  204. package/test/unit/routing-core.test.ts +341 -0
  205. package/test/unit/routing-strategies.test.ts +442 -0
  206. package/test/unit/storyid-events.test.ts +1 -0
  207. package/test/{ui → unit/ui}/tui-controls.test.ts +8 -7
  208. package/test/{ui → unit/ui}/tui-cost-and-pty.test.ts +4 -3
  209. package/test/{ui → unit/ui}/tui-layout.test.ts +5 -4
  210. package/test/{ui → unit/ui}/tui-stories.test.ts +5 -4
  211. package/test/unit/{isolation.test.ts → unit-isolation.test.ts} +1 -0
  212. package/test/unit/{helpers.test.ts → utils-helpers.test.ts} +1 -0
  213. package/test/unit/verdict.test.ts +1 -0
  214. package/test/unit/verification/orchestrator-types.test.ts +54 -0
  215. package/test/unit/verification/orchestrator.test.ts +66 -0
  216. package/test/unit/verification/smart-runner-config.test.ts +1 -0
  217. package/test/unit/verification/smart-runner-discovery.test.ts +8 -7
  218. package/test/unit/verification/strategies/acceptance.test.ts +33 -0
  219. package/test/unit/verification/strategies/regression.test.ts +87 -0
  220. package/test/unit/verification/strategies/scoped.test.ts +100 -0
  221. package/test/unit/worktree-manager.test.ts +1 -0
  222. package/src/execution/lifecycle/story-hooks.ts +0 -38
  223. package/src/execution/post-verify.ts +0 -193
  224. package/src/execution/rectification.ts +0 -13
  225. package/src/execution/verification.ts +0 -72
  226. package/test/integration/rectification-flow.test.ts +0 -512
  227. package/test/integration/runner.test.ts +0 -1679
  228. package/test/integration/tdd-orchestrator.test.ts +0 -1762
  229. package/test/unit/execution/post-verify-regression.test.ts +0 -362
  230. package/test/unit/execution/post-verify.test.ts +0 -236
  231. package/test/unit/routing.test.ts +0 -1039
  232. /package/test/{integration → helpers}/helpers.test.ts +0 -0
  233. /package/test/integration/worktree/{merge.test.ts → worktree-merge.test.ts} +0 -0
@@ -1,1039 +0,0 @@
1
- /**
2
- * Routing Tests
3
- *
4
- * Consolidated test suite for routing system including:
5
- * - Core routing logic (classifyComplexity, determineTestStrategy, routeTask)
6
- * - Routing strategies (keyword, llm, manual, adaptive)
7
- * - Strategy chain execution
8
- * - Async support and chain delegation
9
- */
10
-
11
- import { beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
12
- import { DEFAULT_CONFIG } from "../../src/config";
13
- import type { NaxConfig } from "../../src/config";
14
- import { escalateTier } from "../../src/execution/runner";
15
- import type { AggregateMetrics } from "../../src/metrics/types";
16
- import type { UserStory } from "../../src/prd/types";
17
- import { classifyComplexity, determineTestStrategy, routeTask } from "../../src/routing";
18
- import { buildStrategyChain } from "../../src/routing/builder";
19
- import { StrategyChain } from "../../src/routing/chain";
20
- import { keywordStrategy, llmStrategy, manualStrategy } from "../../src/routing/strategies";
21
- import { adaptiveStrategy } from "../../src/routing/strategies/adaptive";
22
- import {
23
- buildBatchPrompt,
24
- buildRoutingPrompt,
25
- clearCache,
26
- clearCacheForStory,
27
- getCacheSize,
28
- llmStrategy as llmStrategyFull,
29
- parseRoutingResponse,
30
- routeBatch,
31
- stripCodeFences,
32
- validateRoutingDecision,
33
- } from "../../src/routing/strategies/llm";
34
- import type { RoutingContext, RoutingDecision, RoutingStrategy } from "../../src/routing/strategy";
35
-
36
- // ============================================================================
37
- // Core Routing Logic Tests
38
- // ============================================================================
39
-
40
- describe("classifyComplexity", () => {
41
- test("simple: few criteria, no keywords", () => {
42
- expect(classifyComplexity("Fix typo", "Fix a typo in error message", ["Typo is fixed"], [])).toBe("simple");
43
- });
44
-
45
- test("medium: moderate criteria count", () => {
46
- expect(classifyComplexity("Add validation", "Add DTO validation", ["a", "b", "c", "d", "e"], [])).toBe("medium");
47
- });
48
-
49
- test("complex: security keyword", () => {
50
- expect(classifyComplexity("Auth refactor", "Refactor JWT authentication", ["Token works"], ["security"])).toBe(
51
- "complex",
52
- );
53
- });
54
-
55
- test("expert: distributed keyword", () => {
56
- expect(classifyComplexity("Real-time sync", "Real-time distributed consensus", ["Sync works"], [])).toBe("expert");
57
- });
58
-
59
- test("4 ACs should classify as simple (BUG-19 regression)", () => {
60
- const complexity = classifyComplexity(
61
- "Add validation",
62
- "Add basic input validation",
63
- ["AC1", "AC2", "AC3", "AC4"],
64
- [],
65
- );
66
- expect(complexity).toBe("simple");
67
- });
68
-
69
- test("5 ACs should classify as medium (BUG-19 regression)", () => {
70
- const complexity = classifyComplexity(
71
- "Add validation",
72
- "Add comprehensive input validation",
73
- ["AC1", "AC2", "AC3", "AC4", "AC5"],
74
- [],
75
- );
76
- expect(complexity).toBe("medium");
77
- });
78
-
79
- test("9 ACs should classify as complex (BUG-19 regression)", () => {
80
- const complexity = classifyComplexity(
81
- "Add validation",
82
- "Add extensive input validation",
83
- ["AC1", "AC2", "AC3", "AC4", "AC5", "AC6", "AC7", "AC8", "AC9"],
84
- [],
85
- );
86
- expect(complexity).toBe("complex");
87
- });
88
- });
89
-
90
- describe("determineTestStrategy", () => {
91
- test("simple → test-after", () => {
92
- expect(determineTestStrategy("simple", "Fix typo", "Fix a typo", [])).toBe("test-after");
93
- });
94
-
95
- test("complex → three-session-tdd", () => {
96
- expect(determineTestStrategy("complex", "Refactor module", "Complex refactor", [])).toBe("three-session-tdd");
97
- });
98
-
99
- test("security keyword → three-session-tdd even if simple", () => {
100
- expect(determineTestStrategy("simple", "Fix auth bypass", "Security fix for JWT token", ["security"])).toBe(
101
- "three-session-tdd",
102
- );
103
- });
104
-
105
- test("public api keyword → three-session-tdd even if simple", () => {
106
- expect(determineTestStrategy("simple", "Add endpoint", "New public api endpoint for users", [])).toBe(
107
- "three-session-tdd",
108
- );
109
- });
110
-
111
- describe("tddStrategy overrides", () => {
112
- test("strategy='strict' always returns three-session-tdd", () => {
113
- expect(determineTestStrategy("simple", "Update button", "Change color", [], "strict")).toBe("three-session-tdd");
114
- expect(determineTestStrategy("medium", "Update button", "Change color", [], "strict")).toBe("three-session-tdd");
115
- expect(determineTestStrategy("complex", "Refactor module", "Big refactor", [], "strict")).toBe(
116
- "three-session-tdd",
117
- );
118
- });
119
-
120
- test("strategy='lite' always returns three-session-tdd-lite", () => {
121
- expect(determineTestStrategy("simple", "Update button", "Change color", [], "lite")).toBe(
122
- "three-session-tdd-lite",
123
- );
124
- expect(determineTestStrategy("medium", "Update form", "Add validation", [], "lite")).toBe(
125
- "three-session-tdd-lite",
126
- );
127
- expect(determineTestStrategy("complex", "Refactor module", "Big refactor", [], "lite")).toBe(
128
- "three-session-tdd-lite",
129
- );
130
- });
131
-
132
- test("strategy='off' always returns test-after", () => {
133
- expect(determineTestStrategy("simple", "Update button", "Change color", [], "off")).toBe("test-after");
134
- expect(determineTestStrategy("complex", "Refactor auth", "JWT refactor", ["security"], "off")).toBe("test-after");
135
- expect(determineTestStrategy("expert", "Real-time sync", "Distributed consensus", [], "off")).toBe("test-after");
136
- });
137
-
138
- test("strategy='auto' returns three-session-tdd-lite for UI-tagged complex stories", () => {
139
- expect(determineTestStrategy("complex", "Redesign dashboard", "UI overhaul", ["ui"], "auto")).toBe(
140
- "three-session-tdd-lite",
141
- );
142
- });
143
-
144
- test("strategy='auto' returns three-session-tdd-lite for layout-tagged stories", () => {
145
- expect(determineTestStrategy("complex", "Fix layout", "Responsive layout fix", ["layout"], "auto")).toBe(
146
- "three-session-tdd-lite",
147
- );
148
- });
149
-
150
- test("strategy='auto' security-critical stories always return three-session-tdd even with ui tag", () => {
151
- expect(determineTestStrategy("complex", "Auth UI", "JWT token security screen", ["ui", "security"], "auto")).toBe(
152
- "three-session-tdd",
153
- );
154
- });
155
-
156
- test("strategy='auto' lite tags are case-insensitive", () => {
157
- expect(determineTestStrategy("complex", "Build UI", "Create UI", ["UI"], "auto")).toBe("three-session-tdd-lite");
158
- expect(determineTestStrategy("complex", "Build CLI", "Create CLI", ["CLI"], "auto")).toBe(
159
- "three-session-tdd-lite",
160
- );
161
- });
162
- });
163
- });
164
-
165
- describe("routeTask", () => {
166
- test("routes simple task to fast model with test-after", () => {
167
- const result = routeTask("Fix typo", "Fix a typo", ["Typo fixed"], [], DEFAULT_CONFIG);
168
- expect(result.complexity).toBe("simple");
169
- expect(result.modelTier).toBe("fast");
170
- expect(result.testStrategy).toBe("test-after");
171
- });
172
-
173
- test("routes security task to powerful with three-session-tdd", () => {
174
- const result = routeTask("Auth fix", "Fix JWT auth bypass", ["Auth works"], ["security"], DEFAULT_CONFIG);
175
- expect(result.complexity).toBe("complex");
176
- expect(result.modelTier).toBe("powerful");
177
- expect(result.testStrategy).toBe("three-session-tdd");
178
- });
179
-
180
- test("routes all complexity levels correctly", () => {
181
- const simpleResult = routeTask("Fix typo", "Fix a typo", ["Typo fixed"], [], DEFAULT_CONFIG);
182
- expect(simpleResult.complexity).toBe("simple");
183
- expect(simpleResult.modelTier).toBe("fast");
184
-
185
- const mediumResult = routeTask(
186
- "Add validation",
187
- "Add DTO validation",
188
- ["a", "b", "c", "d", "e"],
189
- [],
190
- DEFAULT_CONFIG,
191
- );
192
- expect(mediumResult.complexity).toBe("medium");
193
- expect(mediumResult.modelTier).toBe("balanced");
194
-
195
- const complexResult = routeTask(
196
- "Auth refactor",
197
- "Refactor JWT authentication",
198
- ["Token works"],
199
- ["security"],
200
- DEFAULT_CONFIG,
201
- );
202
- expect(complexResult.complexity).toBe("complex");
203
- expect(complexResult.modelTier).toBe("powerful");
204
-
205
- const expertResult = routeTask(
206
- "Real-time sync",
207
- "Real-time distributed consensus",
208
- ["Sync works"],
209
- [],
210
- DEFAULT_CONFIG,
211
- );
212
- expect(expertResult.complexity).toBe("expert");
213
- expect(expertResult.modelTier).toBe("powerful");
214
- });
215
-
216
- test("complexity → modelTier mapping respects config (BUG-19 regression)", () => {
217
- const simpleResult = routeTask("Simple task", "Simple description", ["AC1"], [], DEFAULT_CONFIG);
218
- expect(simpleResult.complexity).toBe("simple");
219
- expect(simpleResult.modelTier).toBe("fast");
220
-
221
- const mediumResult = routeTask(
222
- "Medium task",
223
- "Medium description",
224
- ["AC1", "AC2", "AC3", "AC4", "AC5"],
225
- [],
226
- DEFAULT_CONFIG,
227
- );
228
- expect(mediumResult.complexity).toBe("medium");
229
- expect(mediumResult.modelTier).toBe("balanced");
230
-
231
- const complexResult = routeTask(
232
- "Complex task",
233
- "Complex description",
234
- ["AC1", "AC2", "AC3", "AC4", "AC5", "AC6", "AC7", "AC8", "AC9"],
235
- [],
236
- DEFAULT_CONFIG,
237
- );
238
- expect(complexResult.complexity).toBe("complex");
239
- expect(complexResult.modelTier).toBe("powerful");
240
- });
241
-
242
- describe("tddStrategy config integration", () => {
243
- const makeConfig = (strategy: NaxConfig["tdd"]["strategy"]): NaxConfig => ({
244
- ...DEFAULT_CONFIG,
245
- tdd: { ...DEFAULT_CONFIG.tdd, strategy },
246
- });
247
-
248
- test("config.tdd.strategy='strict' forces three-session-tdd on simple task", () => {
249
- const result = routeTask("Fix typo", "Fix a typo", ["Typo fixed"], [], makeConfig("strict"));
250
- expect(result.testStrategy).toBe("three-session-tdd");
251
- expect(result.reasoning).toContain("strategy:strict");
252
- });
253
-
254
- test("config.tdd.strategy='lite' forces three-session-tdd-lite on any task", () => {
255
- const result = routeTask("Fix typo", "Fix a typo", ["Typo fixed"], [], makeConfig("lite"));
256
- expect(result.testStrategy).toBe("three-session-tdd-lite");
257
- expect(result.reasoning).toContain("strategy:lite");
258
- });
259
-
260
- test("config.tdd.strategy='off' forces test-after even on complex/security tasks", () => {
261
- const result = routeTask("Auth refactor", "JWT auth security", ["Token works"], ["security"], makeConfig("off"));
262
- expect(result.testStrategy).toBe("test-after");
263
- });
264
-
265
- test("default config (strategy='auto') preserves existing routing behavior", () => {
266
- const simpleResult = routeTask("Fix typo", "Fix a typo", ["Typo fixed"], [], DEFAULT_CONFIG);
267
- expect(simpleResult.testStrategy).toBe("test-after");
268
-
269
- const complexResult = routeTask(
270
- "Auth refactor",
271
- "Refactor JWT authentication",
272
- ["Token works"],
273
- ["security"],
274
- DEFAULT_CONFIG,
275
- );
276
- expect(complexResult.testStrategy).toBe("three-session-tdd");
277
- });
278
- });
279
- });
280
-
281
- describe("escalateTier", () => {
282
- const defaultTiers = [
283
- { tier: "fast", attempts: 5 },
284
- { tier: "balanced", attempts: 3 },
285
- { tier: "powerful", attempts: 2 },
286
- ];
287
-
288
- test("escalates fast → balanced", () => {
289
- expect(escalateTier("fast", defaultTiers)).toBe("balanced");
290
- });
291
-
292
- test("escalates balanced → powerful", () => {
293
- expect(escalateTier("balanced", defaultTiers)).toBe("powerful");
294
- });
295
-
296
- test("escalates powerful → null (max reached)", () => {
297
- expect(escalateTier("powerful", defaultTiers)).toBeNull();
298
- });
299
-
300
- test("explicit 3-tier escalation chain: fast → balanced → powerful → null", () => {
301
- let tier: string | null = escalateTier("fast", defaultTiers);
302
- expect(tier).toBe("balanced");
303
-
304
- tier = escalateTier(tier!, defaultTiers);
305
- expect(tier).toBe("powerful");
306
-
307
- tier = escalateTier(tier!, defaultTiers);
308
- expect(tier).toBeNull();
309
- });
310
- });
311
-
312
- // ============================================================================
313
- // Strategy System Tests
314
- // ============================================================================
315
-
316
- describe("StrategyChain", () => {
317
- test("uses first strategy that returns non-null", async () => {
318
- const alwaysNullStrategy: RoutingStrategy = {
319
- name: "always-null",
320
- route: () => null,
321
- };
322
-
323
- const alwaysReturnStrategy: RoutingStrategy = {
324
- name: "always-return",
325
- route: () => ({
326
- complexity: "simple",
327
- modelTier: "fast",
328
- testStrategy: "test-after",
329
- reasoning: "Always return strategy",
330
- }),
331
- };
332
-
333
- const chain = new StrategyChain([alwaysNullStrategy, alwaysReturnStrategy]);
334
-
335
- const story: UserStory = {
336
- id: "US-001",
337
- title: "Test story",
338
- description: "Test",
339
- acceptanceCriteria: [],
340
- tags: [],
341
- dependencies: [],
342
- status: "pending",
343
- passes: false,
344
- escalations: [],
345
- attempts: 0,
346
- };
347
-
348
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
349
- const context: RoutingContext = { config: configWithoutLlm };
350
- const decision = await chain.route(story, context);
351
-
352
- expect(decision.reasoning).toBe("Always return strategy");
353
- });
354
-
355
- test("throws error if all strategies return null", async () => {
356
- const alwaysNullStrategy: RoutingStrategy = {
357
- name: "always-null",
358
- route: () => null,
359
- };
360
-
361
- const chain = new StrategyChain([alwaysNullStrategy]);
362
-
363
- const story: UserStory = {
364
- id: "US-001",
365
- title: "Test story",
366
- description: "Test",
367
- acceptanceCriteria: [],
368
- tags: [],
369
- dependencies: [],
370
- status: "pending",
371
- passes: false,
372
- escalations: [],
373
- attempts: 0,
374
- };
375
-
376
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
377
- const context: RoutingContext = { config: configWithoutLlm };
378
-
379
- await expect(chain.route(story, context)).rejects.toThrow("No routing strategy returned a decision");
380
- });
381
-
382
- test("getStrategyNames returns strategy names", () => {
383
- const chain = new StrategyChain([keywordStrategy, llmStrategy]);
384
- expect(chain.getStrategyNames()).toEqual(["keyword", "llm"]);
385
- });
386
-
387
- describe("async support", () => {
388
- test("handles async strategy that returns decision", async () => {
389
- const asyncStrategy: RoutingStrategy = {
390
- name: "async-test",
391
- route: async () => {
392
- await new Promise((resolve) => setTimeout(resolve, 10));
393
- return {
394
- complexity: "medium",
395
- modelTier: "balanced",
396
- testStrategy: "test-after",
397
- reasoning: "Async strategy result",
398
- };
399
- },
400
- };
401
-
402
- const chain = new StrategyChain([asyncStrategy]);
403
-
404
- const story: UserStory = {
405
- id: "US-001",
406
- title: "Test async story",
407
- description: "Test async routing",
408
- acceptanceCriteria: [],
409
- tags: [],
410
- dependencies: [],
411
- status: "pending",
412
- passes: false,
413
- escalations: [],
414
- attempts: 0,
415
- };
416
-
417
- const context: RoutingContext = { config: DEFAULT_CONFIG };
418
- const decision = await chain.route(story, context);
419
-
420
- expect(decision.reasoning).toBe("Async strategy result");
421
- expect(decision.complexity).toBe("medium");
422
- expect(decision.modelTier).toBe("balanced");
423
- });
424
-
425
- test("handles mixed sync and async strategies", async () => {
426
- const syncStrategy: RoutingStrategy = {
427
- name: "sync-first",
428
- route: () => null,
429
- };
430
-
431
- const asyncStrategy: RoutingStrategy = {
432
- name: "async-second",
433
- route: async () => {
434
- await new Promise((resolve) => setTimeout(resolve, 10));
435
- return {
436
- complexity: "complex",
437
- modelTier: "powerful",
438
- testStrategy: "three-session-tdd",
439
- reasoning: "Mixed chain result",
440
- };
441
- },
442
- };
443
-
444
- const chain = new StrategyChain([syncStrategy, asyncStrategy]);
445
-
446
- const story: UserStory = {
447
- id: "US-003",
448
- title: "Test mixed",
449
- description: "Test mixed sync/async",
450
- acceptanceCriteria: [],
451
- tags: [],
452
- dependencies: [],
453
- status: "pending",
454
- passes: false,
455
- escalations: [],
456
- attempts: 0,
457
- };
458
-
459
- const context: RoutingContext = { config: DEFAULT_CONFIG };
460
- const decision = await chain.route(story, context);
461
-
462
- expect(decision.reasoning).toBe("Mixed chain result");
463
- expect(decision.testStrategy).toBe("three-session-tdd");
464
- });
465
- });
466
- });
467
-
468
- describe("keywordStrategy", () => {
469
- test("classifies simple story correctly", () => {
470
- const story: UserStory = {
471
- id: "US-001",
472
- title: "Update button color",
473
- description: "Change button to blue",
474
- acceptanceCriteria: ["Button is blue"],
475
- tags: [],
476
- dependencies: [],
477
- status: "pending",
478
- passes: false,
479
- escalations: [],
480
- attempts: 0,
481
- };
482
-
483
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
484
- const context: RoutingContext = { config: configWithoutLlm };
485
- const decision = keywordStrategy.route(story, context);
486
-
487
- expect(decision).not.toBeNull();
488
- expect(decision!.complexity).toBe("simple");
489
- expect(decision!.modelTier).toBe("fast");
490
- expect(decision!.testStrategy).toBe("test-after");
491
- });
492
-
493
- test("classifies complex story with security keywords", () => {
494
- const story: UserStory = {
495
- id: "US-002",
496
- title: "Add JWT authentication",
497
- description: "Implement JWT auth with refresh tokens",
498
- acceptanceCriteria: ["Token storage", "Refresh logic", "Expiry"],
499
- tags: ["security", "auth"],
500
- dependencies: [],
501
- status: "pending",
502
- passes: false,
503
- escalations: [],
504
- attempts: 0,
505
- };
506
-
507
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
508
- const context: RoutingContext = { config: configWithoutLlm };
509
- const decision = keywordStrategy.route(story, context);
510
-
511
- expect(decision).not.toBeNull();
512
- expect(decision!.complexity).toBe("complex");
513
- expect(decision!.modelTier).toBe("powerful");
514
- expect(decision!.testStrategy).toBe("three-session-tdd");
515
- expect(decision!.reasoning).toContain("security-critical");
516
- });
517
-
518
- test("uses three-session-tdd for public API", () => {
519
- const story: UserStory = {
520
- id: "US-005",
521
- title: "Add public API endpoint",
522
- description: "Create external API for consumers",
523
- acceptanceCriteria: ["Endpoint returns JSON"],
524
- tags: ["public api"],
525
- dependencies: [],
526
- status: "pending",
527
- passes: false,
528
- escalations: [],
529
- attempts: 0,
530
- };
531
-
532
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
533
- const context: RoutingContext = { config: configWithoutLlm };
534
- const decision = keywordStrategy.route(story, context);
535
-
536
- expect(decision).not.toBeNull();
537
- expect(decision!.testStrategy).toBe("three-session-tdd");
538
- expect(decision!.reasoning).toContain("public-api");
539
- });
540
- });
541
-
542
- describe("manualStrategy", () => {
543
- test("returns decision from story.routing metadata", () => {
544
- const story: UserStory = {
545
- id: "US-006",
546
- title: "Manual override test",
547
- description: "Story with manual routing",
548
- acceptanceCriteria: [],
549
- tags: [],
550
- dependencies: [],
551
- status: "pending",
552
- passes: false,
553
- escalations: [],
554
- attempts: 0,
555
- routing: {
556
- complexity: "expert",
557
- modelTier: "powerful",
558
- testStrategy: "three-session-tdd",
559
- reasoning: "Manual override for critical task",
560
- },
561
- };
562
-
563
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
564
- const context: RoutingContext = { config: configWithoutLlm };
565
- const decision = manualStrategy.route(story, context);
566
-
567
- expect(decision).not.toBeNull();
568
- expect(decision!.complexity).toBe("expert");
569
- expect(decision!.modelTier).toBe("powerful");
570
- expect(decision!.testStrategy).toBe("three-session-tdd");
571
- expect(decision!.reasoning).toBe("Manual override for critical task");
572
- });
573
-
574
- test("returns null when no routing metadata", () => {
575
- const story: UserStory = {
576
- id: "US-007",
577
- title: "No manual routing",
578
- description: "Story without routing metadata",
579
- acceptanceCriteria: [],
580
- tags: [],
581
- dependencies: [],
582
- status: "pending",
583
- passes: false,
584
- escalations: [],
585
- attempts: 0,
586
- };
587
-
588
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
589
- const context: RoutingContext = { config: configWithoutLlm };
590
- const decision = manualStrategy.route(story, context);
591
-
592
- expect(decision).toBeNull();
593
- });
594
- });
595
-
596
- describe("buildStrategyChain", () => {
597
- test("builds keyword-only chain by default", async () => {
598
- const chain = await buildStrategyChain(DEFAULT_CONFIG, "/tmp");
599
- expect(chain.getStrategyNames()).toEqual(["keyword"]);
600
- });
601
-
602
- test("builds manual + keyword chain when strategy=manual", async () => {
603
- const config = {
604
- ...DEFAULT_CONFIG,
605
- routing: { strategy: "manual" as const },
606
- };
607
- const chain = await buildStrategyChain(config, "/tmp");
608
- expect(chain.getStrategyNames()).toEqual(["manual", "keyword"]);
609
- });
610
-
611
- test("builds llm + keyword chain when strategy=llm", async () => {
612
- const config = {
613
- ...DEFAULT_CONFIG,
614
- routing: { strategy: "llm" as const },
615
- };
616
- const chain = await buildStrategyChain(config, "/tmp");
617
- expect(chain.getStrategyNames()).toEqual(["llm", "keyword"]);
618
- });
619
-
620
- test("throws error when custom strategy without path", async () => {
621
- const config = {
622
- ...DEFAULT_CONFIG,
623
- routing: { strategy: "custom" as const },
624
- };
625
- await expect(buildStrategyChain(config, "/tmp")).rejects.toThrow("routing.customStrategyPath is required");
626
- });
627
- });
628
-
629
- // ============================================================================
630
- // LLM Strategy Tests
631
- // ============================================================================
632
-
633
- // Test user stories for LLM tests
634
- const simpleStory: UserStory = {
635
- id: "US-001",
636
- title: "Fix typo in README",
637
- description: "Correct spelling mistake",
638
- acceptanceCriteria: ["Update README.md with correct spelling"],
639
- tags: ["docs"],
640
- dependencies: [],
641
- status: "pending",
642
- passes: false,
643
- };
644
-
645
- const complexStory: UserStory = {
646
- id: "US-002",
647
- title: "Add JWT authentication",
648
- description: "Implement JWT authentication with refresh tokens",
649
- acceptanceCriteria: ["Secure token storage", "Token refresh endpoint", "Expiry handling", "Logout functionality"],
650
- tags: ["security", "auth"],
651
- dependencies: [],
652
- status: "pending",
653
- passes: false,
654
- };
655
-
656
- const testContext: RoutingContext = {
657
- config: DEFAULT_CONFIG,
658
- };
659
-
660
- describe("LLM Routing Strategy - Prompt Building", () => {
661
- test("buildRoutingPrompt formats story correctly", () => {
662
- const prompt = buildRoutingPrompt(simpleStory, DEFAULT_CONFIG);
663
-
664
- expect(prompt).toContain("Title: Fix typo in README");
665
- expect(prompt).toContain("Description: Correct spelling mistake");
666
- expect(prompt).toContain("1. Update README.md with correct spelling");
667
- expect(prompt).toContain("Tags: docs");
668
- expect(prompt).toContain("fast: Simple changes");
669
- expect(prompt).toContain("balanced: Standard features");
670
- expect(prompt).toContain("powerful: Complex architecture");
671
- expect(prompt).toContain("test-after: Write implementation first");
672
- expect(prompt).toContain("three-session-tdd: Separate test-writer");
673
- });
674
-
675
- test("buildBatchPrompt formats multiple stories", () => {
676
- const stories = [simpleStory, complexStory];
677
- const prompt = buildBatchPrompt(stories, DEFAULT_CONFIG);
678
-
679
- expect(prompt).toContain("1. US-001: Fix typo in README");
680
- expect(prompt).toContain("2. US-002: Add JWT authentication");
681
- expect(prompt).toContain("Tags: docs");
682
- expect(prompt).toContain("Tags: security, auth");
683
- expect(prompt).toContain('{"id":"US-001"');
684
- });
685
- });
686
-
687
- describe("LLM Routing Strategy - Response Parsing", () => {
688
- test("parseRoutingResponse handles valid JSON", () => {
689
- const output =
690
- '{"complexity":"simple","modelTier":"fast","testStrategy":"test-after","reasoning":"Simple documentation fix"}';
691
- const decision = parseRoutingResponse(output, simpleStory, DEFAULT_CONFIG);
692
-
693
- expect(decision.complexity).toBe("simple");
694
- expect(decision.modelTier).toBe("fast");
695
- expect(decision.testStrategy).toBe("test-after");
696
- expect(decision.reasoning).toBe("Simple documentation fix");
697
- });
698
-
699
- test("parseRoutingResponse strips markdown code blocks", () => {
700
- const output =
701
- '```json\n{"complexity":"complex","modelTier":"powerful","testStrategy":"three-session-tdd","reasoning":"Security-critical"}\n```';
702
- const decision = parseRoutingResponse(output, complexStory, DEFAULT_CONFIG);
703
-
704
- expect(decision.complexity).toBe("complex");
705
- expect(decision.modelTier).toBe("powerful");
706
- expect(decision.testStrategy).toBe("three-session-tdd");
707
- });
708
-
709
- test("parseRoutingResponse throws on invalid JSON", () => {
710
- const output = "This is not JSON";
711
- expect(() => parseRoutingResponse(output, simpleStory, DEFAULT_CONFIG)).toThrow();
712
- });
713
-
714
- test("parseRoutingResponse throws on missing fields", () => {
715
- const output = '{"complexity":"simple","modelTier":"fast"}';
716
- expect(() => parseRoutingResponse(output, simpleStory, DEFAULT_CONFIG)).toThrow("Missing required fields");
717
- });
718
- });
719
-
720
- describe("stripCodeFences", () => {
721
- test("returns plain JSON unchanged", () => {
722
- const input = '{"complexity":"simple"}';
723
- expect(stripCodeFences(input)).toBe('{"complexity":"simple"}');
724
- });
725
-
726
- test("strips ```json ... ``` fences", () => {
727
- const input = '```json\n{"complexity":"simple"}\n```';
728
- expect(stripCodeFences(input)).toBe('{"complexity":"simple"}');
729
- });
730
-
731
- test("strips leading 'json' keyword (no backticks)", () => {
732
- const input = 'json\n{"complexity":"simple"}';
733
- expect(stripCodeFences(input)).toBe('{"complexity":"simple"}');
734
- });
735
- });
736
-
737
- describe("validateRoutingDecision", () => {
738
- test("returns valid decision for correct input", () => {
739
- const input = { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "trivial" };
740
- const result = validateRoutingDecision(input, DEFAULT_CONFIG);
741
- expect(result).toEqual({
742
- complexity: "simple",
743
- modelTier: "fast",
744
- testStrategy: "test-after",
745
- reasoning: "trivial",
746
- });
747
- });
748
-
749
- test("throws on missing complexity", () => {
750
- const input = { modelTier: "fast", testStrategy: "test-after", reasoning: "test" };
751
- expect(() => validateRoutingDecision(input, DEFAULT_CONFIG)).toThrow("Missing required fields");
752
- });
753
-
754
- test("throws on invalid complexity value", () => {
755
- const input = { complexity: "mega", modelTier: "fast", testStrategy: "test-after", reasoning: "test" };
756
- expect(() => validateRoutingDecision(input, DEFAULT_CONFIG)).toThrow("Invalid complexity: mega");
757
- });
758
- });
759
-
760
- // ============================================================================
761
- // Adaptive Strategy Tests (Pure Logic)
762
- // ============================================================================
763
-
764
- function createStory(
765
- id: string,
766
- title: string,
767
- description: string,
768
- acceptanceCriteria: string[] = [],
769
- tags: string[] = [],
770
- ): UserStory {
771
- return {
772
- id,
773
- title,
774
- description,
775
- acceptanceCriteria,
776
- tags,
777
- status: "pending",
778
- dependencies: [],
779
- passes: false,
780
- escalations: [],
781
- attempts: 0,
782
- };
783
- }
784
-
785
- function createContext(metrics?: AggregateMetrics, config: NaxConfig = DEFAULT_CONFIG): RoutingContext {
786
- return {
787
- config,
788
- metrics,
789
- };
790
- }
791
-
792
- function createMockMetrics(
793
- complexityData: Record<string, { predicted: number; actualTierUsed: string; mismatchRate: number }>,
794
- ): AggregateMetrics {
795
- return {
796
- totalRuns: 10,
797
- totalCost: 5.0,
798
- totalStories: 100,
799
- firstPassRate: 0.75,
800
- escalationRate: 0.25,
801
- avgCostPerStory: 0.05,
802
- avgCostPerFeature: 0.5,
803
- modelEfficiency: {
804
- "claude-haiku-4-5": {
805
- attempts: 60,
806
- successes: 50,
807
- passRate: 0.833,
808
- avgCost: 0.005,
809
- totalCost: 0.25,
810
- },
811
- "claude-sonnet-4.5": {
812
- attempts: 30,
813
- successes: 28,
814
- passRate: 0.933,
815
- avgCost: 0.02,
816
- totalCost: 0.56,
817
- },
818
- "claude-opus-4-6": {
819
- attempts: 10,
820
- successes: 10,
821
- passRate: 1.0,
822
- avgCost: 0.08,
823
- totalCost: 0.8,
824
- },
825
- },
826
- complexityAccuracy: complexityData,
827
- };
828
- }
829
-
830
- describe("Adaptive Routing Strategy", () => {
831
- describe("No metrics available", () => {
832
- test("should fallback to configured strategy when no metrics", async () => {
833
- const story = createStory("US-001", "Add user login", "Implement user authentication", [
834
- "User can log in with email and password",
835
- ]);
836
-
837
- const context = createContext(undefined);
838
- const decision = await adaptiveStrategy.route(story, context);
839
-
840
- expect(decision).not.toBeNull();
841
- expect(decision?.reasoning).toContain("no metrics available");
842
- expect(decision?.reasoning).toContain("fallback to");
843
- });
844
- });
845
-
846
- describe("Insufficient data fallback", () => {
847
- test("should fallback when samples below minSamples threshold", async () => {
848
- const metrics = createMockMetrics({
849
- simple: {
850
- predicted: 5,
851
- actualTierUsed: "fast",
852
- mismatchRate: 0.2,
853
- },
854
- });
855
-
856
- const story = createStory("US-002", "Fix typo", "Fix typo in README", ["Typo is fixed"]);
857
-
858
- const context = createContext(metrics);
859
- const decision = await adaptiveStrategy.route(story, context);
860
-
861
- expect(decision).not.toBeNull();
862
- expect(decision?.reasoning).toContain("insufficient data");
863
- expect(decision?.reasoning).toContain("5/10 samples");
864
- expect(decision?.reasoning).toContain("fallback to");
865
- });
866
- });
867
-
868
- describe("Sufficient data - adaptive routing", () => {
869
- test("should route to fast tier when low mismatch rate", async () => {
870
- const metrics = createMockMetrics({
871
- simple: {
872
- predicted: 50,
873
- actualTierUsed: "fast",
874
- mismatchRate: 0.1,
875
- },
876
- });
877
-
878
- const story = createStory("US-004", "Add button", "Add a submit button to the form", [
879
- "Button is visible",
880
- "Button triggers submit",
881
- ]);
882
-
883
- const context = createContext(metrics);
884
- const decision = await adaptiveStrategy.route(story, context);
885
-
886
- expect(decision).not.toBeNull();
887
- expect(decision?.complexity).toBe("simple");
888
- expect(decision?.modelTier).toBe("fast");
889
- expect(decision?.reasoning).toContain("adaptive");
890
- expect(decision?.reasoning).toContain("simple → fast");
891
- expect(decision?.reasoning).toContain("samples: 50");
892
- expect(decision?.reasoning).toContain("mismatch: 10.0%");
893
- });
894
-
895
- test("should include cost information in reasoning", async () => {
896
- const metrics = createMockMetrics({
897
- complex: {
898
- predicted: 15,
899
- actualTierUsed: "powerful",
900
- mismatchRate: 0.2,
901
- },
902
- });
903
-
904
- const story = createStory(
905
- "US-006",
906
- "Refactor authentication",
907
- "Refactor the auth module to use JWT",
908
- Array.from({ length: 10 }, (_, i) => `Criterion ${i + 1}`),
909
- ["security", "breaking-change"],
910
- );
911
-
912
- const context = createContext(metrics);
913
- const decision = await adaptiveStrategy.route(story, context);
914
-
915
- expect(decision).not.toBeNull();
916
- expect(decision?.reasoning).toContain("cost:");
917
- expect(decision?.reasoning).toMatch(/\$\d+\.\d{4}/);
918
- });
919
- });
920
-
921
- describe("Edge cases", () => {
922
- test("should handle zero mismatch rate gracefully", async () => {
923
- const metrics = createMockMetrics({
924
- simple: {
925
- predicted: 100,
926
- actualTierUsed: "fast",
927
- mismatchRate: 0.0,
928
- },
929
- });
930
-
931
- const story = createStory("US-014", "Add text", "Add help text", ["Text added"]);
932
- const context = createContext(metrics);
933
- const decision = await adaptiveStrategy.route(story, context);
934
-
935
- expect(decision).not.toBeNull();
936
- expect(decision?.modelTier).toBe("fast");
937
- });
938
- });
939
- });
940
-
941
- // ============================================================================
942
- // LLM Cache Clearing Tests (BUG-028 fix)
943
- // ============================================================================
944
-
945
- describe("LLM Cache Clearing on Tier Escalation", () => {
946
- beforeEach(() => {
947
- // Clear cache before each test
948
- clearCache();
949
- });
950
-
951
- test("cache hit returns cached decision", () => {
952
- const story: UserStory = {
953
- id: "US-cache-001",
954
- title: "Test story",
955
- description: "Test story for cache",
956
- acceptanceCriteria: ["AC1"],
957
- tags: [],
958
- dependencies: [],
959
- status: "pending",
960
- passes: false,
961
- escalations: [],
962
- attempts: 0,
963
- };
964
-
965
- const originalDecision: RoutingDecision = {
966
- complexity: "simple",
967
- modelTier: "fast",
968
- testStrategy: "test-after",
969
- reasoning: "Original decision",
970
- };
971
-
972
- const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
973
- const context: RoutingContext = { config: configWithoutLlm };
974
-
975
- // Simulate cached decision
976
- const cachedDecisions = new Map<string, RoutingDecision>();
977
- cachedDecisions.set(story.id, originalDecision);
978
-
979
- // Verify initial cache state
980
- expect(getCacheSize()).toBe(0);
981
-
982
- // Note: We're testing the behavior through the exported functions
983
- // In a real scenario, the LLM strategy would populate the cache
984
- // For this test, we verify the cache clearing mechanism works
985
- });
986
-
987
- test("clearCacheForStory removes cache entry", () => {
988
- const storyId = "US-cache-002";
989
-
990
- // Clear cache first
991
- clearCache();
992
- expect(getCacheSize()).toBe(0);
993
-
994
- // Clear non-existent entry should not throw
995
- clearCacheForStory(storyId);
996
- expect(getCacheSize()).toBe(0);
997
- });
998
-
999
- test("clearCacheForStory after tier escalation forces re-routing", () => {
1000
- const storyId = "US-cache-003";
1001
-
1002
- // Clear all caches
1003
- clearCache();
1004
- expect(getCacheSize()).toBe(0);
1005
-
1006
- // Simulate clearing for escalation
1007
- clearCacheForStory(storyId);
1008
-
1009
- // Cache should still be empty
1010
- expect(getCacheSize()).toBe(0);
1011
- });
1012
-
1013
- test("clearing one story does not affect other cached stories", () => {
1014
- clearCache();
1015
-
1016
- const story1Id = "US-escalate-1";
1017
- const story2Id = "US-escalate-2";
1018
-
1019
- // Verify we can clear individual stories
1020
- clearCacheForStory(story1Id);
1021
- clearCacheForStory(story2Id);
1022
-
1023
- expect(getCacheSize()).toBe(0);
1024
- });
1025
-
1026
- test("clearCacheForStory is idempotent", () => {
1027
- const storyId = "US-idempotent";
1028
-
1029
- clearCache();
1030
- expect(getCacheSize()).toBe(0);
1031
-
1032
- // Clear multiple times should be safe
1033
- clearCacheForStory(storyId);
1034
- clearCacheForStory(storyId);
1035
- clearCacheForStory(storyId);
1036
-
1037
- expect(getCacheSize()).toBe(0);
1038
- });
1039
- });