@su-record/vibe 2.7.18 → 2.7.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. package/.env.example +37 -37
  2. package/CLAUDE.md +153 -153
  3. package/LICENSE +21 -21
  4. package/README.md +451 -449
  5. package/agents/architect-low.md +41 -41
  6. package/agents/architect-medium.md +59 -59
  7. package/agents/architect.md +80 -80
  8. package/agents/build-error-resolver.md +115 -115
  9. package/agents/compounder.md +261 -261
  10. package/agents/diagrammer.md +178 -178
  11. package/agents/docs/api-documenter.md +99 -99
  12. package/agents/docs/changelog-writer.md +93 -93
  13. package/agents/e2e-tester.md +294 -294
  14. package/agents/event/event-comms.md +78 -0
  15. package/agents/event/event-content.md +68 -0
  16. package/agents/event/event-image.md +95 -0
  17. package/agents/event/event-ops.md +84 -0
  18. package/agents/event/event-scheduler.md +69 -0
  19. package/agents/event/event-speaker.md +86 -0
  20. package/agents/explorer-low.md +42 -42
  21. package/agents/explorer-medium.md +59 -59
  22. package/agents/explorer.md +48 -48
  23. package/agents/implementer-low.md +43 -43
  24. package/agents/implementer-medium.md +52 -52
  25. package/agents/implementer.md +54 -54
  26. package/agents/junior-mentor.md +141 -141
  27. package/agents/planning/requirements-analyst.md +84 -84
  28. package/agents/planning/ux-advisor.md +83 -83
  29. package/agents/qa/acceptance-tester.md +86 -86
  30. package/agents/qa/edge-case-finder.md +93 -93
  31. package/agents/refactor-cleaner.md +143 -143
  32. package/agents/research/best-practices-agent.md +199 -199
  33. package/agents/research/codebase-patterns-agent.md +157 -157
  34. package/agents/research/framework-docs-agent.md +188 -188
  35. package/agents/research/security-advisory-agent.md +213 -213
  36. package/agents/review/architecture-reviewer.md +107 -107
  37. package/agents/review/complexity-reviewer.md +116 -116
  38. package/agents/review/data-integrity-reviewer.md +88 -88
  39. package/agents/review/git-history-reviewer.md +103 -103
  40. package/agents/review/performance-reviewer.md +86 -86
  41. package/agents/review/python-reviewer.md +150 -150
  42. package/agents/review/rails-reviewer.md +139 -139
  43. package/agents/review/react-reviewer.md +144 -144
  44. package/agents/review/security-reviewer.md +80 -80
  45. package/agents/review/simplicity-reviewer.md +140 -140
  46. package/agents/review/test-coverage-reviewer.md +116 -116
  47. package/agents/review/typescript-reviewer.md +127 -127
  48. package/agents/searcher.md +54 -54
  49. package/agents/simplifier.md +120 -120
  50. package/agents/tester.md +49 -49
  51. package/agents/ui/ui-a11y-auditor.md +93 -93
  52. package/agents/ui/ui-antipattern-detector.md +94 -94
  53. package/agents/ui/ui-dataviz-advisor.md +69 -69
  54. package/agents/ui/ui-design-system-gen.md +57 -57
  55. package/agents/ui/ui-industry-analyzer.md +49 -49
  56. package/agents/ui/ui-layout-architect.md +65 -65
  57. package/agents/ui/ui-stack-implementer.md +68 -68
  58. package/agents/ui/ux-compliance-reviewer.md +81 -81
  59. package/agents/ui-previewer.md +258 -258
  60. package/commands/vibe.analyze.md +379 -379
  61. package/commands/vibe.event.md +163 -0
  62. package/commands/vibe.review.md +607 -607
  63. package/commands/vibe.run.md +2217 -2124
  64. package/commands/vibe.spec.md +1195 -1195
  65. package/commands/vibe.spec.review.md +569 -569
  66. package/commands/vibe.trace.md +50 -0
  67. package/commands/vibe.utils.md +413 -413
  68. package/commands/vibe.verify.md +484 -484
  69. package/dist/__tests__/architecture.test.d.ts +2 -0
  70. package/dist/__tests__/architecture.test.d.ts.map +1 -0
  71. package/dist/__tests__/architecture.test.js +207 -0
  72. package/dist/__tests__/architecture.test.js.map +1 -0
  73. package/dist/cli/auth.js +3 -3
  74. package/dist/cli/auth.js.map +1 -1
  75. package/dist/cli/collaborator.js +52 -52
  76. package/dist/cli/commands/evolution.js +12 -12
  77. package/dist/cli/commands/info.d.ts.map +1 -1
  78. package/dist/cli/commands/info.js +45 -81
  79. package/dist/cli/commands/info.js.map +1 -1
  80. package/dist/cli/commands/init.js +5 -5
  81. package/dist/cli/commands/remove.js +14 -14
  82. package/dist/cli/commands/sentinel.js +27 -27
  83. package/dist/cli/commands/skills.js +5 -5
  84. package/dist/cli/commands/slack.js +10 -10
  85. package/dist/cli/commands/telegram.js +12 -12
  86. package/dist/cli/detect.d.ts.map +1 -1
  87. package/dist/cli/detect.js +55 -32
  88. package/dist/cli/detect.js.map +1 -1
  89. package/dist/cli/index.d.ts +1 -1
  90. package/dist/cli/index.d.ts.map +1 -1
  91. package/dist/cli/index.js +52 -52
  92. package/dist/cli/index.js.map +1 -1
  93. package/dist/cli/llm/claude-commands.js +16 -16
  94. package/dist/cli/llm/config.js +18 -18
  95. package/dist/cli/llm/gemini-commands.js +16 -16
  96. package/dist/cli/llm/gpt-commands.js +19 -19
  97. package/dist/cli/llm/help.js +21 -21
  98. package/dist/cli/postinstall/constants.d.ts.map +1 -1
  99. package/dist/cli/postinstall/constants.js +24 -0
  100. package/dist/cli/postinstall/constants.js.map +1 -1
  101. package/dist/cli/postinstall/cursor-agents.js +32 -32
  102. package/dist/cli/postinstall/cursor-rules.js +83 -83
  103. package/dist/cli/postinstall/cursor-skills.js +743 -743
  104. package/dist/cli/setup/Provisioner.js +42 -42
  105. package/dist/infra/lib/AutomationLevel.d.ts +48 -0
  106. package/dist/infra/lib/AutomationLevel.d.ts.map +1 -0
  107. package/dist/infra/lib/AutomationLevel.js +157 -0
  108. package/dist/infra/lib/AutomationLevel.js.map +1 -0
  109. package/dist/infra/lib/DecisionTracer.d.ts +81 -0
  110. package/dist/infra/lib/DecisionTracer.d.ts.map +1 -0
  111. package/dist/infra/lib/DecisionTracer.js +135 -0
  112. package/dist/infra/lib/DecisionTracer.js.map +1 -0
  113. package/dist/infra/lib/DeepInit.js +24 -24
  114. package/dist/infra/lib/InteractiveCheckpoint.d.ts +75 -0
  115. package/dist/infra/lib/InteractiveCheckpoint.d.ts.map +1 -0
  116. package/dist/infra/lib/InteractiveCheckpoint.js +179 -0
  117. package/dist/infra/lib/InteractiveCheckpoint.js.map +1 -0
  118. package/dist/infra/lib/IterationTracker.d.ts +44 -0
  119. package/dist/infra/lib/IterationTracker.d.ts.map +1 -1
  120. package/dist/infra/lib/IterationTracker.js +267 -12
  121. package/dist/infra/lib/IterationTracker.js.map +1 -1
  122. package/dist/infra/lib/LoopBreaker.d.ts +56 -0
  123. package/dist/infra/lib/LoopBreaker.d.ts.map +1 -0
  124. package/dist/infra/lib/LoopBreaker.js +109 -0
  125. package/dist/infra/lib/LoopBreaker.js.map +1 -0
  126. package/dist/infra/lib/PythonParser.js +108 -108
  127. package/dist/infra/lib/ReviewRace.js +96 -96
  128. package/dist/infra/lib/SkillFrontmatter.js +28 -28
  129. package/dist/infra/lib/SkillQualityGate.js +9 -9
  130. package/dist/infra/lib/SkillRepository.js +159 -159
  131. package/dist/infra/lib/UltraQA.js +99 -99
  132. package/dist/infra/lib/VerificationLoop.d.ts +105 -0
  133. package/dist/infra/lib/VerificationLoop.d.ts.map +1 -0
  134. package/dist/infra/lib/VerificationLoop.js +189 -0
  135. package/dist/infra/lib/VerificationLoop.js.map +1 -0
  136. package/dist/infra/lib/__tests__/AutomationLevel.test.d.ts +2 -0
  137. package/dist/infra/lib/__tests__/AutomationLevel.test.d.ts.map +1 -0
  138. package/dist/infra/lib/__tests__/AutomationLevel.test.js +297 -0
  139. package/dist/infra/lib/__tests__/AutomationLevel.test.js.map +1 -0
  140. package/dist/infra/lib/__tests__/DecisionTracer.test.d.ts +2 -0
  141. package/dist/infra/lib/__tests__/DecisionTracer.test.d.ts.map +1 -0
  142. package/dist/infra/lib/__tests__/DecisionTracer.test.js +274 -0
  143. package/dist/infra/lib/__tests__/DecisionTracer.test.js.map +1 -0
  144. package/dist/infra/lib/__tests__/InteractiveCheckpoint.test.d.ts +2 -0
  145. package/dist/infra/lib/__tests__/InteractiveCheckpoint.test.d.ts.map +1 -0
  146. package/dist/infra/lib/__tests__/InteractiveCheckpoint.test.js +350 -0
  147. package/dist/infra/lib/__tests__/InteractiveCheckpoint.test.js.map +1 -0
  148. package/dist/infra/lib/__tests__/LoopBreaker.test.d.ts +2 -0
  149. package/dist/infra/lib/__tests__/LoopBreaker.test.d.ts.map +1 -0
  150. package/dist/infra/lib/__tests__/LoopBreaker.test.js +340 -0
  151. package/dist/infra/lib/__tests__/LoopBreaker.test.js.map +1 -0
  152. package/dist/infra/lib/__tests__/VerificationLoop.test.d.ts +2 -0
  153. package/dist/infra/lib/__tests__/VerificationLoop.test.d.ts.map +1 -0
  154. package/dist/infra/lib/__tests__/VerificationLoop.test.js +486 -0
  155. package/dist/infra/lib/__tests__/VerificationLoop.test.js.map +1 -0
  156. package/dist/infra/lib/autonomy/AuditStore.js +41 -41
  157. package/dist/infra/lib/autonomy/ConfirmationStore.js +30 -30
  158. package/dist/infra/lib/autonomy/EventOutbox.js +38 -38
  159. package/dist/infra/lib/autonomy/PolicyEngine.d.ts +3 -3
  160. package/dist/infra/lib/autonomy/PolicyEngine.js +18 -18
  161. package/dist/infra/lib/autonomy/SecuritySentinel.js +1 -1
  162. package/dist/infra/lib/autonomy/SuggestionStore.js +33 -33
  163. package/dist/infra/lib/embedding/VectorStore.js +22 -22
  164. package/dist/infra/lib/embedding/__tests__/EmbeddingProvider.test.js +4 -0
  165. package/dist/infra/lib/embedding/__tests__/EmbeddingProvider.test.js.map +1 -1
  166. package/dist/infra/lib/evolution/AgentAnalyzer.js +10 -10
  167. package/dist/infra/lib/evolution/DeprecationDetector.d.ts +68 -0
  168. package/dist/infra/lib/evolution/DeprecationDetector.d.ts.map +1 -0
  169. package/dist/infra/lib/evolution/DeprecationDetector.js +207 -0
  170. package/dist/infra/lib/evolution/DeprecationDetector.js.map +1 -0
  171. package/dist/infra/lib/evolution/DescriptionOptimizer.js +21 -21
  172. package/dist/infra/lib/evolution/GenerationRegistry.js +36 -36
  173. package/dist/infra/lib/evolution/InsightStore.js +90 -90
  174. package/dist/infra/lib/evolution/ParityTester.d.ts +74 -0
  175. package/dist/infra/lib/evolution/ParityTester.d.ts.map +1 -0
  176. package/dist/infra/lib/evolution/ParityTester.js +238 -0
  177. package/dist/infra/lib/evolution/ParityTester.js.map +1 -0
  178. package/dist/infra/lib/evolution/RollbackManager.js +5 -5
  179. package/dist/infra/lib/evolution/SkillBenchmark.js +23 -23
  180. package/dist/infra/lib/evolution/SkillEvalRunner.js +50 -50
  181. package/dist/infra/lib/evolution/SkillGapDetector.js +10 -10
  182. package/dist/infra/lib/evolution/UsageTracker.js +28 -28
  183. package/dist/infra/lib/evolution/__tests__/deprecation.test.d.ts +2 -0
  184. package/dist/infra/lib/evolution/__tests__/deprecation.test.d.ts.map +1 -0
  185. package/dist/infra/lib/evolution/__tests__/deprecation.test.js +251 -0
  186. package/dist/infra/lib/evolution/__tests__/deprecation.test.js.map +1 -0
  187. package/dist/infra/lib/evolution/__tests__/parity.test.d.ts +2 -0
  188. package/dist/infra/lib/evolution/__tests__/parity.test.d.ts.map +1 -0
  189. package/dist/infra/lib/evolution/__tests__/parity.test.js +319 -0
  190. package/dist/infra/lib/evolution/__tests__/parity.test.js.map +1 -0
  191. package/dist/infra/lib/evolution/index.d.ts +4 -0
  192. package/dist/infra/lib/evolution/index.d.ts.map +1 -1
  193. package/dist/infra/lib/evolution/index.js +3 -0
  194. package/dist/infra/lib/evolution/index.js.map +1 -1
  195. package/dist/infra/lib/gemini/orchestration.js +5 -5
  196. package/dist/infra/lib/gpt/orchestration.js +4 -4
  197. package/dist/infra/lib/gpt/specializations.d.ts +1 -1
  198. package/dist/infra/lib/gpt/specializations.js +1 -1
  199. package/dist/infra/lib/memory/KnowledgeGraph.js +4 -4
  200. package/dist/infra/lib/memory/MemorySearch.js +57 -57
  201. package/dist/infra/lib/memory/MemoryStorage.js +181 -181
  202. package/dist/infra/lib/memory/ObservationStore.js +28 -28
  203. package/dist/infra/lib/memory/ReflectionStore.js +30 -30
  204. package/dist/infra/lib/memory/SessionRAGRetriever.js +7 -7
  205. package/dist/infra/lib/memory/SessionRAGStore.js +225 -225
  206. package/dist/infra/lib/memory/SessionSummarizer.js +9 -9
  207. package/dist/infra/lib/telemetry/SkillTelemetry.d.ts +6 -0
  208. package/dist/infra/lib/telemetry/SkillTelemetry.d.ts.map +1 -1
  209. package/dist/infra/lib/telemetry/SkillTelemetry.js +11 -0
  210. package/dist/infra/lib/telemetry/SkillTelemetry.js.map +1 -1
  211. package/dist/infra/orchestrator/AgentManager.js +12 -12
  212. package/dist/infra/orchestrator/AgentRegistry.js +65 -65
  213. package/dist/infra/orchestrator/BackgroundManager.d.ts.map +1 -1
  214. package/dist/infra/orchestrator/BackgroundManager.js +2 -0
  215. package/dist/infra/orchestrator/BackgroundManager.js.map +1 -1
  216. package/dist/infra/orchestrator/MultiLlmResearch.js +8 -8
  217. package/dist/infra/orchestrator/PhasePipeline.js +1 -1
  218. package/dist/infra/orchestrator/PhasePipeline.js.map +1 -1
  219. package/dist/infra/orchestrator/SwarmOrchestrator.test.js +16 -16
  220. package/dist/infra/orchestrator/parallelResearch.js +24 -24
  221. package/dist/tools/convention/analyzeComplexity.test.js +115 -115
  222. package/dist/tools/convention/validateCodeQuality.test.js +104 -104
  223. package/dist/tools/index.d.ts +16 -19
  224. package/dist/tools/index.d.ts.map +1 -1
  225. package/dist/tools/index.js +15 -27
  226. package/dist/tools/index.js.map +1 -1
  227. package/dist/tools/memory/createMemoryTimeline.js +10 -10
  228. package/dist/tools/memory/getMemoryGraph.js +12 -12
  229. package/dist/tools/memory/getSessionContext.js +9 -9
  230. package/dist/tools/memory/linkMemories.js +14 -14
  231. package/dist/tools/memory/listMemories.js +4 -4
  232. package/dist/tools/memory/recallMemory.js +4 -4
  233. package/dist/tools/memory/saveMemory.js +4 -4
  234. package/dist/tools/memory/searchMemoriesAdvanced.js +23 -23
  235. package/dist/tools/memory/startSession.js +1 -1
  236. package/dist/tools/memory/startSession.js.map +1 -1
  237. package/dist/tools/semantic/analyzeDependencyGraph.js +12 -12
  238. package/dist/tools/semantic/astGrep.test.js +6 -6
  239. package/dist/tools/spec/index.d.ts +0 -4
  240. package/dist/tools/spec/index.d.ts.map +1 -1
  241. package/dist/tools/spec/index.js +0 -4
  242. package/dist/tools/spec/index.js.map +1 -1
  243. package/dist/tools/spec/prdParser.test.js +171 -171
  244. package/dist/tools/spec/specGenerator.js +169 -169
  245. package/dist/tools/spec/traceabilityMatrix.js +64 -64
  246. package/dist/tools/spec/traceabilityMatrix.test.js +28 -28
  247. package/hooks/gemini-hooks.json +73 -73
  248. package/hooks/hooks.json +137 -137
  249. package/hooks/scripts/code-check.js +77 -77
  250. package/hooks/scripts/context-save.js +212 -212
  251. package/hooks/scripts/evolution-engine.js +69 -0
  252. package/hooks/scripts/hud-status.js +291 -291
  253. package/hooks/scripts/keyword-detector.js +214 -214
  254. package/hooks/scripts/llm-orchestrate.js +475 -475
  255. package/hooks/scripts/post-edit.js +32 -32
  256. package/hooks/scripts/pre-tool-guard.js +125 -125
  257. package/hooks/scripts/prompt-dispatcher.js +185 -185
  258. package/hooks/scripts/sentinel-guard.js +104 -104
  259. package/hooks/scripts/session-start.js +106 -106
  260. package/hooks/scripts/skill-injector.js +83 -0
  261. package/hooks/scripts/stop-notify.js +209 -209
  262. package/hooks/scripts/utils.js +100 -100
  263. package/languages/csharp-unity.md +515 -515
  264. package/languages/gdscript-godot.md +470 -470
  265. package/languages/ruby-rails.md +489 -489
  266. package/languages/typescript-angular.md +433 -433
  267. package/languages/typescript-astro.md +416 -416
  268. package/languages/typescript-electron.md +406 -406
  269. package/languages/typescript-nestjs.md +524 -524
  270. package/languages/typescript-svelte.md +407 -407
  271. package/languages/typescript-tauri.md +365 -365
  272. package/package.json +101 -123
  273. package/skills/agents-md/SKILL.md +120 -120
  274. package/skills/arch-guard/SKILL.md +180 -180
  275. package/skills/brand-assets/SKILL.md +146 -146
  276. package/skills/capability-loop/SKILL.md +167 -167
  277. package/skills/characterization-test/SKILL.md +206 -206
  278. package/skills/commerce-patterns/SKILL.md +63 -63
  279. package/skills/commit-push-pr/SKILL.md +75 -75
  280. package/skills/context7-usage/SKILL.md +105 -105
  281. package/skills/core-capabilities/SKILL.md +13 -13
  282. package/skills/e2e-commerce/SKILL.md +61 -61
  283. package/skills/event-comms/SKILL.md +161 -0
  284. package/skills/event-ops/SKILL.md +197 -0
  285. package/skills/event-planning/SKILL.md +131 -0
  286. package/skills/exec-plan/SKILL.md +147 -147
  287. package/skills/frontend-design/SKILL.md +12 -12
  288. package/skills/git-worktree/SKILL.md +72 -72
  289. package/skills/handoff/SKILL.md +109 -109
  290. package/skills/parallel-research/SKILL.md +87 -87
  291. package/skills/priority-todos/SKILL.md +63 -63
  292. package/skills/seo-checklist/SKILL.md +57 -57
  293. package/skills/techdebt/SKILL.md +122 -122
  294. package/skills/tool-fallback/SKILL.md +103 -103
  295. package/skills/typescript-advanced-types/SKILL.md +66 -66
  296. package/skills/ui-ux-pro-max/SKILL.md +221 -221
  297. package/skills/vercel-react-best-practices/SKILL.md +59 -59
  298. package/skills/video-production/SKILL.md +51 -51
  299. package/vibe/config.json +29 -29
  300. package/vibe/constitution.md +227 -227
  301. package/vibe/rules/principles/communication-guide.md +98 -98
  302. package/vibe/rules/principles/development-philosophy.md +52 -52
  303. package/vibe/rules/principles/quick-start.md +102 -102
  304. package/vibe/rules/quality/bdd-contract-testing.md +393 -393
  305. package/vibe/rules/quality/checklist.md +276 -276
  306. package/vibe/rules/quality/performance.md +236 -236
  307. package/vibe/rules/quality/testing-strategy.md +440 -440
  308. package/vibe/rules/standards/anti-patterns.md +541 -541
  309. package/vibe/rules/standards/code-structure.md +291 -291
  310. package/vibe/rules/standards/complexity-metrics.md +313 -313
  311. package/vibe/rules/standards/git-workflow.md +237 -237
  312. package/vibe/rules/standards/naming-conventions.md +198 -198
  313. package/vibe/rules/standards/security.md +305 -305
  314. package/vibe/rules/writing/document-style.md +74 -74
  315. package/vibe/setup.sh +31 -31
  316. package/vibe/templates/constitution-template.md +252 -252
  317. package/vibe/templates/contract-backend-template.md +526 -526
  318. package/vibe/templates/contract-frontend-template.md +599 -599
  319. package/vibe/templates/feature-template.md +96 -96
  320. package/vibe/templates/spec-template.md +221 -221
  321. package/vibe/ui-ux-data/charts.csv +26 -26
  322. package/vibe/ui-ux-data/colors.csv +97 -97
  323. package/vibe/ui-ux-data/icons.csv +101 -101
  324. package/vibe/ui-ux-data/landing.csv +31 -31
  325. package/vibe/ui-ux-data/products.csv +96 -96
  326. package/vibe/ui-ux-data/react-performance.csv +45 -45
  327. package/vibe/ui-ux-data/stacks/astro.csv +54 -54
  328. package/vibe/ui-ux-data/stacks/flutter.csv +53 -53
  329. package/vibe/ui-ux-data/stacks/html-tailwind.csv +56 -56
  330. package/vibe/ui-ux-data/stacks/jetpack-compose.csv +53 -53
  331. package/vibe/ui-ux-data/stacks/nextjs.csv +53 -53
  332. package/vibe/ui-ux-data/stacks/nuxt-ui.csv +51 -51
  333. package/vibe/ui-ux-data/stacks/nuxtjs.csv +59 -59
  334. package/vibe/ui-ux-data/stacks/react-native.csv +52 -52
  335. package/vibe/ui-ux-data/stacks/react.csv +54 -54
  336. package/vibe/ui-ux-data/stacks/shadcn.csv +61 -61
  337. package/vibe/ui-ux-data/stacks/svelte.csv +54 -54
  338. package/vibe/ui-ux-data/stacks/swiftui.csv +51 -51
  339. package/vibe/ui-ux-data/stacks/vue.csv +50 -50
  340. package/vibe/ui-ux-data/styles.csv +68 -68
  341. package/vibe/ui-ux-data/typography.csv +57 -57
  342. package/vibe/ui-ux-data/ui-reasoning.csv +101 -101
  343. package/vibe/ui-ux-data/ux-guidelines.csv +99 -99
  344. package/vibe/ui-ux-data/version.json +31 -31
  345. package/vibe/ui-ux-data/web-interface.csv +31 -31
@@ -0,0 +1,74 @@
1
+ import { MemoryStorage } from '../memory/MemoryStorage.js';
2
+ export interface ModelVersion {
3
+ id: string;
4
+ name: string;
5
+ registeredAt: string;
6
+ }
7
+ export interface ParityTestResult {
8
+ id: string;
9
+ skillName: string;
10
+ oldModel: string;
11
+ newModel: string;
12
+ /** Old model's baseline pass rate */
13
+ oldBaselinePassRate: number;
14
+ /** New model's baseline pass rate (without skill) */
15
+ newBaselinePassRate: number;
16
+ /** With-skill pass rate (reference) */
17
+ withSkillPassRate: number;
18
+ /** Parity score: how close new baseline is to with-skill (0-1, 1=identical) */
19
+ parityScore: number;
20
+ /** Whether the skill is becoming obsolete */
21
+ obsoleteCandidate: boolean;
22
+ /** Detailed per-eval comparison */
23
+ evalComparisons: EvalComparison[];
24
+ timestamp: string;
25
+ }
26
+ export interface EvalComparison {
27
+ evalId: string;
28
+ prompt: string;
29
+ oldBaselinePassed: boolean;
30
+ newBaselinePassed: boolean;
31
+ withSkillPassed: boolean;
32
+ /** Did new model baseline improve over old? */
33
+ improved: boolean;
34
+ }
35
+ export declare const PARITY_THRESHOLDS: {
36
+ /** New baseline >= this fraction of with-skill → obsolete candidate */
37
+ readonly OBSOLESCENCE_RATIO: 0.85;
38
+ /** Minimum improvement in baseline to consider significant */
39
+ readonly MIN_IMPROVEMENT: 0.1;
40
+ /** Minimum eval cases for reliable parity test */
41
+ readonly MIN_EVAL_CASES: 3;
42
+ };
43
+ export declare class ParityTester {
44
+ private db;
45
+ constructor(storage: MemoryStorage);
46
+ private initializeTables;
47
+ /** Register a model version */
48
+ registerModel(id: string, name: string): ModelVersion;
49
+ /** Get all registered models */
50
+ getModels(): ModelVersion[];
51
+ /** Record baseline eval results for a specific model */
52
+ recordModelBaseline(skillName: string, modelId: string, evalResults: Array<{
53
+ evalId: string;
54
+ passed: boolean;
55
+ output: string;
56
+ durationMs: number;
57
+ tokenCount: number;
58
+ prompt?: string;
59
+ }>): void;
60
+ /**
61
+ * Run a parity test by reading existing baseline data from model_baseline_results.
62
+ * with-skill reference data is also read from model_baseline_results with variant 'with_skill'.
63
+ */
64
+ runParityTest(skillName: string, oldModel: string, newModel: string): ParityTestResult;
65
+ /** Get parity test history for a skill */
66
+ getHistory(skillName: string): ParityTestResult[];
67
+ /** Get latest parity test */
68
+ getLatest(skillName: string): ParityTestResult | null;
69
+ /** Format parity test as markdown report */
70
+ formatReport(result: ParityTestResult): string;
71
+ private getBaselineRows;
72
+ private buildComparisons;
73
+ }
74
+ //# sourceMappingURL=ParityTester.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ParityTester.d.ts","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/ParityTester.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAE3D,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,qCAAqC;IACrC,mBAAmB,EAAE,MAAM,CAAC;IAC5B,qDAAqD;IACrD,mBAAmB,EAAE,MAAM,CAAC;IAC5B,uCAAuC;IACvC,iBAAiB,EAAE,MAAM,CAAC;IAC1B,+EAA+E;IAC/E,WAAW,EAAE,MAAM,CAAC;IACpB,6CAA6C;IAC7C,iBAAiB,EAAE,OAAO,CAAC;IAC3B,mCAAmC;IACnC,eAAe,EAAE,cAAc,EAAE,CAAC;IAClC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,eAAe,EAAE,OAAO,CAAC;IACzB,+CAA+C;IAC/C,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,iBAAiB;IAC5B,uEAAuE;;IAEvE,8DAA8D;;IAE9D,kDAAkD;;CAE1C,CAAC;AAqCX,qBAAa,YAAY;IACvB,OAAO,CAAC,EAAE,CAA2C;gBAEzC,OAAO,EAAE,aAAa;IAKlC,OAAO,CAAC,gBAAgB;IA0CxB,+BAA+B;IACxB,aAAa,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,YAAY;IAS5D,gCAAgC;IACzB,SAAS,IAAI,YAAY,EAAE;IAOlC,wDAAwD;IACjD,mBAAmB,CACxB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,GAC/H,IAAI;IAiBP;;;OAGG;IACI,aAAa,CAClB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,GACf,gBAAgB;IAqCnB,0CAA0C;IACnC,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,gBAAgB,EAAE;IAOxD,6BAA6B;IACtB,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI;IAO5D,4CAA4C;IACrC,YAAY,CAAC,MAAM,EAAE,gBAAgB,GAAG,MAAM;IA8CrD,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,gBAAgB;CAkCzB"}
@@ -0,0 +1,238 @@
1
+ // ParityTester — Model version parity testing for skill obsolescence detection
2
+ //
3
+ // 모델 업그레이드 시:
4
+ // 1. 기존 eval 케이스로 새 모델의 baseline(스킬 없이) 성능 측정
5
+ // 2. 이전 모델 baseline과 비교
6
+ // 3. 새 모델 baseline이 기존 with-skill 수준에 근접하면 → deprecation 후보
7
+ import { randomUUID } from 'crypto';
8
+ export const PARITY_THRESHOLDS = {
9
+ /** New baseline >= this fraction of with-skill → obsolete candidate */
10
+ OBSOLESCENCE_RATIO: 0.85,
11
+ /** Minimum improvement in baseline to consider significant */
12
+ MIN_IMPROVEMENT: 0.1,
13
+ /** Minimum eval cases for reliable parity test */
14
+ MIN_EVAL_CASES: 3,
15
+ };
16
+ export class ParityTester {
17
+ db;
18
+ constructor(storage) {
19
+ this.db = storage.getDatabase();
20
+ this.initializeTables();
21
+ }
22
+ initializeTables() {
23
+ this.db.exec(`
24
+ CREATE TABLE IF NOT EXISTS model_versions (
25
+ id TEXT PRIMARY KEY,
26
+ name TEXT NOT NULL,
27
+ registeredAt TEXT NOT NULL
28
+ );
29
+
30
+ CREATE TABLE IF NOT EXISTS model_baseline_results (
31
+ id TEXT PRIMARY KEY,
32
+ skillName TEXT NOT NULL,
33
+ modelId TEXT NOT NULL,
34
+ evalId TEXT NOT NULL,
35
+ prompt TEXT NOT NULL DEFAULT '',
36
+ passed INTEGER NOT NULL DEFAULT 0,
37
+ output TEXT NOT NULL DEFAULT '',
38
+ durationMs INTEGER NOT NULL DEFAULT 0,
39
+ tokenCount INTEGER NOT NULL DEFAULT 0,
40
+ createdAt TEXT NOT NULL
41
+ );
42
+
43
+ CREATE INDEX IF NOT EXISTS idx_mbr_skill_model ON model_baseline_results(skillName, modelId);
44
+ CREATE INDEX IF NOT EXISTS idx_mbr_eval ON model_baseline_results(evalId);
45
+
46
+ CREATE TABLE IF NOT EXISTS parity_tests (
47
+ id TEXT PRIMARY KEY,
48
+ skillName TEXT NOT NULL,
49
+ oldModel TEXT NOT NULL,
50
+ newModel TEXT NOT NULL,
51
+ oldBaselinePassRate REAL NOT NULL,
52
+ newBaselinePassRate REAL NOT NULL,
53
+ withSkillPassRate REAL NOT NULL,
54
+ parityScore REAL NOT NULL,
55
+ obsoleteCandidate INTEGER NOT NULL DEFAULT 0,
56
+ evalComparisons TEXT NOT NULL DEFAULT '[]',
57
+ createdAt TEXT NOT NULL
58
+ );
59
+
60
+ CREATE INDEX IF NOT EXISTS idx_pt_skill ON parity_tests(skillName);
61
+ `);
62
+ }
63
+ /** Register a model version */
64
+ registerModel(id, name) {
65
+ const now = new Date().toISOString();
66
+ this.db.prepare(`
67
+ INSERT OR REPLACE INTO model_versions (id, name, registeredAt)
68
+ VALUES (?, ?, ?)
69
+ `).run(id, name, now);
70
+ return { id, name, registeredAt: now };
71
+ }
72
+ /** Get all registered models */
73
+ getModels() {
74
+ const rows = this.db.prepare(`
75
+ SELECT * FROM model_versions ORDER BY registeredAt ASC
76
+ `).all();
77
+ return rows.map(r => ({ id: r.id, name: r.name, registeredAt: r.registeredAt }));
78
+ }
79
+ /** Record baseline eval results for a specific model */
80
+ recordModelBaseline(skillName, modelId, evalResults) {
81
+ const now = new Date().toISOString();
82
+ const insertStmt = this.db.prepare(`
83
+ INSERT INTO model_baseline_results (id, skillName, modelId, evalId, prompt, passed, output, durationMs, tokenCount, createdAt)
84
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
85
+ `);
86
+ const insertMany = this.db.transaction(() => {
87
+ for (const r of evalResults) {
88
+ const id = `mbr-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
89
+ insertStmt.run(id, skillName, modelId, r.evalId, r.prompt ?? '', r.passed ? 1 : 0, r.output, r.durationMs, r.tokenCount, now);
90
+ }
91
+ });
92
+ insertMany();
93
+ }
94
+ /**
95
+ * Run a parity test by reading existing baseline data from model_baseline_results.
96
+ * with-skill reference data is also read from model_baseline_results with variant 'with_skill'.
97
+ */
98
+ runParityTest(skillName, oldModel, newModel) {
99
+ const oldRows = this.getBaselineRows(skillName, oldModel);
100
+ const newRows = this.getBaselineRows(skillName, newModel);
101
+ const withSkillRows = this.getBaselineRows(skillName, 'with_skill');
102
+ const comparisons = this.buildComparisons(oldRows, newRows, withSkillRows);
103
+ const oldBaselinePassRate = computePassRate(oldRows);
104
+ const newBaselinePassRate = computePassRate(newRows);
105
+ const withSkillPassRate = computePassRate(withSkillRows);
106
+ const parityScore = withSkillPassRate > 0
107
+ ? Math.min(1.0, newBaselinePassRate / withSkillPassRate)
108
+ : (newBaselinePassRate > 0 ? 1.0 : 0.0);
109
+ const obsoleteCandidate = parityScore >= PARITY_THRESHOLDS.OBSOLESCENCE_RATIO;
110
+ const id = `parity-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
111
+ const now = new Date().toISOString();
112
+ this.db.prepare(`
113
+ INSERT INTO parity_tests (id, skillName, oldModel, newModel, oldBaselinePassRate, newBaselinePassRate, withSkillPassRate, parityScore, obsoleteCandidate, evalComparisons, createdAt)
114
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
115
+ `).run(id, skillName, oldModel, newModel, oldBaselinePassRate, newBaselinePassRate, withSkillPassRate, parityScore, obsoleteCandidate ? 1 : 0, JSON.stringify(comparisons), now);
116
+ return {
117
+ id,
118
+ skillName,
119
+ oldModel,
120
+ newModel,
121
+ oldBaselinePassRate,
122
+ newBaselinePassRate,
123
+ withSkillPassRate,
124
+ parityScore,
125
+ obsoleteCandidate,
126
+ evalComparisons: comparisons,
127
+ timestamp: now,
128
+ };
129
+ }
130
+ /** Get parity test history for a skill */
131
+ getHistory(skillName) {
132
+ const rows = this.db.prepare(`
133
+ SELECT * FROM parity_tests WHERE skillName = ? ORDER BY createdAt ASC
134
+ `).all(skillName);
135
+ return rows.map(rowToParityResult);
136
+ }
137
+ /** Get latest parity test */
138
+ getLatest(skillName) {
139
+ const row = this.db.prepare(`
140
+ SELECT * FROM parity_tests WHERE skillName = ? ORDER BY createdAt DESC LIMIT 1
141
+ `).get(skillName);
142
+ return row ? rowToParityResult(row) : null;
143
+ }
144
+ /** Format parity test as markdown report */
145
+ formatReport(result) {
146
+ const lines = [
147
+ `# Parity Report: ${result.skillName}`,
148
+ '',
149
+ `**Timestamp**: ${result.timestamp}`,
150
+ `**Old Model**: ${result.oldModel}`,
151
+ `**New Model**: ${result.newModel}`,
152
+ '',
153
+ '## Summary',
154
+ '',
155
+ '| Metric | Value |',
156
+ '|--------|-------|',
157
+ `| Old Baseline Pass Rate | ${pct(result.oldBaselinePassRate)} |`,
158
+ `| New Baseline Pass Rate | ${pct(result.newBaselinePassRate)} |`,
159
+ `| With-Skill Pass Rate | ${pct(result.withSkillPassRate)} |`,
160
+ `| Parity Score | ${result.parityScore.toFixed(3)} |`,
161
+ `| Obsolete Candidate | ${result.obsoleteCandidate ? 'YES' : 'No'} |`,
162
+ '',
163
+ ];
164
+ if (result.evalComparisons.length > 0) {
165
+ lines.push('## Per-Eval Comparison', '');
166
+ lines.push('| Eval ID | Old Baseline | New Baseline | With Skill | Improved |');
167
+ lines.push('|---------|-------------|--------------|------------|----------|');
168
+ for (const c of result.evalComparisons) {
169
+ lines.push(`| ${c.evalId} | ${c.oldBaselinePassed ? 'PASS' : 'FAIL'} | ${c.newBaselinePassed ? 'PASS' : 'FAIL'} | ${c.withSkillPassed ? 'PASS' : 'FAIL'} | ${c.improved ? 'Yes' : 'No'} |`);
170
+ }
171
+ lines.push('');
172
+ }
173
+ if (result.obsoleteCandidate) {
174
+ lines.push('## Recommendation', '', `The new model (${result.newModel}) baseline achieves ${pct(result.newBaselinePassRate)} pass rate,`, `reaching ${pct(result.parityScore)} of the with-skill pass rate (${pct(result.withSkillPassRate)}).`, 'This skill is a **deprecation candidate** — consider retiring it.', '');
175
+ }
176
+ return lines.join('\n');
177
+ }
178
+ getBaselineRows(skillName, modelId) {
179
+ return this.db.prepare(`
180
+ SELECT * FROM model_baseline_results
181
+ WHERE skillName = ? AND modelId = ?
182
+ ORDER BY createdAt ASC
183
+ `).all(skillName, modelId);
184
+ }
185
+ buildComparisons(oldRows, newRows, withSkillRows) {
186
+ const allEvalIds = new Set([
187
+ ...oldRows.map(r => r.evalId),
188
+ ...newRows.map(r => r.evalId),
189
+ ...withSkillRows.map(r => r.evalId),
190
+ ]);
191
+ const oldByEval = new Map(oldRows.map(r => [r.evalId, r]));
192
+ const newByEval = new Map(newRows.map(r => [r.evalId, r]));
193
+ const wsById = new Map(withSkillRows.map(r => [r.evalId, r]));
194
+ return Array.from(allEvalIds).map(evalId => {
195
+ const oldRow = oldByEval.get(evalId);
196
+ const newRow = newByEval.get(evalId);
197
+ const wsRow = wsById.get(evalId);
198
+ const oldBaselinePassed = oldRow?.passed === 1;
199
+ const newBaselinePassed = newRow?.passed === 1;
200
+ const withSkillPassed = wsRow?.passed === 1;
201
+ const prompt = newRow?.prompt ?? oldRow?.prompt ?? wsRow?.prompt ?? evalId;
202
+ return {
203
+ evalId,
204
+ prompt,
205
+ oldBaselinePassed,
206
+ newBaselinePassed,
207
+ withSkillPassed,
208
+ improved: newBaselinePassed && !oldBaselinePassed,
209
+ };
210
+ });
211
+ }
212
+ }
213
+ // --- Utility functions ---
214
+ function computePassRate(rows) {
215
+ if (rows.length === 0)
216
+ return 0;
217
+ const passed = rows.filter(r => r.passed === 1).length;
218
+ return passed / rows.length;
219
+ }
220
+ function pct(value) {
221
+ return `${(value * 100).toFixed(1)}%`;
222
+ }
223
+ function rowToParityResult(row) {
224
+ return {
225
+ id: row.id,
226
+ skillName: row.skillName,
227
+ oldModel: row.oldModel,
228
+ newModel: row.newModel,
229
+ oldBaselinePassRate: row.oldBaselinePassRate,
230
+ newBaselinePassRate: row.newBaselinePassRate,
231
+ withSkillPassRate: row.withSkillPassRate,
232
+ parityScore: row.parityScore,
233
+ obsoleteCandidate: row.obsoleteCandidate === 1,
234
+ evalComparisons: JSON.parse(row.evalComparisons),
235
+ timestamp: row.createdAt,
236
+ };
237
+ }
238
+ //# sourceMappingURL=ParityTester.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ParityTester.js","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/ParityTester.ts"],"names":[],"mappings":"AAAA,+EAA+E;AAC/E,EAAE;AACF,cAAc;AACd,8CAA8C;AAC9C,wBAAwB;AACxB,4DAA4D;AAE5D,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAuCpC,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,uEAAuE;IACvE,kBAAkB,EAAE,IAAI;IACxB,8DAA8D;IAC9D,eAAe,EAAE,GAAG;IACpB,kDAAkD;IAClD,cAAc,EAAE,CAAC;CACT,CAAC;AAqCX,MAAM,OAAO,YAAY;IACf,EAAE,CAA2C;IAErD,YAAY,OAAsB;QAChC,IAAI,CAAC,EAAE,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAEO,gBAAgB;QACtB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAsCZ,CAAC,CAAC;IACL,CAAC;IAED,+BAA+B;IACxB,aAAa,CAAC,EAAU,EAAE,IAAY;QAC3C,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;QACtB,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC;IACzC,CAAC;IAED,gCAAgC;IACzB,SAAS;QACd,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,EAAuB,CAAC;QAC9B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;IACnF,CAAC;IAED,wDAAwD;IACjD,mBAAmB,CACxB,SAAiB,EACjB,OAAe,EACf,WAAgI;QAEhI,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGlC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAC1C,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;gBAC5B,MAAM,EAAE,GAAG,OAAO,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC1F,UAAU,CAAC,GAAG,CAAC,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YAChI,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,UAAU,EAAE,CAAC;IACf,CAAC;IAED;;;OAGG;IACI,aAAa,CAClB,SAAiB,EACjB,QAAgB,EAChB,QAAgB;QAEhB,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QAEpE,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,aAAa,CAAC,CAAC;QAC3E,MAAM,mBAAmB,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,mBAAmB,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,iBAAiB,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;QACzD,MAAM,WAAW,GAAG,iBAAiB,GAAG,CAAC;YACvC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,mBAAmB,GAAG,iBAAiB,CAAC;YACxD,CAAC,CAAC,CAAC,mBAAmB,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC1C,MAAM,iBAAiB,GAAG,WAAW,IAAI,iBAAiB,CAAC,kBAAkB,CAAC;QAE9E,MAAM,EAAE,GAAG,UAAU,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC7F,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,WAAW,EAAE,iBAAiB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,GAAG,CAAC,CAAC;QAEjL,OAAO;YACL,EAAE;YACF,SAAS;YACT,QAAQ;YACR,QAAQ;YACR,mBAAmB;YACnB,mBAAmB;YACnB,iBAAiB;YACjB,WAAW;YACX,iBAAiB;YACjB,eAAe,EAAE,WAAW;YAC5B,SAAS,EAAE,GAAG;SACf,CAAC;IACJ,CAAC;IAED,0CAA0C;IACnC,UAAU,CAAC,SAAiB;QACjC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,SAAS,CAAoB,CAAC;QACrC,OAAO,IAAI,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;IACrC,CAAC;IAED,6BAA6B;IACtB,SAAS,CAAC,SAAiB;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE3B,CAAC,CAAC,GAAG,CAAC,SAAS,CAA8B,CAAC;QAC/C,OAAO,GAAG,CAAC,CAAC,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;IAED,4CAA4C;IACrC,YAAY,CAAC,MAAwB;QAC1C,MAAM,KAAK,GAAa;YACtB,oBAAoB,MAAM,CAAC,SAAS,EAAE;YACtC,EAAE;YACF,kBAAkB,MAAM,CAAC,SAAS,EAAE;YACpC,kBAAkB,MAAM,CAAC,QAAQ,EAAE;YACnC,kBAAkB,MAAM,CAAC,QAAQ,EAAE;YACnC,EAAE;YACF,YAAY;YACZ,EAAE;YACF,oBAAoB;YACpB,oBAAoB;YACpB,8BAA8B,GAAG,CAAC,MAAM,CAAC,mBAAmB,CAAC,IAAI;YACjE,8BAA8B,GAAG,CAAC,MAAM,CAAC,mBAAmB,CAAC,IAAI;YACjE,4BAA4B,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI;YAC7D,oBAAoB,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YACrD,0BAA0B,MAAM,CAAC,iBAAiB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI;YACrE,EAAE;SACH,CAAC;QAEF,IAAI,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,KAAK,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,CAAC,CAAC;YACzC,KAAK,CAAC,IAAI,CAAC,mEAAmE,CAAC,CAAC;YAChF,KAAK,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;YAC/E,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,eAAe,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CACR,KAAK,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAChL,CAAC;YACJ,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;QAED,IAAI,MAAM,CAAC,iBAAiB,EAAE,CAAC;YAC7B,KAAK,CAAC,IAAI,CACR,mBAAmB,EACnB,EAAE,EACF,kBAAkB,MAAM,CAAC,QAAQ,uBAAuB,GAAG,CAAC,MAAM,CAAC,mBAAmB,CAAC,aAAa,EACpG,YAAY,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,iCAAiC,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,EACrG,mEAAmE,EACnE,EAAE,CACH,CAAC;QACJ,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAEO,eAAe,CAAC,SAAiB,EAAE,OAAe;QACxD,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;KAItB,CAAC,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAuB,CAAC;IACnD,CAAC;IAEO,gBAAgB,CACtB,OAA2B,EAC3B,OAA2B,EAC3B,aAAiC;QAEjC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;YACzB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;YAC7B,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;YAC7B,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;SACpC,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAE9D,OAAO,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE;YACzC,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACrC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACjC,MAAM,iBAAiB,GAAG,MAAM,EAAE,MAAM,KAAK,CAAC,CAAC;YAC/C,MAAM,iBAAiB,GAAG,MAAM,EAAE,MAAM,KAAK,CAAC,CAAC;YAC/C,MAAM,eAAe,GAAG,KAAK,EAAE,MAAM,KAAK,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,MAAM,EAAE,MAAM,IAAI,MAAM,EAAE,MAAM,IAAI,KAAK,EAAE,MAAM,IAAI,MAAM,CAAC;YAE3E,OAAO;gBACL,MAAM;gBACN,MAAM;gBACN,iBAAiB;gBACjB,iBAAiB;gBACjB,eAAe;gBACf,QAAQ,EAAE,iBAAiB,IAAI,CAAC,iBAAiB;aAClD,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED,4BAA4B;AAE5B,SAAS,eAAe,CAAC,IAAwB;IAC/C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC;IACvD,OAAO,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;AAC9B,CAAC;AAED,SAAS,GAAG,CAAC,KAAa;IACxB,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACxC,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAkB;IAC3C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,mBAAmB,EAAE,GAAG,CAAC,mBAAmB;QAC5C,mBAAmB,EAAE,GAAG,CAAC,mBAAmB;QAC5C,iBAAiB,EAAE,GAAG,CAAC,iBAAiB;QACxC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,iBAAiB,EAAE,GAAG,CAAC,iBAAiB,KAAK,CAAC;QAC9C,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC;QAChD,SAAS,EAAE,GAAG,CAAC,SAAS;KACzB,CAAC;AACJ,CAAC"}
@@ -78,14 +78,14 @@ export class RollbackManager {
78
78
  let disabled = 0;
79
79
  const transaction = this.db.transaction(() => {
80
80
  // Update all non-disabled/non-deleted in DB
81
- const result = this.db.prepare(`
82
- UPDATE generations SET status = 'disabled', updatedAt = ?
83
- WHERE status IN ('draft', 'testing', 'active')
81
+ const result = this.db.prepare(`
82
+ UPDATE generations SET status = 'disabled', updatedAt = ?
83
+ WHERE status IN ('draft', 'testing', 'active')
84
84
  `).run(new Date().toISOString());
85
85
  disabled = result.changes;
86
86
  // Rename all active files
87
- const activeGens = this.db.prepare(`
88
- SELECT filePath FROM generations WHERE status = 'disabled' AND filePath IS NOT NULL
87
+ const activeGens = this.db.prepare(`
88
+ SELECT filePath FROM generations WHERE status = 'disabled' AND filePath IS NOT NULL
89
89
  `).all();
90
90
  for (const gen of activeGens) {
91
91
  if (gen.filePath && existsSync(gen.filePath) && !gen.filePath.endsWith('.disabled')) {
@@ -16,18 +16,18 @@ export class SkillBenchmark {
16
16
  this.initializeTables();
17
17
  }
18
18
  initializeTables() {
19
- this.db.exec(`
20
- CREATE TABLE IF NOT EXISTS skill_benchmarks (
21
- id TEXT PRIMARY KEY,
22
- skillName TEXT NOT NULL,
23
- iteration INTEGER NOT NULL,
24
- summary TEXT NOT NULL,
25
- evalBreakdowns TEXT NOT NULL,
26
- createdAt TEXT NOT NULL
27
- );
28
-
29
- CREATE INDEX IF NOT EXISTS idx_sb_skill ON skill_benchmarks(skillName);
30
- CREATE INDEX IF NOT EXISTS idx_sb_iter ON skill_benchmarks(skillName, iteration);
19
+ this.db.exec(`
20
+ CREATE TABLE IF NOT EXISTS skill_benchmarks (
21
+ id TEXT PRIMARY KEY,
22
+ skillName TEXT NOT NULL,
23
+ iteration INTEGER NOT NULL,
24
+ summary TEXT NOT NULL,
25
+ evalBreakdowns TEXT NOT NULL,
26
+ createdAt TEXT NOT NULL
27
+ );
28
+
29
+ CREATE INDEX IF NOT EXISTS idx_sb_skill ON skill_benchmarks(skillName);
30
+ CREATE INDEX IF NOT EXISTS idx_sb_iter ON skill_benchmarks(skillName, iteration);
31
31
  `);
32
32
  }
33
33
  /**
@@ -74,9 +74,9 @@ export class SkillBenchmark {
74
74
  };
75
75
  const id = `bench-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
76
76
  const now = new Date().toISOString();
77
- this.db.prepare(`
78
- INSERT INTO skill_benchmarks (id, skillName, iteration, summary, evalBreakdowns, createdAt)
79
- VALUES (?, ?, ?, ?, ?, ?)
77
+ this.db.prepare(`
78
+ INSERT INTO skill_benchmarks (id, skillName, iteration, summary, evalBreakdowns, createdAt)
79
+ VALUES (?, ?, ?, ?, ?, ?)
80
80
  `).run(id, skillName, iteration, JSON.stringify(summary), JSON.stringify(breakdowns), now);
81
81
  return { id, skillName, iteration, timestamp: now, summary, evalBreakdowns: breakdowns };
82
82
  }
@@ -84,8 +84,8 @@ export class SkillBenchmark {
84
84
  * Get benchmark history for a skill
85
85
  */
86
86
  getHistory(skillName) {
87
- const rows = this.db.prepare(`
88
- SELECT * FROM skill_benchmarks WHERE skillName = ? ORDER BY iteration ASC
87
+ const rows = this.db.prepare(`
88
+ SELECT * FROM skill_benchmarks WHERE skillName = ? ORDER BY iteration ASC
89
89
  `).all(skillName);
90
90
  return rows.map(this.rowToBenchmark);
91
91
  }
@@ -93,8 +93,8 @@ export class SkillBenchmark {
93
93
  * Get the latest benchmark for a skill
94
94
  */
95
95
  getLatest(skillName) {
96
- const row = this.db.prepare(`
97
- SELECT * FROM skill_benchmarks WHERE skillName = ? ORDER BY iteration DESC LIMIT 1
96
+ const row = this.db.prepare(`
97
+ SELECT * FROM skill_benchmarks WHERE skillName = ? ORDER BY iteration DESC LIMIT 1
98
98
  `).get(skillName);
99
99
  return row ? this.rowToBenchmark(row) : null;
100
100
  }
@@ -158,14 +158,14 @@ export class SkillBenchmark {
158
158
  return lines.join('\n');
159
159
  }
160
160
  getBenchmarkByIteration(skillName, iteration) {
161
- const row = this.db.prepare(`
162
- SELECT * FROM skill_benchmarks WHERE skillName = ? AND iteration = ?
161
+ const row = this.db.prepare(`
162
+ SELECT * FROM skill_benchmarks WHERE skillName = ? AND iteration = ?
163
163
  `).get(skillName, iteration);
164
164
  return row ? this.rowToBenchmark(row) : null;
165
165
  }
166
166
  getNextIteration(skillName) {
167
- const row = this.db.prepare(`
168
- SELECT MAX(iteration) as maxIter FROM skill_benchmarks WHERE skillName = ?
167
+ const row = this.db.prepare(`
168
+ SELECT MAX(iteration) as maxIter FROM skill_benchmarks WHERE skillName = ?
169
169
  `).get(skillName);
170
170
  return (row.maxIter ?? 0) + 1;
171
171
  }
@@ -14,36 +14,36 @@ export class SkillEvalRunner {
14
14
  this.initializeTables();
15
15
  }
16
16
  initializeTables() {
17
- this.db.exec(`
18
- CREATE TABLE IF NOT EXISTS skill_eval_cases (
19
- id TEXT PRIMARY KEY,
20
- skillName TEXT NOT NULL,
21
- prompt TEXT NOT NULL,
22
- expectedOutput TEXT NOT NULL,
23
- files TEXT DEFAULT '[]',
24
- assertions TEXT DEFAULT '[]',
25
- createdAt TEXT NOT NULL,
26
- updatedAt TEXT NOT NULL
27
- );
28
-
29
- CREATE INDEX IF NOT EXISTS idx_sec_skill ON skill_eval_cases(skillName);
30
-
31
- CREATE TABLE IF NOT EXISTS skill_eval_runs (
32
- id TEXT PRIMARY KEY,
33
- evalId TEXT NOT NULL,
34
- skillName TEXT NOT NULL,
35
- variant TEXT NOT NULL CHECK(variant IN ('with_skill','baseline')),
36
- status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending','running','passed','failed','error')),
37
- output TEXT DEFAULT '',
38
- grades TEXT DEFAULT '[]',
39
- durationMs INTEGER DEFAULT 0,
40
- tokenCount INTEGER DEFAULT 0,
41
- createdAt TEXT NOT NULL
42
- );
43
-
44
- CREATE INDEX IF NOT EXISTS idx_ser_eval ON skill_eval_runs(evalId);
45
- CREATE INDEX IF NOT EXISTS idx_ser_skill ON skill_eval_runs(skillName);
46
- CREATE INDEX IF NOT EXISTS idx_ser_variant ON skill_eval_runs(variant);
17
+ this.db.exec(`
18
+ CREATE TABLE IF NOT EXISTS skill_eval_cases (
19
+ id TEXT PRIMARY KEY,
20
+ skillName TEXT NOT NULL,
21
+ prompt TEXT NOT NULL,
22
+ expectedOutput TEXT NOT NULL,
23
+ files TEXT DEFAULT '[]',
24
+ assertions TEXT DEFAULT '[]',
25
+ createdAt TEXT NOT NULL,
26
+ updatedAt TEXT NOT NULL
27
+ );
28
+
29
+ CREATE INDEX IF NOT EXISTS idx_sec_skill ON skill_eval_cases(skillName);
30
+
31
+ CREATE TABLE IF NOT EXISTS skill_eval_runs (
32
+ id TEXT PRIMARY KEY,
33
+ evalId TEXT NOT NULL,
34
+ skillName TEXT NOT NULL,
35
+ variant TEXT NOT NULL CHECK(variant IN ('with_skill','baseline')),
36
+ status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending','running','passed','failed','error')),
37
+ output TEXT DEFAULT '',
38
+ grades TEXT DEFAULT '[]',
39
+ durationMs INTEGER DEFAULT 0,
40
+ tokenCount INTEGER DEFAULT 0,
41
+ createdAt TEXT NOT NULL
42
+ );
43
+
44
+ CREATE INDEX IF NOT EXISTS idx_ser_eval ON skill_eval_runs(evalId);
45
+ CREATE INDEX IF NOT EXISTS idx_ser_skill ON skill_eval_runs(skillName);
46
+ CREATE INDEX IF NOT EXISTS idx_ser_variant ON skill_eval_runs(variant);
47
47
  `);
48
48
  }
49
49
  /**
@@ -52,9 +52,9 @@ export class SkillEvalRunner {
52
52
  createEvalSet(input) {
53
53
  const cases = [];
54
54
  const now = new Date().toISOString();
55
- const insertStmt = this.db.prepare(`
56
- INSERT INTO skill_eval_cases (id, skillName, prompt, expectedOutput, files, assertions, createdAt, updatedAt)
57
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
55
+ const insertStmt = this.db.prepare(`
56
+ INSERT INTO skill_eval_cases (id, skillName, prompt, expectedOutput, files, assertions, createdAt, updatedAt)
57
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
58
58
  `);
59
59
  const insertMany = this.db.transaction((evals) => {
60
60
  for (const evalCase of evals) {
@@ -83,8 +83,8 @@ export class SkillEvalRunner {
83
83
  * Get all eval cases for a skill
84
84
  */
85
85
  getEvalCases(skillName) {
86
- const rows = this.db.prepare(`
87
- SELECT * FROM skill_eval_cases WHERE skillName = ? ORDER BY createdAt ASC
86
+ const rows = this.db.prepare(`
87
+ SELECT * FROM skill_eval_cases WHERE skillName = ? ORDER BY createdAt ASC
88
88
  `).all(skillName);
89
89
  return rows.map(this.rowToEvalCase);
90
90
  }
@@ -92,8 +92,8 @@ export class SkillEvalRunner {
92
92
  * Get a single eval case by ID
93
93
  */
94
94
  getEvalCase(evalId) {
95
- const row = this.db.prepare(`
96
- SELECT * FROM skill_eval_cases WHERE id = ?
95
+ const row = this.db.prepare(`
96
+ SELECT * FROM skill_eval_cases WHERE id = ?
97
97
  `).get(evalId);
98
98
  return row ? this.rowToEvalCase(row) : null;
99
99
  }
@@ -103,9 +103,9 @@ export class SkillEvalRunner {
103
103
  startRun(evalId, skillName, variant) {
104
104
  const id = `run-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
105
105
  const now = new Date().toISOString();
106
- this.db.prepare(`
107
- INSERT INTO skill_eval_runs (id, evalId, skillName, variant, status, createdAt)
108
- VALUES (?, ?, ?, ?, 'running', ?)
106
+ this.db.prepare(`
107
+ INSERT INTO skill_eval_runs (id, evalId, skillName, variant, status, createdAt)
108
+ VALUES (?, ?, ?, ?, 'running', ?)
109
109
  `).run(id, evalId, skillName, variant, now);
110
110
  return id;
111
111
  }
@@ -115,18 +115,18 @@ export class SkillEvalRunner {
115
115
  completeRun(runId, output, grades, durationMs, tokenCount) {
116
116
  const allPassed = grades.length === 0 || grades.every(g => g.passed);
117
117
  const status = allPassed ? 'passed' : 'failed';
118
- this.db.prepare(`
119
- UPDATE skill_eval_runs
120
- SET status = ?, output = ?, grades = ?, durationMs = ?, tokenCount = ?
121
- WHERE id = ?
118
+ this.db.prepare(`
119
+ UPDATE skill_eval_runs
120
+ SET status = ?, output = ?, grades = ?, durationMs = ?, tokenCount = ?
121
+ WHERE id = ?
122
122
  `).run(status, output, JSON.stringify(grades), durationMs, tokenCount, runId);
123
123
  }
124
124
  /**
125
125
  * Mark a run as errored
126
126
  */
127
127
  failRun(runId, errorMessage) {
128
- this.db.prepare(`
129
- UPDATE skill_eval_runs SET status = 'error', output = ? WHERE id = ?
128
+ this.db.prepare(`
129
+ UPDATE skill_eval_runs SET status = 'error', output = ? WHERE id = ?
130
130
  `).run(errorMessage, runId);
131
131
  }
132
132
  /**
@@ -181,8 +181,8 @@ export class SkillEvalRunner {
181
181
  * Get all runs for an eval case
182
182
  */
183
183
  getRunsForEval(evalId) {
184
- const rows = this.db.prepare(`
185
- SELECT * FROM skill_eval_runs WHERE evalId = ? ORDER BY createdAt DESC
184
+ const rows = this.db.prepare(`
185
+ SELECT * FROM skill_eval_runs WHERE evalId = ? ORDER BY createdAt DESC
186
186
  `).all(evalId);
187
187
  return rows.map(this.rowToRunResult);
188
188
  }
@@ -190,8 +190,8 @@ export class SkillEvalRunner {
190
190
  * Get all runs for a skill
191
191
  */
192
192
  getRunsForSkill(skillName) {
193
- const rows = this.db.prepare(`
194
- SELECT * FROM skill_eval_runs WHERE skillName = ? ORDER BY createdAt DESC
193
+ const rows = this.db.prepare(`
194
+ SELECT * FROM skill_eval_runs WHERE skillName = ? ORDER BY createdAt DESC
195
195
  `).all(skillName);
196
196
  return rows.map(this.rowToRunResult);
197
197
  }
@@ -17,9 +17,9 @@ export class SkillGapDetector {
17
17
  const truncated = prompt.slice(0, 200);
18
18
  const normalized = truncated.toLowerCase().replace(/\s+/g, ' ').trim();
19
19
  try {
20
- this.db.prepare(`
21
- INSERT INTO skill_gaps (id, prompt, normalizedPrompt, sessionId, createdAt)
22
- VALUES (?, ?, ?, ?, ?)
20
+ this.db.prepare(`
21
+ INSERT INTO skill_gaps (id, prompt, normalizedPrompt, sessionId, createdAt)
22
+ VALUES (?, ?, ?, ?, ?)
23
23
  `).run(id, truncated, normalized, sessionId || null, new Date().toISOString());
24
24
  }
25
25
  catch {
@@ -33,13 +33,13 @@ export class SkillGapDetector {
33
33
  const result = { newGaps: [], totalClusters: 0 };
34
34
  try {
35
35
  // Cluster by normalizedPrompt
36
- const clusters = this.db.prepare(`
37
- SELECT normalizedPrompt, COUNT(*) as count, GROUP_CONCAT(prompt, '|||') as prompts
38
- FROM skill_gaps
39
- GROUP BY normalizedPrompt
40
- HAVING count >= 3
41
- ORDER BY count DESC
42
- LIMIT ?
36
+ const clusters = this.db.prepare(`
37
+ SELECT normalizedPrompt, COUNT(*) as count, GROUP_CONCAT(prompt, '|||') as prompts
38
+ FROM skill_gaps
39
+ GROUP BY normalizedPrompt
40
+ HAVING count >= 3
41
+ ORDER BY count DESC
42
+ LIMIT ?
43
43
  `).all(limit);
44
44
  result.totalClusters = clusters.length;
45
45
  for (const cluster of clusters) {