@su-record/vibe 2.7.10 → 2.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. package/.env.example +37 -37
  2. package/CLAUDE.md +126 -222
  3. package/LICENSE +21 -21
  4. package/README.md +580 -580
  5. package/agents/architect-low.md +41 -41
  6. package/agents/architect-medium.md +59 -59
  7. package/agents/architect.md +80 -80
  8. package/agents/build-error-resolver.md +115 -115
  9. package/agents/compounder.md +261 -261
  10. package/agents/diagrammer.md +178 -178
  11. package/agents/docs/api-documenter.md +99 -99
  12. package/agents/docs/changelog-writer.md +93 -93
  13. package/agents/e2e-tester.md +266 -266
  14. package/agents/explorer-low.md +42 -42
  15. package/agents/explorer-medium.md +59 -59
  16. package/agents/explorer.md +48 -48
  17. package/agents/implementer-low.md +43 -43
  18. package/agents/implementer-medium.md +52 -52
  19. package/agents/implementer.md +54 -54
  20. package/agents/junior-mentor.md +141 -141
  21. package/agents/planning/requirements-analyst.md +84 -84
  22. package/agents/planning/ux-advisor.md +83 -83
  23. package/agents/qa/acceptance-tester.md +86 -86
  24. package/agents/qa/edge-case-finder.md +93 -93
  25. package/agents/refactor-cleaner.md +143 -143
  26. package/agents/research/best-practices-agent.md +199 -199
  27. package/agents/research/codebase-patterns-agent.md +157 -157
  28. package/agents/research/framework-docs-agent.md +188 -188
  29. package/agents/research/security-advisory-agent.md +213 -213
  30. package/agents/review/architecture-reviewer.md +107 -107
  31. package/agents/review/complexity-reviewer.md +116 -116
  32. package/agents/review/data-integrity-reviewer.md +88 -88
  33. package/agents/review/git-history-reviewer.md +103 -103
  34. package/agents/review/performance-reviewer.md +86 -86
  35. package/agents/review/python-reviewer.md +150 -150
  36. package/agents/review/rails-reviewer.md +139 -139
  37. package/agents/review/react-reviewer.md +144 -144
  38. package/agents/review/security-reviewer.md +80 -80
  39. package/agents/review/simplicity-reviewer.md +140 -140
  40. package/agents/review/test-coverage-reviewer.md +116 -116
  41. package/agents/review/typescript-reviewer.md +127 -127
  42. package/agents/searcher.md +54 -54
  43. package/agents/simplifier.md +120 -120
  44. package/agents/tester.md +49 -49
  45. package/agents/ui/ui-a11y-auditor.md +93 -93
  46. package/agents/ui/ui-antipattern-detector.md +94 -94
  47. package/agents/ui/ui-dataviz-advisor.md +69 -69
  48. package/agents/ui/ui-design-system-gen.md +57 -57
  49. package/agents/ui/ui-industry-analyzer.md +49 -49
  50. package/agents/ui/ui-layout-architect.md +65 -65
  51. package/agents/ui/ui-stack-implementer.md +68 -68
  52. package/agents/ui/ux-compliance-reviewer.md +81 -81
  53. package/agents/ui-previewer.md +260 -260
  54. package/commands/vibe.run.md +83 -0
  55. package/commands/vibe.spec.review.md +558 -558
  56. package/commands/vibe.utils.md +413 -413
  57. package/commands/vibe.voice.md +79 -79
  58. package/dist/cli/auth.d.ts +1 -1
  59. package/dist/cli/auth.d.ts.map +1 -1
  60. package/dist/cli/auth.js +15 -7
  61. package/dist/cli/auth.js.map +1 -1
  62. package/dist/cli/collaborator.js +52 -52
  63. package/dist/cli/commands/evolution.js +12 -12
  64. package/dist/cli/commands/index.d.ts +1 -0
  65. package/dist/cli/commands/index.d.ts.map +1 -1
  66. package/dist/cli/commands/index.js +1 -0
  67. package/dist/cli/commands/index.js.map +1 -1
  68. package/dist/cli/commands/info.d.ts.map +1 -1
  69. package/dist/cli/commands/info.js +62 -56
  70. package/dist/cli/commands/info.js.map +1 -1
  71. package/dist/cli/commands/init.d.ts.map +1 -1
  72. package/dist/cli/commands/init.js +9 -6
  73. package/dist/cli/commands/init.js.map +1 -1
  74. package/dist/cli/commands/remove.js +14 -14
  75. package/dist/cli/commands/sentinel.js +27 -27
  76. package/dist/cli/commands/skills.d.ts +13 -0
  77. package/dist/cli/commands/skills.d.ts.map +1 -0
  78. package/dist/cli/commands/skills.js +83 -0
  79. package/dist/cli/commands/skills.js.map +1 -0
  80. package/dist/cli/commands/slack.js +10 -10
  81. package/dist/cli/commands/telegram.js +12 -12
  82. package/dist/cli/commands/update.d.ts.map +1 -1
  83. package/dist/cli/commands/update.js +3 -0
  84. package/dist/cli/commands/update.js.map +1 -1
  85. package/dist/cli/detect.js +32 -32
  86. package/dist/cli/index.d.ts.map +1 -1
  87. package/dist/cli/index.js +64 -47
  88. package/dist/cli/index.js.map +1 -1
  89. package/dist/cli/llm/claude-commands.js +16 -16
  90. package/dist/cli/llm/config.js +18 -18
  91. package/dist/cli/llm/gemini-commands.js +47 -47
  92. package/dist/cli/llm/gpt-commands.js +19 -19
  93. package/dist/cli/llm/help.js +21 -21
  94. package/dist/cli/postinstall/constants.d.ts +8 -0
  95. package/dist/cli/postinstall/constants.d.ts.map +1 -1
  96. package/dist/cli/postinstall/constants.js +33 -0
  97. package/dist/cli/postinstall/constants.js.map +1 -1
  98. package/dist/cli/postinstall/cursor-agents.js +32 -32
  99. package/dist/cli/postinstall/cursor-rules.js +83 -83
  100. package/dist/cli/postinstall/cursor-skills.js +743 -743
  101. package/dist/cli/postinstall/index.d.ts +1 -1
  102. package/dist/cli/postinstall/index.d.ts.map +1 -1
  103. package/dist/cli/postinstall/index.js +1 -1
  104. package/dist/cli/postinstall/index.js.map +1 -1
  105. package/dist/cli/setup/ProjectSetup.d.ts.map +1 -1
  106. package/dist/cli/setup/ProjectSetup.js +5 -0
  107. package/dist/cli/setup/ProjectSetup.js.map +1 -1
  108. package/dist/cli/setup/Provisioner.js +42 -42
  109. package/dist/cli/types.d.ts +1 -0
  110. package/dist/cli/types.d.ts.map +1 -1
  111. package/dist/infra/lib/DeepInit.js +24 -24
  112. package/dist/infra/lib/IterationTracker.js +11 -11
  113. package/dist/infra/lib/PythonParser.js +108 -108
  114. package/dist/infra/lib/ReviewRace.js +96 -96
  115. package/dist/infra/lib/SkillFrontmatter.js +28 -28
  116. package/dist/infra/lib/SkillQualityGate.js +9 -9
  117. package/dist/infra/lib/SkillRepository.js +159 -159
  118. package/dist/infra/lib/UltraQA.js +99 -99
  119. package/dist/infra/lib/autonomy/AuditStore.js +41 -41
  120. package/dist/infra/lib/autonomy/ConfirmationStore.js +30 -30
  121. package/dist/infra/lib/autonomy/EventOutbox.js +38 -38
  122. package/dist/infra/lib/autonomy/PolicyEngine.js +18 -18
  123. package/dist/infra/lib/autonomy/SecuritySentinel.js +1 -1
  124. package/dist/infra/lib/autonomy/SuggestionStore.js +33 -33
  125. package/dist/infra/lib/embedding/VectorStore.js +22 -22
  126. package/dist/infra/lib/evolution/AgentAnalyzer.js +10 -10
  127. package/dist/infra/lib/evolution/DescriptionOptimizer.d.ts +79 -0
  128. package/dist/infra/lib/evolution/DescriptionOptimizer.d.ts.map +1 -0
  129. package/dist/infra/lib/evolution/DescriptionOptimizer.js +259 -0
  130. package/dist/infra/lib/evolution/DescriptionOptimizer.js.map +1 -0
  131. package/dist/infra/lib/evolution/GenerationRegistry.js +36 -36
  132. package/dist/infra/lib/evolution/InsightStore.js +90 -90
  133. package/dist/infra/lib/evolution/RollbackManager.js +5 -5
  134. package/dist/infra/lib/evolution/SkillBenchmark.d.ts +81 -0
  135. package/dist/infra/lib/evolution/SkillBenchmark.d.ts.map +1 -0
  136. package/dist/infra/lib/evolution/SkillBenchmark.js +233 -0
  137. package/dist/infra/lib/evolution/SkillBenchmark.js.map +1 -0
  138. package/dist/infra/lib/evolution/SkillClassifier.d.ts +35 -0
  139. package/dist/infra/lib/evolution/SkillClassifier.d.ts.map +1 -0
  140. package/dist/infra/lib/evolution/SkillClassifier.js +167 -0
  141. package/dist/infra/lib/evolution/SkillClassifier.js.map +1 -0
  142. package/dist/infra/lib/evolution/SkillEvalRunner.d.ts +102 -0
  143. package/dist/infra/lib/evolution/SkillEvalRunner.d.ts.map +1 -0
  144. package/dist/infra/lib/evolution/SkillEvalRunner.js +256 -0
  145. package/dist/infra/lib/evolution/SkillEvalRunner.js.map +1 -0
  146. package/dist/infra/lib/evolution/SkillGapDetector.js +10 -10
  147. package/dist/infra/lib/evolution/UsageTracker.js +28 -28
  148. package/dist/infra/lib/evolution/__tests__/eval.test.d.ts +2 -0
  149. package/dist/infra/lib/evolution/__tests__/eval.test.d.ts.map +1 -0
  150. package/dist/infra/lib/evolution/__tests__/eval.test.js +539 -0
  151. package/dist/infra/lib/evolution/__tests__/eval.test.js.map +1 -0
  152. package/dist/infra/lib/evolution/index.d.ts +8 -0
  153. package/dist/infra/lib/evolution/index.d.ts.map +1 -1
  154. package/dist/infra/lib/evolution/index.js +5 -0
  155. package/dist/infra/lib/evolution/index.js.map +1 -1
  156. package/dist/infra/lib/gemini/constants.js +14 -14
  157. package/dist/infra/lib/gemini/orchestration.js +5 -5
  158. package/dist/infra/lib/gpt/oauth.js +44 -44
  159. package/dist/infra/lib/gpt/orchestration.js +4 -4
  160. package/dist/infra/lib/memory/KnowledgeGraph.js +4 -4
  161. package/dist/infra/lib/memory/MemorySearch.js +57 -57
  162. package/dist/infra/lib/memory/MemoryStorage.js +181 -181
  163. package/dist/infra/lib/memory/ObservationStore.js +28 -28
  164. package/dist/infra/lib/memory/ReflectionStore.js +30 -30
  165. package/dist/infra/lib/memory/SessionRAGRetriever.js +7 -7
  166. package/dist/infra/lib/memory/SessionRAGStore.js +225 -225
  167. package/dist/infra/lib/memory/SessionSummarizer.js +9 -9
  168. package/dist/infra/orchestrator/AgentManager.js +12 -12
  169. package/dist/infra/orchestrator/AgentRegistry.js +65 -65
  170. package/dist/infra/orchestrator/MultiLlmResearch.js +8 -8
  171. package/dist/infra/orchestrator/SwarmOrchestrator.test.js +16 -16
  172. package/dist/infra/orchestrator/parallelResearch.js +24 -24
  173. package/dist/tools/convention/analyzeComplexity.test.js +115 -115
  174. package/dist/tools/convention/validateCodeQuality.test.js +104 -104
  175. package/dist/tools/memory/createMemoryTimeline.js +10 -10
  176. package/dist/tools/memory/getMemoryGraph.js +12 -12
  177. package/dist/tools/memory/getSessionContext.js +9 -9
  178. package/dist/tools/memory/linkMemories.js +14 -14
  179. package/dist/tools/memory/listMemories.js +4 -4
  180. package/dist/tools/memory/recallMemory.js +4 -4
  181. package/dist/tools/memory/saveMemory.js +4 -4
  182. package/dist/tools/memory/searchMemoriesAdvanced.js +23 -23
  183. package/dist/tools/semantic/analyzeDependencyGraph.js +12 -12
  184. package/dist/tools/semantic/astGrep.test.js +6 -6
  185. package/dist/tools/spec/prdParser.test.js +171 -171
  186. package/dist/tools/spec/specGenerator.js +169 -169
  187. package/dist/tools/spec/traceabilityMatrix.js +64 -64
  188. package/dist/tools/spec/traceabilityMatrix.test.js +28 -28
  189. package/hooks/gemini-hooks.json +73 -73
  190. package/hooks/hooks.json +137 -137
  191. package/hooks/scripts/code-check.js +70 -70
  192. package/hooks/scripts/context-save.js +212 -212
  193. package/hooks/scripts/hud-status.js +291 -291
  194. package/hooks/scripts/keyword-detector.js +214 -214
  195. package/hooks/scripts/llm-orchestrate.js +646 -646
  196. package/hooks/scripts/post-edit.js +32 -32
  197. package/hooks/scripts/pre-tool-guard.js +125 -125
  198. package/hooks/scripts/prompt-dispatcher.js +185 -185
  199. package/hooks/scripts/sentinel-guard.js +104 -104
  200. package/hooks/scripts/session-start.js +106 -106
  201. package/hooks/scripts/stop-notify.js +209 -209
  202. package/hooks/scripts/utils.js +100 -100
  203. package/languages/csharp-unity.md +515 -515
  204. package/languages/gdscript-godot.md +470 -470
  205. package/languages/ruby-rails.md +489 -489
  206. package/languages/typescript-angular.md +433 -433
  207. package/languages/typescript-astro.md +416 -416
  208. package/languages/typescript-electron.md +406 -406
  209. package/languages/typescript-nestjs.md +524 -524
  210. package/languages/typescript-svelte.md +407 -407
  211. package/languages/typescript-tauri.md +365 -365
  212. package/package.json +121 -121
  213. package/skills/agents-md/SKILL.md +120 -120
  214. package/skills/arch-guard/SKILL.md +180 -0
  215. package/skills/brand-assets/SKILL.md +146 -146
  216. package/skills/capability-loop/SKILL.md +167 -0
  217. package/skills/characterization-test/SKILL.md +206 -206
  218. package/skills/commerce-patterns/SKILL.md +59 -59
  219. package/skills/commit-push-pr/SKILL.md +75 -75
  220. package/skills/context7-usage/SKILL.md +105 -105
  221. package/skills/core-capabilities/SKILL.md +48 -48
  222. package/skills/e2e-commerce/SKILL.md +57 -57
  223. package/skills/exec-plan/SKILL.md +147 -0
  224. package/skills/frontend-design/SKILL.md +73 -73
  225. package/skills/git-worktree/SKILL.md +72 -72
  226. package/skills/handoff/SKILL.md +109 -109
  227. package/skills/parallel-research/SKILL.md +87 -87
  228. package/skills/priority-todos/SKILL.md +63 -63
  229. package/skills/seo-checklist/SKILL.md +57 -57
  230. package/skills/techdebt/SKILL.md +122 -122
  231. package/skills/tool-fallback/SKILL.md +103 -103
  232. package/skills/typescript-advanced-types/SKILL.md +65 -65
  233. package/skills/ui-ux-pro-max/SKILL.md +206 -206
  234. package/skills/vercel-react-best-practices/SKILL.md +59 -59
  235. package/skills/video-production/SKILL.md +51 -51
  236. package/vibe/config.json +29 -29
  237. package/vibe/constitution.md +227 -227
  238. package/vibe/rules/principles/communication-guide.md +98 -98
  239. package/vibe/rules/principles/development-philosophy.md +52 -52
  240. package/vibe/rules/principles/quick-start.md +102 -102
  241. package/vibe/rules/quality/bdd-contract-testing.md +393 -393
  242. package/vibe/rules/quality/checklist.md +276 -276
  243. package/vibe/rules/quality/performance.md +236 -236
  244. package/vibe/rules/quality/testing-strategy.md +440 -440
  245. package/vibe/rules/standards/anti-patterns.md +541 -541
  246. package/vibe/rules/standards/code-structure.md +291 -291
  247. package/vibe/rules/standards/complexity-metrics.md +313 -313
  248. package/vibe/rules/standards/git-workflow.md +237 -237
  249. package/vibe/rules/standards/naming-conventions.md +198 -198
  250. package/vibe/rules/standards/security.md +305 -305
  251. package/vibe/rules/writing/document-style.md +74 -74
  252. package/vibe/setup.sh +31 -31
  253. package/vibe/templates/constitution-template.md +252 -252
  254. package/vibe/templates/contract-backend-template.md +526 -526
  255. package/vibe/templates/contract-frontend-template.md +599 -599
  256. package/vibe/templates/feature-template.md +96 -96
  257. package/vibe/templates/spec-template.md +221 -221
  258. package/vibe/ui-ux-data/charts.csv +26 -26
  259. package/vibe/ui-ux-data/colors.csv +97 -97
  260. package/vibe/ui-ux-data/icons.csv +101 -101
  261. package/vibe/ui-ux-data/landing.csv +31 -31
  262. package/vibe/ui-ux-data/products.csv +96 -96
  263. package/vibe/ui-ux-data/react-performance.csv +45 -45
  264. package/vibe/ui-ux-data/stacks/astro.csv +54 -54
  265. package/vibe/ui-ux-data/stacks/flutter.csv +53 -53
  266. package/vibe/ui-ux-data/stacks/html-tailwind.csv +56 -56
  267. package/vibe/ui-ux-data/stacks/jetpack-compose.csv +53 -53
  268. package/vibe/ui-ux-data/stacks/nextjs.csv +53 -53
  269. package/vibe/ui-ux-data/stacks/nuxt-ui.csv +51 -51
  270. package/vibe/ui-ux-data/stacks/nuxtjs.csv +59 -59
  271. package/vibe/ui-ux-data/stacks/react-native.csv +52 -52
  272. package/vibe/ui-ux-data/stacks/react.csv +54 -54
  273. package/vibe/ui-ux-data/stacks/shadcn.csv +61 -61
  274. package/vibe/ui-ux-data/stacks/svelte.csv +54 -54
  275. package/vibe/ui-ux-data/stacks/swiftui.csv +51 -51
  276. package/vibe/ui-ux-data/stacks/vue.csv +50 -50
  277. package/vibe/ui-ux-data/styles.csv +68 -68
  278. package/vibe/ui-ux-data/typography.csv +57 -57
  279. package/vibe/ui-ux-data/ui-reasoning.csv +101 -101
  280. package/vibe/ui-ux-data/ux-guidelines.csv +99 -99
  281. package/vibe/ui-ux-data/version.json +31 -31
  282. package/vibe/ui-ux-data/web-interface.csv +31 -31
@@ -0,0 +1,167 @@
1
+ // Skill Classifier - Phase 5: Capability Uplift vs Encoded Preference
2
+ //
3
+ // Two types of skills (from Anthropic's taxonomy):
4
+ //
5
+ // 1. Capability Uplift: Compensates for what the model can't do well.
6
+ // - Becomes obsolete as models improve
7
+ // - Eval: if baseline (no skill) starts passing, the skill has served its purpose
8
+ //
9
+ // 2. Encoded Preference: Encodes team-specific workflow, style, or process.
10
+ // - Durable regardless of model improvements
11
+ // - Eval: baseline will never pass because the model can't know your preferences
12
+ import { SkillBenchmark } from './SkillBenchmark.js';
13
+ /**
14
+ * Thresholds for classification
15
+ */
16
+ const BASELINE_HIGH_THRESHOLD = 0.7;
17
+ const BASELINE_LOW_THRESHOLD = 0.3;
18
+ const CONVERGENCE_THRESHOLD = 0.15;
19
+ const MIN_BENCHMARKS_FOR_TREND = 2;
20
+ export class SkillClassifier {
21
+ benchmark;
22
+ constructor(storage) {
23
+ this.benchmark = new SkillBenchmark(storage);
24
+ }
25
+ /**
26
+ * Classify a skill based on benchmark history
27
+ */
28
+ classify(skillName) {
29
+ const history = this.benchmark.getHistory(skillName);
30
+ if (history.length === 0) {
31
+ return {
32
+ skillName,
33
+ category: 'unknown',
34
+ confidence: 0,
35
+ reasoning: 'No benchmark data available. Run evals first.',
36
+ baselinePassRate: 0,
37
+ withSkillPassRate: 0,
38
+ trend: 'insufficient_data',
39
+ recommendation: 'Create eval cases and run benchmarks to classify this skill.',
40
+ };
41
+ }
42
+ const latest = history[history.length - 1];
43
+ const { withSkill, baseline } = latest.summary;
44
+ const trend = this.computeTrend(history);
45
+ return this.determineCategory(skillName, withSkill.passRate, baseline.passRate, trend, history.length);
46
+ }
47
+ /**
48
+ * Classify based on explicit pass rates (no DB lookup)
49
+ */
50
+ classifyFromRates(skillName, withSkillPassRate, baselinePassRate, trend = 'insufficient_data') {
51
+ return this.determineCategory(skillName, withSkillPassRate, baselinePassRate, trend, 1);
52
+ }
53
+ /**
54
+ * Check if a skill is becoming obsolete (capability uplift that model now handles)
55
+ */
56
+ isBecomingObsolete(skillName) {
57
+ const result = this.classify(skillName);
58
+ if (result.category === 'capability_uplift' && result.trend === 'converging') {
59
+ return {
60
+ obsolete: true,
61
+ reason: `Baseline pass rate (${this.pct(result.baselinePassRate)}) is converging with skill pass rate (${this.pct(result.withSkillPassRate)}). The model may now handle this without the skill.`,
62
+ };
63
+ }
64
+ if (result.baselinePassRate >= BASELINE_HIGH_THRESHOLD) {
65
+ return {
66
+ obsolete: true,
67
+ reason: `Baseline pass rate is ${this.pct(result.baselinePassRate)} — the model handles this well without the skill.`,
68
+ };
69
+ }
70
+ return { obsolete: false, reason: 'Skill still provides significant value.' };
71
+ }
72
+ determineCategory(skillName, wsRate, blRate, trend, benchmarkCount) {
73
+ const gap = wsRate - blRate;
74
+ // Case 1: Baseline already performs well → capability uplift (possibly obsolete)
75
+ if (blRate >= BASELINE_HIGH_THRESHOLD) {
76
+ return {
77
+ skillName,
78
+ category: 'capability_uplift',
79
+ confidence: Math.min(0.9, 0.5 + blRate * 0.4),
80
+ reasoning: `Baseline pass rate is high (${this.pct(blRate)}), indicating the model can handle this well without the skill. This is a capability uplift skill that may be nearing obsolescence.`,
81
+ baselinePassRate: blRate,
82
+ withSkillPassRate: wsRate,
83
+ trend,
84
+ recommendation: gap < CONVERGENCE_THRESHOLD
85
+ ? 'Consider retiring this skill — the model handles this natively now.'
86
+ : 'Monitor baseline trend. The skill still adds some value.',
87
+ };
88
+ }
89
+ // Case 2: Baseline performs poorly and skill helps a lot → likely encoded preference
90
+ if (blRate <= BASELINE_LOW_THRESHOLD && gap > CONVERGENCE_THRESHOLD) {
91
+ const isEncoded = trend === 'stable' || trend === 'diverging' || benchmarkCount < MIN_BENCHMARKS_FOR_TREND;
92
+ return {
93
+ skillName,
94
+ category: isEncoded ? 'encoded_preference' : 'capability_uplift',
95
+ confidence: isEncoded ? Math.min(0.85, 0.4 + gap * 0.5) : 0.5,
96
+ reasoning: isEncoded
97
+ ? `Low baseline (${this.pct(blRate)}) with stable gap suggests team-specific preferences the model cannot infer.`
98
+ : `Low baseline (${this.pct(blRate)}) but converging trend suggests model capability gap that is closing.`,
99
+ baselinePassRate: blRate,
100
+ withSkillPassRate: wsRate,
101
+ trend,
102
+ recommendation: isEncoded
103
+ ? 'This skill encodes team preferences. Keep and maintain it.'
104
+ : 'Capability uplift skill. Monitor baseline improvements across model updates.',
105
+ };
106
+ }
107
+ // Case 3: Middle ground — need more data or trend analysis
108
+ if (trend === 'converging') {
109
+ return {
110
+ skillName,
111
+ category: 'capability_uplift',
112
+ confidence: 0.6,
113
+ reasoning: `Baseline trend is converging toward skill performance, suggesting a capability gap that is closing.`,
114
+ baselinePassRate: blRate,
115
+ withSkillPassRate: wsRate,
116
+ trend,
117
+ recommendation: 'Likely capability uplift. Re-evaluate after model updates.',
118
+ };
119
+ }
120
+ if (trend === 'stable' || trend === 'diverging') {
121
+ return {
122
+ skillName,
123
+ category: 'encoded_preference',
124
+ confidence: 0.55,
125
+ reasoning: `Baseline-to-skill gap is stable/diverging, suggesting persistent team-specific knowledge.`,
126
+ baselinePassRate: blRate,
127
+ withSkillPassRate: wsRate,
128
+ trend,
129
+ recommendation: 'Likely encoded preference. Maintain and refine.',
130
+ };
131
+ }
132
+ return {
133
+ skillName,
134
+ category: 'unknown',
135
+ confidence: 0.3,
136
+ reasoning: `Not enough data to classify confidently. Baseline: ${this.pct(blRate)}, With-skill: ${this.pct(wsRate)}.`,
137
+ baselinePassRate: blRate,
138
+ withSkillPassRate: wsRate,
139
+ trend,
140
+ recommendation: 'Run more benchmark iterations to gather trend data.',
141
+ };
142
+ }
143
+ computeTrend(history) {
144
+ if (history.length < MIN_BENCHMARKS_FOR_TREND) {
145
+ return 'insufficient_data';
146
+ }
147
+ // Compare gap between first and last benchmarks
148
+ const first = history[0];
149
+ const last = history[history.length - 1];
150
+ const firstGap = first.summary.withSkill.passRate - first.summary.baseline.passRate;
151
+ const lastGap = last.summary.withSkill.passRate - last.summary.baseline.passRate;
152
+ const gapChange = lastGap - firstGap;
153
+ if (Math.abs(gapChange) < CONVERGENCE_THRESHOLD / 2) {
154
+ return 'stable';
155
+ }
156
+ // Gap is shrinking → baseline is catching up → converging
157
+ if (gapChange < 0) {
158
+ return 'converging';
159
+ }
160
+ // Gap is growing → skill is pulling ahead → diverging
161
+ return 'diverging';
162
+ }
163
+ pct(value) {
164
+ return `${(value * 100).toFixed(1)}%`;
165
+ }
166
+ }
167
+ //# sourceMappingURL=SkillClassifier.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SkillClassifier.js","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillClassifier.ts"],"names":[],"mappings":"AAAA,sEAAsE;AACtE,EAAE;AACF,mDAAmD;AACnD,EAAE;AACF,sEAAsE;AACtE,0CAA0C;AAC1C,qFAAqF;AACrF,EAAE;AACF,4EAA4E;AAC5E,gDAAgD;AAChD,oFAAoF;AAGpF,OAAO,EAAE,cAAc,EAAmB,MAAM,qBAAqB,CAAC;AAetE;;GAEG;AACH,MAAM,uBAAuB,GAAG,GAAG,CAAC;AACpC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AACnC,MAAM,qBAAqB,GAAG,IAAI,CAAC;AACnC,MAAM,wBAAwB,GAAG,CAAC,CAAC;AAEnC,MAAM,OAAO,eAAe;IAClB,SAAS,CAAiB;IAElC,YAAY,OAAsB;QAChC,IAAI,CAAC,SAAS,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACI,QAAQ,CAAC,SAAiB;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAErD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,SAAS;gBACnB,UAAU,EAAE,CAAC;gBACb,SAAS,EAAE,+CAA+C;gBAC1D,gBAAgB,EAAE,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,KAAK,EAAE,mBAAmB;gBAC1B,cAAc,EAAE,8DAA8D;aAC/E,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC3C,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC;QAC/C,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAEzC,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,SAAS,CAAC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACzG,CAAC;IAED;;OAEG;IACI,iBAAiB,CACtB,SAAiB,EACjB,iBAAyB,EACzB,gBAAwB,EACxB,QAAuC,mBAAmB;QAE1D,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;IAC1F,CAAC;IAED;;OAEG;IACI,kBAAkB,CAAC,SAAiB;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAExC,IAAI,MAAM,CAAC,QAAQ,KAAK,mBAAmB,IAAI,MAAM,CAAC,KAAK,KAAK,YAAY,EAAE,CAAC;YAC7E,OAAO;gBACL,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,uBAAuB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,gBAAgB,CAAC,yCAAyC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,qDAAqD;aACjM,CAAC;QACJ,CAAC;QAED,IAAI,MAAM,CAAC,gBAAgB,IAAI,uBAAuB,EAAE,CAAC;YACvD,OAAO;gBACL,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,yBAAyB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,gBAAgB,CAAC,mDAAmD;aACtH,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,yCAAyC,EAAE,CAAC;IAChF,CAAC;IAEO,iBAAiB,CACvB,SAAiB,EACjB,MAAc,EACd,MAAc,EACd,KAAoC,EACpC,cAAsB;QAEtB,MAAM,GAAG,GAAG,MAAM,GAAG,MAAM,CAAC;QAE5B,iFAAiF;QACjF,IAAI,MAAM,IAAI,uBAAuB,EAAE,CAAC;YACtC,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,mBAAmB;gBAC7B,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,GAAG,GAAG,CAAC;gBAC7C,SAAS,EAAE,+BAA+B,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,qIAAqI;gBAC/L,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,GAAG,GAAG,qBAAqB;oBACzC,CAAC,CAAC,qEAAqE;oBACvE,CAAC,CAAC,0DAA0D;aAC/D,CAAC;QACJ,CAAC;QAED,qFAAqF;QACrF,IAAI,MAAM,IAAI,sBAAsB,IAAI,GAAG,GAAG,qBAAqB,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,WAAW,IAAI,cAAc,GAAG,wBAAwB,CAAC;YAC3G,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,mBAAmB;gBAChE,UAAU,EAAE,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG;gBAC7D,SAAS,EAAE,SAAS;oBAClB,CAAC,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,8EAA8E;oBACjH,CAAC,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,uEAAuE;gBAC5G,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,SAAS;oBACvB,CAAC,CAAC,4DAA4D;oBAC9D,CAAC,CAAC,8EAA8E;aACnF,CAAC;QACJ,CAAC;QAED,2DAA2D;QAC3D,IAAI,KAAK,KAAK,YAAY,EAAE,CAAC;YAC3B,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,mBAAmB;gBAC7B,UAAU,EAAE,GAAG;gBACf,SAAS,EAAE,qGAAqG;gBAChH,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,4DAA4D;aAC7E,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,WAAW,EAAE,CAAC;YAChD,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,oBAAoB;gBAC9B,UAAU,EAAE,IAAI;gBAChB,SAAS,EAAE,2FAA2F;gBACtG,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,iDAAiD;aAClE,CAAC;QACJ,CAAC;QAED,OAAO;YACL,SAAS;YACT,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,GAAG;YACf,SAAS,EAAE,sDAAsD,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG;YACrH,gBAAgB,EAAE,MAAM;YACxB,iBAAiB,EAAE,MAAM;YACzB,KAAK;YACL,cAAc,EAAE,qDAAqD;SACtE,CAAC;IACJ,CAAC;IAEO,YAAY,CAClB,OAA0B;QAE1B,IAAI,OAAO,CAAC,MAAM,GAAG,wBAAwB,EAAE,CAAC;YAC9C,OAAO,mBAAmB,CAAC;QAC7B,CAAC;QAED,gDAAgD;QAChD,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACzB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEzC,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACpF,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAEjF,MAAM,SAAS,GAAG,OAAO,GAAG,QAAQ,CAAC;QAErC,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,qBAAqB,GAAG,CAAC,EAAE,CAAC;YACpD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,0DAA0D;QAC1D,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,OAAO,YAAY,CAAC;QACtB,CAAC;QAED,sDAAsD;QACtD,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,GAAG,CAAC,KAAa;QACvB,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACxC,CAAC;CACF"}
@@ -0,0 +1,102 @@
1
+ import { MemoryStorage } from '../memory/MemoryStorage.js';
2
+ export type EvalStatus = 'pending' | 'running' | 'passed' | 'failed' | 'error';
3
+ export interface SkillEvalCase {
4
+ id: string;
5
+ skillName: string;
6
+ prompt: string;
7
+ expectedOutput: string;
8
+ files: string[];
9
+ assertions: EvalAssertion[];
10
+ }
11
+ export interface EvalAssertion {
12
+ id: string;
13
+ description: string;
14
+ type: 'contains' | 'not_contains' | 'matches_regex' | 'custom';
15
+ value: string;
16
+ }
17
+ export interface EvalRunResult {
18
+ evalId: string;
19
+ runId: string;
20
+ skillName: string;
21
+ variant: 'with_skill' | 'baseline';
22
+ status: EvalStatus;
23
+ output: string;
24
+ grades: AssertionGrade[];
25
+ durationMs: number;
26
+ tokenCount: number;
27
+ createdAt: string;
28
+ }
29
+ export interface AssertionGrade {
30
+ assertionId: string;
31
+ description: string;
32
+ passed: boolean;
33
+ evidence: string;
34
+ }
35
+ export interface EvalSetInput {
36
+ skillName: string;
37
+ evals: Array<{
38
+ prompt: string;
39
+ expectedOutput: string;
40
+ files?: string[];
41
+ assertions?: Array<{
42
+ description: string;
43
+ type: 'contains' | 'not_contains' | 'matches_regex' | 'custom';
44
+ value: string;
45
+ }>;
46
+ }>;
47
+ }
48
+ export declare class SkillEvalRunner {
49
+ private db;
50
+ constructor(storage: MemoryStorage);
51
+ private initializeTables;
52
+ /**
53
+ * Create an eval set for a skill
54
+ */
55
+ createEvalSet(input: EvalSetInput): SkillEvalCase[];
56
+ /**
57
+ * Get all eval cases for a skill
58
+ */
59
+ getEvalCases(skillName: string): SkillEvalCase[];
60
+ /**
61
+ * Get a single eval case by ID
62
+ */
63
+ getEvalCase(evalId: string): SkillEvalCase | null;
64
+ /**
65
+ * Record the start of an eval run
66
+ */
67
+ startRun(evalId: string, skillName: string, variant: 'with_skill' | 'baseline'): string;
68
+ /**
69
+ * Complete an eval run with output and grades
70
+ */
71
+ completeRun(runId: string, output: string, grades: AssertionGrade[], durationMs: number, tokenCount: number): void;
72
+ /**
73
+ * Mark a run as errored
74
+ */
75
+ failRun(runId: string, errorMessage: string): void;
76
+ /**
77
+ * Grade output against assertions
78
+ */
79
+ gradeOutput(output: string, assertions: EvalAssertion[]): AssertionGrade[];
80
+ /**
81
+ * Get all runs for an eval case
82
+ */
83
+ getRunsForEval(evalId: string): EvalRunResult[];
84
+ /**
85
+ * Get all runs for a skill
86
+ */
87
+ getRunsForSkill(skillName: string): EvalRunResult[];
88
+ /**
89
+ * Get latest runs grouped by eval and variant
90
+ */
91
+ getLatestRuns(skillName: string): Map<string, {
92
+ withSkill: EvalRunResult | null;
93
+ baseline: EvalRunResult | null;
94
+ }>;
95
+ /**
96
+ * Delete all eval cases and runs for a skill
97
+ */
98
+ deleteEvalSet(skillName: string): number;
99
+ private rowToEvalCase;
100
+ private rowToRunResult;
101
+ }
102
+ //# sourceMappingURL=SkillEvalRunner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SkillEvalRunner.d.ts","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillEvalRunner.ts"],"names":[],"mappings":"AAUA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAI3D,MAAM,MAAM,UAAU,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE/E,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,EAAE,aAAa,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,UAAU,GAAG,cAAc,GAAG,eAAe,GAAG,QAAQ,CAAC;IAC/D,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,YAAY,GAAG,UAAU,CAAC;IACnC,MAAM,EAAE,UAAU,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,cAAc,EAAE,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,KAAK,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,cAAc,EAAE,MAAM,CAAC;QACvB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;QACjB,UAAU,CAAC,EAAE,KAAK,CAAC;YACjB,WAAW,EAAE,MAAM,CAAC;YACpB,IAAI,EAAE,UAAU,GAAG,cAAc,GAAG,eAAe,GAAG,QAAQ,CAAC;YAC/D,KAAK,EAAE,MAAM,CAAC;SACf,CAAC,CAAC;KACJ,CAAC,CAAC;CACJ;AA4BD,qBAAa,eAAe;IAC1B,OAAO,CAAC,EAAE,CAA2C;gBAEzC,OAAO,EAAE,aAAa;IAKlC,OAAO,CAAC,gBAAgB;IAkCxB;;OAEG;IACI,aAAa,CAAC,KAAK,EAAE,YAAY,GAAG,aAAa,EAAE;IA6C1D;;OAEG;IACI,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,aAAa,EAAE;IAOvD;;OAEG;IACI,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI;IAOxD;;OAEG;IACI,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,GAAG,UAAU,GAAG,MAAM;IAY9F;;OAEG;IACI,WAAW,CAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,cAAc,EAAE,EACxB,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,GACjB,IAAI;IAWP;;OAEG;IACI,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,IAAI;IAMzD;;OAEG;IACI,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,GAAG,cAAc,EAAE;IAkDjF;;OAEG;IACI,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,EAAE;IAOtD;;OAEG;IACI,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,aAAa,EAAE;IAO1D;;OAEG;IACI,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE;QAAE,SAAS,EAAE,aAAa,GAAG,IAAI,CAAC;QAAC,QAAQ,EAAE,aAAa,GAAG,IAAI,CAAA;KAAE,CAAC;IAmBzH;;OAEG;IACI,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;IAa/C,OAAO,CAAC,aAAa;IAWrB,OAAO,CAAC,cAAc;CAcvB"}
@@ -0,0 +1,256 @@
1
+ // Skill Eval Runner - Phase 5: Test, Measure, and Refine
2
+ // Defines and runs evals for skills, tracking pass/fail per assertion
3
+ //
4
+ // Inspired by Anthropic's skill-creator eval framework:
5
+ // - Define eval cases with prompts and expected outputs
6
+ // - Run with-skill vs baseline comparisons
7
+ // - Grade results against assertions
8
+ // - Aggregate into benchmarks
9
+ import { randomUUID } from 'crypto';
10
+ export class SkillEvalRunner {
11
+ db;
12
+ constructor(storage) {
13
+ this.db = storage.getDatabase();
14
+ this.initializeTables();
15
+ }
16
+ initializeTables() {
17
+ this.db.exec(`
18
+ CREATE TABLE IF NOT EXISTS skill_eval_cases (
19
+ id TEXT PRIMARY KEY,
20
+ skillName TEXT NOT NULL,
21
+ prompt TEXT NOT NULL,
22
+ expectedOutput TEXT NOT NULL,
23
+ files TEXT DEFAULT '[]',
24
+ assertions TEXT DEFAULT '[]',
25
+ createdAt TEXT NOT NULL,
26
+ updatedAt TEXT NOT NULL
27
+ );
28
+
29
+ CREATE INDEX IF NOT EXISTS idx_sec_skill ON skill_eval_cases(skillName);
30
+
31
+ CREATE TABLE IF NOT EXISTS skill_eval_runs (
32
+ id TEXT PRIMARY KEY,
33
+ evalId TEXT NOT NULL,
34
+ skillName TEXT NOT NULL,
35
+ variant TEXT NOT NULL CHECK(variant IN ('with_skill','baseline')),
36
+ status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending','running','passed','failed','error')),
37
+ output TEXT DEFAULT '',
38
+ grades TEXT DEFAULT '[]',
39
+ durationMs INTEGER DEFAULT 0,
40
+ tokenCount INTEGER DEFAULT 0,
41
+ createdAt TEXT NOT NULL
42
+ );
43
+
44
+ CREATE INDEX IF NOT EXISTS idx_ser_eval ON skill_eval_runs(evalId);
45
+ CREATE INDEX IF NOT EXISTS idx_ser_skill ON skill_eval_runs(skillName);
46
+ CREATE INDEX IF NOT EXISTS idx_ser_variant ON skill_eval_runs(variant);
47
+ `);
48
+ }
49
+ /**
50
+ * Create an eval set for a skill
51
+ */
52
+ createEvalSet(input) {
53
+ const cases = [];
54
+ const now = new Date().toISOString();
55
+ const insertStmt = this.db.prepare(`
56
+ INSERT INTO skill_eval_cases (id, skillName, prompt, expectedOutput, files, assertions, createdAt, updatedAt)
57
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
58
+ `);
59
+ const insertMany = this.db.transaction((evals) => {
60
+ for (const evalCase of evals) {
61
+ const id = `eval-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
62
+ const assertions = (evalCase.assertions ?? []).map(a => ({
63
+ id: `assert-${randomUUID().replace(/-/g, '').slice(0, 8)}`,
64
+ description: a.description,
65
+ type: a.type,
66
+ value: a.value,
67
+ }));
68
+ insertStmt.run(id, input.skillName, evalCase.prompt, evalCase.expectedOutput, JSON.stringify(evalCase.files ?? []), JSON.stringify(assertions), now, now);
69
+ cases.push({
70
+ id,
71
+ skillName: input.skillName,
72
+ prompt: evalCase.prompt,
73
+ expectedOutput: evalCase.expectedOutput,
74
+ files: evalCase.files ?? [],
75
+ assertions,
76
+ });
77
+ }
78
+ });
79
+ insertMany(input.evals);
80
+ return cases;
81
+ }
82
+ /**
83
+ * Get all eval cases for a skill
84
+ */
85
+ getEvalCases(skillName) {
86
+ const rows = this.db.prepare(`
87
+ SELECT * FROM skill_eval_cases WHERE skillName = ? ORDER BY createdAt ASC
88
+ `).all(skillName);
89
+ return rows.map(this.rowToEvalCase);
90
+ }
91
+ /**
92
+ * Get a single eval case by ID
93
+ */
94
+ getEvalCase(evalId) {
95
+ const row = this.db.prepare(`
96
+ SELECT * FROM skill_eval_cases WHERE id = ?
97
+ `).get(evalId);
98
+ return row ? this.rowToEvalCase(row) : null;
99
+ }
100
+ /**
101
+ * Record the start of an eval run
102
+ */
103
+ startRun(evalId, skillName, variant) {
104
+ const id = `run-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
105
+ const now = new Date().toISOString();
106
+ this.db.prepare(`
107
+ INSERT INTO skill_eval_runs (id, evalId, skillName, variant, status, createdAt)
108
+ VALUES (?, ?, ?, ?, 'running', ?)
109
+ `).run(id, evalId, skillName, variant, now);
110
+ return id;
111
+ }
112
+ /**
113
+ * Complete an eval run with output and grades
114
+ */
115
+ completeRun(runId, output, grades, durationMs, tokenCount) {
116
+ const allPassed = grades.length === 0 || grades.every(g => g.passed);
117
+ const status = allPassed ? 'passed' : 'failed';
118
+ this.db.prepare(`
119
+ UPDATE skill_eval_runs
120
+ SET status = ?, output = ?, grades = ?, durationMs = ?, tokenCount = ?
121
+ WHERE id = ?
122
+ `).run(status, output, JSON.stringify(grades), durationMs, tokenCount, runId);
123
+ }
124
+ /**
125
+ * Mark a run as errored
126
+ */
127
+ failRun(runId, errorMessage) {
128
+ this.db.prepare(`
129
+ UPDATE skill_eval_runs SET status = 'error', output = ? WHERE id = ?
130
+ `).run(errorMessage, runId);
131
+ }
132
+ /**
133
+ * Grade output against assertions
134
+ */
135
+ gradeOutput(output, assertions) {
136
+ return assertions.map(assertion => {
137
+ let passed = false;
138
+ let evidence = '';
139
+ switch (assertion.type) {
140
+ case 'contains':
141
+ passed = output.includes(assertion.value);
142
+ evidence = passed
143
+ ? `Output contains "${assertion.value}"`
144
+ : `Output does not contain "${assertion.value}"`;
145
+ break;
146
+ case 'not_contains':
147
+ passed = !output.includes(assertion.value);
148
+ evidence = passed
149
+ ? `Output correctly excludes "${assertion.value}"`
150
+ : `Output unexpectedly contains "${assertion.value}"`;
151
+ break;
152
+ case 'matches_regex': {
153
+ try {
154
+ const regex = new RegExp(assertion.value);
155
+ passed = regex.test(output);
156
+ evidence = passed
157
+ ? `Output matches pattern /${assertion.value}/`
158
+ : `Output does not match pattern /${assertion.value}/`;
159
+ }
160
+ catch {
161
+ passed = false;
162
+ evidence = `Invalid regex pattern: ${assertion.value}`;
163
+ }
164
+ break;
165
+ }
166
+ case 'custom':
167
+ // Custom assertions require external grading (LLM or script)
168
+ passed = false;
169
+ evidence = 'Custom assertion requires external grading';
170
+ break;
171
+ }
172
+ return {
173
+ assertionId: assertion.id,
174
+ description: assertion.description,
175
+ passed,
176
+ evidence,
177
+ };
178
+ });
179
+ }
180
+ /**
181
+ * Get all runs for an eval case
182
+ */
183
+ getRunsForEval(evalId) {
184
+ const rows = this.db.prepare(`
185
+ SELECT * FROM skill_eval_runs WHERE evalId = ? ORDER BY createdAt DESC
186
+ `).all(evalId);
187
+ return rows.map(this.rowToRunResult);
188
+ }
189
+ /**
190
+ * Get all runs for a skill
191
+ */
192
+ getRunsForSkill(skillName) {
193
+ const rows = this.db.prepare(`
194
+ SELECT * FROM skill_eval_runs WHERE skillName = ? ORDER BY createdAt DESC
195
+ `).all(skillName);
196
+ return rows.map(this.rowToRunResult);
197
+ }
198
+ /**
199
+ * Get latest runs grouped by eval and variant
200
+ */
201
+ getLatestRuns(skillName) {
202
+ const runs = this.getRunsForSkill(skillName);
203
+ const grouped = new Map();
204
+ for (const run of runs) {
205
+ if (!grouped.has(run.evalId)) {
206
+ grouped.set(run.evalId, { withSkill: null, baseline: null });
207
+ }
208
+ const entry = grouped.get(run.evalId);
209
+ if (run.variant === 'with_skill' && !entry.withSkill) {
210
+ entry.withSkill = run;
211
+ }
212
+ else if (run.variant === 'baseline' && !entry.baseline) {
213
+ entry.baseline = run;
214
+ }
215
+ }
216
+ return grouped;
217
+ }
218
+ /**
219
+ * Delete all eval cases and runs for a skill
220
+ */
221
+ deleteEvalSet(skillName) {
222
+ const deleteRuns = this.db.prepare(`DELETE FROM skill_eval_runs WHERE skillName = ?`);
223
+ const deleteCases = this.db.prepare(`DELETE FROM skill_eval_cases WHERE skillName = ?`);
224
+ const transaction = this.db.transaction(() => {
225
+ deleteRuns.run(skillName);
226
+ const result = deleteCases.run(skillName);
227
+ return result.changes;
228
+ });
229
+ return transaction();
230
+ }
231
+ rowToEvalCase(row) {
232
+ return {
233
+ id: row.id,
234
+ skillName: row.skillName,
235
+ prompt: row.prompt,
236
+ expectedOutput: row.expectedOutput,
237
+ files: JSON.parse(row.files),
238
+ assertions: JSON.parse(row.assertions),
239
+ };
240
+ }
241
+ rowToRunResult(row) {
242
+ return {
243
+ evalId: row.evalId,
244
+ runId: row.id,
245
+ skillName: row.skillName,
246
+ variant: row.variant,
247
+ status: row.status,
248
+ output: row.output,
249
+ grades: JSON.parse(row.grades),
250
+ durationMs: row.durationMs,
251
+ tokenCount: row.tokenCount,
252
+ createdAt: row.createdAt,
253
+ };
254
+ }
255
+ }
256
+ //# sourceMappingURL=SkillEvalRunner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SkillEvalRunner.js","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillEvalRunner.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,sEAAsE;AACtE,EAAE;AACF,wDAAwD;AACxD,wDAAwD;AACxD,2CAA2C;AAC3C,qCAAqC;AACrC,8BAA8B;AAE9B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAmFpC,MAAM,OAAO,eAAe;IAClB,EAAE,CAA2C;IAErD,YAAY,OAAsB;QAChC,IAAI,CAAC,EAAE,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAEO,gBAAgB;QACtB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8BZ,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACI,aAAa,CAAC,KAAmB;QACtC,MAAM,KAAK,GAAoB,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGlC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,KAA4B,EAAE,EAAE;YACtE,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC7B,MAAM,EAAE,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC3F,MAAM,UAAU,GAAoB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACxE,EAAE,EAAE,UAAU,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE;oBAC1D,WAAW,EAAE,CAAC,CAAC,WAAW;oBAC1B,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,KAAK,EAAE,CAAC,CAAC,KAAK;iBACf,CAAC,CAAC,CAAC;gBAEJ,UAAU,CAAC,GAAG,CACZ,EAAE,EACF,KAAK,CAAC,SAAS,EACf,QAAQ,CAAC,MAAM,EACf,QAAQ,CAAC,cAAc,EACvB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC,EACpC,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,EAC1B,GAAG,EACH,GAAG,CACJ,CAAC;gBAEF,KAAK,CAAC,IAAI,CAAC;oBACT,EAAE;oBACF,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,MAAM,EAAE,QAAQ,CAAC,MAAM;oBACvB,cAAc,EAAE,QAAQ,CAAC,cAAc;oBACvC,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;oBAC3B,UAAU;iBACX,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACI,YAAY,CAAC,SAAiB;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,SAAS,CAAkB,CAAC;QACnC,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACI,WAAW,CAAC,MAAc;QAC/B,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE3B,CAAC,CAAC,GAAG,CAAC,MAAM,CAA4B,CAAC;QAC1C,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,QAAQ,CAAC,MAAc,EAAE,SAAiB,EAAE,OAAkC;QACnF,MAAM,EAAE,GAAG,OAAO,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC1F,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAE5C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACI,WAAW,CAChB,KAAa,EACb,MAAc,EACd,MAAwB,EACxB,UAAkB,EAClB,UAAkB;QAElB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACrE,MAAM,MAAM,GAAe,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE3D,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;KAIf,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC;IAChF,CAAC;IAED;;OAEG;IACI,OAAO,CAAC,KAAa,EAAE,YAAoB;QAChD,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAEf,CAAC,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED;;OAEG;IACI,WAAW,CAAC,MAAc,EAAE,UAA2B;QAC5D,OAAO,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE;YAChC,IAAI,MAAM,GAAG,KAAK,CAAC;YACnB,IAAI,QAAQ,GAAG,EAAE,CAAC;YAElB,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;gBACvB,KAAK,UAAU;oBACb,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;oBAC1C,QAAQ,GAAG,MAAM;wBACf,CAAC,CAAC,oBAAoB,SAAS,CAAC,KAAK,GAAG;wBACxC,CAAC,CAAC,4BAA4B,SAAS,CAAC,KAAK,GAAG,CAAC;oBACnD,MAAM;gBAER,KAAK,cAAc;oBACjB,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;oBAC3C,QAAQ,GAAG,MAAM;wBACf,CAAC,CAAC,8BAA8B,SAAS,CAAC,KAAK,GAAG;wBAClD,CAAC,CAAC,iCAAiC,SAAS,CAAC,KAAK,GAAG,CAAC;oBACxD,MAAM;gBAER,KAAK,eAAe,CAAC,CAAC,CAAC;oBACrB,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;wBAC1C,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;wBAC5B,QAAQ,GAAG,MAAM;4BACf,CAAC,CAAC,2BAA2B,SAAS,CAAC,KAAK,GAAG;4BAC/C,CAAC,CAAC,kCAAkC,SAAS,CAAC,KAAK,GAAG,CAAC;oBAC3D,CAAC;oBAAC,MAAM,CAAC;wBACP,MAAM,GAAG,KAAK,CAAC;wBACf,QAAQ,GAAG,0BAA0B,SAAS,CAAC,KAAK,EAAE,CAAC;oBACzD,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,QAAQ;oBACX,6DAA6D;oBAC7D,MAAM,GAAG,KAAK,CAAC;oBACf,QAAQ,GAAG,4CAA4C,CAAC;oBACxD,MAAM;YACV,CAAC;YAED,OAAO;gBACL,WAAW,EAAE,SAAS,CAAC,EAAE;gBACzB,WAAW,EAAE,SAAS,CAAC,WAAW;gBAClC,MAAM;gBACN,QAAQ;aACT,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACI,cAAc,CAAC,MAAc;QAClC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,MAAM,CAAiB,CAAC;QAC/B,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACI,eAAe,CAAC,SAAiB;QACtC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,SAAS,CAAiB,CAAC;QAClC,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACI,aAAa,CAAC,SAAiB;QACpC,MAAM,IAAI,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,IAAI,GAAG,EAA+E,CAAC;QAEvG,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;YACvC,IAAI,GAAG,CAAC,OAAO,KAAK,YAAY,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;gBACrD,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC;YACxB,CAAC;iBAAM,IAAI,GAAG,CAAC,OAAO,KAAK,UAAU,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACzD,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;YACvB,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACI,aAAa,CAAC,SAAiB;QACpC,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,iDAAiD,CAAC,CAAC;QACtF,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kDAAkD,CAAC,CAAC;QAExF,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAC3C,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1B,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1C,OAAO,MAAM,CAAC,OAAO,CAAC;QACxB,CAAC,CAAC,CAAC;QAEH,OAAO,WAAW,EAAE,CAAC;IACvB,CAAC;IAEO,aAAa,CAAC,GAAgB;QACpC,OAAO;YACL,EAAE,EAAE,GAAG,CAAC,EAAE;YACV,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,cAAc,EAAE,GAAG,CAAC,cAAc;YAClC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC;YAC5B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC;SACvC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,GAAe;QACpC,OAAO;YACL,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK,EAAE,GAAG,CAAC,EAAE;YACb,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAoC;YACjD,MAAM,EAAE,GAAG,CAAC,MAAoB;YAChC,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC;YAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;SACzB,CAAC;IACJ,CAAC;CACF"}
@@ -17,9 +17,9 @@ export class SkillGapDetector {
17
17
  const truncated = prompt.slice(0, 200);
18
18
  const normalized = truncated.toLowerCase().replace(/\s+/g, ' ').trim();
19
19
  try {
20
- this.db.prepare(`
21
- INSERT INTO skill_gaps (id, prompt, normalizedPrompt, sessionId, createdAt)
22
- VALUES (?, ?, ?, ?, ?)
20
+ this.db.prepare(`
21
+ INSERT INTO skill_gaps (id, prompt, normalizedPrompt, sessionId, createdAt)
22
+ VALUES (?, ?, ?, ?, ?)
23
23
  `).run(id, truncated, normalized, sessionId || null, new Date().toISOString());
24
24
  }
25
25
  catch {
@@ -33,13 +33,13 @@ export class SkillGapDetector {
33
33
  const result = { newGaps: [], totalClusters: 0 };
34
34
  try {
35
35
  // Cluster by normalizedPrompt
36
- const clusters = this.db.prepare(`
37
- SELECT normalizedPrompt, COUNT(*) as count, GROUP_CONCAT(prompt, '|||') as prompts
38
- FROM skill_gaps
39
- GROUP BY normalizedPrompt
40
- HAVING count >= 3
41
- ORDER BY count DESC
42
- LIMIT ?
36
+ const clusters = this.db.prepare(`
37
+ SELECT normalizedPrompt, COUNT(*) as count, GROUP_CONCAT(prompt, '|||') as prompts
38
+ FROM skill_gaps
39
+ GROUP BY normalizedPrompt
40
+ HAVING count >= 3
41
+ ORDER BY count DESC
42
+ LIMIT ?
43
43
  `).all(limit);
44
44
  result.totalClusters = clusters.length;
45
45
  for (const cluster of clusters) {