@su-record/vibe 2.7.10 → 2.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +37 -37
- package/CLAUDE.md +126 -222
- package/LICENSE +21 -21
- package/README.md +580 -580
- package/agents/architect-low.md +41 -41
- package/agents/architect-medium.md +59 -59
- package/agents/architect.md +80 -80
- package/agents/build-error-resolver.md +115 -115
- package/agents/compounder.md +261 -261
- package/agents/diagrammer.md +178 -178
- package/agents/docs/api-documenter.md +99 -99
- package/agents/docs/changelog-writer.md +93 -93
- package/agents/e2e-tester.md +266 -266
- package/agents/explorer-low.md +42 -42
- package/agents/explorer-medium.md +59 -59
- package/agents/explorer.md +48 -48
- package/agents/implementer-low.md +43 -43
- package/agents/implementer-medium.md +52 -52
- package/agents/implementer.md +54 -54
- package/agents/junior-mentor.md +141 -141
- package/agents/planning/requirements-analyst.md +84 -84
- package/agents/planning/ux-advisor.md +83 -83
- package/agents/qa/acceptance-tester.md +86 -86
- package/agents/qa/edge-case-finder.md +93 -93
- package/agents/refactor-cleaner.md +143 -143
- package/agents/research/best-practices-agent.md +199 -199
- package/agents/research/codebase-patterns-agent.md +157 -157
- package/agents/research/framework-docs-agent.md +188 -188
- package/agents/research/security-advisory-agent.md +213 -213
- package/agents/review/architecture-reviewer.md +107 -107
- package/agents/review/complexity-reviewer.md +116 -116
- package/agents/review/data-integrity-reviewer.md +88 -88
- package/agents/review/git-history-reviewer.md +103 -103
- package/agents/review/performance-reviewer.md +86 -86
- package/agents/review/python-reviewer.md +150 -150
- package/agents/review/rails-reviewer.md +139 -139
- package/agents/review/react-reviewer.md +144 -144
- package/agents/review/security-reviewer.md +80 -80
- package/agents/review/simplicity-reviewer.md +140 -140
- package/agents/review/test-coverage-reviewer.md +116 -116
- package/agents/review/typescript-reviewer.md +127 -127
- package/agents/searcher.md +54 -54
- package/agents/simplifier.md +120 -120
- package/agents/tester.md +49 -49
- package/agents/ui/ui-a11y-auditor.md +93 -93
- package/agents/ui/ui-antipattern-detector.md +94 -94
- package/agents/ui/ui-dataviz-advisor.md +69 -69
- package/agents/ui/ui-design-system-gen.md +57 -57
- package/agents/ui/ui-industry-analyzer.md +49 -49
- package/agents/ui/ui-layout-architect.md +65 -65
- package/agents/ui/ui-stack-implementer.md +68 -68
- package/agents/ui/ux-compliance-reviewer.md +81 -81
- package/agents/ui-previewer.md +260 -260
- package/commands/vibe.run.md +83 -0
- package/commands/vibe.spec.review.md +558 -558
- package/commands/vibe.utils.md +413 -413
- package/commands/vibe.voice.md +79 -79
- package/dist/cli/auth.d.ts +1 -1
- package/dist/cli/auth.d.ts.map +1 -1
- package/dist/cli/auth.js +15 -7
- package/dist/cli/auth.js.map +1 -1
- package/dist/cli/collaborator.js +52 -52
- package/dist/cli/commands/evolution.js +12 -12
- package/dist/cli/commands/index.d.ts +1 -0
- package/dist/cli/commands/index.d.ts.map +1 -1
- package/dist/cli/commands/index.js +1 -0
- package/dist/cli/commands/index.js.map +1 -1
- package/dist/cli/commands/info.d.ts.map +1 -1
- package/dist/cli/commands/info.js +62 -56
- package/dist/cli/commands/info.js.map +1 -1
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +9 -6
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/remove.js +14 -14
- package/dist/cli/commands/sentinel.js +27 -27
- package/dist/cli/commands/skills.d.ts +13 -0
- package/dist/cli/commands/skills.d.ts.map +1 -0
- package/dist/cli/commands/skills.js +83 -0
- package/dist/cli/commands/skills.js.map +1 -0
- package/dist/cli/commands/slack.js +10 -10
- package/dist/cli/commands/telegram.js +12 -12
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +3 -0
- package/dist/cli/commands/update.js.map +1 -1
- package/dist/cli/detect.js +32 -32
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +64 -47
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/llm/claude-commands.js +16 -16
- package/dist/cli/llm/config.js +18 -18
- package/dist/cli/llm/gemini-commands.js +47 -47
- package/dist/cli/llm/gpt-commands.js +19 -19
- package/dist/cli/llm/help.js +21 -21
- package/dist/cli/postinstall/constants.d.ts +8 -0
- package/dist/cli/postinstall/constants.d.ts.map +1 -1
- package/dist/cli/postinstall/constants.js +33 -0
- package/dist/cli/postinstall/constants.js.map +1 -1
- package/dist/cli/postinstall/cursor-agents.js +32 -32
- package/dist/cli/postinstall/cursor-rules.js +83 -83
- package/dist/cli/postinstall/cursor-skills.js +743 -743
- package/dist/cli/postinstall/index.d.ts +1 -1
- package/dist/cli/postinstall/index.d.ts.map +1 -1
- package/dist/cli/postinstall/index.js +1 -1
- package/dist/cli/postinstall/index.js.map +1 -1
- package/dist/cli/setup/ProjectSetup.d.ts.map +1 -1
- package/dist/cli/setup/ProjectSetup.js +5 -0
- package/dist/cli/setup/ProjectSetup.js.map +1 -1
- package/dist/cli/setup/Provisioner.js +42 -42
- package/dist/cli/types.d.ts +1 -0
- package/dist/cli/types.d.ts.map +1 -1
- package/dist/infra/lib/DeepInit.js +24 -24
- package/dist/infra/lib/IterationTracker.js +11 -11
- package/dist/infra/lib/PythonParser.js +108 -108
- package/dist/infra/lib/ReviewRace.js +96 -96
- package/dist/infra/lib/SkillFrontmatter.js +28 -28
- package/dist/infra/lib/SkillQualityGate.js +9 -9
- package/dist/infra/lib/SkillRepository.js +159 -159
- package/dist/infra/lib/UltraQA.js +99 -99
- package/dist/infra/lib/autonomy/AuditStore.js +41 -41
- package/dist/infra/lib/autonomy/ConfirmationStore.js +30 -30
- package/dist/infra/lib/autonomy/EventOutbox.js +38 -38
- package/dist/infra/lib/autonomy/PolicyEngine.js +18 -18
- package/dist/infra/lib/autonomy/SecuritySentinel.js +1 -1
- package/dist/infra/lib/autonomy/SuggestionStore.js +33 -33
- package/dist/infra/lib/embedding/VectorStore.js +22 -22
- package/dist/infra/lib/evolution/AgentAnalyzer.js +10 -10
- package/dist/infra/lib/evolution/DescriptionOptimizer.d.ts +79 -0
- package/dist/infra/lib/evolution/DescriptionOptimizer.d.ts.map +1 -0
- package/dist/infra/lib/evolution/DescriptionOptimizer.js +259 -0
- package/dist/infra/lib/evolution/DescriptionOptimizer.js.map +1 -0
- package/dist/infra/lib/evolution/GenerationRegistry.js +36 -36
- package/dist/infra/lib/evolution/InsightStore.js +90 -90
- package/dist/infra/lib/evolution/RollbackManager.js +5 -5
- package/dist/infra/lib/evolution/SkillBenchmark.d.ts +81 -0
- package/dist/infra/lib/evolution/SkillBenchmark.d.ts.map +1 -0
- package/dist/infra/lib/evolution/SkillBenchmark.js +233 -0
- package/dist/infra/lib/evolution/SkillBenchmark.js.map +1 -0
- package/dist/infra/lib/evolution/SkillClassifier.d.ts +35 -0
- package/dist/infra/lib/evolution/SkillClassifier.d.ts.map +1 -0
- package/dist/infra/lib/evolution/SkillClassifier.js +167 -0
- package/dist/infra/lib/evolution/SkillClassifier.js.map +1 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.d.ts +102 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.d.ts.map +1 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.js +256 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.js.map +1 -0
- package/dist/infra/lib/evolution/SkillGapDetector.js +10 -10
- package/dist/infra/lib/evolution/UsageTracker.js +28 -28
- package/dist/infra/lib/evolution/__tests__/eval.test.d.ts +2 -0
- package/dist/infra/lib/evolution/__tests__/eval.test.d.ts.map +1 -0
- package/dist/infra/lib/evolution/__tests__/eval.test.js +539 -0
- package/dist/infra/lib/evolution/__tests__/eval.test.js.map +1 -0
- package/dist/infra/lib/evolution/index.d.ts +8 -0
- package/dist/infra/lib/evolution/index.d.ts.map +1 -1
- package/dist/infra/lib/evolution/index.js +5 -0
- package/dist/infra/lib/evolution/index.js.map +1 -1
- package/dist/infra/lib/gemini/constants.js +14 -14
- package/dist/infra/lib/gemini/orchestration.js +5 -5
- package/dist/infra/lib/gpt/oauth.js +44 -44
- package/dist/infra/lib/gpt/orchestration.js +4 -4
- package/dist/infra/lib/memory/KnowledgeGraph.js +4 -4
- package/dist/infra/lib/memory/MemorySearch.js +57 -57
- package/dist/infra/lib/memory/MemoryStorage.js +181 -181
- package/dist/infra/lib/memory/ObservationStore.js +28 -28
- package/dist/infra/lib/memory/ReflectionStore.js +30 -30
- package/dist/infra/lib/memory/SessionRAGRetriever.js +7 -7
- package/dist/infra/lib/memory/SessionRAGStore.js +225 -225
- package/dist/infra/lib/memory/SessionSummarizer.js +9 -9
- package/dist/infra/orchestrator/AgentManager.js +12 -12
- package/dist/infra/orchestrator/AgentRegistry.js +65 -65
- package/dist/infra/orchestrator/MultiLlmResearch.js +8 -8
- package/dist/infra/orchestrator/SwarmOrchestrator.test.js +16 -16
- package/dist/infra/orchestrator/parallelResearch.js +24 -24
- package/dist/tools/convention/analyzeComplexity.test.js +115 -115
- package/dist/tools/convention/validateCodeQuality.test.js +104 -104
- package/dist/tools/memory/createMemoryTimeline.js +10 -10
- package/dist/tools/memory/getMemoryGraph.js +12 -12
- package/dist/tools/memory/getSessionContext.js +9 -9
- package/dist/tools/memory/linkMemories.js +14 -14
- package/dist/tools/memory/listMemories.js +4 -4
- package/dist/tools/memory/recallMemory.js +4 -4
- package/dist/tools/memory/saveMemory.js +4 -4
- package/dist/tools/memory/searchMemoriesAdvanced.js +23 -23
- package/dist/tools/semantic/analyzeDependencyGraph.js +12 -12
- package/dist/tools/semantic/astGrep.test.js +6 -6
- package/dist/tools/spec/prdParser.test.js +171 -171
- package/dist/tools/spec/specGenerator.js +169 -169
- package/dist/tools/spec/traceabilityMatrix.js +64 -64
- package/dist/tools/spec/traceabilityMatrix.test.js +28 -28
- package/hooks/gemini-hooks.json +73 -73
- package/hooks/hooks.json +137 -137
- package/hooks/scripts/code-check.js +70 -70
- package/hooks/scripts/context-save.js +212 -212
- package/hooks/scripts/hud-status.js +291 -291
- package/hooks/scripts/keyword-detector.js +214 -214
- package/hooks/scripts/llm-orchestrate.js +646 -646
- package/hooks/scripts/post-edit.js +32 -32
- package/hooks/scripts/pre-tool-guard.js +125 -125
- package/hooks/scripts/prompt-dispatcher.js +185 -185
- package/hooks/scripts/sentinel-guard.js +104 -104
- package/hooks/scripts/session-start.js +106 -106
- package/hooks/scripts/stop-notify.js +209 -209
- package/hooks/scripts/utils.js +100 -100
- package/languages/csharp-unity.md +515 -515
- package/languages/gdscript-godot.md +470 -470
- package/languages/ruby-rails.md +489 -489
- package/languages/typescript-angular.md +433 -433
- package/languages/typescript-astro.md +416 -416
- package/languages/typescript-electron.md +406 -406
- package/languages/typescript-nestjs.md +524 -524
- package/languages/typescript-svelte.md +407 -407
- package/languages/typescript-tauri.md +365 -365
- package/package.json +121 -121
- package/skills/agents-md/SKILL.md +120 -120
- package/skills/arch-guard/SKILL.md +180 -0
- package/skills/brand-assets/SKILL.md +146 -146
- package/skills/capability-loop/SKILL.md +167 -0
- package/skills/characterization-test/SKILL.md +206 -206
- package/skills/commerce-patterns/SKILL.md +59 -59
- package/skills/commit-push-pr/SKILL.md +75 -75
- package/skills/context7-usage/SKILL.md +105 -105
- package/skills/core-capabilities/SKILL.md +48 -48
- package/skills/e2e-commerce/SKILL.md +57 -57
- package/skills/exec-plan/SKILL.md +147 -0
- package/skills/frontend-design/SKILL.md +73 -73
- package/skills/git-worktree/SKILL.md +72 -72
- package/skills/handoff/SKILL.md +109 -109
- package/skills/parallel-research/SKILL.md +87 -87
- package/skills/priority-todos/SKILL.md +63 -63
- package/skills/seo-checklist/SKILL.md +57 -57
- package/skills/techdebt/SKILL.md +122 -122
- package/skills/tool-fallback/SKILL.md +103 -103
- package/skills/typescript-advanced-types/SKILL.md +65 -65
- package/skills/ui-ux-pro-max/SKILL.md +206 -206
- package/skills/vercel-react-best-practices/SKILL.md +59 -59
- package/skills/video-production/SKILL.md +51 -51
- package/vibe/config.json +29 -29
- package/vibe/constitution.md +227 -227
- package/vibe/rules/principles/communication-guide.md +98 -98
- package/vibe/rules/principles/development-philosophy.md +52 -52
- package/vibe/rules/principles/quick-start.md +102 -102
- package/vibe/rules/quality/bdd-contract-testing.md +393 -393
- package/vibe/rules/quality/checklist.md +276 -276
- package/vibe/rules/quality/performance.md +236 -236
- package/vibe/rules/quality/testing-strategy.md +440 -440
- package/vibe/rules/standards/anti-patterns.md +541 -541
- package/vibe/rules/standards/code-structure.md +291 -291
- package/vibe/rules/standards/complexity-metrics.md +313 -313
- package/vibe/rules/standards/git-workflow.md +237 -237
- package/vibe/rules/standards/naming-conventions.md +198 -198
- package/vibe/rules/standards/security.md +305 -305
- package/vibe/rules/writing/document-style.md +74 -74
- package/vibe/setup.sh +31 -31
- package/vibe/templates/constitution-template.md +252 -252
- package/vibe/templates/contract-backend-template.md +526 -526
- package/vibe/templates/contract-frontend-template.md +599 -599
- package/vibe/templates/feature-template.md +96 -96
- package/vibe/templates/spec-template.md +221 -221
- package/vibe/ui-ux-data/charts.csv +26 -26
- package/vibe/ui-ux-data/colors.csv +97 -97
- package/vibe/ui-ux-data/icons.csv +101 -101
- package/vibe/ui-ux-data/landing.csv +31 -31
- package/vibe/ui-ux-data/products.csv +96 -96
- package/vibe/ui-ux-data/react-performance.csv +45 -45
- package/vibe/ui-ux-data/stacks/astro.csv +54 -54
- package/vibe/ui-ux-data/stacks/flutter.csv +53 -53
- package/vibe/ui-ux-data/stacks/html-tailwind.csv +56 -56
- package/vibe/ui-ux-data/stacks/jetpack-compose.csv +53 -53
- package/vibe/ui-ux-data/stacks/nextjs.csv +53 -53
- package/vibe/ui-ux-data/stacks/nuxt-ui.csv +51 -51
- package/vibe/ui-ux-data/stacks/nuxtjs.csv +59 -59
- package/vibe/ui-ux-data/stacks/react-native.csv +52 -52
- package/vibe/ui-ux-data/stacks/react.csv +54 -54
- package/vibe/ui-ux-data/stacks/shadcn.csv +61 -61
- package/vibe/ui-ux-data/stacks/svelte.csv +54 -54
- package/vibe/ui-ux-data/stacks/swiftui.csv +51 -51
- package/vibe/ui-ux-data/stacks/vue.csv +50 -50
- package/vibe/ui-ux-data/styles.csv +68 -68
- package/vibe/ui-ux-data/typography.csv +57 -57
- package/vibe/ui-ux-data/ui-reasoning.csv +101 -101
- package/vibe/ui-ux-data/ux-guidelines.csv +99 -99
- package/vibe/ui-ux-data/version.json +31 -31
- package/vibe/ui-ux-data/web-interface.csv +31 -31
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
// Skill Classifier - Phase 5: Capability Uplift vs Encoded Preference
|
|
2
|
+
//
|
|
3
|
+
// Two types of skills (from Anthropic's taxonomy):
|
|
4
|
+
//
|
|
5
|
+
// 1. Capability Uplift: Compensates for what the model can't do well.
|
|
6
|
+
// - Becomes obsolete as models improve
|
|
7
|
+
// - Eval: if baseline (no skill) starts passing, the skill has served its purpose
|
|
8
|
+
//
|
|
9
|
+
// 2. Encoded Preference: Encodes team-specific workflow, style, or process.
|
|
10
|
+
// - Durable regardless of model improvements
|
|
11
|
+
// - Eval: baseline will never pass because the model can't know your preferences
|
|
12
|
+
import { SkillBenchmark } from './SkillBenchmark.js';
|
|
13
|
+
/**
|
|
14
|
+
* Thresholds for classification
|
|
15
|
+
*/
|
|
16
|
+
const BASELINE_HIGH_THRESHOLD = 0.7;
|
|
17
|
+
const BASELINE_LOW_THRESHOLD = 0.3;
|
|
18
|
+
const CONVERGENCE_THRESHOLD = 0.15;
|
|
19
|
+
const MIN_BENCHMARKS_FOR_TREND = 2;
|
|
20
|
+
export class SkillClassifier {
|
|
21
|
+
benchmark;
|
|
22
|
+
constructor(storage) {
|
|
23
|
+
this.benchmark = new SkillBenchmark(storage);
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Classify a skill based on benchmark history
|
|
27
|
+
*/
|
|
28
|
+
classify(skillName) {
|
|
29
|
+
const history = this.benchmark.getHistory(skillName);
|
|
30
|
+
if (history.length === 0) {
|
|
31
|
+
return {
|
|
32
|
+
skillName,
|
|
33
|
+
category: 'unknown',
|
|
34
|
+
confidence: 0,
|
|
35
|
+
reasoning: 'No benchmark data available. Run evals first.',
|
|
36
|
+
baselinePassRate: 0,
|
|
37
|
+
withSkillPassRate: 0,
|
|
38
|
+
trend: 'insufficient_data',
|
|
39
|
+
recommendation: 'Create eval cases and run benchmarks to classify this skill.',
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
const latest = history[history.length - 1];
|
|
43
|
+
const { withSkill, baseline } = latest.summary;
|
|
44
|
+
const trend = this.computeTrend(history);
|
|
45
|
+
return this.determineCategory(skillName, withSkill.passRate, baseline.passRate, trend, history.length);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Classify based on explicit pass rates (no DB lookup)
|
|
49
|
+
*/
|
|
50
|
+
classifyFromRates(skillName, withSkillPassRate, baselinePassRate, trend = 'insufficient_data') {
|
|
51
|
+
return this.determineCategory(skillName, withSkillPassRate, baselinePassRate, trend, 1);
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Check if a skill is becoming obsolete (capability uplift that model now handles)
|
|
55
|
+
*/
|
|
56
|
+
isBecomingObsolete(skillName) {
|
|
57
|
+
const result = this.classify(skillName);
|
|
58
|
+
if (result.category === 'capability_uplift' && result.trend === 'converging') {
|
|
59
|
+
return {
|
|
60
|
+
obsolete: true,
|
|
61
|
+
reason: `Baseline pass rate (${this.pct(result.baselinePassRate)}) is converging with skill pass rate (${this.pct(result.withSkillPassRate)}). The model may now handle this without the skill.`,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
if (result.baselinePassRate >= BASELINE_HIGH_THRESHOLD) {
|
|
65
|
+
return {
|
|
66
|
+
obsolete: true,
|
|
67
|
+
reason: `Baseline pass rate is ${this.pct(result.baselinePassRate)} — the model handles this well without the skill.`,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
return { obsolete: false, reason: 'Skill still provides significant value.' };
|
|
71
|
+
}
|
|
72
|
+
determineCategory(skillName, wsRate, blRate, trend, benchmarkCount) {
|
|
73
|
+
const gap = wsRate - blRate;
|
|
74
|
+
// Case 1: Baseline already performs well → capability uplift (possibly obsolete)
|
|
75
|
+
if (blRate >= BASELINE_HIGH_THRESHOLD) {
|
|
76
|
+
return {
|
|
77
|
+
skillName,
|
|
78
|
+
category: 'capability_uplift',
|
|
79
|
+
confidence: Math.min(0.9, 0.5 + blRate * 0.4),
|
|
80
|
+
reasoning: `Baseline pass rate is high (${this.pct(blRate)}), indicating the model can handle this well without the skill. This is a capability uplift skill that may be nearing obsolescence.`,
|
|
81
|
+
baselinePassRate: blRate,
|
|
82
|
+
withSkillPassRate: wsRate,
|
|
83
|
+
trend,
|
|
84
|
+
recommendation: gap < CONVERGENCE_THRESHOLD
|
|
85
|
+
? 'Consider retiring this skill — the model handles this natively now.'
|
|
86
|
+
: 'Monitor baseline trend. The skill still adds some value.',
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
// Case 2: Baseline performs poorly and skill helps a lot → likely encoded preference
|
|
90
|
+
if (blRate <= BASELINE_LOW_THRESHOLD && gap > CONVERGENCE_THRESHOLD) {
|
|
91
|
+
const isEncoded = trend === 'stable' || trend === 'diverging' || benchmarkCount < MIN_BENCHMARKS_FOR_TREND;
|
|
92
|
+
return {
|
|
93
|
+
skillName,
|
|
94
|
+
category: isEncoded ? 'encoded_preference' : 'capability_uplift',
|
|
95
|
+
confidence: isEncoded ? Math.min(0.85, 0.4 + gap * 0.5) : 0.5,
|
|
96
|
+
reasoning: isEncoded
|
|
97
|
+
? `Low baseline (${this.pct(blRate)}) with stable gap suggests team-specific preferences the model cannot infer.`
|
|
98
|
+
: `Low baseline (${this.pct(blRate)}) but converging trend suggests model capability gap that is closing.`,
|
|
99
|
+
baselinePassRate: blRate,
|
|
100
|
+
withSkillPassRate: wsRate,
|
|
101
|
+
trend,
|
|
102
|
+
recommendation: isEncoded
|
|
103
|
+
? 'This skill encodes team preferences. Keep and maintain it.'
|
|
104
|
+
: 'Capability uplift skill. Monitor baseline improvements across model updates.',
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
// Case 3: Middle ground — need more data or trend analysis
|
|
108
|
+
if (trend === 'converging') {
|
|
109
|
+
return {
|
|
110
|
+
skillName,
|
|
111
|
+
category: 'capability_uplift',
|
|
112
|
+
confidence: 0.6,
|
|
113
|
+
reasoning: `Baseline trend is converging toward skill performance, suggesting a capability gap that is closing.`,
|
|
114
|
+
baselinePassRate: blRate,
|
|
115
|
+
withSkillPassRate: wsRate,
|
|
116
|
+
trend,
|
|
117
|
+
recommendation: 'Likely capability uplift. Re-evaluate after model updates.',
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
if (trend === 'stable' || trend === 'diverging') {
|
|
121
|
+
return {
|
|
122
|
+
skillName,
|
|
123
|
+
category: 'encoded_preference',
|
|
124
|
+
confidence: 0.55,
|
|
125
|
+
reasoning: `Baseline-to-skill gap is stable/diverging, suggesting persistent team-specific knowledge.`,
|
|
126
|
+
baselinePassRate: blRate,
|
|
127
|
+
withSkillPassRate: wsRate,
|
|
128
|
+
trend,
|
|
129
|
+
recommendation: 'Likely encoded preference. Maintain and refine.',
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
return {
|
|
133
|
+
skillName,
|
|
134
|
+
category: 'unknown',
|
|
135
|
+
confidence: 0.3,
|
|
136
|
+
reasoning: `Not enough data to classify confidently. Baseline: ${this.pct(blRate)}, With-skill: ${this.pct(wsRate)}.`,
|
|
137
|
+
baselinePassRate: blRate,
|
|
138
|
+
withSkillPassRate: wsRate,
|
|
139
|
+
trend,
|
|
140
|
+
recommendation: 'Run more benchmark iterations to gather trend data.',
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
computeTrend(history) {
|
|
144
|
+
if (history.length < MIN_BENCHMARKS_FOR_TREND) {
|
|
145
|
+
return 'insufficient_data';
|
|
146
|
+
}
|
|
147
|
+
// Compare gap between first and last benchmarks
|
|
148
|
+
const first = history[0];
|
|
149
|
+
const last = history[history.length - 1];
|
|
150
|
+
const firstGap = first.summary.withSkill.passRate - first.summary.baseline.passRate;
|
|
151
|
+
const lastGap = last.summary.withSkill.passRate - last.summary.baseline.passRate;
|
|
152
|
+
const gapChange = lastGap - firstGap;
|
|
153
|
+
if (Math.abs(gapChange) < CONVERGENCE_THRESHOLD / 2) {
|
|
154
|
+
return 'stable';
|
|
155
|
+
}
|
|
156
|
+
// Gap is shrinking → baseline is catching up → converging
|
|
157
|
+
if (gapChange < 0) {
|
|
158
|
+
return 'converging';
|
|
159
|
+
}
|
|
160
|
+
// Gap is growing → skill is pulling ahead → diverging
|
|
161
|
+
return 'diverging';
|
|
162
|
+
}
|
|
163
|
+
pct(value) {
|
|
164
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
//# sourceMappingURL=SkillClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SkillClassifier.js","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillClassifier.ts"],"names":[],"mappings":"AAAA,sEAAsE;AACtE,EAAE;AACF,mDAAmD;AACnD,EAAE;AACF,sEAAsE;AACtE,0CAA0C;AAC1C,qFAAqF;AACrF,EAAE;AACF,4EAA4E;AAC5E,gDAAgD;AAChD,oFAAoF;AAGpF,OAAO,EAAE,cAAc,EAAmB,MAAM,qBAAqB,CAAC;AAetE;;GAEG;AACH,MAAM,uBAAuB,GAAG,GAAG,CAAC;AACpC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AACnC,MAAM,qBAAqB,GAAG,IAAI,CAAC;AACnC,MAAM,wBAAwB,GAAG,CAAC,CAAC;AAEnC,MAAM,OAAO,eAAe;IAClB,SAAS,CAAiB;IAElC,YAAY,OAAsB;QAChC,IAAI,CAAC,SAAS,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACI,QAAQ,CAAC,SAAiB;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAErD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,SAAS;gBACnB,UAAU,EAAE,CAAC;gBACb,SAAS,EAAE,+CAA+C;gBAC1D,gBAAgB,EAAE,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,KAAK,EAAE,mBAAmB;gBAC1B,cAAc,EAAE,8DAA8D;aAC/E,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC3C,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC;QAC/C,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAEzC,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,SAAS,CAAC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACzG,CAAC;IAED;;OAEG;IACI,iBAAiB,CACtB,SAAiB,EACjB,iBAAyB,EACzB,gBAAwB,EACxB,QAAuC,mBAAmB;QAE1D,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;IAC1F,CAAC;IAED;;OAEG;IACI,kBAAkB,CAAC,SAAiB;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAExC,IAAI,MAAM,CAAC,QAAQ,KAAK,mBAAmB,IAAI,MAAM,CAAC,KAAK,KAAK,YAAY,EAAE,CAAC;YAC7E,OAAO;gBACL,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,uBAAuB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,gBAAgB,CAAC,yCAAyC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,qDAAqD;aACjM,CAAC;QACJ,CAAC;QAED,IAAI,MAAM,CAAC,gBAAgB,IAAI,uBAAuB,EAAE,CAAC;YACvD,OAAO;gBACL,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,yBAAyB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,gBAAgB,CAAC,mDAAmD;aACtH,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,yCAAyC,EAAE,CAAC;IAChF,CAAC;IAEO,iBAAiB,CACvB,SAAiB,EACjB,MAAc,EACd,MAAc,EACd,KAAoC,EACpC,cAAsB;QAEtB,MAAM,GAAG,GAAG,MAAM,GAAG,MAAM,CAAC;QAE5B,iFAAiF;QACjF,IAAI,MAAM,IAAI,uBAAuB,EAAE,CAAC;YACtC,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,mBAAmB;gBAC7B,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,GAAG,GAAG,CAAC;gBAC7C,SAAS,EAAE,+BAA+B,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,qIAAqI;gBAC/L,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,GAAG,GAAG,qBAAqB;oBACzC,CAAC,CAAC,qEAAqE;oBACvE,CAAC,CAAC,0DAA0D;aAC/D,CAAC;QACJ,CAAC;QAED,qFAAqF;QACrF,IAAI,MAAM,IAAI,sBAAsB,IAAI,GAAG,GAAG,qBAAqB,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,WAAW,IAAI,cAAc,GAAG,wBAAwB,CAAC;YAC3G,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,mBAAmB;gBAChE,UAAU,EAAE,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG;gBAC7D,SAAS,EAAE,SAAS;oBAClB,CAAC,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,8EAA8E;oBACjH,CAAC,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,uEAAuE;gBAC5G,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,SAAS;oBACvB,CAAC,CAAC,4DAA4D;oBAC9D,CAAC,CAAC,8EAA8E;aACnF,CAAC;QACJ,CAAC;QAED,2DAA2D;QAC3D,IAAI,KAAK,KAAK,YAAY,EAAE,CAAC;YAC3B,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,mBAAmB;gBAC7B,UAAU,EAAE,GAAG;gBACf,SAAS,EAAE,qGAAqG;gBAChH,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,4DAA4D;aAC7E,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,WAAW,EAAE,CAAC;YAChD,OAAO;gBACL,SAAS;gBACT,QAAQ,EAAE,oBAAoB;gBAC9B,UAAU,EAAE,IAAI;gBAChB,SAAS,EAAE,2FAA2F;gBACtG,gBAAgB,EAAE,MAAM;gBACxB,iBAAiB,EAAE,MAAM;gBACzB,KAAK;gBACL,cAAc,EAAE,iDAAiD;aAClE,CAAC;QACJ,CAAC;QAED,OAAO;YACL,SAAS;YACT,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,GAAG;YACf,SAAS,EAAE,sDAAsD,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG;YACrH,gBAAgB,EAAE,MAAM;YACxB,iBAAiB,EAAE,MAAM;YACzB,KAAK;YACL,cAAc,EAAE,qDAAqD;SACtE,CAAC;IACJ,CAAC;IAEO,YAAY,CAClB,OAA0B;QAE1B,IAAI,OAAO,CAAC,MAAM,GAAG,wBAAwB,EAAE,CAAC;YAC9C,OAAO,mBAAmB,CAAC;QAC7B,CAAC;QAED,gDAAgD;QAChD,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACzB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEzC,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACpF,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAEjF,MAAM,SAAS,GAAG,OAAO,GAAG,QAAQ,CAAC;QAErC,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,qBAAqB,GAAG,CAAC,EAAE,CAAC;YACpD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,0DAA0D;QAC1D,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,OAAO,YAAY,CAAC;QACtB,CAAC;QAED,sDAAsD;QACtD,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,GAAG,CAAC,KAAa;QACvB,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACxC,CAAC;CACF"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { MemoryStorage } from '../memory/MemoryStorage.js';
|
|
2
|
+
export type EvalStatus = 'pending' | 'running' | 'passed' | 'failed' | 'error';
|
|
3
|
+
export interface SkillEvalCase {
|
|
4
|
+
id: string;
|
|
5
|
+
skillName: string;
|
|
6
|
+
prompt: string;
|
|
7
|
+
expectedOutput: string;
|
|
8
|
+
files: string[];
|
|
9
|
+
assertions: EvalAssertion[];
|
|
10
|
+
}
|
|
11
|
+
export interface EvalAssertion {
|
|
12
|
+
id: string;
|
|
13
|
+
description: string;
|
|
14
|
+
type: 'contains' | 'not_contains' | 'matches_regex' | 'custom';
|
|
15
|
+
value: string;
|
|
16
|
+
}
|
|
17
|
+
export interface EvalRunResult {
|
|
18
|
+
evalId: string;
|
|
19
|
+
runId: string;
|
|
20
|
+
skillName: string;
|
|
21
|
+
variant: 'with_skill' | 'baseline';
|
|
22
|
+
status: EvalStatus;
|
|
23
|
+
output: string;
|
|
24
|
+
grades: AssertionGrade[];
|
|
25
|
+
durationMs: number;
|
|
26
|
+
tokenCount: number;
|
|
27
|
+
createdAt: string;
|
|
28
|
+
}
|
|
29
|
+
export interface AssertionGrade {
|
|
30
|
+
assertionId: string;
|
|
31
|
+
description: string;
|
|
32
|
+
passed: boolean;
|
|
33
|
+
evidence: string;
|
|
34
|
+
}
|
|
35
|
+
export interface EvalSetInput {
|
|
36
|
+
skillName: string;
|
|
37
|
+
evals: Array<{
|
|
38
|
+
prompt: string;
|
|
39
|
+
expectedOutput: string;
|
|
40
|
+
files?: string[];
|
|
41
|
+
assertions?: Array<{
|
|
42
|
+
description: string;
|
|
43
|
+
type: 'contains' | 'not_contains' | 'matches_regex' | 'custom';
|
|
44
|
+
value: string;
|
|
45
|
+
}>;
|
|
46
|
+
}>;
|
|
47
|
+
}
|
|
48
|
+
export declare class SkillEvalRunner {
|
|
49
|
+
private db;
|
|
50
|
+
constructor(storage: MemoryStorage);
|
|
51
|
+
private initializeTables;
|
|
52
|
+
/**
|
|
53
|
+
* Create an eval set for a skill
|
|
54
|
+
*/
|
|
55
|
+
createEvalSet(input: EvalSetInput): SkillEvalCase[];
|
|
56
|
+
/**
|
|
57
|
+
* Get all eval cases for a skill
|
|
58
|
+
*/
|
|
59
|
+
getEvalCases(skillName: string): SkillEvalCase[];
|
|
60
|
+
/**
|
|
61
|
+
* Get a single eval case by ID
|
|
62
|
+
*/
|
|
63
|
+
getEvalCase(evalId: string): SkillEvalCase | null;
|
|
64
|
+
/**
|
|
65
|
+
* Record the start of an eval run
|
|
66
|
+
*/
|
|
67
|
+
startRun(evalId: string, skillName: string, variant: 'with_skill' | 'baseline'): string;
|
|
68
|
+
/**
|
|
69
|
+
* Complete an eval run with output and grades
|
|
70
|
+
*/
|
|
71
|
+
completeRun(runId: string, output: string, grades: AssertionGrade[], durationMs: number, tokenCount: number): void;
|
|
72
|
+
/**
|
|
73
|
+
* Mark a run as errored
|
|
74
|
+
*/
|
|
75
|
+
failRun(runId: string, errorMessage: string): void;
|
|
76
|
+
/**
|
|
77
|
+
* Grade output against assertions
|
|
78
|
+
*/
|
|
79
|
+
gradeOutput(output: string, assertions: EvalAssertion[]): AssertionGrade[];
|
|
80
|
+
/**
|
|
81
|
+
* Get all runs for an eval case
|
|
82
|
+
*/
|
|
83
|
+
getRunsForEval(evalId: string): EvalRunResult[];
|
|
84
|
+
/**
|
|
85
|
+
* Get all runs for a skill
|
|
86
|
+
*/
|
|
87
|
+
getRunsForSkill(skillName: string): EvalRunResult[];
|
|
88
|
+
/**
|
|
89
|
+
* Get latest runs grouped by eval and variant
|
|
90
|
+
*/
|
|
91
|
+
getLatestRuns(skillName: string): Map<string, {
|
|
92
|
+
withSkill: EvalRunResult | null;
|
|
93
|
+
baseline: EvalRunResult | null;
|
|
94
|
+
}>;
|
|
95
|
+
/**
|
|
96
|
+
* Delete all eval cases and runs for a skill
|
|
97
|
+
*/
|
|
98
|
+
deleteEvalSet(skillName: string): number;
|
|
99
|
+
private rowToEvalCase;
|
|
100
|
+
private rowToRunResult;
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=SkillEvalRunner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SkillEvalRunner.d.ts","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillEvalRunner.ts"],"names":[],"mappings":"AAUA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAI3D,MAAM,MAAM,UAAU,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE/E,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,EAAE,aAAa,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,UAAU,GAAG,cAAc,GAAG,eAAe,GAAG,QAAQ,CAAC;IAC/D,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,YAAY,GAAG,UAAU,CAAC;IACnC,MAAM,EAAE,UAAU,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,cAAc,EAAE,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,KAAK,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,cAAc,EAAE,MAAM,CAAC;QACvB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;QACjB,UAAU,CAAC,EAAE,KAAK,CAAC;YACjB,WAAW,EAAE,MAAM,CAAC;YACpB,IAAI,EAAE,UAAU,GAAG,cAAc,GAAG,eAAe,GAAG,QAAQ,CAAC;YAC/D,KAAK,EAAE,MAAM,CAAC;SACf,CAAC,CAAC;KACJ,CAAC,CAAC;CACJ;AA4BD,qBAAa,eAAe;IAC1B,OAAO,CAAC,EAAE,CAA2C;gBAEzC,OAAO,EAAE,aAAa;IAKlC,OAAO,CAAC,gBAAgB;IAkCxB;;OAEG;IACI,aAAa,CAAC,KAAK,EAAE,YAAY,GAAG,aAAa,EAAE;IA6C1D;;OAEG;IACI,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,aAAa,EAAE;IAOvD;;OAEG;IACI,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI;IAOxD;;OAEG;IACI,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,GAAG,UAAU,GAAG,MAAM;IAY9F;;OAEG;IACI,WAAW,CAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,cAAc,EAAE,EACxB,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,GACjB,IAAI;IAWP;;OAEG;IACI,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,IAAI;IAMzD;;OAEG;IACI,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,GAAG,cAAc,EAAE;IAkDjF;;OAEG;IACI,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,EAAE;IAOtD;;OAEG;IACI,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,aAAa,EAAE;IAO1D;;OAEG;IACI,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE;QAAE,SAAS,EAAE,aAAa,GAAG,IAAI,CAAC;QAAC,QAAQ,EAAE,aAAa,GAAG,IAAI,CAAA;KAAE,CAAC;IAmBzH;;OAEG;IACI,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;IAa/C,OAAO,CAAC,aAAa;IAWrB,OAAO,CAAC,cAAc;CAcvB"}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
// Skill Eval Runner - Phase 5: Test, Measure, and Refine
|
|
2
|
+
// Defines and runs evals for skills, tracking pass/fail per assertion
|
|
3
|
+
//
|
|
4
|
+
// Inspired by Anthropic's skill-creator eval framework:
|
|
5
|
+
// - Define eval cases with prompts and expected outputs
|
|
6
|
+
// - Run with-skill vs baseline comparisons
|
|
7
|
+
// - Grade results against assertions
|
|
8
|
+
// - Aggregate into benchmarks
|
|
9
|
+
import { randomUUID } from 'crypto';
|
|
10
|
+
export class SkillEvalRunner {
|
|
11
|
+
db;
|
|
12
|
+
constructor(storage) {
|
|
13
|
+
this.db = storage.getDatabase();
|
|
14
|
+
this.initializeTables();
|
|
15
|
+
}
|
|
16
|
+
initializeTables() {
|
|
17
|
+
this.db.exec(`
|
|
18
|
+
CREATE TABLE IF NOT EXISTS skill_eval_cases (
|
|
19
|
+
id TEXT PRIMARY KEY,
|
|
20
|
+
skillName TEXT NOT NULL,
|
|
21
|
+
prompt TEXT NOT NULL,
|
|
22
|
+
expectedOutput TEXT NOT NULL,
|
|
23
|
+
files TEXT DEFAULT '[]',
|
|
24
|
+
assertions TEXT DEFAULT '[]',
|
|
25
|
+
createdAt TEXT NOT NULL,
|
|
26
|
+
updatedAt TEXT NOT NULL
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
CREATE INDEX IF NOT EXISTS idx_sec_skill ON skill_eval_cases(skillName);
|
|
30
|
+
|
|
31
|
+
CREATE TABLE IF NOT EXISTS skill_eval_runs (
|
|
32
|
+
id TEXT PRIMARY KEY,
|
|
33
|
+
evalId TEXT NOT NULL,
|
|
34
|
+
skillName TEXT NOT NULL,
|
|
35
|
+
variant TEXT NOT NULL CHECK(variant IN ('with_skill','baseline')),
|
|
36
|
+
status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending','running','passed','failed','error')),
|
|
37
|
+
output TEXT DEFAULT '',
|
|
38
|
+
grades TEXT DEFAULT '[]',
|
|
39
|
+
durationMs INTEGER DEFAULT 0,
|
|
40
|
+
tokenCount INTEGER DEFAULT 0,
|
|
41
|
+
createdAt TEXT NOT NULL
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
CREATE INDEX IF NOT EXISTS idx_ser_eval ON skill_eval_runs(evalId);
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_ser_skill ON skill_eval_runs(skillName);
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_ser_variant ON skill_eval_runs(variant);
|
|
47
|
+
`);
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Create an eval set for a skill
|
|
51
|
+
*/
|
|
52
|
+
createEvalSet(input) {
|
|
53
|
+
const cases = [];
|
|
54
|
+
const now = new Date().toISOString();
|
|
55
|
+
const insertStmt = this.db.prepare(`
|
|
56
|
+
INSERT INTO skill_eval_cases (id, skillName, prompt, expectedOutput, files, assertions, createdAt, updatedAt)
|
|
57
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
58
|
+
`);
|
|
59
|
+
const insertMany = this.db.transaction((evals) => {
|
|
60
|
+
for (const evalCase of evals) {
|
|
61
|
+
const id = `eval-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
|
|
62
|
+
const assertions = (evalCase.assertions ?? []).map(a => ({
|
|
63
|
+
id: `assert-${randomUUID().replace(/-/g, '').slice(0, 8)}`,
|
|
64
|
+
description: a.description,
|
|
65
|
+
type: a.type,
|
|
66
|
+
value: a.value,
|
|
67
|
+
}));
|
|
68
|
+
insertStmt.run(id, input.skillName, evalCase.prompt, evalCase.expectedOutput, JSON.stringify(evalCase.files ?? []), JSON.stringify(assertions), now, now);
|
|
69
|
+
cases.push({
|
|
70
|
+
id,
|
|
71
|
+
skillName: input.skillName,
|
|
72
|
+
prompt: evalCase.prompt,
|
|
73
|
+
expectedOutput: evalCase.expectedOutput,
|
|
74
|
+
files: evalCase.files ?? [],
|
|
75
|
+
assertions,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
insertMany(input.evals);
|
|
80
|
+
return cases;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Get all eval cases for a skill
|
|
84
|
+
*/
|
|
85
|
+
getEvalCases(skillName) {
|
|
86
|
+
const rows = this.db.prepare(`
|
|
87
|
+
SELECT * FROM skill_eval_cases WHERE skillName = ? ORDER BY createdAt ASC
|
|
88
|
+
`).all(skillName);
|
|
89
|
+
return rows.map(this.rowToEvalCase);
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Get a single eval case by ID
|
|
93
|
+
*/
|
|
94
|
+
getEvalCase(evalId) {
|
|
95
|
+
const row = this.db.prepare(`
|
|
96
|
+
SELECT * FROM skill_eval_cases WHERE id = ?
|
|
97
|
+
`).get(evalId);
|
|
98
|
+
return row ? this.rowToEvalCase(row) : null;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Record the start of an eval run
|
|
102
|
+
*/
|
|
103
|
+
startRun(evalId, skillName, variant) {
|
|
104
|
+
const id = `run-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
|
|
105
|
+
const now = new Date().toISOString();
|
|
106
|
+
this.db.prepare(`
|
|
107
|
+
INSERT INTO skill_eval_runs (id, evalId, skillName, variant, status, createdAt)
|
|
108
|
+
VALUES (?, ?, ?, ?, 'running', ?)
|
|
109
|
+
`).run(id, evalId, skillName, variant, now);
|
|
110
|
+
return id;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Complete an eval run with output and grades
|
|
114
|
+
*/
|
|
115
|
+
completeRun(runId, output, grades, durationMs, tokenCount) {
|
|
116
|
+
const allPassed = grades.length === 0 || grades.every(g => g.passed);
|
|
117
|
+
const status = allPassed ? 'passed' : 'failed';
|
|
118
|
+
this.db.prepare(`
|
|
119
|
+
UPDATE skill_eval_runs
|
|
120
|
+
SET status = ?, output = ?, grades = ?, durationMs = ?, tokenCount = ?
|
|
121
|
+
WHERE id = ?
|
|
122
|
+
`).run(status, output, JSON.stringify(grades), durationMs, tokenCount, runId);
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Mark a run as errored
|
|
126
|
+
*/
|
|
127
|
+
failRun(runId, errorMessage) {
|
|
128
|
+
this.db.prepare(`
|
|
129
|
+
UPDATE skill_eval_runs SET status = 'error', output = ? WHERE id = ?
|
|
130
|
+
`).run(errorMessage, runId);
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Grade output against assertions
|
|
134
|
+
*/
|
|
135
|
+
gradeOutput(output, assertions) {
|
|
136
|
+
return assertions.map(assertion => {
|
|
137
|
+
let passed = false;
|
|
138
|
+
let evidence = '';
|
|
139
|
+
switch (assertion.type) {
|
|
140
|
+
case 'contains':
|
|
141
|
+
passed = output.includes(assertion.value);
|
|
142
|
+
evidence = passed
|
|
143
|
+
? `Output contains "${assertion.value}"`
|
|
144
|
+
: `Output does not contain "${assertion.value}"`;
|
|
145
|
+
break;
|
|
146
|
+
case 'not_contains':
|
|
147
|
+
passed = !output.includes(assertion.value);
|
|
148
|
+
evidence = passed
|
|
149
|
+
? `Output correctly excludes "${assertion.value}"`
|
|
150
|
+
: `Output unexpectedly contains "${assertion.value}"`;
|
|
151
|
+
break;
|
|
152
|
+
case 'matches_regex': {
|
|
153
|
+
try {
|
|
154
|
+
const regex = new RegExp(assertion.value);
|
|
155
|
+
passed = regex.test(output);
|
|
156
|
+
evidence = passed
|
|
157
|
+
? `Output matches pattern /${assertion.value}/`
|
|
158
|
+
: `Output does not match pattern /${assertion.value}/`;
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
passed = false;
|
|
162
|
+
evidence = `Invalid regex pattern: ${assertion.value}`;
|
|
163
|
+
}
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
case 'custom':
|
|
167
|
+
// Custom assertions require external grading (LLM or script)
|
|
168
|
+
passed = false;
|
|
169
|
+
evidence = 'Custom assertion requires external grading';
|
|
170
|
+
break;
|
|
171
|
+
}
|
|
172
|
+
return {
|
|
173
|
+
assertionId: assertion.id,
|
|
174
|
+
description: assertion.description,
|
|
175
|
+
passed,
|
|
176
|
+
evidence,
|
|
177
|
+
};
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Get all runs for an eval case
|
|
182
|
+
*/
|
|
183
|
+
getRunsForEval(evalId) {
|
|
184
|
+
const rows = this.db.prepare(`
|
|
185
|
+
SELECT * FROM skill_eval_runs WHERE evalId = ? ORDER BY createdAt DESC
|
|
186
|
+
`).all(evalId);
|
|
187
|
+
return rows.map(this.rowToRunResult);
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Get all runs for a skill
|
|
191
|
+
*/
|
|
192
|
+
getRunsForSkill(skillName) {
|
|
193
|
+
const rows = this.db.prepare(`
|
|
194
|
+
SELECT * FROM skill_eval_runs WHERE skillName = ? ORDER BY createdAt DESC
|
|
195
|
+
`).all(skillName);
|
|
196
|
+
return rows.map(this.rowToRunResult);
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Get latest runs grouped by eval and variant
|
|
200
|
+
*/
|
|
201
|
+
getLatestRuns(skillName) {
|
|
202
|
+
const runs = this.getRunsForSkill(skillName);
|
|
203
|
+
const grouped = new Map();
|
|
204
|
+
for (const run of runs) {
|
|
205
|
+
if (!grouped.has(run.evalId)) {
|
|
206
|
+
grouped.set(run.evalId, { withSkill: null, baseline: null });
|
|
207
|
+
}
|
|
208
|
+
const entry = grouped.get(run.evalId);
|
|
209
|
+
if (run.variant === 'with_skill' && !entry.withSkill) {
|
|
210
|
+
entry.withSkill = run;
|
|
211
|
+
}
|
|
212
|
+
else if (run.variant === 'baseline' && !entry.baseline) {
|
|
213
|
+
entry.baseline = run;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return grouped;
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Delete all eval cases and runs for a skill
|
|
220
|
+
*/
|
|
221
|
+
deleteEvalSet(skillName) {
|
|
222
|
+
const deleteRuns = this.db.prepare(`DELETE FROM skill_eval_runs WHERE skillName = ?`);
|
|
223
|
+
const deleteCases = this.db.prepare(`DELETE FROM skill_eval_cases WHERE skillName = ?`);
|
|
224
|
+
const transaction = this.db.transaction(() => {
|
|
225
|
+
deleteRuns.run(skillName);
|
|
226
|
+
const result = deleteCases.run(skillName);
|
|
227
|
+
return result.changes;
|
|
228
|
+
});
|
|
229
|
+
return transaction();
|
|
230
|
+
}
|
|
231
|
+
rowToEvalCase(row) {
|
|
232
|
+
return {
|
|
233
|
+
id: row.id,
|
|
234
|
+
skillName: row.skillName,
|
|
235
|
+
prompt: row.prompt,
|
|
236
|
+
expectedOutput: row.expectedOutput,
|
|
237
|
+
files: JSON.parse(row.files),
|
|
238
|
+
assertions: JSON.parse(row.assertions),
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
rowToRunResult(row) {
|
|
242
|
+
return {
|
|
243
|
+
evalId: row.evalId,
|
|
244
|
+
runId: row.id,
|
|
245
|
+
skillName: row.skillName,
|
|
246
|
+
variant: row.variant,
|
|
247
|
+
status: row.status,
|
|
248
|
+
output: row.output,
|
|
249
|
+
grades: JSON.parse(row.grades),
|
|
250
|
+
durationMs: row.durationMs,
|
|
251
|
+
tokenCount: row.tokenCount,
|
|
252
|
+
createdAt: row.createdAt,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
//# sourceMappingURL=SkillEvalRunner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SkillEvalRunner.js","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillEvalRunner.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,sEAAsE;AACtE,EAAE;AACF,wDAAwD;AACxD,wDAAwD;AACxD,2CAA2C;AAC3C,qCAAqC;AACrC,8BAA8B;AAE9B,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAmFpC,MAAM,OAAO,eAAe;IAClB,EAAE,CAA2C;IAErD,YAAY,OAAsB;QAChC,IAAI,CAAC,EAAE,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAEO,gBAAgB;QACtB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8BZ,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACI,aAAa,CAAC,KAAmB;QACtC,MAAM,KAAK,GAAoB,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGlC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,KAA4B,EAAE,EAAE;YACtE,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC7B,MAAM,EAAE,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC3F,MAAM,UAAU,GAAoB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACxE,EAAE,EAAE,UAAU,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE;oBAC1D,WAAW,EAAE,CAAC,CAAC,WAAW;oBAC1B,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,KAAK,EAAE,CAAC,CAAC,KAAK;iBACf,CAAC,CAAC,CAAC;gBAEJ,UAAU,CAAC,GAAG,CACZ,EAAE,EACF,KAAK,CAAC,SAAS,EACf,QAAQ,CAAC,MAAM,EACf,QAAQ,CAAC,cAAc,EACvB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC,EACpC,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,EAC1B,GAAG,EACH,GAAG,CACJ,CAAC;gBAEF,KAAK,CAAC,IAAI,CAAC;oBACT,EAAE;oBACF,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,MAAM,EAAE,QAAQ,CAAC,MAAM;oBACvB,cAAc,EAAE,QAAQ,CAAC,cAAc;oBACvC,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;oBAC3B,UAAU;iBACX,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACI,YAAY,CAAC,SAAiB;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,SAAS,CAAkB,CAAC;QACnC,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACI,WAAW,CAAC,MAAc;QAC/B,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE3B,CAAC,CAAC,GAAG,CAAC,MAAM,CAA4B,CAAC;QAC1C,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,QAAQ,CAAC,MAAc,EAAE,SAAiB,EAAE,OAAkC;QACnF,MAAM,EAAE,GAAG,OAAO,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC1F,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAE5C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACI,WAAW,CAChB,KAAa,EACb,MAAc,EACd,MAAwB,EACxB,UAAkB,EAClB,UAAkB;QAElB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACrE,MAAM,MAAM,GAAe,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE3D,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;KAIf,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC;IAChF,CAAC;IAED;;OAEG;IACI,OAAO,CAAC,KAAa,EAAE,YAAoB;QAChD,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAEf,CAAC,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED;;OAEG;IACI,WAAW,CAAC,MAAc,EAAE,UAA2B;QAC5D,OAAO,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE;YAChC,IAAI,MAAM,GAAG,KAAK,CAAC;YACnB,IAAI,QAAQ,GAAG,EAAE,CAAC;YAElB,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;gBACvB,KAAK,UAAU;oBACb,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;oBAC1C,QAAQ,GAAG,MAAM;wBACf,CAAC,CAAC,oBAAoB,SAAS,CAAC,KAAK,GAAG;wBACxC,CAAC,CAAC,4BAA4B,SAAS,CAAC,KAAK,GAAG,CAAC;oBACnD,MAAM;gBAER,KAAK,cAAc;oBACjB,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;oBAC3C,QAAQ,GAAG,MAAM;wBACf,CAAC,CAAC,8BAA8B,SAAS,CAAC,KAAK,GAAG;wBAClD,CAAC,CAAC,iCAAiC,SAAS,CAAC,KAAK,GAAG,CAAC;oBACxD,MAAM;gBAER,KAAK,eAAe,CAAC,CAAC,CAAC;oBACrB,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;wBAC1C,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;wBAC5B,QAAQ,GAAG,MAAM;4BACf,CAAC,CAAC,2BAA2B,SAAS,CAAC,KAAK,GAAG;4BAC/C,CAAC,CAAC,kCAAkC,SAAS,CAAC,KAAK,GAAG,CAAC;oBAC3D,CAAC;oBAAC,MAAM,CAAC;wBACP,MAAM,GAAG,KAAK,CAAC;wBACf,QAAQ,GAAG,0BAA0B,SAAS,CAAC,KAAK,EAAE,CAAC;oBACzD,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,QAAQ;oBACX,6DAA6D;oBAC7D,MAAM,GAAG,KAAK,CAAC;oBACf,QAAQ,GAAG,4CAA4C,CAAC;oBACxD,MAAM;YACV,CAAC;YAED,OAAO;gBACL,WAAW,EAAE,SAAS,CAAC,EAAE;gBACzB,WAAW,EAAE,SAAS,CAAC,WAAW;gBAClC,MAAM;gBACN,QAAQ;aACT,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACI,cAAc,CAAC,MAAc;QAClC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,MAAM,CAAiB,CAAC;QAC/B,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACI,eAAe,CAAC,SAAiB;QACtC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,SAAS,CAAiB,CAAC;QAClC,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACI,aAAa,CAAC,SAAiB;QACpC,MAAM,IAAI,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,IAAI,GAAG,EAA+E,CAAC;QAEvG,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;YACvC,IAAI,GAAG,CAAC,OAAO,KAAK,YAAY,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;gBACrD,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC;YACxB,CAAC;iBAAM,IAAI,GAAG,CAAC,OAAO,KAAK,UAAU,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACzD,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;YACvB,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACI,aAAa,CAAC,SAAiB;QACpC,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,iDAAiD,CAAC,CAAC;QACtF,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kDAAkD,CAAC,CAAC;QAExF,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAC3C,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1B,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1C,OAAO,MAAM,CAAC,OAAO,CAAC;QACxB,CAAC,CAAC,CAAC;QAEH,OAAO,WAAW,EAAE,CAAC;IACvB,CAAC;IAEO,aAAa,CAAC,GAAgB;QACpC,OAAO;YACL,EAAE,EAAE,GAAG,CAAC,EAAE;YACV,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,cAAc,EAAE,GAAG,CAAC,cAAc;YAClC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC;YAC5B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC;SACvC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,GAAe;QACpC,OAAO;YACL,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK,EAAE,GAAG,CAAC,EAAE;YACb,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAoC;YACjD,MAAM,EAAE,GAAG,CAAC,MAAoB;YAChC,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC;YAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;SACzB,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -17,9 +17,9 @@ export class SkillGapDetector {
|
|
|
17
17
|
const truncated = prompt.slice(0, 200);
|
|
18
18
|
const normalized = truncated.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
19
19
|
try {
|
|
20
|
-
this.db.prepare(`
|
|
21
|
-
INSERT INTO skill_gaps (id, prompt, normalizedPrompt, sessionId, createdAt)
|
|
22
|
-
VALUES (?, ?, ?, ?, ?)
|
|
20
|
+
this.db.prepare(`
|
|
21
|
+
INSERT INTO skill_gaps (id, prompt, normalizedPrompt, sessionId, createdAt)
|
|
22
|
+
VALUES (?, ?, ?, ?, ?)
|
|
23
23
|
`).run(id, truncated, normalized, sessionId || null, new Date().toISOString());
|
|
24
24
|
}
|
|
25
25
|
catch {
|
|
@@ -33,13 +33,13 @@ export class SkillGapDetector {
|
|
|
33
33
|
const result = { newGaps: [], totalClusters: 0 };
|
|
34
34
|
try {
|
|
35
35
|
// Cluster by normalizedPrompt
|
|
36
|
-
const clusters = this.db.prepare(`
|
|
37
|
-
SELECT normalizedPrompt, COUNT(*) as count, GROUP_CONCAT(prompt, '|||') as prompts
|
|
38
|
-
FROM skill_gaps
|
|
39
|
-
GROUP BY normalizedPrompt
|
|
40
|
-
HAVING count >= 3
|
|
41
|
-
ORDER BY count DESC
|
|
42
|
-
LIMIT ?
|
|
36
|
+
const clusters = this.db.prepare(`
|
|
37
|
+
SELECT normalizedPrompt, COUNT(*) as count, GROUP_CONCAT(prompt, '|||') as prompts
|
|
38
|
+
FROM skill_gaps
|
|
39
|
+
GROUP BY normalizedPrompt
|
|
40
|
+
HAVING count >= 3
|
|
41
|
+
ORDER BY count DESC
|
|
42
|
+
LIMIT ?
|
|
43
43
|
`).all(limit);
|
|
44
44
|
result.totalClusters = clusters.length;
|
|
45
45
|
for (const cluster of clusters) {
|