@su-record/vibe 2.7.10 → 2.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +37 -37
- package/CLAUDE.md +126 -222
- package/LICENSE +21 -21
- package/README.md +580 -580
- package/agents/architect-low.md +41 -41
- package/agents/architect-medium.md +59 -59
- package/agents/architect.md +80 -80
- package/agents/build-error-resolver.md +115 -115
- package/agents/compounder.md +261 -261
- package/agents/diagrammer.md +178 -178
- package/agents/docs/api-documenter.md +99 -99
- package/agents/docs/changelog-writer.md +93 -93
- package/agents/e2e-tester.md +266 -266
- package/agents/explorer-low.md +42 -42
- package/agents/explorer-medium.md +59 -59
- package/agents/explorer.md +48 -48
- package/agents/implementer-low.md +43 -43
- package/agents/implementer-medium.md +52 -52
- package/agents/implementer.md +54 -54
- package/agents/junior-mentor.md +141 -141
- package/agents/planning/requirements-analyst.md +84 -84
- package/agents/planning/ux-advisor.md +83 -83
- package/agents/qa/acceptance-tester.md +86 -86
- package/agents/qa/edge-case-finder.md +93 -93
- package/agents/refactor-cleaner.md +143 -143
- package/agents/research/best-practices-agent.md +199 -199
- package/agents/research/codebase-patterns-agent.md +157 -157
- package/agents/research/framework-docs-agent.md +188 -188
- package/agents/research/security-advisory-agent.md +213 -213
- package/agents/review/architecture-reviewer.md +107 -107
- package/agents/review/complexity-reviewer.md +116 -116
- package/agents/review/data-integrity-reviewer.md +88 -88
- package/agents/review/git-history-reviewer.md +103 -103
- package/agents/review/performance-reviewer.md +86 -86
- package/agents/review/python-reviewer.md +150 -150
- package/agents/review/rails-reviewer.md +139 -139
- package/agents/review/react-reviewer.md +144 -144
- package/agents/review/security-reviewer.md +80 -80
- package/agents/review/simplicity-reviewer.md +140 -140
- package/agents/review/test-coverage-reviewer.md +116 -116
- package/agents/review/typescript-reviewer.md +127 -127
- package/agents/searcher.md +54 -54
- package/agents/simplifier.md +120 -120
- package/agents/tester.md +49 -49
- package/agents/ui/ui-a11y-auditor.md +93 -93
- package/agents/ui/ui-antipattern-detector.md +94 -94
- package/agents/ui/ui-dataviz-advisor.md +69 -69
- package/agents/ui/ui-design-system-gen.md +57 -57
- package/agents/ui/ui-industry-analyzer.md +49 -49
- package/agents/ui/ui-layout-architect.md +65 -65
- package/agents/ui/ui-stack-implementer.md +68 -68
- package/agents/ui/ux-compliance-reviewer.md +81 -81
- package/agents/ui-previewer.md +260 -260
- package/commands/vibe.run.md +83 -0
- package/commands/vibe.spec.review.md +558 -558
- package/commands/vibe.utils.md +413 -413
- package/commands/vibe.voice.md +79 -79
- package/dist/cli/auth.d.ts +1 -1
- package/dist/cli/auth.d.ts.map +1 -1
- package/dist/cli/auth.js +15 -7
- package/dist/cli/auth.js.map +1 -1
- package/dist/cli/collaborator.js +52 -52
- package/dist/cli/commands/evolution.js +12 -12
- package/dist/cli/commands/index.d.ts +1 -0
- package/dist/cli/commands/index.d.ts.map +1 -1
- package/dist/cli/commands/index.js +1 -0
- package/dist/cli/commands/index.js.map +1 -1
- package/dist/cli/commands/info.d.ts.map +1 -1
- package/dist/cli/commands/info.js +62 -56
- package/dist/cli/commands/info.js.map +1 -1
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +9 -6
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/remove.js +14 -14
- package/dist/cli/commands/sentinel.js +27 -27
- package/dist/cli/commands/skills.d.ts +13 -0
- package/dist/cli/commands/skills.d.ts.map +1 -0
- package/dist/cli/commands/skills.js +83 -0
- package/dist/cli/commands/skills.js.map +1 -0
- package/dist/cli/commands/slack.js +10 -10
- package/dist/cli/commands/telegram.js +12 -12
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +3 -0
- package/dist/cli/commands/update.js.map +1 -1
- package/dist/cli/detect.js +32 -32
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +64 -47
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/llm/claude-commands.js +16 -16
- package/dist/cli/llm/config.js +18 -18
- package/dist/cli/llm/gemini-commands.js +47 -47
- package/dist/cli/llm/gpt-commands.js +19 -19
- package/dist/cli/llm/help.js +21 -21
- package/dist/cli/postinstall/constants.d.ts +8 -0
- package/dist/cli/postinstall/constants.d.ts.map +1 -1
- package/dist/cli/postinstall/constants.js +33 -0
- package/dist/cli/postinstall/constants.js.map +1 -1
- package/dist/cli/postinstall/cursor-agents.js +32 -32
- package/dist/cli/postinstall/cursor-rules.js +83 -83
- package/dist/cli/postinstall/cursor-skills.js +743 -743
- package/dist/cli/postinstall/index.d.ts +1 -1
- package/dist/cli/postinstall/index.d.ts.map +1 -1
- package/dist/cli/postinstall/index.js +1 -1
- package/dist/cli/postinstall/index.js.map +1 -1
- package/dist/cli/setup/ProjectSetup.d.ts.map +1 -1
- package/dist/cli/setup/ProjectSetup.js +5 -0
- package/dist/cli/setup/ProjectSetup.js.map +1 -1
- package/dist/cli/setup/Provisioner.js +42 -42
- package/dist/cli/types.d.ts +1 -0
- package/dist/cli/types.d.ts.map +1 -1
- package/dist/infra/lib/DeepInit.js +24 -24
- package/dist/infra/lib/IterationTracker.js +11 -11
- package/dist/infra/lib/PythonParser.js +108 -108
- package/dist/infra/lib/ReviewRace.js +96 -96
- package/dist/infra/lib/SkillFrontmatter.js +28 -28
- package/dist/infra/lib/SkillQualityGate.js +9 -9
- package/dist/infra/lib/SkillRepository.js +159 -159
- package/dist/infra/lib/UltraQA.js +99 -99
- package/dist/infra/lib/autonomy/AuditStore.js +41 -41
- package/dist/infra/lib/autonomy/ConfirmationStore.js +30 -30
- package/dist/infra/lib/autonomy/EventOutbox.js +38 -38
- package/dist/infra/lib/autonomy/PolicyEngine.js +18 -18
- package/dist/infra/lib/autonomy/SecuritySentinel.js +1 -1
- package/dist/infra/lib/autonomy/SuggestionStore.js +33 -33
- package/dist/infra/lib/embedding/VectorStore.js +22 -22
- package/dist/infra/lib/evolution/AgentAnalyzer.js +10 -10
- package/dist/infra/lib/evolution/DescriptionOptimizer.d.ts +79 -0
- package/dist/infra/lib/evolution/DescriptionOptimizer.d.ts.map +1 -0
- package/dist/infra/lib/evolution/DescriptionOptimizer.js +259 -0
- package/dist/infra/lib/evolution/DescriptionOptimizer.js.map +1 -0
- package/dist/infra/lib/evolution/GenerationRegistry.js +36 -36
- package/dist/infra/lib/evolution/InsightStore.js +90 -90
- package/dist/infra/lib/evolution/RollbackManager.js +5 -5
- package/dist/infra/lib/evolution/SkillBenchmark.d.ts +81 -0
- package/dist/infra/lib/evolution/SkillBenchmark.d.ts.map +1 -0
- package/dist/infra/lib/evolution/SkillBenchmark.js +233 -0
- package/dist/infra/lib/evolution/SkillBenchmark.js.map +1 -0
- package/dist/infra/lib/evolution/SkillClassifier.d.ts +35 -0
- package/dist/infra/lib/evolution/SkillClassifier.d.ts.map +1 -0
- package/dist/infra/lib/evolution/SkillClassifier.js +167 -0
- package/dist/infra/lib/evolution/SkillClassifier.js.map +1 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.d.ts +102 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.d.ts.map +1 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.js +256 -0
- package/dist/infra/lib/evolution/SkillEvalRunner.js.map +1 -0
- package/dist/infra/lib/evolution/SkillGapDetector.js +10 -10
- package/dist/infra/lib/evolution/UsageTracker.js +28 -28
- package/dist/infra/lib/evolution/__tests__/eval.test.d.ts +2 -0
- package/dist/infra/lib/evolution/__tests__/eval.test.d.ts.map +1 -0
- package/dist/infra/lib/evolution/__tests__/eval.test.js +539 -0
- package/dist/infra/lib/evolution/__tests__/eval.test.js.map +1 -0
- package/dist/infra/lib/evolution/index.d.ts +8 -0
- package/dist/infra/lib/evolution/index.d.ts.map +1 -1
- package/dist/infra/lib/evolution/index.js +5 -0
- package/dist/infra/lib/evolution/index.js.map +1 -1
- package/dist/infra/lib/gemini/constants.js +14 -14
- package/dist/infra/lib/gemini/orchestration.js +5 -5
- package/dist/infra/lib/gpt/oauth.js +44 -44
- package/dist/infra/lib/gpt/orchestration.js +4 -4
- package/dist/infra/lib/memory/KnowledgeGraph.js +4 -4
- package/dist/infra/lib/memory/MemorySearch.js +57 -57
- package/dist/infra/lib/memory/MemoryStorage.js +181 -181
- package/dist/infra/lib/memory/ObservationStore.js +28 -28
- package/dist/infra/lib/memory/ReflectionStore.js +30 -30
- package/dist/infra/lib/memory/SessionRAGRetriever.js +7 -7
- package/dist/infra/lib/memory/SessionRAGStore.js +225 -225
- package/dist/infra/lib/memory/SessionSummarizer.js +9 -9
- package/dist/infra/orchestrator/AgentManager.js +12 -12
- package/dist/infra/orchestrator/AgentRegistry.js +65 -65
- package/dist/infra/orchestrator/MultiLlmResearch.js +8 -8
- package/dist/infra/orchestrator/SwarmOrchestrator.test.js +16 -16
- package/dist/infra/orchestrator/parallelResearch.js +24 -24
- package/dist/tools/convention/analyzeComplexity.test.js +115 -115
- package/dist/tools/convention/validateCodeQuality.test.js +104 -104
- package/dist/tools/memory/createMemoryTimeline.js +10 -10
- package/dist/tools/memory/getMemoryGraph.js +12 -12
- package/dist/tools/memory/getSessionContext.js +9 -9
- package/dist/tools/memory/linkMemories.js +14 -14
- package/dist/tools/memory/listMemories.js +4 -4
- package/dist/tools/memory/recallMemory.js +4 -4
- package/dist/tools/memory/saveMemory.js +4 -4
- package/dist/tools/memory/searchMemoriesAdvanced.js +23 -23
- package/dist/tools/semantic/analyzeDependencyGraph.js +12 -12
- package/dist/tools/semantic/astGrep.test.js +6 -6
- package/dist/tools/spec/prdParser.test.js +171 -171
- package/dist/tools/spec/specGenerator.js +169 -169
- package/dist/tools/spec/traceabilityMatrix.js +64 -64
- package/dist/tools/spec/traceabilityMatrix.test.js +28 -28
- package/hooks/gemini-hooks.json +73 -73
- package/hooks/hooks.json +137 -137
- package/hooks/scripts/code-check.js +70 -70
- package/hooks/scripts/context-save.js +212 -212
- package/hooks/scripts/hud-status.js +291 -291
- package/hooks/scripts/keyword-detector.js +214 -214
- package/hooks/scripts/llm-orchestrate.js +646 -646
- package/hooks/scripts/post-edit.js +32 -32
- package/hooks/scripts/pre-tool-guard.js +125 -125
- package/hooks/scripts/prompt-dispatcher.js +185 -185
- package/hooks/scripts/sentinel-guard.js +104 -104
- package/hooks/scripts/session-start.js +106 -106
- package/hooks/scripts/stop-notify.js +209 -209
- package/hooks/scripts/utils.js +100 -100
- package/languages/csharp-unity.md +515 -515
- package/languages/gdscript-godot.md +470 -470
- package/languages/ruby-rails.md +489 -489
- package/languages/typescript-angular.md +433 -433
- package/languages/typescript-astro.md +416 -416
- package/languages/typescript-electron.md +406 -406
- package/languages/typescript-nestjs.md +524 -524
- package/languages/typescript-svelte.md +407 -407
- package/languages/typescript-tauri.md +365 -365
- package/package.json +121 -121
- package/skills/agents-md/SKILL.md +120 -120
- package/skills/arch-guard/SKILL.md +180 -0
- package/skills/brand-assets/SKILL.md +146 -146
- package/skills/capability-loop/SKILL.md +167 -0
- package/skills/characterization-test/SKILL.md +206 -206
- package/skills/commerce-patterns/SKILL.md +59 -59
- package/skills/commit-push-pr/SKILL.md +75 -75
- package/skills/context7-usage/SKILL.md +105 -105
- package/skills/core-capabilities/SKILL.md +48 -48
- package/skills/e2e-commerce/SKILL.md +57 -57
- package/skills/exec-plan/SKILL.md +147 -0
- package/skills/frontend-design/SKILL.md +73 -73
- package/skills/git-worktree/SKILL.md +72 -72
- package/skills/handoff/SKILL.md +109 -109
- package/skills/parallel-research/SKILL.md +87 -87
- package/skills/priority-todos/SKILL.md +63 -63
- package/skills/seo-checklist/SKILL.md +57 -57
- package/skills/techdebt/SKILL.md +122 -122
- package/skills/tool-fallback/SKILL.md +103 -103
- package/skills/typescript-advanced-types/SKILL.md +65 -65
- package/skills/ui-ux-pro-max/SKILL.md +206 -206
- package/skills/vercel-react-best-practices/SKILL.md +59 -59
- package/skills/video-production/SKILL.md +51 -51
- package/vibe/config.json +29 -29
- package/vibe/constitution.md +227 -227
- package/vibe/rules/principles/communication-guide.md +98 -98
- package/vibe/rules/principles/development-philosophy.md +52 -52
- package/vibe/rules/principles/quick-start.md +102 -102
- package/vibe/rules/quality/bdd-contract-testing.md +393 -393
- package/vibe/rules/quality/checklist.md +276 -276
- package/vibe/rules/quality/performance.md +236 -236
- package/vibe/rules/quality/testing-strategy.md +440 -440
- package/vibe/rules/standards/anti-patterns.md +541 -541
- package/vibe/rules/standards/code-structure.md +291 -291
- package/vibe/rules/standards/complexity-metrics.md +313 -313
- package/vibe/rules/standards/git-workflow.md +237 -237
- package/vibe/rules/standards/naming-conventions.md +198 -198
- package/vibe/rules/standards/security.md +305 -305
- package/vibe/rules/writing/document-style.md +74 -74
- package/vibe/setup.sh +31 -31
- package/vibe/templates/constitution-template.md +252 -252
- package/vibe/templates/contract-backend-template.md +526 -526
- package/vibe/templates/contract-frontend-template.md +599 -599
- package/vibe/templates/feature-template.md +96 -96
- package/vibe/templates/spec-template.md +221 -221
- package/vibe/ui-ux-data/charts.csv +26 -26
- package/vibe/ui-ux-data/colors.csv +97 -97
- package/vibe/ui-ux-data/icons.csv +101 -101
- package/vibe/ui-ux-data/landing.csv +31 -31
- package/vibe/ui-ux-data/products.csv +96 -96
- package/vibe/ui-ux-data/react-performance.csv +45 -45
- package/vibe/ui-ux-data/stacks/astro.csv +54 -54
- package/vibe/ui-ux-data/stacks/flutter.csv +53 -53
- package/vibe/ui-ux-data/stacks/html-tailwind.csv +56 -56
- package/vibe/ui-ux-data/stacks/jetpack-compose.csv +53 -53
- package/vibe/ui-ux-data/stacks/nextjs.csv +53 -53
- package/vibe/ui-ux-data/stacks/nuxt-ui.csv +51 -51
- package/vibe/ui-ux-data/stacks/nuxtjs.csv +59 -59
- package/vibe/ui-ux-data/stacks/react-native.csv +52 -52
- package/vibe/ui-ux-data/stacks/react.csv +54 -54
- package/vibe/ui-ux-data/stacks/shadcn.csv +61 -61
- package/vibe/ui-ux-data/stacks/svelte.csv +54 -54
- package/vibe/ui-ux-data/stacks/swiftui.csv +51 -51
- package/vibe/ui-ux-data/stacks/vue.csv +50 -50
- package/vibe/ui-ux-data/styles.csv +68 -68
- package/vibe/ui-ux-data/typography.csv +57 -57
- package/vibe/ui-ux-data/ui-reasoning.csv +101 -101
- package/vibe/ui-ux-data/ux-guidelines.csv +99 -99
- package/vibe/ui-ux-data/version.json +31 -31
- package/vibe/ui-ux-data/web-interface.csv +31 -31
|
@@ -15,62 +15,62 @@ export class InsightStore {
|
|
|
15
15
|
this.initializeTables();
|
|
16
16
|
}
|
|
17
17
|
initializeTables() {
|
|
18
|
-
this.db.exec(`
|
|
19
|
-
CREATE TABLE IF NOT EXISTS insights (
|
|
20
|
-
id TEXT PRIMARY KEY,
|
|
21
|
-
type TEXT NOT NULL CHECK(type IN ('pattern','anti_pattern','preference','skill_gap','optimization')),
|
|
22
|
-
title TEXT NOT NULL,
|
|
23
|
-
description TEXT NOT NULL,
|
|
24
|
-
evidence TEXT,
|
|
25
|
-
confidence REAL DEFAULT 0.5 CHECK(confidence >= 0 AND confidence <= 1),
|
|
26
|
-
occurrences INTEGER DEFAULT 1,
|
|
27
|
-
tags TEXT,
|
|
28
|
-
status TEXT NOT NULL DEFAULT 'draft' CHECK(status IN ('draft','confirmed','applied','deprecated')),
|
|
29
|
-
generatedFrom TEXT NOT NULL CHECK(generatedFrom IN ('reflection','observation','agent_stats','manual')),
|
|
30
|
-
appliedAs TEXT,
|
|
31
|
-
createdAt TEXT NOT NULL,
|
|
32
|
-
updatedAt TEXT NOT NULL
|
|
33
|
-
);
|
|
34
|
-
|
|
35
|
-
CREATE INDEX IF NOT EXISTS idx_ins_type ON insights(type);
|
|
36
|
-
CREATE INDEX IF NOT EXISTS idx_ins_status ON insights(status);
|
|
37
|
-
CREATE INDEX IF NOT EXISTS idx_ins_confidence ON insights(confidence);
|
|
38
|
-
CREATE INDEX IF NOT EXISTS idx_ins_created ON insights(createdAt);
|
|
39
|
-
CREATE INDEX IF NOT EXISTS idx_ins_source ON insights(generatedFrom);
|
|
18
|
+
this.db.exec(`
|
|
19
|
+
CREATE TABLE IF NOT EXISTS insights (
|
|
20
|
+
id TEXT PRIMARY KEY,
|
|
21
|
+
type TEXT NOT NULL CHECK(type IN ('pattern','anti_pattern','preference','skill_gap','optimization')),
|
|
22
|
+
title TEXT NOT NULL,
|
|
23
|
+
description TEXT NOT NULL,
|
|
24
|
+
evidence TEXT,
|
|
25
|
+
confidence REAL DEFAULT 0.5 CHECK(confidence >= 0 AND confidence <= 1),
|
|
26
|
+
occurrences INTEGER DEFAULT 1,
|
|
27
|
+
tags TEXT,
|
|
28
|
+
status TEXT NOT NULL DEFAULT 'draft' CHECK(status IN ('draft','confirmed','applied','deprecated')),
|
|
29
|
+
generatedFrom TEXT NOT NULL CHECK(generatedFrom IN ('reflection','observation','agent_stats','manual')),
|
|
30
|
+
appliedAs TEXT,
|
|
31
|
+
createdAt TEXT NOT NULL,
|
|
32
|
+
updatedAt TEXT NOT NULL
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_ins_type ON insights(type);
|
|
36
|
+
CREATE INDEX IF NOT EXISTS idx_ins_status ON insights(status);
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_ins_confidence ON insights(confidence);
|
|
38
|
+
CREATE INDEX IF NOT EXISTS idx_ins_created ON insights(createdAt);
|
|
39
|
+
CREATE INDEX IF NOT EXISTS idx_ins_source ON insights(generatedFrom);
|
|
40
40
|
`);
|
|
41
41
|
// Skill gaps table for prompt-dispatcher miss logging
|
|
42
|
-
this.db.exec(`
|
|
43
|
-
CREATE TABLE IF NOT EXISTS skill_gaps (
|
|
44
|
-
id TEXT PRIMARY KEY,
|
|
45
|
-
prompt TEXT NOT NULL,
|
|
46
|
-
normalizedPrompt TEXT,
|
|
47
|
-
sessionId TEXT,
|
|
48
|
-
createdAt TEXT NOT NULL
|
|
49
|
-
);
|
|
50
|
-
|
|
51
|
-
CREATE INDEX IF NOT EXISTS idx_sg_normalized ON skill_gaps(normalizedPrompt);
|
|
52
|
-
CREATE INDEX IF NOT EXISTS idx_sg_created ON skill_gaps(createdAt);
|
|
42
|
+
this.db.exec(`
|
|
43
|
+
CREATE TABLE IF NOT EXISTS skill_gaps (
|
|
44
|
+
id TEXT PRIMARY KEY,
|
|
45
|
+
prompt TEXT NOT NULL,
|
|
46
|
+
normalizedPrompt TEXT,
|
|
47
|
+
sessionId TEXT,
|
|
48
|
+
createdAt TEXT NOT NULL
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
CREATE INDEX IF NOT EXISTS idx_sg_normalized ON skill_gaps(normalizedPrompt);
|
|
52
|
+
CREATE INDEX IF NOT EXISTS idx_sg_created ON skill_gaps(createdAt);
|
|
53
53
|
`);
|
|
54
54
|
if (this.fts5Available) {
|
|
55
55
|
try {
|
|
56
|
-
this.db.exec(`
|
|
57
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS insights_fts
|
|
58
|
-
USING fts5(title, description, tags, content=insights, content_rowid=rowid);
|
|
59
|
-
|
|
60
|
-
CREATE TRIGGER IF NOT EXISTS insights_ai AFTER INSERT ON insights BEGIN
|
|
61
|
-
INSERT INTO insights_fts(rowid, title, description, tags)
|
|
62
|
-
VALUES (new.rowid, new.title, new.description, new.tags);
|
|
63
|
-
END;
|
|
64
|
-
CREATE TRIGGER IF NOT EXISTS insights_ad AFTER DELETE ON insights BEGIN
|
|
65
|
-
INSERT INTO insights_fts(insights_fts, rowid, title, description, tags)
|
|
66
|
-
VALUES('delete', old.rowid, old.title, old.description, old.tags);
|
|
67
|
-
END;
|
|
68
|
-
CREATE TRIGGER IF NOT EXISTS insights_au AFTER UPDATE ON insights BEGIN
|
|
69
|
-
INSERT INTO insights_fts(insights_fts, rowid, title, description, tags)
|
|
70
|
-
VALUES('delete', old.rowid, old.title, old.description, old.tags);
|
|
71
|
-
INSERT INTO insights_fts(rowid, title, description, tags)
|
|
72
|
-
VALUES (new.rowid, new.title, new.description, new.tags);
|
|
73
|
-
END;
|
|
56
|
+
this.db.exec(`
|
|
57
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS insights_fts
|
|
58
|
+
USING fts5(title, description, tags, content=insights, content_rowid=rowid);
|
|
59
|
+
|
|
60
|
+
CREATE TRIGGER IF NOT EXISTS insights_ai AFTER INSERT ON insights BEGIN
|
|
61
|
+
INSERT INTO insights_fts(rowid, title, description, tags)
|
|
62
|
+
VALUES (new.rowid, new.title, new.description, new.tags);
|
|
63
|
+
END;
|
|
64
|
+
CREATE TRIGGER IF NOT EXISTS insights_ad AFTER DELETE ON insights BEGIN
|
|
65
|
+
INSERT INTO insights_fts(insights_fts, rowid, title, description, tags)
|
|
66
|
+
VALUES('delete', old.rowid, old.title, old.description, old.tags);
|
|
67
|
+
END;
|
|
68
|
+
CREATE TRIGGER IF NOT EXISTS insights_au AFTER UPDATE ON insights BEGIN
|
|
69
|
+
INSERT INTO insights_fts(insights_fts, rowid, title, description, tags)
|
|
70
|
+
VALUES('delete', old.rowid, old.title, old.description, old.tags);
|
|
71
|
+
INSERT INTO insights_fts(rowid, title, description, tags)
|
|
72
|
+
VALUES (new.rowid, new.title, new.description, new.tags);
|
|
73
|
+
END;
|
|
74
74
|
`);
|
|
75
75
|
}
|
|
76
76
|
catch {
|
|
@@ -82,9 +82,9 @@ export class InsightStore {
|
|
|
82
82
|
const id = generateId();
|
|
83
83
|
const now = new Date().toISOString();
|
|
84
84
|
const confidence = Math.max(0, Math.min(1, input.confidence ?? 0.5));
|
|
85
|
-
this.db.prepare(`
|
|
86
|
-
INSERT INTO insights (id, type, title, description, evidence, confidence, occurrences, tags, status, generatedFrom, createdAt, updatedAt)
|
|
87
|
-
VALUES (?, ?, ?, ?, ?, ?, 1, ?, 'draft', ?, ?, ?)
|
|
85
|
+
this.db.prepare(`
|
|
86
|
+
INSERT INTO insights (id, type, title, description, evidence, confidence, occurrences, tags, status, generatedFrom, createdAt, updatedAt)
|
|
87
|
+
VALUES (?, ?, ?, ?, ?, ?, 1, ?, 'draft', ?, ?, ?)
|
|
88
88
|
`).run(id, input.type, input.title, input.description, JSON.stringify(input.evidence ?? []), confidence, JSON.stringify(input.tags ?? []), input.generatedFrom, now, now);
|
|
89
89
|
return id;
|
|
90
90
|
}
|
|
@@ -95,19 +95,19 @@ export class InsightStore {
|
|
|
95
95
|
findAndMergeDuplicate(title, description) {
|
|
96
96
|
if (this.fts5Available) {
|
|
97
97
|
try {
|
|
98
|
-
const row = this.db.prepare(`
|
|
99
|
-
SELECT i.id, i.occurrences, i.confidence
|
|
100
|
-
FROM insights_fts fts
|
|
101
|
-
JOIN insights i ON i.rowid = fts.rowid
|
|
102
|
-
WHERE insights_fts MATCH ?
|
|
103
|
-
ORDER BY bm25(insights_fts)
|
|
104
|
-
LIMIT 1
|
|
98
|
+
const row = this.db.prepare(`
|
|
99
|
+
SELECT i.id, i.occurrences, i.confidence
|
|
100
|
+
FROM insights_fts fts
|
|
101
|
+
JOIN insights i ON i.rowid = fts.rowid
|
|
102
|
+
WHERE insights_fts MATCH ?
|
|
103
|
+
ORDER BY bm25(insights_fts)
|
|
104
|
+
LIMIT 1
|
|
105
105
|
`).get(title);
|
|
106
106
|
if (row) {
|
|
107
107
|
const newOccurrences = row.occurrences + 1;
|
|
108
108
|
const newConfidence = Math.min(1.0, newOccurrences * 0.2 + 0.1);
|
|
109
|
-
this.db.prepare(`
|
|
110
|
-
UPDATE insights SET occurrences = ?, confidence = ?, updatedAt = ? WHERE id = ?
|
|
109
|
+
this.db.prepare(`
|
|
110
|
+
UPDATE insights SET occurrences = ?, confidence = ?, updatedAt = ? WHERE id = ?
|
|
111
111
|
`).run(newOccurrences, newConfidence, new Date().toISOString(), row.id);
|
|
112
112
|
return row.id;
|
|
113
113
|
}
|
|
@@ -121,13 +121,13 @@ export class InsightStore {
|
|
|
121
121
|
search(query, limit = 20) {
|
|
122
122
|
if (this.fts5Available) {
|
|
123
123
|
try {
|
|
124
|
-
const rows = this.db.prepare(`
|
|
125
|
-
SELECT i.*, bm25(insights_fts) as rank
|
|
126
|
-
FROM insights_fts fts
|
|
127
|
-
JOIN insights i ON i.rowid = fts.rowid
|
|
128
|
-
WHERE insights_fts MATCH ?
|
|
129
|
-
ORDER BY i.confidence DESC, rank
|
|
130
|
-
LIMIT ?
|
|
124
|
+
const rows = this.db.prepare(`
|
|
125
|
+
SELECT i.*, bm25(insights_fts) as rank
|
|
126
|
+
FROM insights_fts fts
|
|
127
|
+
JOIN insights i ON i.rowid = fts.rowid
|
|
128
|
+
WHERE insights_fts MATCH ?
|
|
129
|
+
ORDER BY i.confidence DESC, rank
|
|
130
|
+
LIMIT ?
|
|
131
131
|
`).all(query, limit);
|
|
132
132
|
return rows.map(this.rowToInsight);
|
|
133
133
|
}
|
|
@@ -136,11 +136,11 @@ export class InsightStore {
|
|
|
136
136
|
}
|
|
137
137
|
}
|
|
138
138
|
const pattern = `%${query}%`;
|
|
139
|
-
const rows = this.db.prepare(`
|
|
140
|
-
SELECT * FROM insights
|
|
141
|
-
WHERE title LIKE ? OR description LIKE ? OR tags LIKE ?
|
|
142
|
-
ORDER BY confidence DESC, createdAt DESC
|
|
143
|
-
LIMIT ?
|
|
139
|
+
const rows = this.db.prepare(`
|
|
140
|
+
SELECT * FROM insights
|
|
141
|
+
WHERE title LIKE ? OR description LIKE ? OR tags LIKE ?
|
|
142
|
+
ORDER BY confidence DESC, createdAt DESC
|
|
143
|
+
LIMIT ?
|
|
144
144
|
`).all(pattern, pattern, pattern, limit);
|
|
145
145
|
return rows.map(this.rowToInsight);
|
|
146
146
|
}
|
|
@@ -149,42 +149,42 @@ export class InsightStore {
|
|
|
149
149
|
return row ? this.rowToInsight(row) : null;
|
|
150
150
|
}
|
|
151
151
|
getActionable() {
|
|
152
|
-
const rows = this.db.prepare(`
|
|
153
|
-
SELECT * FROM insights
|
|
154
|
-
WHERE status = 'confirmed' AND type IN ('skill_gap', 'pattern', 'anti_pattern')
|
|
155
|
-
ORDER BY confidence DESC, occurrences DESC
|
|
156
|
-
LIMIT 20
|
|
152
|
+
const rows = this.db.prepare(`
|
|
153
|
+
SELECT * FROM insights
|
|
154
|
+
WHERE status = 'confirmed' AND type IN ('skill_gap', 'pattern', 'anti_pattern')
|
|
155
|
+
ORDER BY confidence DESC, occurrences DESC
|
|
156
|
+
LIMIT 20
|
|
157
157
|
`).all();
|
|
158
158
|
return rows.map(this.rowToInsight);
|
|
159
159
|
}
|
|
160
160
|
getByStatus(status, limit = 50) {
|
|
161
|
-
const rows = this.db.prepare(`
|
|
162
|
-
SELECT * FROM insights WHERE status = ? ORDER BY updatedAt DESC LIMIT ?
|
|
161
|
+
const rows = this.db.prepare(`
|
|
162
|
+
SELECT * FROM insights WHERE status = ? ORDER BY updatedAt DESC LIMIT ?
|
|
163
163
|
`).all(status, limit);
|
|
164
164
|
return rows.map(this.rowToInsight);
|
|
165
165
|
}
|
|
166
166
|
getByType(type, limit = 50) {
|
|
167
|
-
const rows = this.db.prepare(`
|
|
168
|
-
SELECT * FROM insights WHERE type = ? ORDER BY confidence DESC LIMIT ?
|
|
167
|
+
const rows = this.db.prepare(`
|
|
168
|
+
SELECT * FROM insights WHERE type = ? ORDER BY confidence DESC LIMIT ?
|
|
169
169
|
`).all(type, limit);
|
|
170
170
|
return rows.map(this.rowToInsight);
|
|
171
171
|
}
|
|
172
172
|
updateStatus(id, status) {
|
|
173
|
-
const result = this.db.prepare(`
|
|
174
|
-
UPDATE insights SET status = ?, updatedAt = ? WHERE id = ?
|
|
173
|
+
const result = this.db.prepare(`
|
|
174
|
+
UPDATE insights SET status = ?, updatedAt = ? WHERE id = ?
|
|
175
175
|
`).run(status, new Date().toISOString(), id);
|
|
176
176
|
return result.changes > 0;
|
|
177
177
|
}
|
|
178
178
|
setAppliedAs(id, generationId) {
|
|
179
|
-
const result = this.db.prepare(`
|
|
180
|
-
UPDATE insights SET appliedAs = ?, status = 'applied', updatedAt = ? WHERE id = ?
|
|
179
|
+
const result = this.db.prepare(`
|
|
180
|
+
UPDATE insights SET appliedAs = ?, status = 'applied', updatedAt = ? WHERE id = ?
|
|
181
181
|
`).run(generationId, new Date().toISOString(), id);
|
|
182
182
|
return result.changes > 0;
|
|
183
183
|
}
|
|
184
184
|
cleanupLowConfidence(maxAge = 7 * 24 * 60 * 60 * 1000) {
|
|
185
185
|
const cutoff = new Date(Date.now() - maxAge).toISOString();
|
|
186
|
-
const result = this.db.prepare(`
|
|
187
|
-
DELETE FROM insights WHERE confidence < 0.3 AND createdAt < ?
|
|
186
|
+
const result = this.db.prepare(`
|
|
187
|
+
DELETE FROM insights WHERE confidence < 0.3 AND createdAt < ?
|
|
188
188
|
`).run(cutoff);
|
|
189
189
|
return result.changes;
|
|
190
190
|
}
|
|
@@ -78,14 +78,14 @@ export class RollbackManager {
|
|
|
78
78
|
let disabled = 0;
|
|
79
79
|
const transaction = this.db.transaction(() => {
|
|
80
80
|
// Update all non-disabled/non-deleted in DB
|
|
81
|
-
const result = this.db.prepare(`
|
|
82
|
-
UPDATE generations SET status = 'disabled', updatedAt = ?
|
|
83
|
-
WHERE status IN ('draft', 'testing', 'active')
|
|
81
|
+
const result = this.db.prepare(`
|
|
82
|
+
UPDATE generations SET status = 'disabled', updatedAt = ?
|
|
83
|
+
WHERE status IN ('draft', 'testing', 'active')
|
|
84
84
|
`).run(new Date().toISOString());
|
|
85
85
|
disabled = result.changes;
|
|
86
86
|
// Rename all active files
|
|
87
|
-
const activeGens = this.db.prepare(`
|
|
88
|
-
SELECT filePath FROM generations WHERE status = 'disabled' AND filePath IS NOT NULL
|
|
87
|
+
const activeGens = this.db.prepare(`
|
|
88
|
+
SELECT filePath FROM generations WHERE status = 'disabled' AND filePath IS NOT NULL
|
|
89
89
|
`).all();
|
|
90
90
|
for (const gen of activeGens) {
|
|
91
91
|
if (gen.filePath && existsSync(gen.filePath) && !gen.filePath.endsWith('.disabled')) {
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { MemoryStorage } from '../memory/MemoryStorage.js';
|
|
2
|
+
export interface BenchmarkResult {
|
|
3
|
+
id: string;
|
|
4
|
+
skillName: string;
|
|
5
|
+
iteration: number;
|
|
6
|
+
timestamp: string;
|
|
7
|
+
summary: BenchmarkSummary;
|
|
8
|
+
evalBreakdowns: EvalBreakdown[];
|
|
9
|
+
}
|
|
10
|
+
export interface BenchmarkSummary {
|
|
11
|
+
totalEvals: number;
|
|
12
|
+
withSkill: VariantStats;
|
|
13
|
+
baseline: VariantStats;
|
|
14
|
+
delta: DeltaStats;
|
|
15
|
+
}
|
|
16
|
+
export interface VariantStats {
|
|
17
|
+
passRate: number;
|
|
18
|
+
meanDurationMs: number;
|
|
19
|
+
stddevDurationMs: number;
|
|
20
|
+
meanTokens: number;
|
|
21
|
+
stddevTokens: number;
|
|
22
|
+
totalRuns: number;
|
|
23
|
+
}
|
|
24
|
+
export interface DeltaStats {
|
|
25
|
+
passRateDelta: number;
|
|
26
|
+
durationDeltaMs: number;
|
|
27
|
+
tokenDelta: number;
|
|
28
|
+
}
|
|
29
|
+
export interface EvalBreakdown {
|
|
30
|
+
evalId: string;
|
|
31
|
+
prompt: string;
|
|
32
|
+
withSkillPassed: boolean;
|
|
33
|
+
baselinePassed: boolean;
|
|
34
|
+
withSkillDurationMs: number;
|
|
35
|
+
baselineDurationMs: number;
|
|
36
|
+
withSkillTokens: number;
|
|
37
|
+
baselineTokens: number;
|
|
38
|
+
assertionResults: Array<{
|
|
39
|
+
description: string;
|
|
40
|
+
withSkillPassed: boolean;
|
|
41
|
+
baselinePassed: boolean;
|
|
42
|
+
}>;
|
|
43
|
+
}
|
|
44
|
+
export declare class SkillBenchmark {
|
|
45
|
+
private db;
|
|
46
|
+
private evalRunner;
|
|
47
|
+
constructor(storage: MemoryStorage);
|
|
48
|
+
private initializeTables;
|
|
49
|
+
/**
|
|
50
|
+
* Aggregate latest eval runs into a benchmark
|
|
51
|
+
*/
|
|
52
|
+
aggregate(skillName: string): BenchmarkResult;
|
|
53
|
+
/**
|
|
54
|
+
* Get benchmark history for a skill
|
|
55
|
+
*/
|
|
56
|
+
getHistory(skillName: string): BenchmarkResult[];
|
|
57
|
+
/**
|
|
58
|
+
* Get the latest benchmark for a skill
|
|
59
|
+
*/
|
|
60
|
+
getLatest(skillName: string): BenchmarkResult | null;
|
|
61
|
+
/**
|
|
62
|
+
* Compare two benchmark iterations
|
|
63
|
+
*/
|
|
64
|
+
compare(skillName: string, iterA: number, iterB: number): {
|
|
65
|
+
iterationA: BenchmarkResult | null;
|
|
66
|
+
iterationB: BenchmarkResult | null;
|
|
67
|
+
improvement: DeltaStats | null;
|
|
68
|
+
};
|
|
69
|
+
/**
|
|
70
|
+
* Format benchmark as markdown report
|
|
71
|
+
*/
|
|
72
|
+
formatReport(benchmark: BenchmarkResult): string;
|
|
73
|
+
private getBenchmarkByIteration;
|
|
74
|
+
private getNextIteration;
|
|
75
|
+
private computeVariantStats;
|
|
76
|
+
private mergeAssertionResults;
|
|
77
|
+
private pct;
|
|
78
|
+
private signedPct;
|
|
79
|
+
private rowToBenchmark;
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=SkillBenchmark.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SkillBenchmark.d.ts","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillBenchmark.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG3D,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,gBAAgB,CAAC;IAC1B,cAAc,EAAE,aAAa,EAAE,CAAC;CACjC;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,YAAY,CAAC;IACxB,QAAQ,EAAE,YAAY,CAAC;IACvB,KAAK,EAAE,UAAU,CAAC;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,UAAU;IACzB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,OAAO,CAAC;IACzB,cAAc,EAAE,OAAO,CAAC;IACxB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,KAAK,CAAC;QACtB,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,EAAE,OAAO,CAAC;QACzB,cAAc,EAAE,OAAO,CAAC;KACzB,CAAC,CAAC;CACJ;AAWD,qBAAa,cAAc;IACzB,OAAO,CAAC,EAAE,CAA2C;IACrD,OAAO,CAAC,UAAU,CAAkB;gBAExB,OAAO,EAAE,aAAa;IAMlC,OAAO,CAAC,gBAAgB;IAgBxB;;OAEG;IACI,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,eAAe;IAuDpD;;OAEG;IACI,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,eAAe,EAAE;IAOvD;;OAEG;IACI,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI;IAO3D;;OAEG;IACI,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG;QAC/D,UAAU,EAAE,eAAe,GAAG,IAAI,CAAC;QACnC,UAAU,EAAE,eAAe,GAAG,IAAI,CAAC;QACnC,WAAW,EAAE,UAAU,GAAG,IAAI,CAAC;KAChC;IAmBD;;OAEG;IACI,YAAY,CAAC,SAAS,EAAE,eAAe,GAAG,MAAM;IAyCvD,OAAO,CAAC,uBAAuB;IAO/B,OAAO,CAAC,gBAAgB;IAOxB,OAAO,CAAC,mBAAmB;IAmB3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,GAAG;IAIX,OAAO,CAAC,SAAS;IAKjB,OAAO,CAAC,cAAc;CAUvB"}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
// Skill Benchmark - Phase 5: Aggregate eval results into benchmarks
|
|
2
|
+
// Tracks pass rate, time, tokens across iterations
|
|
3
|
+
//
|
|
4
|
+
// Produces benchmark reports for comparing:
|
|
5
|
+
// - with-skill vs baseline performance
|
|
6
|
+
// - Current iteration vs previous iterations
|
|
7
|
+
// - Per-eval and per-assertion breakdowns
|
|
8
|
+
import { randomUUID } from 'crypto';
|
|
9
|
+
import { SkillEvalRunner } from './SkillEvalRunner.js';
|
|
10
|
+
export class SkillBenchmark {
|
|
11
|
+
db;
|
|
12
|
+
evalRunner;
|
|
13
|
+
constructor(storage) {
|
|
14
|
+
this.db = storage.getDatabase();
|
|
15
|
+
this.evalRunner = new SkillEvalRunner(storage);
|
|
16
|
+
this.initializeTables();
|
|
17
|
+
}
|
|
18
|
+
initializeTables() {
|
|
19
|
+
this.db.exec(`
|
|
20
|
+
CREATE TABLE IF NOT EXISTS skill_benchmarks (
|
|
21
|
+
id TEXT PRIMARY KEY,
|
|
22
|
+
skillName TEXT NOT NULL,
|
|
23
|
+
iteration INTEGER NOT NULL,
|
|
24
|
+
summary TEXT NOT NULL,
|
|
25
|
+
evalBreakdowns TEXT NOT NULL,
|
|
26
|
+
createdAt TEXT NOT NULL
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
CREATE INDEX IF NOT EXISTS idx_sb_skill ON skill_benchmarks(skillName);
|
|
30
|
+
CREATE INDEX IF NOT EXISTS idx_sb_iter ON skill_benchmarks(skillName, iteration);
|
|
31
|
+
`);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Aggregate latest eval runs into a benchmark
|
|
35
|
+
*/
|
|
36
|
+
aggregate(skillName) {
|
|
37
|
+
const latestRuns = this.evalRunner.getLatestRuns(skillName);
|
|
38
|
+
const evalCases = this.evalRunner.getEvalCases(skillName);
|
|
39
|
+
const iteration = this.getNextIteration(skillName);
|
|
40
|
+
const withSkillRuns = [];
|
|
41
|
+
const baselineRuns = [];
|
|
42
|
+
const breakdowns = [];
|
|
43
|
+
for (const evalCase of evalCases) {
|
|
44
|
+
const runPair = latestRuns.get(evalCase.id);
|
|
45
|
+
const ws = runPair?.withSkill ?? null;
|
|
46
|
+
const bl = runPair?.baseline ?? null;
|
|
47
|
+
if (ws)
|
|
48
|
+
withSkillRuns.push(ws);
|
|
49
|
+
if (bl)
|
|
50
|
+
baselineRuns.push(bl);
|
|
51
|
+
breakdowns.push({
|
|
52
|
+
evalId: evalCase.id,
|
|
53
|
+
prompt: evalCase.prompt,
|
|
54
|
+
withSkillPassed: ws?.status === 'passed',
|
|
55
|
+
baselinePassed: bl?.status === 'passed',
|
|
56
|
+
withSkillDurationMs: ws?.durationMs ?? 0,
|
|
57
|
+
baselineDurationMs: bl?.durationMs ?? 0,
|
|
58
|
+
withSkillTokens: ws?.tokenCount ?? 0,
|
|
59
|
+
baselineTokens: bl?.tokenCount ?? 0,
|
|
60
|
+
assertionResults: this.mergeAssertionResults(ws, bl),
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
const withSkillStats = this.computeVariantStats(withSkillRuns);
|
|
64
|
+
const baselineStats = this.computeVariantStats(baselineRuns);
|
|
65
|
+
const summary = {
|
|
66
|
+
totalEvals: evalCases.length,
|
|
67
|
+
withSkill: withSkillStats,
|
|
68
|
+
baseline: baselineStats,
|
|
69
|
+
delta: {
|
|
70
|
+
passRateDelta: withSkillStats.passRate - baselineStats.passRate,
|
|
71
|
+
durationDeltaMs: withSkillStats.meanDurationMs - baselineStats.meanDurationMs,
|
|
72
|
+
tokenDelta: withSkillStats.meanTokens - baselineStats.meanTokens,
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
const id = `bench-${Date.now().toString(36)}-${randomUUID().replace(/-/g, '').slice(0, 8)}`;
|
|
76
|
+
const now = new Date().toISOString();
|
|
77
|
+
this.db.prepare(`
|
|
78
|
+
INSERT INTO skill_benchmarks (id, skillName, iteration, summary, evalBreakdowns, createdAt)
|
|
79
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
80
|
+
`).run(id, skillName, iteration, JSON.stringify(summary), JSON.stringify(breakdowns), now);
|
|
81
|
+
return { id, skillName, iteration, timestamp: now, summary, evalBreakdowns: breakdowns };
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Get benchmark history for a skill
|
|
85
|
+
*/
|
|
86
|
+
getHistory(skillName) {
|
|
87
|
+
const rows = this.db.prepare(`
|
|
88
|
+
SELECT * FROM skill_benchmarks WHERE skillName = ? ORDER BY iteration ASC
|
|
89
|
+
`).all(skillName);
|
|
90
|
+
return rows.map(this.rowToBenchmark);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Get the latest benchmark for a skill
|
|
94
|
+
*/
|
|
95
|
+
getLatest(skillName) {
|
|
96
|
+
const row = this.db.prepare(`
|
|
97
|
+
SELECT * FROM skill_benchmarks WHERE skillName = ? ORDER BY iteration DESC LIMIT 1
|
|
98
|
+
`).get(skillName);
|
|
99
|
+
return row ? this.rowToBenchmark(row) : null;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Compare two benchmark iterations
|
|
103
|
+
*/
|
|
104
|
+
compare(skillName, iterA, iterB) {
|
|
105
|
+
const a = this.getBenchmarkByIteration(skillName, iterA);
|
|
106
|
+
const b = this.getBenchmarkByIteration(skillName, iterB);
|
|
107
|
+
if (!a || !b) {
|
|
108
|
+
return { iterationA: a, iterationB: b, improvement: null };
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
iterationA: a,
|
|
112
|
+
iterationB: b,
|
|
113
|
+
improvement: {
|
|
114
|
+
passRateDelta: b.summary.withSkill.passRate - a.summary.withSkill.passRate,
|
|
115
|
+
durationDeltaMs: b.summary.withSkill.meanDurationMs - a.summary.withSkill.meanDurationMs,
|
|
116
|
+
tokenDelta: b.summary.withSkill.meanTokens - a.summary.withSkill.meanTokens,
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Format benchmark as markdown report
|
|
122
|
+
*/
|
|
123
|
+
formatReport(benchmark) {
|
|
124
|
+
const { summary } = benchmark;
|
|
125
|
+
const lines = [
|
|
126
|
+
`# Benchmark: ${benchmark.skillName} (Iteration ${benchmark.iteration})`,
|
|
127
|
+
'',
|
|
128
|
+
`**Date**: ${benchmark.timestamp}`,
|
|
129
|
+
'',
|
|
130
|
+
'## Summary',
|
|
131
|
+
'',
|
|
132
|
+
'| Metric | With Skill | Baseline | Delta |',
|
|
133
|
+
'|--------|-----------|----------|-------|',
|
|
134
|
+
`| Pass Rate | ${this.pct(summary.withSkill.passRate)} | ${this.pct(summary.baseline.passRate)} | ${this.signedPct(summary.delta.passRateDelta)} |`,
|
|
135
|
+
`| Mean Duration | ${summary.withSkill.meanDurationMs.toFixed(0)}ms | ${summary.baseline.meanDurationMs.toFixed(0)}ms | ${summary.delta.durationDeltaMs > 0 ? '+' : ''}${summary.delta.durationDeltaMs.toFixed(0)}ms |`,
|
|
136
|
+
`| Mean Tokens | ${summary.withSkill.meanTokens.toFixed(0)} | ${summary.baseline.meanTokens.toFixed(0)} | ${summary.delta.tokenDelta > 0 ? '+' : ''}${summary.delta.tokenDelta.toFixed(0)} |`,
|
|
137
|
+
'',
|
|
138
|
+
'## Per-Eval Breakdown',
|
|
139
|
+
'',
|
|
140
|
+
];
|
|
141
|
+
for (const bd of benchmark.evalBreakdowns) {
|
|
142
|
+
const wsIcon = bd.withSkillPassed ? 'PASS' : 'FAIL';
|
|
143
|
+
const blIcon = bd.baselinePassed ? 'PASS' : 'FAIL';
|
|
144
|
+
lines.push(`### ${bd.evalId}`);
|
|
145
|
+
lines.push(`- **Prompt**: ${bd.prompt.slice(0, 80)}${bd.prompt.length > 80 ? '...' : ''}`);
|
|
146
|
+
lines.push(`- **With Skill**: ${wsIcon} (${bd.withSkillDurationMs}ms, ${bd.withSkillTokens} tokens)`);
|
|
147
|
+
lines.push(`- **Baseline**: ${blIcon} (${bd.baselineDurationMs}ms, ${bd.baselineTokens} tokens)`);
|
|
148
|
+
if (bd.assertionResults.length > 0) {
|
|
149
|
+
lines.push('- **Assertions**:');
|
|
150
|
+
for (const ar of bd.assertionResults) {
|
|
151
|
+
const wsA = ar.withSkillPassed ? 'PASS' : 'FAIL';
|
|
152
|
+
const blA = ar.baselinePassed ? 'PASS' : 'FAIL';
|
|
153
|
+
lines.push(` - ${ar.description}: skill=${wsA}, baseline=${blA}`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
lines.push('');
|
|
157
|
+
}
|
|
158
|
+
return lines.join('\n');
|
|
159
|
+
}
|
|
160
|
+
getBenchmarkByIteration(skillName, iteration) {
|
|
161
|
+
const row = this.db.prepare(`
|
|
162
|
+
SELECT * FROM skill_benchmarks WHERE skillName = ? AND iteration = ?
|
|
163
|
+
`).get(skillName, iteration);
|
|
164
|
+
return row ? this.rowToBenchmark(row) : null;
|
|
165
|
+
}
|
|
166
|
+
getNextIteration(skillName) {
|
|
167
|
+
const row = this.db.prepare(`
|
|
168
|
+
SELECT MAX(iteration) as maxIter FROM skill_benchmarks WHERE skillName = ?
|
|
169
|
+
`).get(skillName);
|
|
170
|
+
return (row.maxIter ?? 0) + 1;
|
|
171
|
+
}
|
|
172
|
+
computeVariantStats(runs) {
|
|
173
|
+
if (runs.length === 0) {
|
|
174
|
+
return { passRate: 0, meanDurationMs: 0, stddevDurationMs: 0, meanTokens: 0, stddevTokens: 0, totalRuns: 0 };
|
|
175
|
+
}
|
|
176
|
+
const passed = runs.filter(r => r.status === 'passed').length;
|
|
177
|
+
const durations = runs.map(r => r.durationMs);
|
|
178
|
+
const tokens = runs.map(r => r.tokenCount);
|
|
179
|
+
return {
|
|
180
|
+
passRate: passed / runs.length,
|
|
181
|
+
meanDurationMs: mean(durations),
|
|
182
|
+
stddevDurationMs: stddev(durations),
|
|
183
|
+
meanTokens: mean(tokens),
|
|
184
|
+
stddevTokens: stddev(tokens),
|
|
185
|
+
totalRuns: runs.length,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
mergeAssertionResults(ws, bl) {
|
|
189
|
+
const wsGrades = ws?.grades ?? [];
|
|
190
|
+
const blGrades = bl?.grades ?? [];
|
|
191
|
+
// Use ws assertions as base, merge baseline grades
|
|
192
|
+
const allDescriptions = new Set([
|
|
193
|
+
...wsGrades.map(g => g.description),
|
|
194
|
+
...blGrades.map(g => g.description),
|
|
195
|
+
]);
|
|
196
|
+
return Array.from(allDescriptions).map(desc => ({
|
|
197
|
+
description: desc,
|
|
198
|
+
withSkillPassed: wsGrades.find(g => g.description === desc)?.passed ?? false,
|
|
199
|
+
baselinePassed: blGrades.find(g => g.description === desc)?.passed ?? false,
|
|
200
|
+
}));
|
|
201
|
+
}
|
|
202
|
+
pct(value) {
|
|
203
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
204
|
+
}
|
|
205
|
+
signedPct(value) {
|
|
206
|
+
const sign = value > 0 ? '+' : '';
|
|
207
|
+
return `${sign}${(value * 100).toFixed(1)}%`;
|
|
208
|
+
}
|
|
209
|
+
rowToBenchmark(row) {
|
|
210
|
+
return {
|
|
211
|
+
id: row.id,
|
|
212
|
+
skillName: row.skillName,
|
|
213
|
+
iteration: row.iteration,
|
|
214
|
+
timestamp: row.createdAt,
|
|
215
|
+
summary: JSON.parse(row.summary),
|
|
216
|
+
evalBreakdowns: JSON.parse(row.evalBreakdowns),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// --- Utility functions ---
|
|
221
|
+
function mean(values) {
|
|
222
|
+
if (values.length === 0)
|
|
223
|
+
return 0;
|
|
224
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
225
|
+
}
|
|
226
|
+
function stddev(values) {
|
|
227
|
+
if (values.length < 2)
|
|
228
|
+
return 0;
|
|
229
|
+
const avg = mean(values);
|
|
230
|
+
const variance = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) / (values.length - 1);
|
|
231
|
+
return Math.sqrt(variance);
|
|
232
|
+
}
|
|
233
|
+
//# sourceMappingURL=SkillBenchmark.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SkillBenchmark.js","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillBenchmark.ts"],"names":[],"mappings":"AAAA,oEAAoE;AACpE,mDAAmD;AACnD,EAAE;AACF,4CAA4C;AAC5C,uCAAuC;AACvC,6CAA6C;AAC7C,0CAA0C;AAE1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,OAAO,EAAE,eAAe,EAAiB,MAAM,sBAAsB,CAAC;AA0DtE,MAAM,OAAO,cAAc;IACjB,EAAE,CAA2C;IAC7C,UAAU,CAAkB;IAEpC,YAAY,OAAsB;QAChC,IAAI,CAAC,EAAE,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,UAAU,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;QAC/C,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAEO,gBAAgB;QACtB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;KAYZ,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACI,SAAS,CAAC,SAAiB;QAChC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAEnD,MAAM,aAAa,GAAoB,EAAE,CAAC;QAC1C,MAAM,YAAY,GAAoB,EAAE,CAAC;QACzC,MAAM,UAAU,GAAoB,EAAE,CAAC;QAEvC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;YAC5C,MAAM,EAAE,GAAG,OAAO,EAAE,SAAS,IAAI,IAAI,CAAC;YACtC,MAAM,EAAE,GAAG,OAAO,EAAE,QAAQ,IAAI,IAAI,CAAC;YAErC,IAAI,EAAE;gBAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC/B,IAAI,EAAE;gBAAE,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAE9B,UAAU,CAAC,IAAI,CAAC;gBACd,MAAM,EAAE,QAAQ,CAAC,EAAE;gBACnB,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,eAAe,EAAE,EAAE,EAAE,MAAM,KAAK,QAAQ;gBACxC,cAAc,EAAE,EAAE,EAAE,MAAM,KAAK,QAAQ;gBACvC,mBAAmB,EAAE,EAAE,EAAE,UAAU,IAAI,CAAC;gBACxC,kBAAkB,EAAE,EAAE,EAAE,UAAU,IAAI,CAAC;gBACvC,eAAe,EAAE,EAAE,EAAE,UAAU,IAAI,CAAC;gBACpC,cAAc,EAAE,EAAE,EAAE,UAAU,IAAI,CAAC;gBACnC,gBAAgB,EAAE,IAAI,CAAC,qBAAqB,CAAC,EAAE,EAAE,EAAE,CAAC;aACrD,CAAC,CAAC;QACL,CAAC;QAED,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,CAAC;QAC/D,MAAM,aAAa,GAAG,IAAI,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC;QAE7D,MAAM,OAAO,GAAqB;YAChC,UAAU,EAAE,SAAS,CAAC,MAAM;YAC5B,SAAS,EAAE,cAAc;YACzB,QAAQ,EAAE,aAAa;YACvB,KAAK,EAAE;gBACL,aAAa,EAAE,cAAc,CAAC,QAAQ,GAAG,aAAa,CAAC,QAAQ;gBAC/D,eAAe,EAAE,cAAc,CAAC,cAAc,GAAG,aAAa,CAAC,cAAc;gBAC7E,UAAU,EAAE,cAAc,CAAC,UAAU,GAAG,aAAa,CAAC,UAAU;aACjE;SACF,CAAC;QAEF,MAAM,EAAE,GAAG,SAAS,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC5F,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAErC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,GAAG,CAAC,CAAC;QAE3F,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,CAAC;IAC3F,CAAC;IAED;;OAEG;IACI,UAAU,CAAC,SAAiB;QACjC,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC,GAAG,CAAC,SAAS,CAAmB,CAAC;QACpC,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACI,SAAS,CAAC,SAAiB;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE3B,CAAC,CAAC,GAAG,CAAC,SAAS,CAA6B,CAAC;QAC9C,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC/C,CAAC;IAED;;OAEG;IACI,OAAO,CAAC,SAAiB,EAAE,KAAa,EAAE,KAAa;QAK5D,MAAM,CAAC,GAAG,IAAI,CAAC,uBAAuB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QACzD,MAAM,CAAC,GAAG,IAAI,CAAC,uBAAuB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAEzD,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YACb,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAC7D,CAAC;QAED,OAAO;YACL,UAAU,EAAE,CAAC;YACb,UAAU,EAAE,CAAC;YACb,WAAW,EAAE;gBACX,aAAa,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ;gBAC1E,eAAe,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,cAAc;gBACxF,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU;aAC5E;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACI,YAAY,CAAC,SAA0B;QAC5C,MAAM,EAAE,OAAO,EAAE,GAAG,SAAS,CAAC;QAC9B,MAAM,KAAK,GAAa;YACtB,gBAAgB,SAAS,CAAC,SAAS,eAAe,SAAS,CAAC,SAAS,GAAG;YACxE,EAAE;YACF,aAAa,SAAS,CAAC,SAAS,EAAE;YAClC,EAAE;YACF,YAAY;YACZ,EAAE;YACF,4CAA4C;YAC5C,2CAA2C;YAC3C,iBAAiB,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI;YACnJ,qBAAqB,OAAO,CAAC,SAAS,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,OAAO,CAAC,KAAK,CAAC,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;YACvN,mBAAmB,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YAC7L,EAAE;YACF,uBAAuB;YACvB,EAAE;SACH,CAAC;QAEF,KAAK,MAAM,EAAE,IAAI,SAAS,CAAC,cAAc,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACpD,MAAM,MAAM,GAAG,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACnD,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;YAC/B,KAAK,CAAC,IAAI,CAAC,iBAAiB,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3F,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,KAAK,EAAE,CAAC,mBAAmB,OAAO,EAAE,CAAC,eAAe,UAAU,CAAC,CAAC;YACtG,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,KAAK,EAAE,CAAC,kBAAkB,OAAO,EAAE,CAAC,cAAc,UAAU,CAAC,CAAC;YAElG,IAAI,EAAE,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;gBAChC,KAAK,MAAM,EAAE,IAAI,EAAE,CAAC,gBAAgB,EAAE,CAAC;oBACrC,MAAM,GAAG,GAAG,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;oBACjD,MAAM,GAAG,GAAG,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;oBAChD,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,WAAW,WAAW,GAAG,cAAc,GAAG,EAAE,CAAC,CAAC;gBACrE,CAAC;YACH,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAEO,uBAAuB,CAAC,SAAiB,EAAE,SAAiB;QAClE,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE3B,CAAC,CAAC,GAAG,CAAC,SAAS,EAAE,SAAS,CAA6B,CAAC;QACzD,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC/C,CAAC;IAEO,gBAAgB,CAAC,SAAiB;QACxC,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE3B,CAAC,CAAC,GAAG,CAAC,SAAS,CAA+B,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAEO,mBAAmB,CAAC,IAAqB;QAC/C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,EAAE,QAAQ,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,gBAAgB,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QAC/G,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;QAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QAE3C,OAAO;YACL,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC,MAAM;YAC9B,cAAc,EAAE,IAAI,CAAC,SAAS,CAAC;YAC/B,gBAAgB,EAAE,MAAM,CAAC,SAAS,CAAC;YACnC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC;YACxB,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC;YAC5B,SAAS,EAAE,IAAI,CAAC,MAAM;SACvB,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAC3B,EAAwB,EACxB,EAAwB;QAExB,MAAM,QAAQ,GAAG,EAAE,EAAE,MAAM,IAAI,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,EAAE,EAAE,MAAM,IAAI,EAAE,CAAC;QAElC,mDAAmD;QACnD,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;YAC9B,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC;YACnC,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC;SACpC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC9C,WAAW,EAAE,IAAI;YACjB,eAAe,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK;YAC5E,cAAc,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK;SAC5E,CAAC,CAAC,CAAC;IACN,CAAC;IAEO,GAAG,CAAC,KAAa;QACvB,OAAO,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACxC,CAAC;IAEO,SAAS,CAAC,KAAa;QAC7B,MAAM,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAClC,OAAO,GAAG,IAAI,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IAC/C,CAAC;IAEO,cAAc,CAAC,GAAiB;QACtC,OAAO;YACL,EAAE,EAAE,GAAG,CAAC,EAAE;YACV,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;YAChC,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC;SAC/C,CAAC;IACJ,CAAC;CACF;AAED,4BAA4B;AAE5B,SAAS,IAAI,CAAC,MAAgB;IAC5B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,MAAM,CAAC,MAAgB;IAC9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACzB,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC1F,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { MemoryStorage } from '../memory/MemoryStorage.js';
|
|
2
|
+
export type SkillCategory = 'capability_uplift' | 'encoded_preference' | 'unknown';
|
|
3
|
+
export interface ClassificationResult {
|
|
4
|
+
skillName: string;
|
|
5
|
+
category: SkillCategory;
|
|
6
|
+
confidence: number;
|
|
7
|
+
reasoning: string;
|
|
8
|
+
baselinePassRate: number;
|
|
9
|
+
withSkillPassRate: number;
|
|
10
|
+
trend: 'converging' | 'stable' | 'diverging' | 'insufficient_data';
|
|
11
|
+
recommendation: string;
|
|
12
|
+
}
|
|
13
|
+
export declare class SkillClassifier {
|
|
14
|
+
private benchmark;
|
|
15
|
+
constructor(storage: MemoryStorage);
|
|
16
|
+
/**
|
|
17
|
+
* Classify a skill based on benchmark history
|
|
18
|
+
*/
|
|
19
|
+
classify(skillName: string): ClassificationResult;
|
|
20
|
+
/**
|
|
21
|
+
* Classify based on explicit pass rates (no DB lookup)
|
|
22
|
+
*/
|
|
23
|
+
classifyFromRates(skillName: string, withSkillPassRate: number, baselinePassRate: number, trend?: ClassificationResult['trend']): ClassificationResult;
|
|
24
|
+
/**
|
|
25
|
+
* Check if a skill is becoming obsolete (capability uplift that model now handles)
|
|
26
|
+
*/
|
|
27
|
+
isBecomingObsolete(skillName: string): {
|
|
28
|
+
obsolete: boolean;
|
|
29
|
+
reason: string;
|
|
30
|
+
};
|
|
31
|
+
private determineCategory;
|
|
32
|
+
private computeTrend;
|
|
33
|
+
private pct;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=SkillClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SkillClassifier.d.ts","sourceRoot":"","sources":["../../../../src/infra/lib/evolution/SkillClassifier.ts"],"names":[],"mappings":"AAYA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAG3D,MAAM,MAAM,aAAa,GAAG,mBAAmB,GAAG,oBAAoB,GAAG,SAAS,CAAC;AAEnF,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,aAAa,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,KAAK,EAAE,YAAY,GAAG,QAAQ,GAAG,WAAW,GAAG,mBAAmB,CAAC;IACnE,cAAc,EAAE,MAAM,CAAC;CACxB;AAUD,qBAAa,eAAe;IAC1B,OAAO,CAAC,SAAS,CAAiB;gBAEtB,OAAO,EAAE,aAAa;IAIlC;;OAEG;IACI,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,oBAAoB;IAuBxD;;OAEG;IACI,iBAAiB,CACtB,SAAS,EAAE,MAAM,EACjB,iBAAiB,EAAE,MAAM,EACzB,gBAAgB,EAAE,MAAM,EACxB,KAAK,GAAE,oBAAoB,CAAC,OAAO,CAAuB,GACzD,oBAAoB;IAIvB;;OAEG;IACI,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG;QAAE,QAAQ,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;IAoBnF,OAAO,CAAC,iBAAiB;IAmFzB,OAAO,CAAC,YAAY;IA6BpB,OAAO,CAAC,GAAG;CAGZ"}
|