@wooojin/forgen 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +76 -0
- package/README.ko.md +25 -14
- package/README.md +61 -17
- package/agents/analyst.md +48 -4
- package/agents/architect.md +39 -4
- package/agents/code-reviewer.md +107 -77
- package/agents/critic.md +47 -4
- package/agents/debugger.md +46 -4
- package/agents/designer.md +40 -4
- package/agents/executor.md +112 -30
- package/agents/explore.md +45 -5
- package/agents/git-master.md +48 -4
- package/agents/planner.md +121 -18
- package/agents/solution-evolver.md +115 -0
- package/agents/test-engineer.md +58 -4
- package/agents/verifier.md +92 -77
- package/commands/architecture-decision.md +127 -258
- package/commands/calibrate.md +225 -0
- package/commands/code-review.md +163 -178
- package/commands/compound.md +127 -68
- package/commands/deep-interview.md +212 -110
- package/commands/docker.md +68 -178
- package/commands/forge-loop.md +215 -0
- package/commands/learn.md +231 -0
- package/commands/retro.md +215 -0
- package/commands/ship.md +277 -0
- package/dist/cli.js +25 -9
- package/dist/core/auto-compound-runner.js +14 -0
- package/dist/core/config-injector.d.ts +2 -1
- package/dist/core/config-injector.js +2 -1
- package/dist/core/dashboard.d.ts +17 -0
- package/dist/core/dashboard.js +158 -2
- package/dist/core/harness.d.ts +6 -1
- package/dist/core/harness.js +75 -19
- package/dist/core/paths.d.ts +31 -1
- package/dist/core/paths.js +43 -2
- package/dist/core/spawn.d.ts +3 -2
- package/dist/core/spawn.js +27 -8
- package/dist/core/types.d.ts +34 -0
- package/dist/engine/compound-lifecycle.d.ts +4 -3
- package/dist/engine/compound-lifecycle.js +91 -46
- package/dist/engine/learn-cli.d.ts +1 -0
- package/dist/engine/learn-cli.js +182 -0
- package/dist/engine/meta-learning/adaptive-thresholds.d.ts +20 -0
- package/dist/engine/meta-learning/adaptive-thresholds.js +126 -0
- package/dist/engine/meta-learning/extraction-tuner.d.ts +15 -0
- package/dist/engine/meta-learning/extraction-tuner.js +99 -0
- package/dist/engine/meta-learning/matcher-weight-tuner.d.ts +21 -0
- package/dist/engine/meta-learning/matcher-weight-tuner.js +151 -0
- package/dist/engine/meta-learning/runner.d.ts +14 -0
- package/dist/engine/meta-learning/runner.js +90 -0
- package/dist/engine/meta-learning/scope-promoter.d.ts +21 -0
- package/dist/engine/meta-learning/scope-promoter.js +84 -0
- package/dist/engine/meta-learning/session-quality-scorer.d.ts +61 -0
- package/dist/engine/meta-learning/session-quality-scorer.js +166 -0
- package/dist/engine/meta-learning/types.d.ts +114 -0
- package/dist/engine/meta-learning/types.js +43 -0
- package/dist/engine/solution-candidate.d.ts +30 -0
- package/dist/engine/solution-candidate.js +124 -0
- package/dist/engine/solution-fitness.d.ts +52 -0
- package/dist/engine/solution-fitness.js +95 -0
- package/dist/engine/solution-fixup.d.ts +30 -0
- package/dist/engine/solution-fixup.js +116 -0
- package/dist/engine/solution-format.d.ts +10 -2
- package/dist/engine/solution-format.js +287 -57
- package/dist/engine/solution-index.d.ts +1 -1
- package/dist/engine/solution-index.js +10 -0
- package/dist/engine/solution-matcher.d.ts +7 -1
- package/dist/engine/solution-matcher.js +137 -37
- package/dist/engine/solution-outcomes.d.ts +70 -0
- package/dist/engine/solution-outcomes.js +242 -0
- package/dist/engine/solution-quarantine.d.ts +36 -0
- package/dist/engine/solution-quarantine.js +172 -0
- package/dist/engine/solution-weakness.d.ts +45 -0
- package/dist/engine/solution-weakness.js +225 -0
- package/dist/engine/solution-writer.d.ts +5 -0
- package/dist/engine/solution-writer.js +18 -0
- package/dist/fgx.js +12 -8
- package/dist/hooks/context-guard.d.ts +5 -0
- package/dist/hooks/context-guard.js +118 -2
- package/dist/hooks/hooks-generator.d.ts +3 -0
- package/dist/hooks/hooks-generator.js +23 -6
- package/dist/hooks/keyword-detector.js +16 -100
- package/dist/hooks/post-tool-failure.js +7 -0
- package/dist/hooks/skill-injector.d.ts +4 -3
- package/dist/hooks/skill-injector.js +6 -4
- package/dist/hooks/solution-injector.js +20 -0
- package/dist/host/codex-adapter.d.ts +10 -0
- package/dist/host/codex-adapter.js +154 -0
- package/dist/mcp/solution-reader.d.ts +5 -5
- package/dist/mcp/solution-reader.js +34 -24
- package/dist/mcp/tools.js +8 -0
- package/dist/services/session.d.ts +19 -0
- package/dist/services/session.js +62 -0
- package/hooks/hooks.json +2 -2
- package/package.json +2 -1
- package/skills/architecture-decision/SKILL.md +113 -257
- package/skills/calibrate/SKILL.md +207 -0
- package/skills/code-review/SKILL.md +151 -178
- package/skills/compound/SKILL.md +126 -68
- package/skills/deep-interview/SKILL.md +210 -110
- package/skills/docker/SKILL.md +57 -179
- package/skills/forge-loop/SKILL.md +198 -0
- package/skills/learn/SKILL.md +216 -0
- package/skills/retro/SKILL.md +199 -0
- package/skills/ship/SKILL.md +259 -0
- package/agents/code-simplifier.md +0 -197
- package/agents/performance-reviewer.md +0 -172
- package/agents/qa-tester.md +0 -158
- package/agents/refactoring-expert.md +0 -168
- package/agents/scientist.md +0 -144
- package/agents/security-reviewer.md +0 -137
- package/agents/writer.md +0 -184
- package/commands/api-design.md +0 -268
- package/commands/ci-cd.md +0 -270
- package/commands/database.md +0 -263
- package/commands/debug-detective.md +0 -99
- package/commands/documentation.md +0 -276
- package/commands/ecomode.md +0 -51
- package/commands/frontend.md +0 -271
- package/commands/git-master.md +0 -90
- package/commands/incident-response.md +0 -292
- package/commands/migrate.md +0 -101
- package/commands/performance.md +0 -288
- package/commands/refactor.md +0 -105
- package/commands/security-review.md +0 -288
- package/commands/specify.md +0 -128
- package/commands/tdd.md +0 -183
- package/commands/testing-strategy.md +0 -265
- package/skills/api-design/SKILL.md +0 -262
- package/skills/ci-cd/SKILL.md +0 -264
- package/skills/database/SKILL.md +0 -257
- package/skills/debug-detective/SKILL.md +0 -95
- package/skills/documentation/SKILL.md +0 -270
- package/skills/ecomode/SKILL.md +0 -46
- package/skills/frontend/SKILL.md +0 -265
- package/skills/git-master/SKILL.md +0 -86
- package/skills/incident-response/SKILL.md +0 -286
- package/skills/migrate/SKILL.md +0 -96
- package/skills/performance/SKILL.md +0 -282
- package/skills/refactor/SKILL.md +0 -100
- package/skills/security-review/SKILL.md +0 -282
- package/skills/specify/SKILL.md +0 -122
- package/skills/tdd/SKILL.md +0 -178
- package/skills/testing-strategy/SKILL.md +0 -260
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Forgen Meta-Learning — Session Quality Scorer (Feature 1)
|
|
3
|
+
*
|
|
4
|
+
* Joins existing data sources to compute a per-session quality score.
|
|
5
|
+
* This score feeds other meta-learning features (matcher tuning, thresholds).
|
|
6
|
+
*
|
|
7
|
+
* Data sources:
|
|
8
|
+
* - injection-cache-{sessionId}.json → injected solutions
|
|
9
|
+
* - modified-files-{sessionId}.json → drift state
|
|
10
|
+
* - implicit-feedback.jsonl → revert/drift events
|
|
11
|
+
* - me/behavior/*.json → correction evidence
|
|
12
|
+
* - state/sessions/{sessionId}.json → session metadata
|
|
13
|
+
*/
|
|
14
|
+
import * as fs from 'node:fs';
|
|
15
|
+
import * as path from 'node:path';
|
|
16
|
+
import { ME_BEHAVIOR, STATE_DIR } from '../../core/paths.js';
|
|
17
|
+
import { safeReadJSON } from '../../hooks/shared/atomic-write.js';
|
|
18
|
+
function sanitizeId(id) {
|
|
19
|
+
return id.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
20
|
+
}
|
|
21
|
+
export function loadInjectionCache(sessionId) {
|
|
22
|
+
const cachePath = path.join(STATE_DIR, `injection-cache-${sanitizeId(sessionId)}.json`);
|
|
23
|
+
return safeReadJSON(cachePath, null);
|
|
24
|
+
}
|
|
25
|
+
function loadSolutionCache(sessionId) {
|
|
26
|
+
const cachePath = path.join(STATE_DIR, `solution-cache-${sanitizeId(sessionId)}.json`);
|
|
27
|
+
return safeReadJSON(cachePath, null);
|
|
28
|
+
}
|
|
29
|
+
export function loadDriftState(sessionId) {
|
|
30
|
+
const statePath = path.join(STATE_DIR, `modified-files-${sanitizeId(sessionId)}.json`);
|
|
31
|
+
const data = safeReadJSON(statePath, null);
|
|
32
|
+
return data?.drift ?? null;
|
|
33
|
+
}
|
|
34
|
+
export function loadImplicitFeedback(sessionId) {
|
|
35
|
+
const logPath = path.join(STATE_DIR, 'implicit-feedback.jsonl');
|
|
36
|
+
try {
|
|
37
|
+
if (!fs.existsSync(logPath))
|
|
38
|
+
return [];
|
|
39
|
+
const lines = fs.readFileSync(logPath, 'utf-8').split('\n').filter(Boolean);
|
|
40
|
+
const entries = [];
|
|
41
|
+
for (const line of lines) {
|
|
42
|
+
try {
|
|
43
|
+
const entry = JSON.parse(line);
|
|
44
|
+
if (entry.sessionId === sessionId)
|
|
45
|
+
entries.push(entry);
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
/* skip malformed lines */
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return entries;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
export function loadSessionCorrections(sessionId) {
|
|
58
|
+
try {
|
|
59
|
+
if (!fs.existsSync(ME_BEHAVIOR))
|
|
60
|
+
return 0;
|
|
61
|
+
let count = 0;
|
|
62
|
+
for (const file of fs.readdirSync(ME_BEHAVIOR)) {
|
|
63
|
+
if (!file.endsWith('.json'))
|
|
64
|
+
continue;
|
|
65
|
+
const data = safeReadJSON(path.join(ME_BEHAVIOR, file), null);
|
|
66
|
+
if (data?.session_id === sessionId && data?.type === 'explicit_correction') {
|
|
67
|
+
count++;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return count;
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return 0;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
function loadToolCallCount(sessionId) {
|
|
77
|
+
const statePath = path.join(STATE_DIR, `modified-files-${sanitizeId(sessionId)}.json`);
|
|
78
|
+
const data = safeReadJSON(statePath, null);
|
|
79
|
+
return data?.toolCallCount ?? 0;
|
|
80
|
+
}
|
|
81
|
+
// ── Score computation ──
|
|
82
|
+
/**
|
|
83
|
+
* Compute overall session quality score (0-100, higher = better).
|
|
84
|
+
*
|
|
85
|
+
* Formula:
|
|
86
|
+
* 100
|
|
87
|
+
* - (correctionRate × 15) // each correction/prompt penalizes 15pts
|
|
88
|
+
* - (driftScore × 0.3) // drift 0-100 maps to 0-30 penalty
|
|
89
|
+
* - (revertCount × 5) // each revert penalizes 5pts
|
|
90
|
+
* + (solutionEffectiveness × 20) // good solution usage boosts 0-20pts
|
|
91
|
+
*/
|
|
92
|
+
export function computeOverallScore(correctionRate, driftScore, revertCount, solutionEffectiveness) {
|
|
93
|
+
const raw = 100 - correctionRate * 15 - driftScore * 0.3 - revertCount * 5 + solutionEffectiveness * 20;
|
|
94
|
+
return Math.max(0, Math.min(100, Math.round(raw * 100) / 100));
|
|
95
|
+
}
|
|
96
|
+
// ── Main entry ──
|
|
97
|
+
/**
|
|
98
|
+
* Score a session's quality by joining all available data sources.
|
|
99
|
+
* Returns null if insufficient data (no session state found).
|
|
100
|
+
*/
|
|
101
|
+
export function scoreSession(sessionId) {
|
|
102
|
+
// Load injected solutions — try both caches
|
|
103
|
+
const injectionCache = loadInjectionCache(sessionId);
|
|
104
|
+
const solutionCache = loadSolutionCache(sessionId);
|
|
105
|
+
const injectedSolutions = injectionCache?.injected ?? solutionCache?.injected ?? [];
|
|
106
|
+
// Load drift state
|
|
107
|
+
const drift = loadDriftState(sessionId);
|
|
108
|
+
const driftScore = drift ? Math.min(100, drift.ewmaEditRate * 65 + drift.ewmaRevertRate * 35) : 0;
|
|
109
|
+
const revertCount = drift?.totalReverts ?? 0;
|
|
110
|
+
// Count corrections
|
|
111
|
+
const corrections = loadSessionCorrections(sessionId);
|
|
112
|
+
const toolCallCount = loadToolCallCount(sessionId);
|
|
113
|
+
// Use toolCallCount as proxy for prompt count (each prompt leads to tool calls)
|
|
114
|
+
const promptEstimate = Math.max(1, Math.ceil(toolCallCount / 3));
|
|
115
|
+
const correctionRate = corrections / promptEstimate;
|
|
116
|
+
// Solution effectiveness: we can only measure at session level
|
|
117
|
+
// by checking how many injected solutions have reflected > 0.
|
|
118
|
+
// For per-session granularity, count revert events as negative signal.
|
|
119
|
+
const implicitFeedback = loadImplicitFeedback(sessionId);
|
|
120
|
+
const revertEvents = implicitFeedback.filter((e) => e.type === 'revert_detected').length;
|
|
121
|
+
// Effectiveness: 1 - (negative signals / total injections), clamped to [0, 1]
|
|
122
|
+
const solutionEffectiveness = injectedSolutions.length > 0
|
|
123
|
+
? Math.max(0, Math.min(1, 1 - revertEvents / injectedSolutions.length))
|
|
124
|
+
: 0;
|
|
125
|
+
const overallScore = computeOverallScore(correctionRate, driftScore, revertCount, solutionEffectiveness);
|
|
126
|
+
return {
|
|
127
|
+
sessionId,
|
|
128
|
+
correctionRate: Math.round(correctionRate * 1000) / 1000,
|
|
129
|
+
driftScore: Math.round(driftScore * 100) / 100,
|
|
130
|
+
revertCount,
|
|
131
|
+
solutionEffectiveness: Math.round(solutionEffectiveness * 1000) / 1000,
|
|
132
|
+
overallScore,
|
|
133
|
+
injectedSolutions,
|
|
134
|
+
computedAt: new Date().toISOString(),
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
// ── Persistence ──
|
|
138
|
+
export function saveSessionQuality(score, baseDir) {
|
|
139
|
+
const dir = baseDir ?? path.join(STATE_DIR, 'session-quality');
|
|
140
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
141
|
+
const filePath = path.join(dir, `${sanitizeId(score.sessionId)}.json`);
|
|
142
|
+
fs.writeFileSync(filePath, JSON.stringify(score, null, 2));
|
|
143
|
+
}
|
|
144
|
+
export function loadSessionQuality(sessionId, baseDir) {
|
|
145
|
+
const dir = baseDir ?? path.join(STATE_DIR, 'session-quality');
|
|
146
|
+
const filePath = path.join(dir, `${sanitizeId(sessionId)}.json`);
|
|
147
|
+
return safeReadJSON(filePath, null);
|
|
148
|
+
}
|
|
149
|
+
export function loadRecentQualityScores(limit = 10, baseDir) {
|
|
150
|
+
const dir = baseDir ?? path.join(STATE_DIR, 'session-quality');
|
|
151
|
+
try {
|
|
152
|
+
if (!fs.existsSync(dir))
|
|
153
|
+
return [];
|
|
154
|
+
const files = fs.readdirSync(dir).filter((f) => f.endsWith('.json'));
|
|
155
|
+
const scores = [];
|
|
156
|
+
for (const file of files) {
|
|
157
|
+
const score = safeReadJSON(path.join(dir, file), null);
|
|
158
|
+
if (score)
|
|
159
|
+
scores.push(score);
|
|
160
|
+
}
|
|
161
|
+
return scores.sort((a, b) => b.computedAt.localeCompare(a.computedAt)).slice(0, limit);
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
return [];
|
|
165
|
+
}
|
|
166
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Forgen Meta-Learning — Shared Types
|
|
3
|
+
*
|
|
4
|
+
* HyperAgents-inspired self-tuning layer above the compound system.
|
|
5
|
+
* All types consumed by the meta-learning runner and its sub-modules.
|
|
6
|
+
*/
|
|
7
|
+
export interface SessionQualityScore {
|
|
8
|
+
sessionId: string;
|
|
9
|
+
/** corrections per prompt in this session */
|
|
10
|
+
correctionRate: number;
|
|
11
|
+
/** final EWMA drift score (0-100) */
|
|
12
|
+
driftScore: number;
|
|
13
|
+
/** total reverts detected */
|
|
14
|
+
revertCount: number;
|
|
15
|
+
/** reflected / injected ratio (0-1, NaN → 0 if no injections) */
|
|
16
|
+
solutionEffectiveness: number;
|
|
17
|
+
/** composite score 0-100 (higher = better) */
|
|
18
|
+
overallScore: number;
|
|
19
|
+
/** which solutions were injected this session */
|
|
20
|
+
injectedSolutions: string[];
|
|
21
|
+
computedAt: string;
|
|
22
|
+
}
|
|
23
|
+
export interface MatcherWeights {
|
|
24
|
+
tfidf: number;
|
|
25
|
+
bm25: number;
|
|
26
|
+
bigram: number;
|
|
27
|
+
updatedAt: string;
|
|
28
|
+
/** how many solutions informed this tuning */
|
|
29
|
+
sampleSize: number;
|
|
30
|
+
/** monotonic version for rollback detection */
|
|
31
|
+
version: number;
|
|
32
|
+
/** original defaults for fallback */
|
|
33
|
+
defaults: {
|
|
34
|
+
tfidf: number;
|
|
35
|
+
bm25: number;
|
|
36
|
+
bigram: number;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
export interface PromotionThresholds {
|
|
40
|
+
reflected: number;
|
|
41
|
+
sessions: number;
|
|
42
|
+
reExtracted: number;
|
|
43
|
+
}
|
|
44
|
+
export interface AdaptiveLifecycleThresholds {
|
|
45
|
+
experiment: PromotionThresholds;
|
|
46
|
+
candidate: PromotionThresholds;
|
|
47
|
+
verified: PromotionThresholds & {
|
|
48
|
+
negative: number;
|
|
49
|
+
};
|
|
50
|
+
/** solutions per week */
|
|
51
|
+
learningVelocity: number;
|
|
52
|
+
updatedAt: string;
|
|
53
|
+
sampleSize: number;
|
|
54
|
+
defaults: {
|
|
55
|
+
experiment: PromotionThresholds;
|
|
56
|
+
candidate: PromotionThresholds;
|
|
57
|
+
verified: PromotionThresholds & {
|
|
58
|
+
negative: number;
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
export interface ExtractionBias {
|
|
63
|
+
typeWeights: Record<string, number>;
|
|
64
|
+
updatedAt: string;
|
|
65
|
+
sampleSize: number;
|
|
66
|
+
}
|
|
67
|
+
export interface ProjectUsageEntry {
|
|
68
|
+
projects: string[];
|
|
69
|
+
updatedAt: string;
|
|
70
|
+
}
|
|
71
|
+
export interface ProjectUsageMap {
|
|
72
|
+
solutions: Record<string, ProjectUsageEntry>;
|
|
73
|
+
}
|
|
74
|
+
export interface MetaLearningFeatures {
|
|
75
|
+
sessionQualityScorer: boolean;
|
|
76
|
+
matcherWeightTuning: boolean;
|
|
77
|
+
scopeAutoPromotion: boolean;
|
|
78
|
+
adaptiveThresholds: boolean;
|
|
79
|
+
extractionTuning: boolean;
|
|
80
|
+
}
|
|
81
|
+
export interface ColdStartConfig {
|
|
82
|
+
minSessionsForQuality: number;
|
|
83
|
+
minSolutionsForMatcher: number;
|
|
84
|
+
minSolutionsForThresholds: number;
|
|
85
|
+
minSolutionsForExtraction: number;
|
|
86
|
+
minProjectsForScope: number;
|
|
87
|
+
}
|
|
88
|
+
export interface GuardrailConfig {
|
|
89
|
+
weightFloor: number;
|
|
90
|
+
weightCeiling: number;
|
|
91
|
+
maxWeightDelta: number;
|
|
92
|
+
thresholdFloor: number;
|
|
93
|
+
thresholdCeiling: number;
|
|
94
|
+
maxThresholdDelta: number;
|
|
95
|
+
degradationThreshold: number;
|
|
96
|
+
}
|
|
97
|
+
export interface MetaLearningConfig {
|
|
98
|
+
enabled: boolean;
|
|
99
|
+
features: MetaLearningFeatures;
|
|
100
|
+
coldStart: ColdStartConfig;
|
|
101
|
+
guardrails: GuardrailConfig;
|
|
102
|
+
}
|
|
103
|
+
export interface MetaLearningResult {
|
|
104
|
+
skipped?: boolean;
|
|
105
|
+
reason?: string;
|
|
106
|
+
qualityScore?: SessionQualityScore | null;
|
|
107
|
+
matcherWeights?: MatcherWeights | null;
|
|
108
|
+
scopePromotions?: string[];
|
|
109
|
+
thresholds?: AdaptiveLifecycleThresholds | null;
|
|
110
|
+
extractionBias?: ExtractionBias | null;
|
|
111
|
+
}
|
|
112
|
+
export declare const DEFAULT_CONFIG: MetaLearningConfig;
|
|
113
|
+
export declare const DEFAULT_MATCHER_WEIGHTS: MatcherWeights['defaults'];
|
|
114
|
+
export declare const DEFAULT_PROMOTION_THRESHOLDS: AdaptiveLifecycleThresholds['defaults'];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Forgen Meta-Learning — Shared Types
|
|
3
|
+
*
|
|
4
|
+
* HyperAgents-inspired self-tuning layer above the compound system.
|
|
5
|
+
* All types consumed by the meta-learning runner and its sub-modules.
|
|
6
|
+
*/
|
|
7
|
+
// ── Defaults ──
|
|
8
|
+
export const DEFAULT_CONFIG = {
|
|
9
|
+
enabled: false,
|
|
10
|
+
features: {
|
|
11
|
+
sessionQualityScorer: true,
|
|
12
|
+
matcherWeightTuning: true,
|
|
13
|
+
scopeAutoPromotion: true,
|
|
14
|
+
adaptiveThresholds: true,
|
|
15
|
+
extractionTuning: true,
|
|
16
|
+
},
|
|
17
|
+
coldStart: {
|
|
18
|
+
minSessionsForQuality: 1,
|
|
19
|
+
minSolutionsForMatcher: 10,
|
|
20
|
+
minSolutionsForThresholds: 15,
|
|
21
|
+
minSolutionsForExtraction: 20,
|
|
22
|
+
minProjectsForScope: 3,
|
|
23
|
+
},
|
|
24
|
+
guardrails: {
|
|
25
|
+
weightFloor: 0.1,
|
|
26
|
+
weightCeiling: 0.7,
|
|
27
|
+
maxWeightDelta: 0.05,
|
|
28
|
+
thresholdFloor: 2,
|
|
29
|
+
thresholdCeiling: 15,
|
|
30
|
+
maxThresholdDelta: 1,
|
|
31
|
+
degradationThreshold: 0.3,
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
export const DEFAULT_MATCHER_WEIGHTS = {
|
|
35
|
+
tfidf: 0.5,
|
|
36
|
+
bm25: 0.3,
|
|
37
|
+
bigram: 0.2,
|
|
38
|
+
};
|
|
39
|
+
export const DEFAULT_PROMOTION_THRESHOLDS = {
|
|
40
|
+
experiment: { reflected: 3, sessions: 3, reExtracted: 2 },
|
|
41
|
+
candidate: { reflected: 4, sessions: 3, reExtracted: 2 },
|
|
42
|
+
verified: { reflected: 8, sessions: 5, reExtracted: 2, negative: 1 },
|
|
43
|
+
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export interface PromoteResult {
|
|
2
|
+
ok: boolean;
|
|
3
|
+
source?: string;
|
|
4
|
+
dest?: string;
|
|
5
|
+
reason?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface RollbackResult {
|
|
8
|
+
archived: string[];
|
|
9
|
+
archive_dir: string;
|
|
10
|
+
errors: string[];
|
|
11
|
+
}
|
|
12
|
+
export declare function listCandidates(): string[];
|
|
13
|
+
/**
|
|
14
|
+
* Move one candidate file from lab/candidates/ to me/solutions/ after
|
|
15
|
+
* schema + ownership checks. Refuses to overwrite an existing solution.
|
|
16
|
+
* Returns `{ok:false, reason}` for any precondition failure so the CLI
|
|
17
|
+
* can report exactly why promotion was rejected.
|
|
18
|
+
*/
|
|
19
|
+
export declare function promoteCandidate(nameOrPath: string): PromoteResult;
|
|
20
|
+
/**
|
|
21
|
+
* Archive evolved-* solutions created at-or-after the given epoch ms.
|
|
22
|
+
* Looks in ME_SOLUTIONS first (live, promoted candidates) then in
|
|
23
|
+
* CANDIDATES_DIR (unpromoted). Archive is a timestamp-suffixed
|
|
24
|
+
* directory so concurrent rollbacks don't clobber each other.
|
|
25
|
+
*
|
|
26
|
+
* "evolved" is identified by `source: evolved` in frontmatter; we
|
|
27
|
+
* deliberately do NOT use filename prefix so a manually-renamed
|
|
28
|
+
* evolved solution can still be rolled back.
|
|
29
|
+
*/
|
|
30
|
+
export declare function rollbackSince(epochMs: number): RollbackResult;
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { ARCHIVED_DIR, CANDIDATES_DIR, ME_SOLUTIONS } from '../core/paths.js';
|
|
4
|
+
import { parseFrontmatterOnly } from './solution-format.js';
|
|
5
|
+
import { diagnoseFromRawContent } from './solution-quarantine.js';
|
|
6
|
+
import { createLogger } from '../core/logger.js';
|
|
7
|
+
const log = createLogger('solution-candidate');
|
|
8
|
+
export function listCandidates() {
|
|
9
|
+
if (!fs.existsSync(CANDIDATES_DIR))
|
|
10
|
+
return [];
|
|
11
|
+
return fs
|
|
12
|
+
.readdirSync(CANDIDATES_DIR)
|
|
13
|
+
.filter((f) => f.endsWith('.md'))
|
|
14
|
+
.map((f) => path.join(CANDIDATES_DIR, f));
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Move one candidate file from lab/candidates/ to me/solutions/ after
|
|
18
|
+
* schema + ownership checks. Refuses to overwrite an existing solution.
|
|
19
|
+
* Returns `{ok:false, reason}` for any precondition failure so the CLI
|
|
20
|
+
* can report exactly why promotion was rejected.
|
|
21
|
+
*/
|
|
22
|
+
export function promoteCandidate(nameOrPath) {
|
|
23
|
+
const source = resolveCandidatePath(nameOrPath);
|
|
24
|
+
if (!source)
|
|
25
|
+
return { ok: false, reason: `candidate not found: ${nameOrPath}` };
|
|
26
|
+
const content = fs.readFileSync(source, 'utf-8');
|
|
27
|
+
const errors = diagnoseFromRawContent(content);
|
|
28
|
+
if (errors.length > 0) {
|
|
29
|
+
return { ok: false, source, reason: `schema errors: ${errors.join('; ')}` };
|
|
30
|
+
}
|
|
31
|
+
const fm = parseFrontmatterOnly(content);
|
|
32
|
+
if (!fm)
|
|
33
|
+
return { ok: false, source, reason: 'frontmatter parse failed post-diagnose (unexpected)' };
|
|
34
|
+
if (fm.status !== 'candidate') {
|
|
35
|
+
return { ok: false, source, reason: `status must be 'candidate', got '${fm.status}'` };
|
|
36
|
+
}
|
|
37
|
+
if (fm.extractedBy !== 'auto') {
|
|
38
|
+
return { ok: false, source, reason: `extractedBy must be 'auto' (evolved proposals)` };
|
|
39
|
+
}
|
|
40
|
+
const dest = path.join(ME_SOLUTIONS, `${fm.name}.md`);
|
|
41
|
+
if (fs.existsSync(dest)) {
|
|
42
|
+
return { ok: false, source, reason: `name collision: ${fm.name} already exists in me/solutions` };
|
|
43
|
+
}
|
|
44
|
+
fs.mkdirSync(ME_SOLUTIONS, { recursive: true });
|
|
45
|
+
try {
|
|
46
|
+
fs.renameSync(source, dest);
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
// renameSync fails across filesystems — fall back to copy+unlink.
|
|
50
|
+
fs.copyFileSync(source, dest);
|
|
51
|
+
try {
|
|
52
|
+
fs.unlinkSync(source);
|
|
53
|
+
}
|
|
54
|
+
catch { /* ignore */ }
|
|
55
|
+
}
|
|
56
|
+
log.debug(`promoted: ${fm.name}`);
|
|
57
|
+
return { ok: true, source, dest };
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Archive evolved-* solutions created at-or-after the given epoch ms.
|
|
61
|
+
* Looks in ME_SOLUTIONS first (live, promoted candidates) then in
|
|
62
|
+
* CANDIDATES_DIR (unpromoted). Archive is a timestamp-suffixed
|
|
63
|
+
* directory so concurrent rollbacks don't clobber each other.
|
|
64
|
+
*
|
|
65
|
+
* "evolved" is identified by `source: evolved` in frontmatter; we
|
|
66
|
+
* deliberately do NOT use filename prefix so a manually-renamed
|
|
67
|
+
* evolved solution can still be rolled back.
|
|
68
|
+
*/
|
|
69
|
+
export function rollbackSince(epochMs) {
|
|
70
|
+
const archiveDir = path.join(ARCHIVED_DIR, `rollback-${Date.now()}`);
|
|
71
|
+
const archived = [];
|
|
72
|
+
const errors = [];
|
|
73
|
+
const dirs = [ME_SOLUTIONS, CANDIDATES_DIR];
|
|
74
|
+
for (const dir of dirs) {
|
|
75
|
+
if (!fs.existsSync(dir))
|
|
76
|
+
continue;
|
|
77
|
+
for (const file of fs.readdirSync(dir)) {
|
|
78
|
+
if (!file.endsWith('.md'))
|
|
79
|
+
continue;
|
|
80
|
+
const filePath = path.join(dir, file);
|
|
81
|
+
let content;
|
|
82
|
+
try {
|
|
83
|
+
content = fs.readFileSync(filePath, 'utf-8');
|
|
84
|
+
}
|
|
85
|
+
catch (e) {
|
|
86
|
+
errors.push(`read ${filePath}: ${errMsg(e)}`);
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
const fm = parseFrontmatterOnly(content);
|
|
90
|
+
if (!fm)
|
|
91
|
+
continue;
|
|
92
|
+
// `source` is an optional free-form field written by the evolver.
|
|
93
|
+
const source = fm.source;
|
|
94
|
+
if (source !== 'evolved')
|
|
95
|
+
continue;
|
|
96
|
+
// `created` is YAML-formatted date string. If parsing fails or the
|
|
97
|
+
// created date is older than epochMs, leave the file in place.
|
|
98
|
+
const createdMs = Date.parse(fm.created);
|
|
99
|
+
if (Number.isFinite(createdMs) && createdMs < epochMs)
|
|
100
|
+
continue;
|
|
101
|
+
try {
|
|
102
|
+
fs.mkdirSync(archiveDir, { recursive: true });
|
|
103
|
+
const destName = path.basename(dir) + '__' + file;
|
|
104
|
+
fs.renameSync(filePath, path.join(archiveDir, destName));
|
|
105
|
+
archived.push(filePath);
|
|
106
|
+
}
|
|
107
|
+
catch (e) {
|
|
108
|
+
errors.push(`archive ${filePath}: ${errMsg(e)}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return { archived, archive_dir: archiveDir, errors };
|
|
113
|
+
}
|
|
114
|
+
function resolveCandidatePath(nameOrPath) {
|
|
115
|
+
if (fs.existsSync(nameOrPath))
|
|
116
|
+
return nameOrPath;
|
|
117
|
+
const byBasename = path.join(CANDIDATES_DIR, nameOrPath.endsWith('.md') ? nameOrPath : `${nameOrPath}.md`);
|
|
118
|
+
if (fs.existsSync(byBasename))
|
|
119
|
+
return byBasename;
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
function errMsg(e) {
|
|
123
|
+
return e instanceof Error ? e.message : String(e);
|
|
124
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { type OutcomeEvent } from './solution-outcomes.js';
|
|
2
|
+
export type FitnessState = 'draft' | 'active' | 'champion' | 'underperform';
|
|
3
|
+
export interface FitnessRecord {
|
|
4
|
+
solution: string;
|
|
5
|
+
injected: number;
|
|
6
|
+
accepted: number;
|
|
7
|
+
corrected: number;
|
|
8
|
+
errored: number;
|
|
9
|
+
unknown: number;
|
|
10
|
+
/** Laplace-smoothed acceptance ratio × log(1+injected). */
|
|
11
|
+
fitness: number;
|
|
12
|
+
state: FitnessState;
|
|
13
|
+
/** ms since last injection event. Infinity if never injected. */
|
|
14
|
+
last_injected_ago_ms: number;
|
|
15
|
+
}
|
|
16
|
+
export interface FitnessOptions {
|
|
17
|
+
/**
|
|
18
|
+
* Minimum injections required before a solution is evaluated against the
|
|
19
|
+
* underperform threshold. Below this, state stays at `draft`.
|
|
20
|
+
*/
|
|
21
|
+
minEvalInjections?: number;
|
|
22
|
+
/**
|
|
23
|
+
* Injections required to qualify as champion (in addition to fitness cut).
|
|
24
|
+
*/
|
|
25
|
+
minChampionInjections?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Champion cut: fitness must exceed this fraction of the max fitness in
|
|
28
|
+
* the current population. Default 0.7 → top 30% by ratio of max.
|
|
29
|
+
*/
|
|
30
|
+
championFraction?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Underperform cut: fitness must fall below this fraction of the median.
|
|
33
|
+
*/
|
|
34
|
+
underperformFraction?: number;
|
|
35
|
+
/** Pre-loaded events (for tests). Defaults to `readAllOutcomes()`. */
|
|
36
|
+
events?: OutcomeEvent[];
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Compute fitness scores for every solution with at least one recorded
|
|
40
|
+
* outcome event.
|
|
41
|
+
*
|
|
42
|
+
* Formula: `fitness = (accept + 1) / (accept + correct + error + 1) × log(1 + injected)`
|
|
43
|
+
* - `accept` = positive (silence = consent)
|
|
44
|
+
* - `correct` = negative (explicit user correction within window)
|
|
45
|
+
* - `error` = weak negative (tool failed while solution was pending)
|
|
46
|
+
* - `unknown` = ignored (session ended mid-pending; we can't tell)
|
|
47
|
+
*
|
|
48
|
+
* Epsilon smoothing (+1) means a cold solution with 1 injection and 1
|
|
49
|
+
* accept produces `2/2 × log(2) ≈ 0.69`, not a meaningless `1.0 × 0` or
|
|
50
|
+
* `∞`. Log confidence penalizes small-sample champions.
|
|
51
|
+
*/
|
|
52
|
+
export declare function computeFitness(opts?: FitnessOptions): FitnessRecord[];
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { readAllOutcomes } from './solution-outcomes.js';
|
|
2
|
+
const DEFAULT_OPTS = {
|
|
3
|
+
minEvalInjections: 5,
|
|
4
|
+
minChampionInjections: 10,
|
|
5
|
+
championFraction: 0.7,
|
|
6
|
+
underperformFraction: 0.3,
|
|
7
|
+
};
|
|
8
|
+
/**
|
|
9
|
+
* Compute fitness scores for every solution with at least one recorded
|
|
10
|
+
* outcome event.
|
|
11
|
+
*
|
|
12
|
+
* Formula: `fitness = (accept + 1) / (accept + correct + error + 1) × log(1 + injected)`
|
|
13
|
+
* - `accept` = positive (silence = consent)
|
|
14
|
+
* - `correct` = negative (explicit user correction within window)
|
|
15
|
+
* - `error` = weak negative (tool failed while solution was pending)
|
|
16
|
+
* - `unknown` = ignored (session ended mid-pending; we can't tell)
|
|
17
|
+
*
|
|
18
|
+
* Epsilon smoothing (+1) means a cold solution with 1 injection and 1
|
|
19
|
+
* accept produces `2/2 × log(2) ≈ 0.69`, not a meaningless `1.0 × 0` or
|
|
20
|
+
* `∞`. Log confidence penalizes small-sample champions.
|
|
21
|
+
*/
|
|
22
|
+
export function computeFitness(opts = {}) {
|
|
23
|
+
const config = { ...DEFAULT_OPTS, ...opts };
|
|
24
|
+
const events = opts.events ?? readAllOutcomes();
|
|
25
|
+
const now = Date.now();
|
|
26
|
+
const byName = new Map();
|
|
27
|
+
for (const ev of events) {
|
|
28
|
+
const b = byName.get(ev.solution) ?? { accept: 0, correct: 0, error: 0, unknown: 0, last_inject_ts: 0 };
|
|
29
|
+
if (ev.outcome === 'accept')
|
|
30
|
+
b.accept++;
|
|
31
|
+
else if (ev.outcome === 'correct')
|
|
32
|
+
b.correct++;
|
|
33
|
+
else if (ev.outcome === 'error')
|
|
34
|
+
b.error++;
|
|
35
|
+
else
|
|
36
|
+
b.unknown++;
|
|
37
|
+
// Every event is a proxy for an injection (each outcome represents one
|
|
38
|
+
// inject that resolved). `last_inject_ts` tracks the most recent event
|
|
39
|
+
// timestamp which is also the latest decision time.
|
|
40
|
+
if (ev.ts > b.last_inject_ts)
|
|
41
|
+
b.last_inject_ts = ev.ts;
|
|
42
|
+
byName.set(ev.solution, b);
|
|
43
|
+
}
|
|
44
|
+
// First pass: raw fitness
|
|
45
|
+
const records = [];
|
|
46
|
+
for (const [solution, b] of byName) {
|
|
47
|
+
const injected = b.accept + b.correct + b.error + b.unknown;
|
|
48
|
+
const decided = b.accept + b.correct + b.error; // unknown excluded from ratio
|
|
49
|
+
const ratio = (b.accept + 1) / (decided + 1);
|
|
50
|
+
const confidence = Math.log(1 + injected);
|
|
51
|
+
const fitness = ratio * confidence;
|
|
52
|
+
records.push({
|
|
53
|
+
solution,
|
|
54
|
+
injected,
|
|
55
|
+
accepted: b.accept,
|
|
56
|
+
corrected: b.correct,
|
|
57
|
+
errored: b.error,
|
|
58
|
+
unknown: b.unknown,
|
|
59
|
+
fitness,
|
|
60
|
+
state: 'draft',
|
|
61
|
+
last_injected_ago_ms: b.last_inject_ts === 0 ? Infinity : now - b.last_inject_ts,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
// Population stats for state classification (only solutions past the
|
|
65
|
+
// eval threshold contribute — draft solutions distort max/median).
|
|
66
|
+
const evalPool = records.filter((r) => r.injected >= config.minEvalInjections).map((r) => r.fitness);
|
|
67
|
+
const maxFit = evalPool.length ? Math.max(...evalPool) : 0;
|
|
68
|
+
const medianFit = evalPool.length ? median(evalPool) : 0;
|
|
69
|
+
for (const r of records) {
|
|
70
|
+
r.state = classifyState(r, { maxFit, medianFit, config });
|
|
71
|
+
}
|
|
72
|
+
// Sort: champions first, then active by fitness desc, then underperform,
|
|
73
|
+
// then draft (cold solutions) at the bottom.
|
|
74
|
+
const order = { champion: 0, active: 1, underperform: 2, draft: 3 };
|
|
75
|
+
records.sort((a, b) => order[a.state] - order[b.state] || b.fitness - a.fitness);
|
|
76
|
+
return records;
|
|
77
|
+
}
|
|
78
|
+
function classifyState(r, ctx) {
|
|
79
|
+
const { config, maxFit, medianFit } = ctx;
|
|
80
|
+
if (r.injected < config.minEvalInjections)
|
|
81
|
+
return 'draft';
|
|
82
|
+
if (r.injected >= config.minChampionInjections && r.fitness >= config.championFraction * maxFit) {
|
|
83
|
+
return 'champion';
|
|
84
|
+
}
|
|
85
|
+
if (r.fitness < config.underperformFraction * medianFit)
|
|
86
|
+
return 'underperform';
|
|
87
|
+
return 'active';
|
|
88
|
+
}
|
|
89
|
+
function median(values) {
|
|
90
|
+
if (values.length === 0)
|
|
91
|
+
return 0;
|
|
92
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
93
|
+
const mid = Math.floor(sorted.length / 2);
|
|
94
|
+
return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
|
|
95
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export interface FixupReport {
|
|
2
|
+
path: string;
|
|
3
|
+
changed: boolean;
|
|
4
|
+
added: string[];
|
|
5
|
+
remaining_errors: string[];
|
|
6
|
+
}
|
|
7
|
+
export interface FixupResult {
|
|
8
|
+
scanned: number;
|
|
9
|
+
fixed: number;
|
|
10
|
+
untouched: number;
|
|
11
|
+
unfixable: number;
|
|
12
|
+
reports: FixupReport[];
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Attempt to repair known-safe frontmatter defects.
|
|
16
|
+
*
|
|
17
|
+
* Handled defects (pre-0.3.1 schema drift, observed on 5 auto-extracted
|
|
18
|
+
* solutions from 2026-04-10):
|
|
19
|
+
* - `extractedBy` missing → add `extractedBy: auto`
|
|
20
|
+
* - `evidence` block missing → add `DEFAULT_EVIDENCE`
|
|
21
|
+
*
|
|
22
|
+
* All other validation errors (bad scope, non-numeric confidence, etc.)
|
|
23
|
+
* are surfaced in `remaining_errors` and the file is left untouched —
|
|
24
|
+
* those require human judgement, not a mechanical default.
|
|
25
|
+
*
|
|
26
|
+
* `dryRun: true` (default) reports what would change without writing.
|
|
27
|
+
*/
|
|
28
|
+
export declare function fixupSolutions(solutionsDir: string, opts?: {
|
|
29
|
+
dryRun?: boolean;
|
|
30
|
+
}): FixupResult;
|