@wooojin/forgen 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/agents/solution-evolver.md +115 -0
- package/dist/cli.js +8 -0
- package/dist/core/dashboard.js +46 -0
- package/dist/core/paths.d.ts +25 -0
- package/dist/core/paths.js +25 -0
- package/dist/engine/learn-cli.d.ts +1 -0
- package/dist/engine/learn-cli.js +182 -0
- package/dist/engine/solution-candidate.d.ts +30 -0
- package/dist/engine/solution-candidate.js +124 -0
- package/dist/engine/solution-fitness.d.ts +52 -0
- package/dist/engine/solution-fitness.js +95 -0
- package/dist/engine/solution-fixup.d.ts +30 -0
- package/dist/engine/solution-fixup.js +116 -0
- package/dist/engine/solution-format.d.ts +8 -0
- package/dist/engine/solution-format.js +38 -23
- package/dist/engine/solution-index.js +10 -0
- package/dist/engine/solution-matcher.js +24 -1
- package/dist/engine/solution-outcomes.d.ts +70 -0
- package/dist/engine/solution-outcomes.js +242 -0
- package/dist/engine/solution-quarantine.d.ts +36 -0
- package/dist/engine/solution-quarantine.js +172 -0
- package/dist/engine/solution-weakness.d.ts +45 -0
- package/dist/engine/solution-weakness.js +225 -0
- package/dist/engine/solution-writer.d.ts +5 -0
- package/dist/engine/solution-writer.js +18 -0
- package/dist/hooks/post-tool-failure.js +7 -0
- package/dist/hooks/solution-injector.js +20 -0
- package/dist/mcp/tools.js +8 -0
- package/package.json +1 -1
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { type OutcomeEvent } from './solution-outcomes.js';
|
|
2
|
+
export type FitnessState = 'draft' | 'active' | 'champion' | 'underperform';
|
|
3
|
+
export interface FitnessRecord {
|
|
4
|
+
solution: string;
|
|
5
|
+
injected: number;
|
|
6
|
+
accepted: number;
|
|
7
|
+
corrected: number;
|
|
8
|
+
errored: number;
|
|
9
|
+
unknown: number;
|
|
10
|
+
/** Laplace-smoothed acceptance ratio × log(1+injected). */
|
|
11
|
+
fitness: number;
|
|
12
|
+
state: FitnessState;
|
|
13
|
+
/** ms since last injection event. Infinity if never injected. */
|
|
14
|
+
last_injected_ago_ms: number;
|
|
15
|
+
}
|
|
16
|
+
export interface FitnessOptions {
|
|
17
|
+
/**
|
|
18
|
+
* Minimum injections required before a solution is evaluated against the
|
|
19
|
+
* underperform threshold. Below this, state stays at `draft`.
|
|
20
|
+
*/
|
|
21
|
+
minEvalInjections?: number;
|
|
22
|
+
/**
|
|
23
|
+
* Injections required to qualify as champion (in addition to fitness cut).
|
|
24
|
+
*/
|
|
25
|
+
minChampionInjections?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Champion cut: fitness must exceed this fraction of the max fitness in
|
|
28
|
+
* the current population. Default 0.7 → top 30% by ratio of max.
|
|
29
|
+
*/
|
|
30
|
+
championFraction?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Underperform cut: fitness must fall below this fraction of the median.
|
|
33
|
+
*/
|
|
34
|
+
underperformFraction?: number;
|
|
35
|
+
/** Pre-loaded events (for tests). Defaults to `readAllOutcomes()`. */
|
|
36
|
+
events?: OutcomeEvent[];
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Compute fitness scores for every solution with at least one recorded
|
|
40
|
+
* outcome event.
|
|
41
|
+
*
|
|
42
|
+
* Formula: `fitness = (accept + 1) / (accept + correct + error + 1) × log(1 + injected)`
|
|
43
|
+
* - `accept` = positive (silence = consent)
|
|
44
|
+
* - `correct` = negative (explicit user correction within window)
|
|
45
|
+
* - `error` = weak negative (tool failed while solution was pending)
|
|
46
|
+
* - `unknown` = ignored (session ended mid-pending; we can't tell)
|
|
47
|
+
*
|
|
48
|
+
* Epsilon smoothing (+1) means a cold solution with 1 injection and 1
|
|
49
|
+
* accept produces `2/2 × log(2) ≈ 0.69`, not a meaningless `1.0 × 0` or
|
|
50
|
+
* `∞`. Log confidence penalizes small-sample champions.
|
|
51
|
+
*/
|
|
52
|
+
export declare function computeFitness(opts?: FitnessOptions): FitnessRecord[];
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { readAllOutcomes } from './solution-outcomes.js';
|
|
2
|
+
const DEFAULT_OPTS = {
|
|
3
|
+
minEvalInjections: 5,
|
|
4
|
+
minChampionInjections: 10,
|
|
5
|
+
championFraction: 0.7,
|
|
6
|
+
underperformFraction: 0.3,
|
|
7
|
+
};
|
|
8
|
+
/**
|
|
9
|
+
* Compute fitness scores for every solution with at least one recorded
|
|
10
|
+
* outcome event.
|
|
11
|
+
*
|
|
12
|
+
* Formula: `fitness = (accept + 1) / (accept + correct + error + 1) × log(1 + injected)`
|
|
13
|
+
* - `accept` = positive (silence = consent)
|
|
14
|
+
* - `correct` = negative (explicit user correction within window)
|
|
15
|
+
* - `error` = weak negative (tool failed while solution was pending)
|
|
16
|
+
* - `unknown` = ignored (session ended mid-pending; we can't tell)
|
|
17
|
+
*
|
|
18
|
+
* Epsilon smoothing (+1) means a cold solution with 1 injection and 1
|
|
19
|
+
* accept produces `2/2 × log(2) ≈ 0.69`, not a meaningless `1.0 × 0` or
|
|
20
|
+
* `∞`. Log confidence penalizes small-sample champions.
|
|
21
|
+
*/
|
|
22
|
+
export function computeFitness(opts = {}) {
|
|
23
|
+
const config = { ...DEFAULT_OPTS, ...opts };
|
|
24
|
+
const events = opts.events ?? readAllOutcomes();
|
|
25
|
+
const now = Date.now();
|
|
26
|
+
const byName = new Map();
|
|
27
|
+
for (const ev of events) {
|
|
28
|
+
const b = byName.get(ev.solution) ?? { accept: 0, correct: 0, error: 0, unknown: 0, last_inject_ts: 0 };
|
|
29
|
+
if (ev.outcome === 'accept')
|
|
30
|
+
b.accept++;
|
|
31
|
+
else if (ev.outcome === 'correct')
|
|
32
|
+
b.correct++;
|
|
33
|
+
else if (ev.outcome === 'error')
|
|
34
|
+
b.error++;
|
|
35
|
+
else
|
|
36
|
+
b.unknown++;
|
|
37
|
+
// Every event is a proxy for an injection (each outcome represents one
|
|
38
|
+
// inject that resolved). `last_inject_ts` tracks the most recent event
|
|
39
|
+
// timestamp which is also the latest decision time.
|
|
40
|
+
if (ev.ts > b.last_inject_ts)
|
|
41
|
+
b.last_inject_ts = ev.ts;
|
|
42
|
+
byName.set(ev.solution, b);
|
|
43
|
+
}
|
|
44
|
+
// First pass: raw fitness
|
|
45
|
+
const records = [];
|
|
46
|
+
for (const [solution, b] of byName) {
|
|
47
|
+
const injected = b.accept + b.correct + b.error + b.unknown;
|
|
48
|
+
const decided = b.accept + b.correct + b.error; // unknown excluded from ratio
|
|
49
|
+
const ratio = (b.accept + 1) / (decided + 1);
|
|
50
|
+
const confidence = Math.log(1 + injected);
|
|
51
|
+
const fitness = ratio * confidence;
|
|
52
|
+
records.push({
|
|
53
|
+
solution,
|
|
54
|
+
injected,
|
|
55
|
+
accepted: b.accept,
|
|
56
|
+
corrected: b.correct,
|
|
57
|
+
errored: b.error,
|
|
58
|
+
unknown: b.unknown,
|
|
59
|
+
fitness,
|
|
60
|
+
state: 'draft',
|
|
61
|
+
last_injected_ago_ms: b.last_inject_ts === 0 ? Infinity : now - b.last_inject_ts,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
// Population stats for state classification (only solutions past the
|
|
65
|
+
// eval threshold contribute — draft solutions distort max/median).
|
|
66
|
+
const evalPool = records.filter((r) => r.injected >= config.minEvalInjections).map((r) => r.fitness);
|
|
67
|
+
const maxFit = evalPool.length ? Math.max(...evalPool) : 0;
|
|
68
|
+
const medianFit = evalPool.length ? median(evalPool) : 0;
|
|
69
|
+
for (const r of records) {
|
|
70
|
+
r.state = classifyState(r, { maxFit, medianFit, config });
|
|
71
|
+
}
|
|
72
|
+
// Sort: champions first, then active by fitness desc, then underperform,
|
|
73
|
+
// then draft (cold solutions) at the bottom.
|
|
74
|
+
const order = { champion: 0, active: 1, underperform: 2, draft: 3 };
|
|
75
|
+
records.sort((a, b) => order[a.state] - order[b.state] || b.fitness - a.fitness);
|
|
76
|
+
return records;
|
|
77
|
+
}
|
|
78
|
+
function classifyState(r, ctx) {
|
|
79
|
+
const { config, maxFit, medianFit } = ctx;
|
|
80
|
+
if (r.injected < config.minEvalInjections)
|
|
81
|
+
return 'draft';
|
|
82
|
+
if (r.injected >= config.minChampionInjections && r.fitness >= config.championFraction * maxFit) {
|
|
83
|
+
return 'champion';
|
|
84
|
+
}
|
|
85
|
+
if (r.fitness < config.underperformFraction * medianFit)
|
|
86
|
+
return 'underperform';
|
|
87
|
+
return 'active';
|
|
88
|
+
}
|
|
89
|
+
function median(values) {
|
|
90
|
+
if (values.length === 0)
|
|
91
|
+
return 0;
|
|
92
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
93
|
+
const mid = Math.floor(sorted.length / 2);
|
|
94
|
+
return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
|
|
95
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export interface FixupReport {
|
|
2
|
+
path: string;
|
|
3
|
+
changed: boolean;
|
|
4
|
+
added: string[];
|
|
5
|
+
remaining_errors: string[];
|
|
6
|
+
}
|
|
7
|
+
export interface FixupResult {
|
|
8
|
+
scanned: number;
|
|
9
|
+
fixed: number;
|
|
10
|
+
untouched: number;
|
|
11
|
+
unfixable: number;
|
|
12
|
+
reports: FixupReport[];
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Attempt to repair known-safe frontmatter defects.
|
|
16
|
+
*
|
|
17
|
+
* Handled defects (pre-0.3.1 schema drift, observed on 5 auto-extracted
|
|
18
|
+
* solutions from 2026-04-10):
|
|
19
|
+
* - `extractedBy` missing → add `extractedBy: auto`
|
|
20
|
+
* - `evidence` block missing → add `DEFAULT_EVIDENCE`
|
|
21
|
+
*
|
|
22
|
+
* All other validation errors (bad scope, non-numeric confidence, etc.)
|
|
23
|
+
* are surfaced in `remaining_errors` and the file is left untouched —
|
|
24
|
+
* those require human judgement, not a mechanical default.
|
|
25
|
+
*
|
|
26
|
+
* `dryRun: true` (default) reports what would change without writing.
|
|
27
|
+
*/
|
|
28
|
+
export declare function fixupSolutions(solutionsDir: string, opts?: {
|
|
29
|
+
dryRun?: boolean;
|
|
30
|
+
}): FixupResult;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import yaml from 'js-yaml';
|
|
4
|
+
import { DEFAULT_EVIDENCE } from './solution-format.js';
|
|
5
|
+
import { diagnoseFromRawContent } from './solution-quarantine.js';
|
|
6
|
+
import { createLogger } from '../core/logger.js';
|
|
7
|
+
const log = createLogger('solution-fixup');
|
|
8
|
+
/**
|
|
9
|
+
* Attempt to repair known-safe frontmatter defects.
|
|
10
|
+
*
|
|
11
|
+
* Handled defects (pre-0.3.1 schema drift, observed on 5 auto-extracted
|
|
12
|
+
* solutions from 2026-04-10):
|
|
13
|
+
* - `extractedBy` missing → add `extractedBy: auto`
|
|
14
|
+
* - `evidence` block missing → add `DEFAULT_EVIDENCE`
|
|
15
|
+
*
|
|
16
|
+
* All other validation errors (bad scope, non-numeric confidence, etc.)
|
|
17
|
+
* are surfaced in `remaining_errors` and the file is left untouched —
|
|
18
|
+
* those require human judgement, not a mechanical default.
|
|
19
|
+
*
|
|
20
|
+
* `dryRun: true` (default) reports what would change without writing.
|
|
21
|
+
*/
|
|
22
|
+
export function fixupSolutions(solutionsDir, opts = {}) {
|
|
23
|
+
const dryRun = opts.dryRun !== false;
|
|
24
|
+
const result = { scanned: 0, fixed: 0, untouched: 0, unfixable: 0, reports: [] };
|
|
25
|
+
if (!fs.existsSync(solutionsDir))
|
|
26
|
+
return result;
|
|
27
|
+
const files = fs.readdirSync(solutionsDir).filter((f) => f.endsWith('.md'));
|
|
28
|
+
for (const file of files) {
|
|
29
|
+
const filePath = path.join(solutionsDir, file);
|
|
30
|
+
result.scanned++;
|
|
31
|
+
let content;
|
|
32
|
+
try {
|
|
33
|
+
content = fs.readFileSync(filePath, 'utf-8');
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
result.unfixable++;
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
const errors = diagnoseFromRawContent(content);
|
|
40
|
+
if (errors.length === 0) {
|
|
41
|
+
result.untouched++;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
const fix = tryFix(content, errors);
|
|
45
|
+
result.reports.push({
|
|
46
|
+
path: filePath,
|
|
47
|
+
changed: fix.changed,
|
|
48
|
+
added: fix.added,
|
|
49
|
+
remaining_errors: fix.remaining,
|
|
50
|
+
});
|
|
51
|
+
if (fix.changed && fix.remaining.length === 0) {
|
|
52
|
+
if (!dryRun) {
|
|
53
|
+
try {
|
|
54
|
+
fs.writeFileSync(filePath, fix.content);
|
|
55
|
+
log.debug(`fixed: ${filePath} (${fix.added.join(', ')})`);
|
|
56
|
+
}
|
|
57
|
+
catch (e) {
|
|
58
|
+
log.debug(`write failed: ${filePath}: ${e instanceof Error ? e.message : String(e)}`);
|
|
59
|
+
result.unfixable++;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
result.fixed++;
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
result.unfixable++;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return result;
|
|
70
|
+
}
|
|
71
|
+
function tryFix(content, initialErrors) {
|
|
72
|
+
const trimmed = content.trimStart();
|
|
73
|
+
const added = [];
|
|
74
|
+
if (!trimmed.startsWith('---')) {
|
|
75
|
+
return { changed: false, added, remaining: initialErrors, content };
|
|
76
|
+
}
|
|
77
|
+
const endIdx = trimmed.indexOf('---', 3);
|
|
78
|
+
if (endIdx === -1) {
|
|
79
|
+
return { changed: false, added, remaining: initialErrors, content };
|
|
80
|
+
}
|
|
81
|
+
const leadingWs = content.slice(0, content.length - trimmed.length);
|
|
82
|
+
const fmRaw = trimmed.slice(3, endIdx);
|
|
83
|
+
const body = trimmed.slice(endIdx + 3);
|
|
84
|
+
let fm;
|
|
85
|
+
try {
|
|
86
|
+
const parsed = yaml.load(fmRaw, { schema: yaml.JSON_SCHEMA });
|
|
87
|
+
if (parsed == null || typeof parsed !== 'object') {
|
|
88
|
+
return { changed: false, added, remaining: initialErrors, content };
|
|
89
|
+
}
|
|
90
|
+
fm = parsed;
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
return { changed: false, added, remaining: initialErrors, content };
|
|
94
|
+
}
|
|
95
|
+
if (fm.extractedBy !== 'auto' && fm.extractedBy !== 'manual') {
|
|
96
|
+
fm.extractedBy = 'auto';
|
|
97
|
+
added.push('extractedBy: auto');
|
|
98
|
+
}
|
|
99
|
+
if (fm.evidence == null || typeof fm.evidence !== 'object') {
|
|
100
|
+
fm.evidence = { ...DEFAULT_EVIDENCE };
|
|
101
|
+
added.push('evidence: default');
|
|
102
|
+
}
|
|
103
|
+
if (fm.supersedes === undefined) {
|
|
104
|
+
fm.supersedes = null;
|
|
105
|
+
added.push('supersedes: null');
|
|
106
|
+
}
|
|
107
|
+
const newFmRaw = yaml.dump(fm, { lineWidth: 120, noRefs: true, sortKeys: false });
|
|
108
|
+
const rebuilt = `${leadingWs}---\n${newFmRaw}---${body}`;
|
|
109
|
+
const remaining = diagnoseFromRawContent(rebuilt);
|
|
110
|
+
return {
|
|
111
|
+
changed: added.length > 0,
|
|
112
|
+
added,
|
|
113
|
+
remaining,
|
|
114
|
+
content: rebuilt,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
@@ -60,6 +60,14 @@ export declare const DEFAULT_EVIDENCE: SolutionEvidence;
|
|
|
60
60
|
export declare function slugify(text: string): string;
|
|
61
61
|
/** Runtime type guard for SolutionFrontmatter */
|
|
62
62
|
export declare function validateFrontmatter(fm: unknown): fm is SolutionFrontmatter;
|
|
63
|
+
/**
|
|
64
|
+
* Return a list of validation errors for a parsed frontmatter object.
|
|
65
|
+
*
|
|
66
|
+
* Empty array = valid. Non-empty = each entry describes one missing/wrong
|
|
67
|
+
* field. Callers that only need a boolean should use `validateFrontmatter`.
|
|
68
|
+
* Slow path (quarantine logging) uses this to produce actionable diagnostics.
|
|
69
|
+
*/
|
|
70
|
+
export declare function diagnoseFrontmatter(fm: unknown): string[];
|
|
63
71
|
/** Parse YAML frontmatter from solution file content */
|
|
64
72
|
export declare function parseFrontmatterOnly(content: string): SolutionFrontmatter | null;
|
|
65
73
|
/** Parse a full V3 solution file into its components */
|
|
@@ -35,43 +35,58 @@ export function slugify(text) {
|
|
|
35
35
|
// ── Validation ──
|
|
36
36
|
/** Runtime type guard for SolutionFrontmatter */
|
|
37
37
|
export function validateFrontmatter(fm) {
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
return diagnoseFrontmatter(fm).length === 0;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Return a list of validation errors for a parsed frontmatter object.
|
|
42
|
+
*
|
|
43
|
+
* Empty array = valid. Non-empty = each entry describes one missing/wrong
|
|
44
|
+
* field. Callers that only need a boolean should use `validateFrontmatter`.
|
|
45
|
+
* Slow path (quarantine logging) uses this to produce actionable diagnostics.
|
|
46
|
+
*/
|
|
47
|
+
export function diagnoseFrontmatter(fm) {
|
|
48
|
+
const errors = [];
|
|
49
|
+
if (fm == null || typeof fm !== 'object') {
|
|
50
|
+
errors.push('frontmatter is not an object');
|
|
51
|
+
return errors;
|
|
52
|
+
}
|
|
40
53
|
const o = fm;
|
|
41
54
|
if (typeof o.name !== 'string')
|
|
42
|
-
|
|
55
|
+
errors.push('name: must be string');
|
|
43
56
|
if (typeof o.version !== 'number' || o.version <= 0)
|
|
44
|
-
|
|
57
|
+
errors.push('version: must be positive number');
|
|
45
58
|
if (typeof o.status !== 'string' || !VALID_STATUSES.includes(o.status))
|
|
46
|
-
|
|
59
|
+
errors.push(`status: must be one of ${VALID_STATUSES.join('|')}`);
|
|
47
60
|
if (typeof o.confidence !== 'number' || o.confidence < 0 || o.confidence > 1)
|
|
48
|
-
|
|
61
|
+
errors.push('confidence: must be number in [0,1]');
|
|
49
62
|
if (typeof o.type !== 'string' || !VALID_TYPES.includes(o.type))
|
|
50
|
-
|
|
63
|
+
errors.push(`type: must be one of ${VALID_TYPES.join('|')}`);
|
|
51
64
|
if (o.scope !== 'me' && o.scope !== 'team' && o.scope !== 'project' && o.scope !== 'universal')
|
|
52
|
-
|
|
65
|
+
errors.push('scope: must be me|team|project|universal');
|
|
53
66
|
if (!Array.isArray(o.tags) || !o.tags.every((t) => typeof t === 'string'))
|
|
54
|
-
|
|
67
|
+
errors.push('tags: must be string[]');
|
|
55
68
|
if (!Array.isArray(o.identifiers) || !o.identifiers.every((t) => typeof t === 'string'))
|
|
56
|
-
|
|
69
|
+
errors.push('identifiers: must be string[]');
|
|
57
70
|
if (typeof o.created !== 'string')
|
|
58
|
-
|
|
71
|
+
errors.push('created: must be string');
|
|
59
72
|
if (typeof o.updated !== 'string')
|
|
60
|
-
|
|
73
|
+
errors.push('updated: must be string');
|
|
61
74
|
if (o.supersedes !== null && typeof o.supersedes !== 'string')
|
|
62
|
-
|
|
75
|
+
errors.push('supersedes: must be string or null');
|
|
63
76
|
if (o.extractedBy !== 'auto' && o.extractedBy !== 'manual')
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
77
|
+
errors.push('extractedBy: missing or not auto|manual');
|
|
78
|
+
if (o.evidence == null || typeof o.evidence !== 'object') {
|
|
79
|
+
errors.push('evidence: block missing');
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
const ev = o.evidence;
|
|
83
|
+
const evFields = ['injected', 'reflected', 'negative', 'sessions', 'reExtracted'];
|
|
84
|
+
for (const f of evFields) {
|
|
85
|
+
if (typeof ev[f] !== 'number')
|
|
86
|
+
errors.push(`evidence.${f}: must be number`);
|
|
87
|
+
}
|
|
73
88
|
}
|
|
74
|
-
return
|
|
89
|
+
return errors;
|
|
75
90
|
}
|
|
76
91
|
// ── Parsing ──
|
|
77
92
|
/** Parse YAML frontmatter from solution file content */
|
|
@@ -5,6 +5,7 @@ import { defaultNormalizer } from './term-normalizer.js';
|
|
|
5
5
|
import { withFileLockSync } from '../hooks/shared/file-lock.js';
|
|
6
6
|
import { atomicWriteText } from '../hooks/shared/atomic-write.js';
|
|
7
7
|
import { createLogger } from '../core/logger.js';
|
|
8
|
+
import { recordQuarantine, diagnoseFromRawContent } from './solution-quarantine.js';
|
|
8
9
|
const log = createLogger('solution-index');
|
|
9
10
|
/**
|
|
10
11
|
* Cache keyed by an order-preserving directory signature.
|
|
@@ -155,6 +156,15 @@ function buildIndex(dirs) {
|
|
|
155
156
|
const fm = parseFrontmatterOnly(content);
|
|
156
157
|
if (!fm) {
|
|
157
158
|
droppedMalformed++;
|
|
159
|
+
// Slow-path diagnosis: re-parse YAML to produce actionable errors,
|
|
160
|
+
// then persist to ~/.forgen/state/solution-quarantine.jsonl so the
|
|
161
|
+
// file is visible to `forgen doctor` instead of silently dead.
|
|
162
|
+
// Best-effort: quarantine writes must never throw.
|
|
163
|
+
try {
|
|
164
|
+
const errors = diagnoseFromRawContent(content);
|
|
165
|
+
recordQuarantine(filePath, errors);
|
|
166
|
+
}
|
|
167
|
+
catch { /* ignore */ }
|
|
158
168
|
log.debug(`dropped (malformed frontmatter): ${filePath}`);
|
|
159
169
|
continue;
|
|
160
170
|
}
|
|
@@ -810,6 +810,29 @@ function loadTunedMatcherWeights() {
|
|
|
810
810
|
_weightsCacheTime = now;
|
|
811
811
|
return undefined;
|
|
812
812
|
}
|
|
813
|
+
/**
|
|
814
|
+
* Cold-start exploration bonus for candidate solutions.
|
|
815
|
+
*
|
|
816
|
+
* Phase 4 evolution: newly proposed solutions enter at `status: candidate`.
|
|
817
|
+
* Without a nudge they compete head-to-head with mature verified/champion
|
|
818
|
+
* entries and almost always lose the first few rounds — not because
|
|
819
|
+
* they're worse, but because matchers favor solutions with richer tag
|
|
820
|
+
* histories. A small confidence multiplier lets candidates surface often
|
|
821
|
+
* enough to accumulate outcome data, after which the fitness loop
|
|
822
|
+
* decides their fate.
|
|
823
|
+
*
|
|
824
|
+
* The 1.3× factor is a starting point (Q1 in docs/design-solution-evolution.md).
|
|
825
|
+
* Automatic deactivation after 5 accumulated injections is handled by a
|
|
826
|
+
* separate promoter that flips `status` to `verified`.
|
|
827
|
+
*/
|
|
828
|
+
const CANDIDATE_EXPLORATION_MULTIPLIER = 1.3;
|
|
829
|
+
function applyCandidateExplorationBonus(entries) {
|
|
830
|
+
return entries.map((e) => {
|
|
831
|
+
if (e.status !== 'candidate')
|
|
832
|
+
return e;
|
|
833
|
+
return { ...e, confidence: Math.min(1, e.confidence * CANDIDATE_EXPLORATION_MULTIPLIER) };
|
|
834
|
+
});
|
|
835
|
+
}
|
|
813
836
|
export function matchSolutions(prompt, scope, cwd) {
|
|
814
837
|
// Build solution dirs for index cache
|
|
815
838
|
const dirs = [{ dir: ME_SOLUTIONS, scope: 'me' }];
|
|
@@ -819,7 +842,7 @@ export function matchSolutions(prompt, scope, cwd) {
|
|
|
819
842
|
dirs.push({ dir: path.join(cwd, '.compound', 'solutions'), scope: 'project' });
|
|
820
843
|
// Use cached index (rebuilt only when dirs change)
|
|
821
844
|
const index = getOrBuildIndex(dirs);
|
|
822
|
-
const allSolutions = index.entries.map((e) => ({ ...e }));
|
|
845
|
+
const allSolutions = applyCandidateExplorationBonus(index.entries.map((e) => ({ ...e })));
|
|
823
846
|
const promptTags = extractTags(prompt);
|
|
824
847
|
const promptLower = prompt.toLowerCase();
|
|
825
848
|
// Meta-learning: load tuned weights if available
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
export type Outcome = 'accept' | 'correct' | 'error' | 'unknown';
|
|
2
|
+
export type Attribution = 'explicit' | 'window' | 'session_end' | 'default';
|
|
3
|
+
/**
|
|
4
|
+
* One inject → outcome event. Written append-only to
|
|
5
|
+
* ~/.forgen/state/outcomes/{session_id}.jsonl. The pending state (inject
|
|
6
|
+
* happened, outcome not yet decided) is stored separately in
|
|
7
|
+
* ~/.forgen/state/outcome-pending-{session_id}.json.
|
|
8
|
+
*/
|
|
9
|
+
export interface OutcomeEvent {
|
|
10
|
+
ts: number;
|
|
11
|
+
session_id: string;
|
|
12
|
+
solution: string;
|
|
13
|
+
match_score: number;
|
|
14
|
+
injected_chars: number;
|
|
15
|
+
outcome: Outcome;
|
|
16
|
+
outcome_lag_ms: number;
|
|
17
|
+
attribution: Attribution;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Record that solutions were injected. Called from solution-injector right
|
|
21
|
+
* after `approveWithContext` is emitted. Fails silently — outcome tracking
|
|
22
|
+
* must never block the user's workflow.
|
|
23
|
+
*/
|
|
24
|
+
export declare function appendPending(sessionId: string, injections: Array<{
|
|
25
|
+
solution: string;
|
|
26
|
+
match_score: number;
|
|
27
|
+
injected_chars: number;
|
|
28
|
+
}>): void;
|
|
29
|
+
/**
|
|
30
|
+
* Flush pending injections as `accept` events. Called when a new user
|
|
31
|
+
* prompt arrives without any intervening correction/error, signaling that
|
|
32
|
+
* the previous injections were silently accepted. "Silence = consent."
|
|
33
|
+
*
|
|
34
|
+
* If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
|
|
35
|
+
* because an earlier step already attributed them as `correct` or `error`).
|
|
36
|
+
*/
|
|
37
|
+
export declare function flushAccept(sessionId: string, excludeSolutions?: Set<string>): number;
|
|
38
|
+
/**
|
|
39
|
+
* Attribute a correction to the most recent pending injection(s). Called
|
|
40
|
+
* from the correction-record MCP tool. Removes attributed entries from
|
|
41
|
+
* pending so subsequent `flushAccept` does not double-count them.
|
|
42
|
+
*
|
|
43
|
+
* Strategy: all currently-pending solutions in this session are marked as
|
|
44
|
+
* `correct`. This is conservative (the correction may target only one of
|
|
45
|
+
* them), but without semantic attribution we err on the side of the user's
|
|
46
|
+
* feedback signal being louder than acceptance.
|
|
47
|
+
*/
|
|
48
|
+
export declare function attributeCorrection(sessionId: string): string[];
|
|
49
|
+
/**
|
|
50
|
+
* Attribute a tool error to pending solutions in this session. Called from
|
|
51
|
+
* post-tool-failure hook. Unlike corrections, errors do not clear pending
|
|
52
|
+
* — an error is a weaker signal and the next user prompt can still produce
|
|
53
|
+
* a correct/accept decision.
|
|
54
|
+
*
|
|
55
|
+
* To avoid flooding the log with duplicate errors for the same pending
|
|
56
|
+
* batch, we cap at one `error` event per (session, solution) pair per
|
|
57
|
+
* pending-cycle by tracking a `error_flagged` set in the pending state.
|
|
58
|
+
*/
|
|
59
|
+
export declare function attributeError(sessionId: string): string[];
|
|
60
|
+
/**
|
|
61
|
+
* At session end, any still-pending entries are logged as `unknown` (we
|
|
62
|
+
* can't tell if the user was happy or just stopped). Pending file is
|
|
63
|
+
* removed.
|
|
64
|
+
*/
|
|
65
|
+
export declare function finalizeSession(sessionId: string): number;
|
|
66
|
+
/**
|
|
67
|
+
* Read all outcome events across all sessions. Used by fitness
|
|
68
|
+
* calculation. Returns events sorted by timestamp ascending.
|
|
69
|
+
*/
|
|
70
|
+
export declare function readAllOutcomes(): OutcomeEvent[];
|