@wooojin/forgen 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ import { type OutcomeEvent } from './solution-outcomes.js';
2
+ export type FitnessState = 'draft' | 'active' | 'champion' | 'underperform';
3
+ export interface FitnessRecord {
4
+ solution: string;
5
+ injected: number;
6
+ accepted: number;
7
+ corrected: number;
8
+ errored: number;
9
+ unknown: number;
10
+ /** Laplace-smoothed acceptance ratio × log(1+injected). */
11
+ fitness: number;
12
+ state: FitnessState;
13
+ /** ms since last injection event. Infinity if never injected. */
14
+ last_injected_ago_ms: number;
15
+ }
16
+ export interface FitnessOptions {
17
+ /**
18
+ * Minimum injections required before a solution is evaluated against the
19
+ * underperform threshold. Below this, state stays at `draft`.
20
+ */
21
+ minEvalInjections?: number;
22
+ /**
23
+ * Injections required to qualify as champion (in addition to fitness cut).
24
+ */
25
+ minChampionInjections?: number;
26
+ /**
27
+ * Champion cut: fitness must exceed this fraction of the max fitness in
28
+ * the current population. Default 0.7 → top 30% by ratio of max.
29
+ */
30
+ championFraction?: number;
31
+ /**
32
+ * Underperform cut: fitness must fall below this fraction of the median.
33
+ */
34
+ underperformFraction?: number;
35
+ /** Pre-loaded events (for tests). Defaults to `readAllOutcomes()`. */
36
+ events?: OutcomeEvent[];
37
+ }
38
+ /**
39
+ * Compute fitness scores for every solution with at least one recorded
40
+ * outcome event.
41
+ *
42
+ * Formula: `fitness = (accept + 1) / (accept + correct + error + 1) × log(1 + injected)`
43
+ * - `accept` = positive (silence = consent)
44
+ * - `correct` = negative (explicit user correction within window)
45
+ * - `error` = weak negative (tool failed while solution was pending)
46
+ * - `unknown` = ignored (session ended mid-pending; we can't tell)
47
+ *
48
+ * Epsilon smoothing (+1) means a cold solution with 1 injection and 1
49
+ * accept produces `2/2 × log(2) ≈ 0.69`, not a meaningless `1.0 × 0` or
50
+ * `∞`. Log confidence penalizes small-sample champions.
51
+ */
52
+ export declare function computeFitness(opts?: FitnessOptions): FitnessRecord[];
@@ -0,0 +1,95 @@
1
+ import { readAllOutcomes } from './solution-outcomes.js';
2
+ const DEFAULT_OPTS = {
3
+ minEvalInjections: 5,
4
+ minChampionInjections: 10,
5
+ championFraction: 0.7,
6
+ underperformFraction: 0.3,
7
+ };
8
+ /**
9
+ * Compute fitness scores for every solution with at least one recorded
10
+ * outcome event.
11
+ *
12
+ * Formula: `fitness = (accept + 1) / (accept + correct + error + 1) × log(1 + injected)`
13
+ * - `accept` = positive (silence = consent)
14
+ * - `correct` = negative (explicit user correction within window)
15
+ * - `error` = weak negative (tool failed while solution was pending)
16
+ * - `unknown` = ignored (session ended mid-pending; we can't tell)
17
+ *
18
+ * Epsilon smoothing (+1) means a cold solution with 1 injection and 1
19
+ * accept produces `2/2 × log(2) ≈ 0.69`, not a meaningless `1.0 × 0` or
20
+ * `∞`. Log confidence penalizes small-sample champions.
21
+ */
22
+ export function computeFitness(opts = {}) {
23
+ const config = { ...DEFAULT_OPTS, ...opts };
24
+ const events = opts.events ?? readAllOutcomes();
25
+ const now = Date.now();
26
+ const byName = new Map();
27
+ for (const ev of events) {
28
+ const b = byName.get(ev.solution) ?? { accept: 0, correct: 0, error: 0, unknown: 0, last_inject_ts: 0 };
29
+ if (ev.outcome === 'accept')
30
+ b.accept++;
31
+ else if (ev.outcome === 'correct')
32
+ b.correct++;
33
+ else if (ev.outcome === 'error')
34
+ b.error++;
35
+ else
36
+ b.unknown++;
37
+ // Every event is a proxy for an injection (each outcome represents one
38
+ // inject that resolved). `last_inject_ts` tracks the most recent event
39
+ // timestamp which is also the latest decision time.
40
+ if (ev.ts > b.last_inject_ts)
41
+ b.last_inject_ts = ev.ts;
42
+ byName.set(ev.solution, b);
43
+ }
44
+ // First pass: raw fitness
45
+ const records = [];
46
+ for (const [solution, b] of byName) {
47
+ const injected = b.accept + b.correct + b.error + b.unknown;
48
+ const decided = b.accept + b.correct + b.error; // unknown excluded from ratio
49
+ const ratio = (b.accept + 1) / (decided + 1);
50
+ const confidence = Math.log(1 + injected);
51
+ const fitness = ratio * confidence;
52
+ records.push({
53
+ solution,
54
+ injected,
55
+ accepted: b.accept,
56
+ corrected: b.correct,
57
+ errored: b.error,
58
+ unknown: b.unknown,
59
+ fitness,
60
+ state: 'draft',
61
+ last_injected_ago_ms: b.last_inject_ts === 0 ? Infinity : now - b.last_inject_ts,
62
+ });
63
+ }
64
+ // Population stats for state classification (only solutions past the
65
+ // eval threshold contribute — draft solutions distort max/median).
66
+ const evalPool = records.filter((r) => r.injected >= config.minEvalInjections).map((r) => r.fitness);
67
+ const maxFit = evalPool.length ? Math.max(...evalPool) : 0;
68
+ const medianFit = evalPool.length ? median(evalPool) : 0;
69
+ for (const r of records) {
70
+ r.state = classifyState(r, { maxFit, medianFit, config });
71
+ }
72
+ // Sort: champions first, then active by fitness desc, then underperform,
73
+ // then draft (cold solutions) at the bottom.
74
+ const order = { champion: 0, active: 1, underperform: 2, draft: 3 };
75
+ records.sort((a, b) => order[a.state] - order[b.state] || b.fitness - a.fitness);
76
+ return records;
77
+ }
78
+ function classifyState(r, ctx) {
79
+ const { config, maxFit, medianFit } = ctx;
80
+ if (r.injected < config.minEvalInjections)
81
+ return 'draft';
82
+ if (r.injected >= config.minChampionInjections && r.fitness >= config.championFraction * maxFit) {
83
+ return 'champion';
84
+ }
85
+ if (r.fitness < config.underperformFraction * medianFit)
86
+ return 'underperform';
87
+ return 'active';
88
+ }
89
+ function median(values) {
90
+ if (values.length === 0)
91
+ return 0;
92
+ const sorted = [...values].sort((a, b) => a - b);
93
+ const mid = Math.floor(sorted.length / 2);
94
+ return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
95
+ }
@@ -0,0 +1,30 @@
1
+ export interface FixupReport {
2
+ path: string;
3
+ changed: boolean;
4
+ added: string[];
5
+ remaining_errors: string[];
6
+ }
7
+ export interface FixupResult {
8
+ scanned: number;
9
+ fixed: number;
10
+ untouched: number;
11
+ unfixable: number;
12
+ reports: FixupReport[];
13
+ }
14
+ /**
15
+ * Attempt to repair known-safe frontmatter defects.
16
+ *
17
+ * Handled defects (pre-0.3.1 schema drift, observed on 5 auto-extracted
18
+ * solutions from 2026-04-10):
19
+ * - `extractedBy` missing → add `extractedBy: auto`
20
+ * - `evidence` block missing → add `DEFAULT_EVIDENCE`
21
+ *
22
+ * All other validation errors (bad scope, non-numeric confidence, etc.)
23
+ * are surfaced in `remaining_errors` and the file is left untouched —
24
+ * those require human judgement, not a mechanical default.
25
+ *
26
+ * `dryRun: true` (default) reports what would change without writing.
27
+ */
28
+ export declare function fixupSolutions(solutionsDir: string, opts?: {
29
+ dryRun?: boolean;
30
+ }): FixupResult;
@@ -0,0 +1,116 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import yaml from 'js-yaml';
4
+ import { DEFAULT_EVIDENCE } from './solution-format.js';
5
+ import { diagnoseFromRawContent } from './solution-quarantine.js';
6
+ import { createLogger } from '../core/logger.js';
7
+ const log = createLogger('solution-fixup');
8
+ /**
9
+ * Attempt to repair known-safe frontmatter defects.
10
+ *
11
+ * Handled defects (pre-0.3.1 schema drift, observed on 5 auto-extracted
12
+ * solutions from 2026-04-10):
13
+ * - `extractedBy` missing → add `extractedBy: auto`
14
+ * - `evidence` block missing → add `DEFAULT_EVIDENCE`
15
+ *
16
+ * All other validation errors (bad scope, non-numeric confidence, etc.)
17
+ * are surfaced in `remaining_errors` and the file is left untouched —
18
+ * those require human judgement, not a mechanical default.
19
+ *
20
+ * `dryRun: true` (default) reports what would change without writing.
21
+ */
22
+ export function fixupSolutions(solutionsDir, opts = {}) {
23
+ const dryRun = opts.dryRun !== false;
24
+ const result = { scanned: 0, fixed: 0, untouched: 0, unfixable: 0, reports: [] };
25
+ if (!fs.existsSync(solutionsDir))
26
+ return result;
27
+ const files = fs.readdirSync(solutionsDir).filter((f) => f.endsWith('.md'));
28
+ for (const file of files) {
29
+ const filePath = path.join(solutionsDir, file);
30
+ result.scanned++;
31
+ let content;
32
+ try {
33
+ content = fs.readFileSync(filePath, 'utf-8');
34
+ }
35
+ catch {
36
+ result.unfixable++;
37
+ continue;
38
+ }
39
+ const errors = diagnoseFromRawContent(content);
40
+ if (errors.length === 0) {
41
+ result.untouched++;
42
+ continue;
43
+ }
44
+ const fix = tryFix(content, errors);
45
+ result.reports.push({
46
+ path: filePath,
47
+ changed: fix.changed,
48
+ added: fix.added,
49
+ remaining_errors: fix.remaining,
50
+ });
51
+ if (fix.changed && fix.remaining.length === 0) {
52
+ if (!dryRun) {
53
+ try {
54
+ fs.writeFileSync(filePath, fix.content);
55
+ log.debug(`fixed: ${filePath} (${fix.added.join(', ')})`);
56
+ }
57
+ catch (e) {
58
+ log.debug(`write failed: ${filePath}: ${e instanceof Error ? e.message : String(e)}`);
59
+ result.unfixable++;
60
+ continue;
61
+ }
62
+ }
63
+ result.fixed++;
64
+ }
65
+ else {
66
+ result.unfixable++;
67
+ }
68
+ }
69
+ return result;
70
+ }
71
+ function tryFix(content, initialErrors) {
72
+ const trimmed = content.trimStart();
73
+ const added = [];
74
+ if (!trimmed.startsWith('---')) {
75
+ return { changed: false, added, remaining: initialErrors, content };
76
+ }
77
+ const endIdx = trimmed.indexOf('---', 3);
78
+ if (endIdx === -1) {
79
+ return { changed: false, added, remaining: initialErrors, content };
80
+ }
81
+ const leadingWs = content.slice(0, content.length - trimmed.length);
82
+ const fmRaw = trimmed.slice(3, endIdx);
83
+ const body = trimmed.slice(endIdx + 3);
84
+ let fm;
85
+ try {
86
+ const parsed = yaml.load(fmRaw, { schema: yaml.JSON_SCHEMA });
87
+ if (parsed == null || typeof parsed !== 'object') {
88
+ return { changed: false, added, remaining: initialErrors, content };
89
+ }
90
+ fm = parsed;
91
+ }
92
+ catch {
93
+ return { changed: false, added, remaining: initialErrors, content };
94
+ }
95
+ if (fm.extractedBy !== 'auto' && fm.extractedBy !== 'manual') {
96
+ fm.extractedBy = 'auto';
97
+ added.push('extractedBy: auto');
98
+ }
99
+ if (fm.evidence == null || typeof fm.evidence !== 'object') {
100
+ fm.evidence = { ...DEFAULT_EVIDENCE };
101
+ added.push('evidence: default');
102
+ }
103
+ if (fm.supersedes === undefined) {
104
+ fm.supersedes = null;
105
+ added.push('supersedes: null');
106
+ }
107
+ const newFmRaw = yaml.dump(fm, { lineWidth: 120, noRefs: true, sortKeys: false });
108
+ const rebuilt = `${leadingWs}---\n${newFmRaw}---${body}`;
109
+ const remaining = diagnoseFromRawContent(rebuilt);
110
+ return {
111
+ changed: added.length > 0,
112
+ added,
113
+ remaining,
114
+ content: rebuilt,
115
+ };
116
+ }
@@ -60,6 +60,14 @@ export declare const DEFAULT_EVIDENCE: SolutionEvidence;
60
60
  export declare function slugify(text: string): string;
61
61
  /** Runtime type guard for SolutionFrontmatter */
62
62
  export declare function validateFrontmatter(fm: unknown): fm is SolutionFrontmatter;
63
+ /**
64
+ * Return a list of validation errors for a parsed frontmatter object.
65
+ *
66
+ * Empty array = valid. Non-empty = each entry describes one missing/wrong
67
+ * field. Callers that only need a boolean should use `validateFrontmatter`.
68
+ * Slow path (quarantine logging) uses this to produce actionable diagnostics.
69
+ */
70
+ export declare function diagnoseFrontmatter(fm: unknown): string[];
63
71
  /** Parse YAML frontmatter from solution file content */
64
72
  export declare function parseFrontmatterOnly(content: string): SolutionFrontmatter | null;
65
73
  /** Parse a full V3 solution file into its components */
@@ -35,43 +35,58 @@ export function slugify(text) {
35
35
  // ── Validation ──
36
36
  /** Runtime type guard for SolutionFrontmatter */
37
37
  export function validateFrontmatter(fm) {
38
- if (fm == null || typeof fm !== 'object')
39
- return false;
38
+ return diagnoseFrontmatter(fm).length === 0;
39
+ }
40
+ /**
41
+ * Return a list of validation errors for a parsed frontmatter object.
42
+ *
43
+ * Empty array = valid. Non-empty = each entry describes one missing/wrong
44
+ * field. Callers that only need a boolean should use `validateFrontmatter`.
45
+ * Slow path (quarantine logging) uses this to produce actionable diagnostics.
46
+ */
47
+ export function diagnoseFrontmatter(fm) {
48
+ const errors = [];
49
+ if (fm == null || typeof fm !== 'object') {
50
+ errors.push('frontmatter is not an object');
51
+ return errors;
52
+ }
40
53
  const o = fm;
41
54
  if (typeof o.name !== 'string')
42
- return false;
55
+ errors.push('name: must be string');
43
56
  if (typeof o.version !== 'number' || o.version <= 0)
44
- return false;
57
+ errors.push('version: must be positive number');
45
58
  if (typeof o.status !== 'string' || !VALID_STATUSES.includes(o.status))
46
- return false;
59
+ errors.push(`status: must be one of ${VALID_STATUSES.join('|')}`);
47
60
  if (typeof o.confidence !== 'number' || o.confidence < 0 || o.confidence > 1)
48
- return false;
61
+ errors.push('confidence: must be number in [0,1]');
49
62
  if (typeof o.type !== 'string' || !VALID_TYPES.includes(o.type))
50
- return false;
63
+ errors.push(`type: must be one of ${VALID_TYPES.join('|')}`);
51
64
  if (o.scope !== 'me' && o.scope !== 'team' && o.scope !== 'project' && o.scope !== 'universal')
52
- return false;
65
+ errors.push('scope: must be me|team|project|universal');
53
66
  if (!Array.isArray(o.tags) || !o.tags.every((t) => typeof t === 'string'))
54
- return false;
67
+ errors.push('tags: must be string[]');
55
68
  if (!Array.isArray(o.identifiers) || !o.identifiers.every((t) => typeof t === 'string'))
56
- return false;
69
+ errors.push('identifiers: must be string[]');
57
70
  if (typeof o.created !== 'string')
58
- return false;
71
+ errors.push('created: must be string');
59
72
  if (typeof o.updated !== 'string')
60
- return false;
73
+ errors.push('updated: must be string');
61
74
  if (o.supersedes !== null && typeof o.supersedes !== 'string')
62
- return false;
75
+ errors.push('supersedes: must be string or null');
63
76
  if (o.extractedBy !== 'auto' && o.extractedBy !== 'manual')
64
- return false;
65
- // evidence
66
- if (o.evidence == null || typeof o.evidence !== 'object')
67
- return false;
68
- const ev = o.evidence;
69
- const evFields = ['injected', 'reflected', 'negative', 'sessions', 'reExtracted'];
70
- for (const f of evFields) {
71
- if (typeof ev[f] !== 'number')
72
- return false;
77
+ errors.push('extractedBy: missing or not auto|manual');
78
+ if (o.evidence == null || typeof o.evidence !== 'object') {
79
+ errors.push('evidence: block missing');
80
+ }
81
+ else {
82
+ const ev = o.evidence;
83
+ const evFields = ['injected', 'reflected', 'negative', 'sessions', 'reExtracted'];
84
+ for (const f of evFields) {
85
+ if (typeof ev[f] !== 'number')
86
+ errors.push(`evidence.${f}: must be number`);
87
+ }
73
88
  }
74
- return true;
89
+ return errors;
75
90
  }
76
91
  // ── Parsing ──
77
92
  /** Parse YAML frontmatter from solution file content */
@@ -5,6 +5,7 @@ import { defaultNormalizer } from './term-normalizer.js';
5
5
  import { withFileLockSync } from '../hooks/shared/file-lock.js';
6
6
  import { atomicWriteText } from '../hooks/shared/atomic-write.js';
7
7
  import { createLogger } from '../core/logger.js';
8
+ import { recordQuarantine, diagnoseFromRawContent } from './solution-quarantine.js';
8
9
  const log = createLogger('solution-index');
9
10
  /**
10
11
  * Cache keyed by an order-preserving directory signature.
@@ -155,6 +156,15 @@ function buildIndex(dirs) {
155
156
  const fm = parseFrontmatterOnly(content);
156
157
  if (!fm) {
157
158
  droppedMalformed++;
159
+ // Slow-path diagnosis: re-parse YAML to produce actionable errors,
160
+ // then persist to ~/.forgen/state/solution-quarantine.jsonl so the
161
+ // file is visible to `forgen doctor` instead of silently dead.
162
+ // Best-effort: quarantine writes must never throw.
163
+ try {
164
+ const errors = diagnoseFromRawContent(content);
165
+ recordQuarantine(filePath, errors);
166
+ }
167
+ catch { /* ignore */ }
158
168
  log.debug(`dropped (malformed frontmatter): ${filePath}`);
159
169
  continue;
160
170
  }
@@ -810,6 +810,29 @@ function loadTunedMatcherWeights() {
810
810
  _weightsCacheTime = now;
811
811
  return undefined;
812
812
  }
813
+ /**
814
+ * Cold-start exploration bonus for candidate solutions.
815
+ *
816
+ * Phase 4 evolution: newly proposed solutions enter at `status: candidate`.
817
+ * Without a nudge they compete head-to-head with mature verified/champion
818
+ * entries and almost always lose the first few rounds — not because
819
+ * they're worse, but because matchers favor solutions with richer tag
820
+ * histories. A small confidence multiplier lets candidates surface often
821
+ * enough to accumulate outcome data, after which the fitness loop
822
+ * decides their fate.
823
+ *
824
+ * The 1.3× factor is a starting point (Q1 in docs/design-solution-evolution.md).
825
+ * Automatic deactivation after 5 accumulated injections is handled by a
826
+ * separate promoter that flips `status` to `verified`.
827
+ */
828
+ const CANDIDATE_EXPLORATION_MULTIPLIER = 1.3;
829
+ function applyCandidateExplorationBonus(entries) {
830
+ return entries.map((e) => {
831
+ if (e.status !== 'candidate')
832
+ return e;
833
+ return { ...e, confidence: Math.min(1, e.confidence * CANDIDATE_EXPLORATION_MULTIPLIER) };
834
+ });
835
+ }
813
836
  export function matchSolutions(prompt, scope, cwd) {
814
837
  // Build solution dirs for index cache
815
838
  const dirs = [{ dir: ME_SOLUTIONS, scope: 'me' }];
@@ -819,7 +842,7 @@ export function matchSolutions(prompt, scope, cwd) {
819
842
  dirs.push({ dir: path.join(cwd, '.compound', 'solutions'), scope: 'project' });
820
843
  // Use cached index (rebuilt only when dirs change)
821
844
  const index = getOrBuildIndex(dirs);
822
- const allSolutions = index.entries.map((e) => ({ ...e }));
845
+ const allSolutions = applyCandidateExplorationBonus(index.entries.map((e) => ({ ...e })));
823
846
  const promptTags = extractTags(prompt);
824
847
  const promptLower = prompt.toLowerCase();
825
848
  // Meta-learning: load tuned weights if available
@@ -0,0 +1,70 @@
1
+ export type Outcome = 'accept' | 'correct' | 'error' | 'unknown';
2
+ export type Attribution = 'explicit' | 'window' | 'session_end' | 'default';
3
+ /**
4
+ * One inject → outcome event. Written append-only to
5
+ * ~/.forgen/state/outcomes/{session_id}.jsonl. The pending state (inject
6
+ * happened, outcome not yet decided) is stored separately in
7
+ * ~/.forgen/state/outcome-pending-{session_id}.json.
8
+ */
9
+ export interface OutcomeEvent {
10
+ ts: number;
11
+ session_id: string;
12
+ solution: string;
13
+ match_score: number;
14
+ injected_chars: number;
15
+ outcome: Outcome;
16
+ outcome_lag_ms: number;
17
+ attribution: Attribution;
18
+ }
19
+ /**
20
+ * Record that solutions were injected. Called from solution-injector right
21
+ * after `approveWithContext` is emitted. Fails silently — outcome tracking
22
+ * must never block the user's workflow.
23
+ */
24
+ export declare function appendPending(sessionId: string, injections: Array<{
25
+ solution: string;
26
+ match_score: number;
27
+ injected_chars: number;
28
+ }>): void;
29
+ /**
30
+ * Flush pending injections as `accept` events. Called when a new user
31
+ * prompt arrives without any intervening correction/error, signaling that
32
+ * the previous injections were silently accepted. "Silence = consent."
33
+ *
34
+ * If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
35
+ * because an earlier step already attributed them as `correct` or `error`).
36
+ */
37
+ export declare function flushAccept(sessionId: string, excludeSolutions?: Set<string>): number;
38
+ /**
39
+ * Attribute a correction to the most recent pending injection(s). Called
40
+ * from the correction-record MCP tool. Removes attributed entries from
41
+ * pending so subsequent `flushAccept` does not double-count them.
42
+ *
43
+ * Strategy: all currently-pending solutions in this session are marked as
44
+ * `correct`. This is conservative (the correction may target only one of
45
+ * them), but without semantic attribution we err on the side of the user's
46
+ * feedback signal being louder than acceptance.
47
+ */
48
+ export declare function attributeCorrection(sessionId: string): string[];
49
+ /**
50
+ * Attribute a tool error to pending solutions in this session. Called from
51
+ * post-tool-failure hook. Unlike corrections, errors do not clear pending
52
+ * — an error is a weaker signal and the next user prompt can still produce
53
+ * a correct/accept decision.
54
+ *
55
+ * To avoid flooding the log with duplicate errors for the same pending
56
+ * batch, we cap at one `error` event per (session, solution) pair per
57
+ * pending-cycle by tracking a `error_flagged` set in the pending state.
58
+ */
59
+ export declare function attributeError(sessionId: string): string[];
60
+ /**
61
+ * At session end, any still-pending entries are logged as `unknown` (we
62
+ * can't tell if the user was happy or just stopped). Pending file is
63
+ * removed.
64
+ */
65
+ export declare function finalizeSession(sessionId: string): number;
66
+ /**
67
+ * Read all outcome events across all sessions. Used by fitness
68
+ * calculation. Returns events sorted by timestamp ascending.
69
+ */
70
+ export declare function readAllOutcomes(): OutcomeEvent[];