npm - @wooojin/forgen - Versions diffs - 0.2.1 → 0.3.1 - Mend

@wooojin/forgen 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

package/CHANGELOG.md +76 -0
package/README.ko.md +25 -14
package/README.md +61 -17
package/agents/analyst.md +48 -4
package/agents/architect.md +39 -4
package/agents/code-reviewer.md +107 -77
package/agents/critic.md +47 -4
package/agents/debugger.md +46 -4
package/agents/designer.md +40 -4
package/agents/executor.md +112 -30
package/agents/explore.md +45 -5
package/agents/git-master.md +48 -4
package/agents/planner.md +121 -18
package/agents/solution-evolver.md +115 -0
package/agents/test-engineer.md +58 -4
package/agents/verifier.md +92 -77
package/commands/architecture-decision.md +127 -258
package/commands/calibrate.md +225 -0
package/commands/code-review.md +163 -178
package/commands/compound.md +127 -68
package/commands/deep-interview.md +212 -110
package/commands/docker.md +68 -178
package/commands/forge-loop.md +215 -0
package/commands/learn.md +231 -0
package/commands/retro.md +215 -0
package/commands/ship.md +277 -0
package/dist/cli.js +25 -9
package/dist/core/auto-compound-runner.js +14 -0
package/dist/core/config-injector.d.ts +2 -1
package/dist/core/config-injector.js +2 -1
package/dist/core/dashboard.d.ts +17 -0
package/dist/core/dashboard.js +158 -2
package/dist/core/harness.d.ts +6 -1
package/dist/core/harness.js +75 -19
package/dist/core/paths.d.ts +31 -1
package/dist/core/paths.js +43 -2
package/dist/core/spawn.d.ts +3 -2
package/dist/core/spawn.js +27 -8
package/dist/core/types.d.ts +34 -0
package/dist/engine/compound-lifecycle.d.ts +4 -3
package/dist/engine/compound-lifecycle.js +91 -46
package/dist/engine/learn-cli.d.ts +1 -0
package/dist/engine/learn-cli.js +182 -0
package/dist/engine/meta-learning/adaptive-thresholds.d.ts +20 -0
package/dist/engine/meta-learning/adaptive-thresholds.js +126 -0
package/dist/engine/meta-learning/extraction-tuner.d.ts +15 -0
package/dist/engine/meta-learning/extraction-tuner.js +99 -0
package/dist/engine/meta-learning/matcher-weight-tuner.d.ts +21 -0
package/dist/engine/meta-learning/matcher-weight-tuner.js +151 -0
package/dist/engine/meta-learning/runner.d.ts +14 -0
package/dist/engine/meta-learning/runner.js +90 -0
package/dist/engine/meta-learning/scope-promoter.d.ts +21 -0
package/dist/engine/meta-learning/scope-promoter.js +84 -0
package/dist/engine/meta-learning/session-quality-scorer.d.ts +61 -0
package/dist/engine/meta-learning/session-quality-scorer.js +166 -0
package/dist/engine/meta-learning/types.d.ts +114 -0
package/dist/engine/meta-learning/types.js +43 -0
package/dist/engine/solution-candidate.d.ts +30 -0
package/dist/engine/solution-candidate.js +124 -0
package/dist/engine/solution-fitness.d.ts +52 -0
package/dist/engine/solution-fitness.js +95 -0
package/dist/engine/solution-fixup.d.ts +30 -0
package/dist/engine/solution-fixup.js +116 -0
package/dist/engine/solution-format.d.ts +10 -2
package/dist/engine/solution-format.js +287 -57
package/dist/engine/solution-index.d.ts +1 -1
package/dist/engine/solution-index.js +10 -0
package/dist/engine/solution-matcher.d.ts +7 -1
package/dist/engine/solution-matcher.js +137 -37
package/dist/engine/solution-outcomes.d.ts +70 -0
package/dist/engine/solution-outcomes.js +242 -0
package/dist/engine/solution-quarantine.d.ts +36 -0
package/dist/engine/solution-quarantine.js +172 -0
package/dist/engine/solution-weakness.d.ts +45 -0
package/dist/engine/solution-weakness.js +225 -0
package/dist/engine/solution-writer.d.ts +5 -0
package/dist/engine/solution-writer.js +18 -0
package/dist/fgx.js +12 -8
package/dist/hooks/context-guard.d.ts +5 -0
package/dist/hooks/context-guard.js +118 -2
package/dist/hooks/hooks-generator.d.ts +3 -0
package/dist/hooks/hooks-generator.js +23 -6
package/dist/hooks/keyword-detector.js +16 -100
package/dist/hooks/post-tool-failure.js +7 -0
package/dist/hooks/skill-injector.d.ts +4 -3
package/dist/hooks/skill-injector.js +6 -4
package/dist/hooks/solution-injector.js +20 -0
package/dist/host/codex-adapter.d.ts +10 -0
package/dist/host/codex-adapter.js +154 -0
package/dist/mcp/solution-reader.d.ts +5 -5
package/dist/mcp/solution-reader.js +34 -24
package/dist/mcp/tools.js +8 -0
package/dist/services/session.d.ts +19 -0
package/dist/services/session.js +62 -0
package/hooks/hooks.json +2 -2
package/package.json +2 -1
package/skills/architecture-decision/SKILL.md +113 -257
package/skills/calibrate/SKILL.md +207 -0
package/skills/code-review/SKILL.md +151 -178
package/skills/compound/SKILL.md +126 -68
package/skills/deep-interview/SKILL.md +210 -110
package/skills/docker/SKILL.md +57 -179
package/skills/forge-loop/SKILL.md +198 -0
package/skills/learn/SKILL.md +216 -0
package/skills/retro/SKILL.md +199 -0
package/skills/ship/SKILL.md +259 -0
package/agents/code-simplifier.md +0 -197
package/agents/performance-reviewer.md +0 -172
package/agents/qa-tester.md +0 -158
package/agents/refactoring-expert.md +0 -168
package/agents/scientist.md +0 -144
package/agents/security-reviewer.md +0 -137
package/agents/writer.md +0 -184
package/commands/api-design.md +0 -268
package/commands/ci-cd.md +0 -270
package/commands/database.md +0 -263
package/commands/debug-detective.md +0 -99
package/commands/documentation.md +0 -276
package/commands/ecomode.md +0 -51
package/commands/frontend.md +0 -271
package/commands/git-master.md +0 -90
package/commands/incident-response.md +0 -292
package/commands/migrate.md +0 -101
package/commands/performance.md +0 -288
package/commands/refactor.md +0 -105
package/commands/security-review.md +0 -288
package/commands/specify.md +0 -128
package/commands/tdd.md +0 -183
package/commands/testing-strategy.md +0 -265
package/skills/api-design/SKILL.md +0 -262
package/skills/ci-cd/SKILL.md +0 -264
package/skills/database/SKILL.md +0 -257
package/skills/debug-detective/SKILL.md +0 -95
package/skills/documentation/SKILL.md +0 -270
package/skills/ecomode/SKILL.md +0 -46
package/skills/frontend/SKILL.md +0 -265
package/skills/git-master/SKILL.md +0 -86
package/skills/incident-response/SKILL.md +0 -286
package/skills/migrate/SKILL.md +0 -96
package/skills/performance/SKILL.md +0 -282
package/skills/refactor/SKILL.md +0 -100
package/skills/security-review/SKILL.md +0 -282
package/skills/specify/SKILL.md +0 -122
package/skills/tdd/SKILL.md +0 -178
package/skills/testing-strategy/SKILL.md +0 -260

package/dist/engine/solution-matcher.js CHANGED Viewed

@@ -1,9 +1,10 @@
+import * as fs from 'node:fs';
 import * as path from 'node:path';
-import { ME_SOLUTIONS, PACKS_DIR } from '../core/paths.js';
-import { extractTags, expandCompoundTags, expandQueryBigrams } from './solution-format.js';
+import { ME_SOLUTIONS, META_LEARNING_DIR, PACKS_DIR } from '../core/paths.js';
+import { maskBlockedTokens } from './phrase-blocklist.js';
+import { expandCompoundTags, expandQueryBigrams, extractTags } from './solution-format.js';
 import { getOrBuildIndex } from './solution-index.js';
 import { defaultNormalizer } from './term-normalizer.js';
-import { maskBlockedTokens } from './phrase-blocklist.js';
 // ── Synonym expansion (delegates to term-normalizer) ──
 //
 // The old `SYNONYM_MAP` + `expandTagsWithSynonyms` pair had two problems:
@@ -87,7 +88,7 @@ export function bm25Score(queryTags, docTags, avgDocLength) {
     let score = 0;
     for (const qt of queryTags) {
         // Term frequency in document
-        const tf = docTags.filter(dt => dt === qt || (dt.length > 3 && qt.length > 3 && (dt.includes(qt) || qt.includes(dt)))).length;
+        const tf = docTags.filter((dt) => dt === qt || (dt.length > 3 && qt.length > 3 && (dt.includes(qt) || qt.includes(dt)))).length;
         if (tf === 0)
             continue;
         // BM25 TF saturation
@@ -100,10 +101,37 @@ export function bm25Score(queryTags, docTags, avgDocLength) {
 }
 /** High-frequency tags that should be weighted lower */
 const COMMON_TAGS = new Set([
-    'typescript', 'ts', 'javascript', 'js', 'fix', 'update', 'add', 'change',
-    'file', 'code', 'function', 'import', 'export', 'error', 'type', 'string',
-    'number', 'object', 'array', 'return', 'const', 'class', 'module',
-    '코드', '파일', '함수', '수정', '추가', '변경', '에러', '타입',
+    'typescript',
+    'ts',
+    'javascript',
+    'js',
+    'fix',
+    'update',
+    'add',
+    'change',
+    'file',
+    'code',
+    'function',
+    'import',
+    'export',
+    'error',
+    'type',
+    'string',
+    'number',
+    'object',
+    'array',
+    'return',
+    'const',
+    'class',
+    'module',
+    '코드',
+    '파일',
+    '함수',
+    '수정',
+    '추가',
+    '변경',
+    '에러',
+    '타입',
 ]);
 /** Apply IDF-like weight: common tags get reduced weight */
 export function tagWeight(tag) {
@@ -114,7 +142,7 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
         // Legacy mode: substring matching for backwards compatibility.
         // Not a hot path — only hit by the (old) solution-matcher.test.ts cases.
         const promptTags = extractTags(promptOrTags);
-        const intersection = keywordsOrTags.filter(kw => promptTags.some(pt => pt === kw || (pt.length > 3 && kw.length > 3 && (pt.startsWith(kw) || kw.startsWith(pt)))));
+        const intersection = keywordsOrTags.filter((kw) => promptTags.some((pt) => pt === kw || (pt.length > 3 && kw.length > 3 && (pt.startsWith(kw) || kw.startsWith(pt)))));
         return Math.min(1, intersection.length / Math.max(promptTags.length * 0.5, 1));
     }
     // v3 mode: tag matching with synonym expansion + TF-IDF weighting.
@@ -124,21 +152,21 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
     // the hot path pre-compute the expansion once per query and pass it via
     // `options.normalizedPromptTags`, so this function no longer repeats the
     // work per solution.
-    const expandedPromptTags = options?.normalizedPromptTags
-        ?? defaultNormalizer.normalizeTerms(promptOrTags);
+    const expandedPromptTags = options?.normalizedPromptTags ?? defaultNormalizer.normalizeTerms(promptOrTags);
     // R4-T1: when the caller supplies a compound-expanded solution tag set,
     // intersection and partial matching run against the expanded set (so
     // `api-key` matches `api`/`key` queries via the split parts), but the
     // Jaccard union denominator below still uses the RAW `keywordsOrTags`
     // for normalization stability.
     const matchTags = options?.solutionTagsExpanded ?? keywordsOrTags;
-    const intersection = matchTags.filter(t => expandedPromptTags.includes(t));
+    const intersection = matchTags.filter((t) => expandedPromptTags.includes(t));
     // partial/substring matches for longer tags (>3 chars)
-    const partialMatches = matchTags.filter(t => t.length > 3 && !intersection.includes(t)
-        && expandedPromptTags.some(pt => pt.length > 3 && (pt.includes(t) || t.includes(pt))));
+    const partialMatches = matchTags.filter((t) => t.length > 3 &&
+        !intersection.includes(t) &&
+        expandedPromptTags.some((pt) => pt.length > 3 && (pt.includes(t) || t.includes(pt))));
     // Apply TF-IDF weighting: common tags count less
-    const weightedMatched = intersection.reduce((sum, t) => sum + tagWeight(t), 0)
-        + partialMatches.reduce((sum, t) => sum + tagWeight(t) * 0.5, 0);
+    const weightedMatched = intersection.reduce((sum, t) => sum + tagWeight(t), 0) +
+        partialMatches.reduce((sum, t) => sum + tagWeight(t) * 0.5, 0);
     // ── Bigram similarity boost for borderline cases ──
     //
     // When the TF-IDF intersection score is below the match threshold (0.5),
@@ -176,7 +204,11 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
             const blendedScore = tfidfScore * 0.8 + bestBigramScore * 0.2;
             return {
                 relevance: blendedScore * (confidence ?? 1),
-                matchedTags: [...intersection, ...partialMatches, ...bigramMatchedTags.filter(t => !intersection.includes(t) && !partialMatches.includes(t))],
+                matchedTags: [
+                    ...intersection,
+                    ...partialMatches,
+                    ...bigramMatchedTags.filter((t) => !intersection.includes(t) && !partialMatches.includes(t)),
+                ],
             };
         }
         return { relevance: 0, matchedTags: [] };
@@ -196,7 +228,8 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
                 bigramBoost = sim;
         }
     }
-    const ensembleScore = tfidfScore * 0.5 + bm25 * 0.3 + bigramBoost * 0.2;
+    const w = options?.ensembleWeights ?? { tfidf: 0.5, bm25: 0.3, bigram: 0.2 };
+    const ensembleScore = tfidfScore * w.tfidf + bm25 * w.bm25 + bigramBoost * w.bigram;
     return {
         relevance: ensembleScore * (confidence ?? 1),
         matchedTags: [...intersection, ...partialMatches],
@@ -274,8 +307,8 @@ export function shouldRejectByR4T3Rules(promptTags, matchedTags) {
     // Rule B
     if (matchedTags.length === 1) {
         const tag = matchedTags[0];
-        const literalHit = promptTags.includes(tag)
-            || promptTags.some(pt => {
+        const literalHit = promptTags.includes(tag) ||
+            promptTags.some((pt) => {
                 if (pt.length <= 3 || tag.length <= 3)
                     return false;
                 if (pt.includes(tag) || tag.includes(pt))
@@ -310,7 +343,7 @@ export function shouldRejectByR4T3Rules(promptTags, matchedTags) {
  *     `matchSolutions` behaviour (both scopes could rank). Callers that want
  *     first-wins scope precedence must dedupe on their side.
  */
-function rankCandidates(promptTags, promptLower, solutions) {
+function rankCandidates(promptTags, promptLower, solutions, ensembleWeights) {
     // T2: normalize prompt tags ONCE per query (not once per solution).
     // Pre-T2 this expansion happened inside calculateRelevance and was
     // repeated N times for N solutions — the plan's primary hot-path win.
@@ -345,7 +378,7 @@ function rankCandidates(promptTags, promptLower, solutions) {
     const promptTagsWithBigrams = expandQueryBigrams(maskedPromptTags);
     const normalizedPromptTags = defaultNormalizer.normalizeTerms(promptTagsWithBigrams);
     return solutions
-        .map(sol => {
+        .map((sol) => {
         // R4-T1: solution-side compound-tag expansion. `api-key` becomes
         // {api-key, api, key} so a query token `api` (from "api keys") hits
         // it directly. Computed per solution because each sol.tags is
@@ -358,7 +391,11 @@ function rankCandidates(promptTags, promptLower, solutions) {
         // step (intersection/partialMatches) already uses the masked set
         // via `normalizedPromptTags` — the union must match for score
         // semantics to stay consistent.
-        const result = calculateRelevance(maskedPromptTags, sol.tags, sol.confidence, { normalizedPromptTags, solutionTagsExpanded: solTagsExpanded });
+        const result = calculateRelevance(maskedPromptTags, sol.tags, sol.confidence, {
+            normalizedPromptTags,
+            solutionTagsExpanded: solTagsExpanded,
+            ensembleWeights,
+        });
         // Compute identifier boost FIRST — independent of tag scoring so
         // R4-T3's tag-evidence precision rules below cannot silently drop
         // a candidate that has strong identifier-level evidence.
@@ -385,9 +422,9 @@ function rankCandidates(promptTags, promptLower, solutions) {
         // the `matchedTags.length + matchedIdentifiers.length >= 1` filter.
         let tagRelevance = result.relevance;
         let tagMatches = result.matchedTags;
-        if (matchedIdentifiers.length === 0
-            && tagMatches.length > 0
-            && shouldRejectByR4T3Rules(maskedPromptTags, tagMatches)) {
+        if (matchedIdentifiers.length === 0 &&
+            tagMatches.length > 0 &&
+            shouldRejectByR4T3Rules(maskedPromptTags, tagMatches)) {
             tagRelevance = 0;
             tagMatches = [];
         }
@@ -398,7 +435,7 @@ function rankCandidates(promptTags, promptLower, solutions) {
             matchedIdentifiers,
         };
     })
-        .filter(c => c.matchedTags.length + c.matchedIdentifiers.length >= 1)
+        .filter((c) => c.matchedTags.length + c.matchedIdentifiers.length >= 1)
         .sort((a, b) => b.relevance - a.relevance)
         .slice(0, 5);
 }
@@ -675,7 +712,7 @@ function computeBucketMetrics(queries, solutions) {
  */
 export function evaluateQuery(query, solutions) {
     const promptTags = extractTags(query);
-    return rankCandidates(promptTags, query.toLowerCase(), solutions).map(c => ({
+    return rankCandidates(promptTags, query.toLowerCase(), solutions).map((c) => ({
         name: c.solution.name,
         relevance: c.relevance,
         matchedTags: c.matchedTags,
@@ -701,13 +738,16 @@ export function evaluateSolutionMatcher(fixture) {
     // doesn't drown a small paraphrase bucket but also a single-query bucket
     // doesn't dominate.
     const recallAt5 = combinedTotal > 0
-        ? (positiveM.recallAt5 * positiveM.total + paraphraseM.recallAt5 * paraphraseM.total) / combinedTotal
+        ? (positiveM.recallAt5 * positiveM.total + paraphraseM.recallAt5 * paraphraseM.total) /
+            combinedTotal
         : 0;
     const mrrAt5 = combinedTotal > 0
-        ? (positiveM.mrrAt5 * positiveM.total + paraphraseM.mrrAt5 * paraphraseM.total) / combinedTotal
+        ? (positiveM.mrrAt5 * positiveM.total + paraphraseM.mrrAt5 * paraphraseM.total) /
+            combinedTotal
         : 0;
     const noResultRate = combinedTotal > 0
-        ? (positiveM.noResultRate * positiveM.total + paraphraseM.noResultRate * paraphraseM.total) / combinedTotal
+        ? (positiveM.noResultRate * positiveM.total + paraphraseM.noResultRate * paraphraseM.total) /
+            combinedTotal
         : 0;
     let negAnyResult = 0;
     for (const q of fixture.negative) {
@@ -733,27 +773,87 @@ export function evaluateSolutionMatcher(fixture) {
         },
     };
 }
+// ── Meta-learning: dynamic ensemble weights ──
+let _cachedWeights;
+let _weightsCacheTime = 0;
+const WEIGHTS_CACHE_TTL = 60_000; // 1 minute cache
+/**
+ * Load tuned matcher weights from meta-learning state.
+ * Returns undefined (use defaults) if no tuned weights exist.
+ * Cached for 1 minute to avoid re-reading per matchSolutions call.
+ */
+function loadTunedMatcherWeights() {
+    const now = Date.now();
+    if (_cachedWeights !== undefined && now - _weightsCacheTime < WEIGHTS_CACHE_TTL) {
+        return _cachedWeights ?? undefined;
+    }
+    try {
+        const weightsPath = path.join(META_LEARNING_DIR, 'matcher-weights.json');
+        if (!fs.existsSync(weightsPath)) {
+            _cachedWeights = null;
+            _weightsCacheTime = now;
+            return undefined;
+        }
+        const data = JSON.parse(fs.readFileSync(weightsPath, 'utf-8'));
+        if (typeof data.tfidf === 'number' &&
+            typeof data.bm25 === 'number' &&
+            typeof data.bigram === 'number') {
+            _cachedWeights = { tfidf: data.tfidf, bm25: data.bm25, bigram: data.bigram };
+            _weightsCacheTime = now;
+            return _cachedWeights;
+        }
+    }
+    catch {
+        /* fail-open: use defaults */
+    }
+    _cachedWeights = null;
+    _weightsCacheTime = now;
+    return undefined;
+}
+/**
+ * Cold-start exploration bonus for candidate solutions.
+ *
+ * Phase 4 evolution: newly proposed solutions enter at `status: candidate`.
+ * Without a nudge they compete head-to-head with mature verified/champion
+ * entries and almost always lose the first few rounds — not because
+ * they're worse, but because matchers favor solutions with richer tag
+ * histories. A small confidence multiplier lets candidates surface often
+ * enough to accumulate outcome data, after which the fitness loop
+ * decides their fate.
+ *
+ * The 1.3× factor is a starting point (Q1 in docs/design-solution-evolution.md).
+ * Automatic deactivation after 5 accumulated injections is handled by a
+ * separate promoter that flips `status` to `verified`.
+ */
+const CANDIDATE_EXPLORATION_MULTIPLIER = 1.3;
+function applyCandidateExplorationBonus(entries) {
+    return entries.map((e) => {
+        if (e.status !== 'candidate')
+            return e;
+        return { ...e, confidence: Math.min(1, e.confidence * CANDIDATE_EXPLORATION_MULTIPLIER) };
+    });
+}
 export function matchSolutions(prompt, scope, cwd) {
     // Build solution dirs for index cache
-    const dirs = [
-        { dir: ME_SOLUTIONS, scope: 'me' },
-    ];
+    const dirs = [{ dir: ME_SOLUTIONS, scope: 'me' }];
     if (scope.team) {
         dirs.push({ dir: path.join(PACKS_DIR, scope.team.name, 'solutions'), scope: 'team' });
     }
     dirs.push({ dir: path.join(cwd, '.compound', 'solutions'), scope: 'project' });
     // Use cached index (rebuilt only when dirs change)
     const index = getOrBuildIndex(dirs);
-    const allSolutions = index.entries.map(e => ({ ...e }));
+    const allSolutions = applyCandidateExplorationBonus(index.entries.map((e) => ({ ...e })));
     const promptTags = extractTags(prompt);
     const promptLower = prompt.toLowerCase();
+    // Meta-learning: load tuned weights if available
+    const tunedWeights = loadTunedMatcherWeights();
     // Delegate to shared ranking core. `rankCandidates` is generic so each
     // ranked candidate carries the original `LoadedSolution` reference — no
     // name-based re-lookup, so two scopes sharing a name (e.g. me/foo and
     // project/foo) can both appear in the result without a Map last-wins
     // scope-precedence bug.
-    const ranked = rankCandidates(promptTags, promptLower, allSolutions);
-    return ranked.map(c => ({
+    const ranked = rankCandidates(promptTags, promptLower, allSolutions, tunedWeights);
+    return ranked.map((c) => ({
         name: c.solution.name,
         path: c.solution.filePath,
         scope: c.solution.scope,

package/dist/engine/solution-outcomes.d.ts ADDED Viewed

@@ -0,0 +1,70 @@
+export type Outcome = 'accept' | 'correct' | 'error' | 'unknown';
+export type Attribution = 'explicit' | 'window' | 'session_end' | 'default';
+/**
+ * One inject → outcome event. Written append-only to
+ * ~/.forgen/state/outcomes/{session_id}.jsonl. The pending state (inject
+ * happened, outcome not yet decided) is stored separately in
+ * ~/.forgen/state/outcome-pending-{session_id}.json.
+ */
+export interface OutcomeEvent {
+    ts: number;
+    session_id: string;
+    solution: string;
+    match_score: number;
+    injected_chars: number;
+    outcome: Outcome;
+    outcome_lag_ms: number;
+    attribution: Attribution;
+}
+/**
+ * Record that solutions were injected. Called from solution-injector right
+ * after `approveWithContext` is emitted. Fails silently — outcome tracking
+ * must never block the user's workflow.
+ */
+export declare function appendPending(sessionId: string, injections: Array<{
+    solution: string;
+    match_score: number;
+    injected_chars: number;
+}>): void;
+/**
+ * Flush pending injections as `accept` events. Called when a new user
+ * prompt arrives without any intervening correction/error, signaling that
+ * the previous injections were silently accepted. "Silence = consent."
+ *
+ * If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
+ * because an earlier step already attributed them as `correct` or `error`).
+ */
+export declare function flushAccept(sessionId: string, excludeSolutions?: Set<string>): number;
+/**
+ * Attribute a correction to the most recent pending injection(s). Called
+ * from the correction-record MCP tool. Removes attributed entries from
+ * pending so subsequent `flushAccept` does not double-count them.
+ *
+ * Strategy: all currently-pending solutions in this session are marked as
+ * `correct`. This is conservative (the correction may target only one of
+ * them), but without semantic attribution we err on the side of the user's
+ * feedback signal being louder than acceptance.
+ */
+export declare function attributeCorrection(sessionId: string): string[];
+/**
+ * Attribute a tool error to pending solutions in this session. Called from
+ * post-tool-failure hook. Unlike corrections, errors do not clear pending
+ * — an error is a weaker signal and the next user prompt can still produce
+ * a correct/accept decision.
+ *
+ * To avoid flooding the log with duplicate errors for the same pending
+ * batch, we cap at one `error` event per (session, solution) pair per
+ * pending-cycle by tracking a `error_flagged` set in the pending state.
+ */
+export declare function attributeError(sessionId: string): string[];
+/**
+ * At session end, any still-pending entries are logged as `unknown` (we
+ * can't tell if the user was happy or just stopped). Pending file is
+ * removed.
+ */
+export declare function finalizeSession(sessionId: string): number;
+/**
+ * Read all outcome events across all sessions. Used by fitness
+ * calculation. Returns events sorted by timestamp ascending.
+ */
+export declare function readAllOutcomes(): OutcomeEvent[];

package/dist/engine/solution-outcomes.js ADDED Viewed

@@ -0,0 +1,242 @@
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { OUTCOMES_DIR, STATE_DIR } from '../core/paths.js';
+import { sanitizeId } from '../hooks/shared/sanitize-id.js';
+import { createLogger } from '../core/logger.js';
+const log = createLogger('solution-outcomes');
+function pendingPath(sessionId) {
+    return path.join(STATE_DIR, `outcome-pending-${sanitizeId(sessionId)}.json`);
+}
+function outcomesPath(sessionId) {
+    return path.join(OUTCOMES_DIR, `${sanitizeId(sessionId)}.jsonl`);
+}
+function readPending(sessionId) {
+    const p = pendingPath(sessionId);
+    if (!fs.existsSync(p))
+        return { pending: [], last_prompt_ts: 0 };
+    try {
+        return JSON.parse(fs.readFileSync(p, 'utf-8'));
+    }
+    catch {
+        return { pending: [], last_prompt_ts: 0 };
+    }
+}
+function writePending(sessionId, state) {
+    const p = pendingPath(sessionId);
+    fs.mkdirSync(STATE_DIR, { recursive: true });
+    fs.writeFileSync(p, JSON.stringify(state));
+}
+function appendOutcome(event) {
+    fs.mkdirSync(OUTCOMES_DIR, { recursive: true });
+    fs.appendFileSync(outcomesPath(event.session_id), JSON.stringify(event) + '\n');
+}
+/**
+ * Record that solutions were injected. Called from solution-injector right
+ * after `approveWithContext` is emitted. Fails silently — outcome tracking
+ * must never block the user's workflow.
+ */
+export function appendPending(sessionId, injections) {
+    if (!sessionId || injections.length === 0)
+        return;
+    try {
+        const state = readPending(sessionId);
+        const ts = Date.now();
+        for (const inj of injections) {
+            state.pending.push({ ...inj, ts });
+        }
+        writePending(sessionId, state);
+    }
+    catch (e) {
+        log.debug(`appendPending failed: ${e instanceof Error ? e.message : String(e)}`);
+    }
+}
+/**
+ * Flush pending injections as `accept` events. Called when a new user
+ * prompt arrives without any intervening correction/error, signaling that
+ * the previous injections were silently accepted. "Silence = consent."
+ *
+ * If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
+ * because an earlier step already attributed them as `correct` or `error`).
+ */
+export function flushAccept(sessionId, excludeSolutions = new Set()) {
+    if (!sessionId)
+        return 0;
+    try {
+        const state = readPending(sessionId);
+        if (state.pending.length === 0)
+            return 0;
+        const now = Date.now();
+        const kept = [];
+        let flushed = 0;
+        for (const p of state.pending) {
+            if (excludeSolutions.has(p.solution))
+                continue;
+            appendOutcome({
+                ts: now,
+                session_id: sessionId,
+                solution: p.solution,
+                match_score: p.match_score,
+                injected_chars: p.injected_chars,
+                outcome: 'accept',
+                outcome_lag_ms: now - p.ts,
+                attribution: 'default',
+            });
+            flushed++;
+        }
+        writePending(sessionId, { pending: kept, last_prompt_ts: now });
+        return flushed;
+    }
+    catch (e) {
+        log.debug(`flushAccept failed: ${e instanceof Error ? e.message : String(e)}`);
+        return 0;
+    }
+}
+/**
+ * Attribute a correction to the most recent pending injection(s). Called
+ * from the correction-record MCP tool. Removes attributed entries from
+ * pending so subsequent `flushAccept` does not double-count them.
+ *
+ * Strategy: all currently-pending solutions in this session are marked as
+ * `correct`. This is conservative (the correction may target only one of
+ * them), but without semantic attribution we err on the side of the user's
+ * feedback signal being louder than acceptance.
+ */
+export function attributeCorrection(sessionId) {
+    if (!sessionId)
+        return [];
+    try {
+        const state = readPending(sessionId);
+        if (state.pending.length === 0)
+            return [];
+        const now = Date.now();
+        const attributed = [];
+        for (const p of state.pending) {
+            appendOutcome({
+                ts: now,
+                session_id: sessionId,
+                solution: p.solution,
+                match_score: p.match_score,
+                injected_chars: p.injected_chars,
+                outcome: 'correct',
+                outcome_lag_ms: now - p.ts,
+                attribution: 'explicit',
+            });
+            attributed.push(p.solution);
+        }
+        writePending(sessionId, { pending: [], last_prompt_ts: state.last_prompt_ts });
+        return attributed;
+    }
+    catch (e) {
+        log.debug(`attributeCorrection failed: ${e instanceof Error ? e.message : String(e)}`);
+        return [];
+    }
+}
+/**
+ * Attribute a tool error to pending solutions in this session. Called from
+ * post-tool-failure hook. Unlike corrections, errors do not clear pending
+ * — an error is a weaker signal and the next user prompt can still produce
+ * a correct/accept decision.
+ *
+ * To avoid flooding the log with duplicate errors for the same pending
+ * batch, we cap at one `error` event per (session, solution) pair per
+ * pending-cycle by tracking a `error_flagged` set in the pending state.
+ */
+export function attributeError(sessionId) {
+    if (!sessionId)
+        return [];
+    try {
+        const state = readPending(sessionId);
+        if (state.pending.length === 0)
+            return [];
+        const flaggedKey = `__error_flagged`;
+        const existing = state[flaggedKey];
+        const flagged = new Set(Array.isArray(existing) ? existing : []);
+        const now = Date.now();
+        const flaggedThisCall = [];
+        for (const p of state.pending) {
+            if (flagged.has(p.solution))
+                continue;
+            appendOutcome({
+                ts: now,
+                session_id: sessionId,
+                solution: p.solution,
+                match_score: p.match_score,
+                injected_chars: p.injected_chars,
+                outcome: 'error',
+                outcome_lag_ms: now - p.ts,
+                attribution: 'window',
+            });
+            flagged.add(p.solution);
+            flaggedThisCall.push(p.solution);
+        }
+        state[flaggedKey] = Array.from(flagged);
+        writePending(sessionId, state);
+        return flaggedThisCall;
+    }
+    catch (e) {
+        log.debug(`attributeError failed: ${e instanceof Error ? e.message : String(e)}`);
+        return [];
+    }
+}
+/**
+ * At session end, any still-pending entries are logged as `unknown` (we
+ * can't tell if the user was happy or just stopped). Pending file is
+ * removed.
+ */
+export function finalizeSession(sessionId) {
+    if (!sessionId)
+        return 0;
+    try {
+        const state = readPending(sessionId);
+        const now = Date.now();
+        let finalized = 0;
+        for (const p of state.pending) {
+            appendOutcome({
+                ts: now,
+                session_id: sessionId,
+                solution: p.solution,
+                match_score: p.match_score,
+                injected_chars: p.injected_chars,
+                outcome: 'unknown',
+                outcome_lag_ms: now - p.ts,
+                attribution: 'session_end',
+            });
+            finalized++;
+        }
+        const p = pendingPath(sessionId);
+        if (fs.existsSync(p))
+            fs.unlinkSync(p);
+        return finalized;
+    }
+    catch (e) {
+        log.debug(`finalizeSession failed: ${e instanceof Error ? e.message : String(e)}`);
+        return 0;
+    }
+}
+/**
+ * Read all outcome events across all sessions. Used by fitness
+ * calculation. Returns events sorted by timestamp ascending.
+ */
+export function readAllOutcomes() {
+    if (!fs.existsSync(OUTCOMES_DIR))
+        return [];
+    const events = [];
+    for (const file of fs.readdirSync(OUTCOMES_DIR)) {
+        if (!file.endsWith('.jsonl'))
+            continue;
+        try {
+            const text = fs.readFileSync(path.join(OUTCOMES_DIR, file), 'utf-8');
+            for (const line of text.split('\n')) {
+                if (!line)
+                    continue;
+                try {
+                    events.push(JSON.parse(line));
+                }
+                catch { /* skip bad line */ }
+            }
+        }
+        catch { /* skip */ }
+    }
+    events.sort((a, b) => a.ts - b.ts);
+    return events;
+}

package/dist/engine/solution-quarantine.d.ts ADDED Viewed

@@ -0,0 +1,36 @@
+interface QuarantineEntry {
+    path: string;
+    at: string;
+    errors: string[];
+}
+/**
+ * Produce actionable frontmatter diagnostics directly from file content.
+ *
+ * This duplicates the YAML parse that `parseFrontmatterOnly` already does,
+ * but it runs only on the rare failure path (solution dropped from index),
+ * so the overhead is acceptable in exchange for a human-readable error list.
+ */
+export declare function diagnoseFromRawContent(content: string): string[];
+/**
+ * Append one quarantine entry for `filePath`. Deduped by path within the
+ * current file: if the latest entry for this path already matches the
+ * current errors, skip the append.
+ *
+ * Storage: one JSONL line per quarantine event. Readers use only the
+ * latest line per path.
+ */
+export declare function recordQuarantine(filePath: string, errors: string[]): void;
+/**
+ * Read the latest quarantine state: one entry per path, keyed to the most
+ * recent append. Entries whose file no longer exists are dropped.
+ */
+export declare function listQuarantined(): QuarantineEntry[];
+/**
+ * Clear quarantine entries for files that now parse correctly or no longer
+ * exist. Intended to be called after `forgen learn fix-up` or a manual edit.
+ */
+export declare function pruneQuarantine(): {
+    removed: number;
+    kept: number;
+};
+export {};