@wooojin/forgen 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +76 -0
- package/README.ko.md +25 -14
- package/README.md +61 -17
- package/agents/analyst.md +48 -4
- package/agents/architect.md +39 -4
- package/agents/code-reviewer.md +107 -77
- package/agents/critic.md +47 -4
- package/agents/debugger.md +46 -4
- package/agents/designer.md +40 -4
- package/agents/executor.md +112 -30
- package/agents/explore.md +45 -5
- package/agents/git-master.md +48 -4
- package/agents/planner.md +121 -18
- package/agents/solution-evolver.md +115 -0
- package/agents/test-engineer.md +58 -4
- package/agents/verifier.md +92 -77
- package/commands/architecture-decision.md +127 -258
- package/commands/calibrate.md +225 -0
- package/commands/code-review.md +163 -178
- package/commands/compound.md +127 -68
- package/commands/deep-interview.md +212 -110
- package/commands/docker.md +68 -178
- package/commands/forge-loop.md +215 -0
- package/commands/learn.md +231 -0
- package/commands/retro.md +215 -0
- package/commands/ship.md +277 -0
- package/dist/cli.js +25 -9
- package/dist/core/auto-compound-runner.js +14 -0
- package/dist/core/config-injector.d.ts +2 -1
- package/dist/core/config-injector.js +2 -1
- package/dist/core/dashboard.d.ts +17 -0
- package/dist/core/dashboard.js +158 -2
- package/dist/core/harness.d.ts +6 -1
- package/dist/core/harness.js +75 -19
- package/dist/core/paths.d.ts +31 -1
- package/dist/core/paths.js +43 -2
- package/dist/core/spawn.d.ts +3 -2
- package/dist/core/spawn.js +27 -8
- package/dist/core/types.d.ts +34 -0
- package/dist/engine/compound-lifecycle.d.ts +4 -3
- package/dist/engine/compound-lifecycle.js +91 -46
- package/dist/engine/learn-cli.d.ts +1 -0
- package/dist/engine/learn-cli.js +182 -0
- package/dist/engine/meta-learning/adaptive-thresholds.d.ts +20 -0
- package/dist/engine/meta-learning/adaptive-thresholds.js +126 -0
- package/dist/engine/meta-learning/extraction-tuner.d.ts +15 -0
- package/dist/engine/meta-learning/extraction-tuner.js +99 -0
- package/dist/engine/meta-learning/matcher-weight-tuner.d.ts +21 -0
- package/dist/engine/meta-learning/matcher-weight-tuner.js +151 -0
- package/dist/engine/meta-learning/runner.d.ts +14 -0
- package/dist/engine/meta-learning/runner.js +90 -0
- package/dist/engine/meta-learning/scope-promoter.d.ts +21 -0
- package/dist/engine/meta-learning/scope-promoter.js +84 -0
- package/dist/engine/meta-learning/session-quality-scorer.d.ts +61 -0
- package/dist/engine/meta-learning/session-quality-scorer.js +166 -0
- package/dist/engine/meta-learning/types.d.ts +114 -0
- package/dist/engine/meta-learning/types.js +43 -0
- package/dist/engine/solution-candidate.d.ts +30 -0
- package/dist/engine/solution-candidate.js +124 -0
- package/dist/engine/solution-fitness.d.ts +52 -0
- package/dist/engine/solution-fitness.js +95 -0
- package/dist/engine/solution-fixup.d.ts +30 -0
- package/dist/engine/solution-fixup.js +116 -0
- package/dist/engine/solution-format.d.ts +10 -2
- package/dist/engine/solution-format.js +287 -57
- package/dist/engine/solution-index.d.ts +1 -1
- package/dist/engine/solution-index.js +10 -0
- package/dist/engine/solution-matcher.d.ts +7 -1
- package/dist/engine/solution-matcher.js +137 -37
- package/dist/engine/solution-outcomes.d.ts +70 -0
- package/dist/engine/solution-outcomes.js +242 -0
- package/dist/engine/solution-quarantine.d.ts +36 -0
- package/dist/engine/solution-quarantine.js +172 -0
- package/dist/engine/solution-weakness.d.ts +45 -0
- package/dist/engine/solution-weakness.js +225 -0
- package/dist/engine/solution-writer.d.ts +5 -0
- package/dist/engine/solution-writer.js +18 -0
- package/dist/fgx.js +12 -8
- package/dist/hooks/context-guard.d.ts +5 -0
- package/dist/hooks/context-guard.js +118 -2
- package/dist/hooks/hooks-generator.d.ts +3 -0
- package/dist/hooks/hooks-generator.js +23 -6
- package/dist/hooks/keyword-detector.js +16 -100
- package/dist/hooks/post-tool-failure.js +7 -0
- package/dist/hooks/skill-injector.d.ts +4 -3
- package/dist/hooks/skill-injector.js +6 -4
- package/dist/hooks/solution-injector.js +20 -0
- package/dist/host/codex-adapter.d.ts +10 -0
- package/dist/host/codex-adapter.js +154 -0
- package/dist/mcp/solution-reader.d.ts +5 -5
- package/dist/mcp/solution-reader.js +34 -24
- package/dist/mcp/tools.js +8 -0
- package/dist/services/session.d.ts +19 -0
- package/dist/services/session.js +62 -0
- package/hooks/hooks.json +2 -2
- package/package.json +2 -1
- package/skills/architecture-decision/SKILL.md +113 -257
- package/skills/calibrate/SKILL.md +207 -0
- package/skills/code-review/SKILL.md +151 -178
- package/skills/compound/SKILL.md +126 -68
- package/skills/deep-interview/SKILL.md +210 -110
- package/skills/docker/SKILL.md +57 -179
- package/skills/forge-loop/SKILL.md +198 -0
- package/skills/learn/SKILL.md +216 -0
- package/skills/retro/SKILL.md +199 -0
- package/skills/ship/SKILL.md +259 -0
- package/agents/code-simplifier.md +0 -197
- package/agents/performance-reviewer.md +0 -172
- package/agents/qa-tester.md +0 -158
- package/agents/refactoring-expert.md +0 -168
- package/agents/scientist.md +0 -144
- package/agents/security-reviewer.md +0 -137
- package/agents/writer.md +0 -184
- package/commands/api-design.md +0 -268
- package/commands/ci-cd.md +0 -270
- package/commands/database.md +0 -263
- package/commands/debug-detective.md +0 -99
- package/commands/documentation.md +0 -276
- package/commands/ecomode.md +0 -51
- package/commands/frontend.md +0 -271
- package/commands/git-master.md +0 -90
- package/commands/incident-response.md +0 -292
- package/commands/migrate.md +0 -101
- package/commands/performance.md +0 -288
- package/commands/refactor.md +0 -105
- package/commands/security-review.md +0 -288
- package/commands/specify.md +0 -128
- package/commands/tdd.md +0 -183
- package/commands/testing-strategy.md +0 -265
- package/skills/api-design/SKILL.md +0 -262
- package/skills/ci-cd/SKILL.md +0 -264
- package/skills/database/SKILL.md +0 -257
- package/skills/debug-detective/SKILL.md +0 -95
- package/skills/documentation/SKILL.md +0 -270
- package/skills/ecomode/SKILL.md +0 -46
- package/skills/frontend/SKILL.md +0 -265
- package/skills/git-master/SKILL.md +0 -86
- package/skills/incident-response/SKILL.md +0 -286
- package/skills/migrate/SKILL.md +0 -96
- package/skills/performance/SKILL.md +0 -282
- package/skills/refactor/SKILL.md +0 -100
- package/skills/security-review/SKILL.md +0 -282
- package/skills/specify/SKILL.md +0 -122
- package/skills/tdd/SKILL.md +0 -178
- package/skills/testing-strategy/SKILL.md +0 -260
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
1
2
|
import * as path from 'node:path';
|
|
2
|
-
import { ME_SOLUTIONS, PACKS_DIR } from '../core/paths.js';
|
|
3
|
-
import {
|
|
3
|
+
import { ME_SOLUTIONS, META_LEARNING_DIR, PACKS_DIR } from '../core/paths.js';
|
|
4
|
+
import { maskBlockedTokens } from './phrase-blocklist.js';
|
|
5
|
+
import { expandCompoundTags, expandQueryBigrams, extractTags } from './solution-format.js';
|
|
4
6
|
import { getOrBuildIndex } from './solution-index.js';
|
|
5
7
|
import { defaultNormalizer } from './term-normalizer.js';
|
|
6
|
-
import { maskBlockedTokens } from './phrase-blocklist.js';
|
|
7
8
|
// ── Synonym expansion (delegates to term-normalizer) ──
|
|
8
9
|
//
|
|
9
10
|
// The old `SYNONYM_MAP` + `expandTagsWithSynonyms` pair had two problems:
|
|
@@ -87,7 +88,7 @@ export function bm25Score(queryTags, docTags, avgDocLength) {
|
|
|
87
88
|
let score = 0;
|
|
88
89
|
for (const qt of queryTags) {
|
|
89
90
|
// Term frequency in document
|
|
90
|
-
const tf = docTags.filter(dt => dt === qt || (dt.length > 3 && qt.length > 3 && (dt.includes(qt) || qt.includes(dt)))).length;
|
|
91
|
+
const tf = docTags.filter((dt) => dt === qt || (dt.length > 3 && qt.length > 3 && (dt.includes(qt) || qt.includes(dt)))).length;
|
|
91
92
|
if (tf === 0)
|
|
92
93
|
continue;
|
|
93
94
|
// BM25 TF saturation
|
|
@@ -100,10 +101,37 @@ export function bm25Score(queryTags, docTags, avgDocLength) {
|
|
|
100
101
|
}
|
|
101
102
|
/** High-frequency tags that should be weighted lower */
|
|
102
103
|
const COMMON_TAGS = new Set([
|
|
103
|
-
'typescript',
|
|
104
|
-
'
|
|
105
|
-
'
|
|
106
|
-
'
|
|
104
|
+
'typescript',
|
|
105
|
+
'ts',
|
|
106
|
+
'javascript',
|
|
107
|
+
'js',
|
|
108
|
+
'fix',
|
|
109
|
+
'update',
|
|
110
|
+
'add',
|
|
111
|
+
'change',
|
|
112
|
+
'file',
|
|
113
|
+
'code',
|
|
114
|
+
'function',
|
|
115
|
+
'import',
|
|
116
|
+
'export',
|
|
117
|
+
'error',
|
|
118
|
+
'type',
|
|
119
|
+
'string',
|
|
120
|
+
'number',
|
|
121
|
+
'object',
|
|
122
|
+
'array',
|
|
123
|
+
'return',
|
|
124
|
+
'const',
|
|
125
|
+
'class',
|
|
126
|
+
'module',
|
|
127
|
+
'코드',
|
|
128
|
+
'파일',
|
|
129
|
+
'함수',
|
|
130
|
+
'수정',
|
|
131
|
+
'추가',
|
|
132
|
+
'변경',
|
|
133
|
+
'에러',
|
|
134
|
+
'타입',
|
|
107
135
|
]);
|
|
108
136
|
/** Apply IDF-like weight: common tags get reduced weight */
|
|
109
137
|
export function tagWeight(tag) {
|
|
@@ -114,7 +142,7 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
|
|
|
114
142
|
// Legacy mode: substring matching for backwards compatibility.
|
|
115
143
|
// Not a hot path — only hit by the (old) solution-matcher.test.ts cases.
|
|
116
144
|
const promptTags = extractTags(promptOrTags);
|
|
117
|
-
const intersection = keywordsOrTags.filter(kw => promptTags.some(pt => pt === kw || (pt.length > 3 && kw.length > 3 && (pt.startsWith(kw) || kw.startsWith(pt)))));
|
|
145
|
+
const intersection = keywordsOrTags.filter((kw) => promptTags.some((pt) => pt === kw || (pt.length > 3 && kw.length > 3 && (pt.startsWith(kw) || kw.startsWith(pt)))));
|
|
118
146
|
return Math.min(1, intersection.length / Math.max(promptTags.length * 0.5, 1));
|
|
119
147
|
}
|
|
120
148
|
// v3 mode: tag matching with synonym expansion + TF-IDF weighting.
|
|
@@ -124,21 +152,21 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
|
|
|
124
152
|
// the hot path pre-compute the expansion once per query and pass it via
|
|
125
153
|
// `options.normalizedPromptTags`, so this function no longer repeats the
|
|
126
154
|
// work per solution.
|
|
127
|
-
const expandedPromptTags = options?.normalizedPromptTags
|
|
128
|
-
?? defaultNormalizer.normalizeTerms(promptOrTags);
|
|
155
|
+
const expandedPromptTags = options?.normalizedPromptTags ?? defaultNormalizer.normalizeTerms(promptOrTags);
|
|
129
156
|
// R4-T1: when the caller supplies a compound-expanded solution tag set,
|
|
130
157
|
// intersection and partial matching run against the expanded set (so
|
|
131
158
|
// `api-key` matches `api`/`key` queries via the split parts), but the
|
|
132
159
|
// Jaccard union denominator below still uses the RAW `keywordsOrTags`
|
|
133
160
|
// for normalization stability.
|
|
134
161
|
const matchTags = options?.solutionTagsExpanded ?? keywordsOrTags;
|
|
135
|
-
const intersection = matchTags.filter(t => expandedPromptTags.includes(t));
|
|
162
|
+
const intersection = matchTags.filter((t) => expandedPromptTags.includes(t));
|
|
136
163
|
// partial/substring matches for longer tags (>3 chars)
|
|
137
|
-
const partialMatches = matchTags.filter(t => t.length > 3 &&
|
|
138
|
-
|
|
164
|
+
const partialMatches = matchTags.filter((t) => t.length > 3 &&
|
|
165
|
+
!intersection.includes(t) &&
|
|
166
|
+
expandedPromptTags.some((pt) => pt.length > 3 && (pt.includes(t) || t.includes(pt))));
|
|
139
167
|
// Apply TF-IDF weighting: common tags count less
|
|
140
|
-
const weightedMatched = intersection.reduce((sum, t) => sum + tagWeight(t), 0)
|
|
141
|
-
|
|
168
|
+
const weightedMatched = intersection.reduce((sum, t) => sum + tagWeight(t), 0) +
|
|
169
|
+
partialMatches.reduce((sum, t) => sum + tagWeight(t) * 0.5, 0);
|
|
142
170
|
// ── Bigram similarity boost for borderline cases ──
|
|
143
171
|
//
|
|
144
172
|
// When the TF-IDF intersection score is below the match threshold (0.5),
|
|
@@ -176,7 +204,11 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
|
|
|
176
204
|
const blendedScore = tfidfScore * 0.8 + bestBigramScore * 0.2;
|
|
177
205
|
return {
|
|
178
206
|
relevance: blendedScore * (confidence ?? 1),
|
|
179
|
-
matchedTags: [
|
|
207
|
+
matchedTags: [
|
|
208
|
+
...intersection,
|
|
209
|
+
...partialMatches,
|
|
210
|
+
...bigramMatchedTags.filter((t) => !intersection.includes(t) && !partialMatches.includes(t)),
|
|
211
|
+
],
|
|
180
212
|
};
|
|
181
213
|
}
|
|
182
214
|
return { relevance: 0, matchedTags: [] };
|
|
@@ -196,7 +228,8 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
|
|
|
196
228
|
bigramBoost = sim;
|
|
197
229
|
}
|
|
198
230
|
}
|
|
199
|
-
const
|
|
231
|
+
const w = options?.ensembleWeights ?? { tfidf: 0.5, bm25: 0.3, bigram: 0.2 };
|
|
232
|
+
const ensembleScore = tfidfScore * w.tfidf + bm25 * w.bm25 + bigramBoost * w.bigram;
|
|
200
233
|
return {
|
|
201
234
|
relevance: ensembleScore * (confidence ?? 1),
|
|
202
235
|
matchedTags: [...intersection, ...partialMatches],
|
|
@@ -274,8 +307,8 @@ export function shouldRejectByR4T3Rules(promptTags, matchedTags) {
|
|
|
274
307
|
// Rule B
|
|
275
308
|
if (matchedTags.length === 1) {
|
|
276
309
|
const tag = matchedTags[0];
|
|
277
|
-
const literalHit = promptTags.includes(tag)
|
|
278
|
-
|
|
310
|
+
const literalHit = promptTags.includes(tag) ||
|
|
311
|
+
promptTags.some((pt) => {
|
|
279
312
|
if (pt.length <= 3 || tag.length <= 3)
|
|
280
313
|
return false;
|
|
281
314
|
if (pt.includes(tag) || tag.includes(pt))
|
|
@@ -310,7 +343,7 @@ export function shouldRejectByR4T3Rules(promptTags, matchedTags) {
|
|
|
310
343
|
* `matchSolutions` behaviour (both scopes could rank). Callers that want
|
|
311
344
|
* first-wins scope precedence must dedupe on their side.
|
|
312
345
|
*/
|
|
313
|
-
function rankCandidates(promptTags, promptLower, solutions) {
|
|
346
|
+
function rankCandidates(promptTags, promptLower, solutions, ensembleWeights) {
|
|
314
347
|
// T2: normalize prompt tags ONCE per query (not once per solution).
|
|
315
348
|
// Pre-T2 this expansion happened inside calculateRelevance and was
|
|
316
349
|
// repeated N times for N solutions — the plan's primary hot-path win.
|
|
@@ -345,7 +378,7 @@ function rankCandidates(promptTags, promptLower, solutions) {
|
|
|
345
378
|
const promptTagsWithBigrams = expandQueryBigrams(maskedPromptTags);
|
|
346
379
|
const normalizedPromptTags = defaultNormalizer.normalizeTerms(promptTagsWithBigrams);
|
|
347
380
|
return solutions
|
|
348
|
-
.map(sol => {
|
|
381
|
+
.map((sol) => {
|
|
349
382
|
// R4-T1: solution-side compound-tag expansion. `api-key` becomes
|
|
350
383
|
// {api-key, api, key} so a query token `api` (from "api keys") hits
|
|
351
384
|
// it directly. Computed per solution because each sol.tags is
|
|
@@ -358,7 +391,11 @@ function rankCandidates(promptTags, promptLower, solutions) {
|
|
|
358
391
|
// step (intersection/partialMatches) already uses the masked set
|
|
359
392
|
// via `normalizedPromptTags` — the union must match for score
|
|
360
393
|
// semantics to stay consistent.
|
|
361
|
-
const result = calculateRelevance(maskedPromptTags, sol.tags, sol.confidence, {
|
|
394
|
+
const result = calculateRelevance(maskedPromptTags, sol.tags, sol.confidence, {
|
|
395
|
+
normalizedPromptTags,
|
|
396
|
+
solutionTagsExpanded: solTagsExpanded,
|
|
397
|
+
ensembleWeights,
|
|
398
|
+
});
|
|
362
399
|
// Compute identifier boost FIRST — independent of tag scoring so
|
|
363
400
|
// R4-T3's tag-evidence precision rules below cannot silently drop
|
|
364
401
|
// a candidate that has strong identifier-level evidence.
|
|
@@ -385,9 +422,9 @@ function rankCandidates(promptTags, promptLower, solutions) {
|
|
|
385
422
|
// the `matchedTags.length + matchedIdentifiers.length >= 1` filter.
|
|
386
423
|
let tagRelevance = result.relevance;
|
|
387
424
|
let tagMatches = result.matchedTags;
|
|
388
|
-
if (matchedIdentifiers.length === 0
|
|
389
|
-
|
|
390
|
-
|
|
425
|
+
if (matchedIdentifiers.length === 0 &&
|
|
426
|
+
tagMatches.length > 0 &&
|
|
427
|
+
shouldRejectByR4T3Rules(maskedPromptTags, tagMatches)) {
|
|
391
428
|
tagRelevance = 0;
|
|
392
429
|
tagMatches = [];
|
|
393
430
|
}
|
|
@@ -398,7 +435,7 @@ function rankCandidates(promptTags, promptLower, solutions) {
|
|
|
398
435
|
matchedIdentifiers,
|
|
399
436
|
};
|
|
400
437
|
})
|
|
401
|
-
.filter(c => c.matchedTags.length + c.matchedIdentifiers.length >= 1)
|
|
438
|
+
.filter((c) => c.matchedTags.length + c.matchedIdentifiers.length >= 1)
|
|
402
439
|
.sort((a, b) => b.relevance - a.relevance)
|
|
403
440
|
.slice(0, 5);
|
|
404
441
|
}
|
|
@@ -675,7 +712,7 @@ function computeBucketMetrics(queries, solutions) {
|
|
|
675
712
|
*/
|
|
676
713
|
export function evaluateQuery(query, solutions) {
|
|
677
714
|
const promptTags = extractTags(query);
|
|
678
|
-
return rankCandidates(promptTags, query.toLowerCase(), solutions).map(c => ({
|
|
715
|
+
return rankCandidates(promptTags, query.toLowerCase(), solutions).map((c) => ({
|
|
679
716
|
name: c.solution.name,
|
|
680
717
|
relevance: c.relevance,
|
|
681
718
|
matchedTags: c.matchedTags,
|
|
@@ -701,13 +738,16 @@ export function evaluateSolutionMatcher(fixture) {
|
|
|
701
738
|
// doesn't drown a small paraphrase bucket but also a single-query bucket
|
|
702
739
|
// doesn't dominate.
|
|
703
740
|
const recallAt5 = combinedTotal > 0
|
|
704
|
-
? (positiveM.recallAt5 * positiveM.total + paraphraseM.recallAt5 * paraphraseM.total) /
|
|
741
|
+
? (positiveM.recallAt5 * positiveM.total + paraphraseM.recallAt5 * paraphraseM.total) /
|
|
742
|
+
combinedTotal
|
|
705
743
|
: 0;
|
|
706
744
|
const mrrAt5 = combinedTotal > 0
|
|
707
|
-
? (positiveM.mrrAt5 * positiveM.total + paraphraseM.mrrAt5 * paraphraseM.total) /
|
|
745
|
+
? (positiveM.mrrAt5 * positiveM.total + paraphraseM.mrrAt5 * paraphraseM.total) /
|
|
746
|
+
combinedTotal
|
|
708
747
|
: 0;
|
|
709
748
|
const noResultRate = combinedTotal > 0
|
|
710
|
-
? (positiveM.noResultRate * positiveM.total + paraphraseM.noResultRate * paraphraseM.total) /
|
|
749
|
+
? (positiveM.noResultRate * positiveM.total + paraphraseM.noResultRate * paraphraseM.total) /
|
|
750
|
+
combinedTotal
|
|
711
751
|
: 0;
|
|
712
752
|
let negAnyResult = 0;
|
|
713
753
|
for (const q of fixture.negative) {
|
|
@@ -733,27 +773,87 @@ export function evaluateSolutionMatcher(fixture) {
|
|
|
733
773
|
},
|
|
734
774
|
};
|
|
735
775
|
}
|
|
776
|
+
// ── Meta-learning: dynamic ensemble weights ──
|
|
777
|
+
let _cachedWeights;
|
|
778
|
+
let _weightsCacheTime = 0;
|
|
779
|
+
const WEIGHTS_CACHE_TTL = 60_000; // 1 minute cache
|
|
780
|
+
/**
|
|
781
|
+
* Load tuned matcher weights from meta-learning state.
|
|
782
|
+
* Returns undefined (use defaults) if no tuned weights exist.
|
|
783
|
+
* Cached for 1 minute to avoid re-reading per matchSolutions call.
|
|
784
|
+
*/
|
|
785
|
+
function loadTunedMatcherWeights() {
|
|
786
|
+
const now = Date.now();
|
|
787
|
+
if (_cachedWeights !== undefined && now - _weightsCacheTime < WEIGHTS_CACHE_TTL) {
|
|
788
|
+
return _cachedWeights ?? undefined;
|
|
789
|
+
}
|
|
790
|
+
try {
|
|
791
|
+
const weightsPath = path.join(META_LEARNING_DIR, 'matcher-weights.json');
|
|
792
|
+
if (!fs.existsSync(weightsPath)) {
|
|
793
|
+
_cachedWeights = null;
|
|
794
|
+
_weightsCacheTime = now;
|
|
795
|
+
return undefined;
|
|
796
|
+
}
|
|
797
|
+
const data = JSON.parse(fs.readFileSync(weightsPath, 'utf-8'));
|
|
798
|
+
if (typeof data.tfidf === 'number' &&
|
|
799
|
+
typeof data.bm25 === 'number' &&
|
|
800
|
+
typeof data.bigram === 'number') {
|
|
801
|
+
_cachedWeights = { tfidf: data.tfidf, bm25: data.bm25, bigram: data.bigram };
|
|
802
|
+
_weightsCacheTime = now;
|
|
803
|
+
return _cachedWeights;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
catch {
|
|
807
|
+
/* fail-open: use defaults */
|
|
808
|
+
}
|
|
809
|
+
_cachedWeights = null;
|
|
810
|
+
_weightsCacheTime = now;
|
|
811
|
+
return undefined;
|
|
812
|
+
}
|
|
813
|
+
/**
|
|
814
|
+
* Cold-start exploration bonus for candidate solutions.
|
|
815
|
+
*
|
|
816
|
+
* Phase 4 evolution: newly proposed solutions enter at `status: candidate`.
|
|
817
|
+
* Without a nudge they compete head-to-head with mature verified/champion
|
|
818
|
+
* entries and almost always lose the first few rounds — not because
|
|
819
|
+
* they're worse, but because matchers favor solutions with richer tag
|
|
820
|
+
* histories. A small confidence multiplier lets candidates surface often
|
|
821
|
+
* enough to accumulate outcome data, after which the fitness loop
|
|
822
|
+
* decides their fate.
|
|
823
|
+
*
|
|
824
|
+
* The 1.3× factor is a starting point (Q1 in docs/design-solution-evolution.md).
|
|
825
|
+
* Automatic deactivation after 5 accumulated injections is handled by a
|
|
826
|
+
* separate promoter that flips `status` to `verified`.
|
|
827
|
+
*/
|
|
828
|
+
const CANDIDATE_EXPLORATION_MULTIPLIER = 1.3;
|
|
829
|
+
function applyCandidateExplorationBonus(entries) {
|
|
830
|
+
return entries.map((e) => {
|
|
831
|
+
if (e.status !== 'candidate')
|
|
832
|
+
return e;
|
|
833
|
+
return { ...e, confidence: Math.min(1, e.confidence * CANDIDATE_EXPLORATION_MULTIPLIER) };
|
|
834
|
+
});
|
|
835
|
+
}
|
|
736
836
|
export function matchSolutions(prompt, scope, cwd) {
|
|
737
837
|
// Build solution dirs for index cache
|
|
738
|
-
const dirs = [
|
|
739
|
-
{ dir: ME_SOLUTIONS, scope: 'me' },
|
|
740
|
-
];
|
|
838
|
+
const dirs = [{ dir: ME_SOLUTIONS, scope: 'me' }];
|
|
741
839
|
if (scope.team) {
|
|
742
840
|
dirs.push({ dir: path.join(PACKS_DIR, scope.team.name, 'solutions'), scope: 'team' });
|
|
743
841
|
}
|
|
744
842
|
dirs.push({ dir: path.join(cwd, '.compound', 'solutions'), scope: 'project' });
|
|
745
843
|
// Use cached index (rebuilt only when dirs change)
|
|
746
844
|
const index = getOrBuildIndex(dirs);
|
|
747
|
-
const allSolutions = index.entries.map(e => ({ ...e }));
|
|
845
|
+
const allSolutions = applyCandidateExplorationBonus(index.entries.map((e) => ({ ...e })));
|
|
748
846
|
const promptTags = extractTags(prompt);
|
|
749
847
|
const promptLower = prompt.toLowerCase();
|
|
848
|
+
// Meta-learning: load tuned weights if available
|
|
849
|
+
const tunedWeights = loadTunedMatcherWeights();
|
|
750
850
|
// Delegate to shared ranking core. `rankCandidates` is generic so each
|
|
751
851
|
// ranked candidate carries the original `LoadedSolution` reference — no
|
|
752
852
|
// name-based re-lookup, so two scopes sharing a name (e.g. me/foo and
|
|
753
853
|
// project/foo) can both appear in the result without a Map last-wins
|
|
754
854
|
// scope-precedence bug.
|
|
755
|
-
const ranked = rankCandidates(promptTags, promptLower, allSolutions);
|
|
756
|
-
return ranked.map(c => ({
|
|
855
|
+
const ranked = rankCandidates(promptTags, promptLower, allSolutions, tunedWeights);
|
|
856
|
+
return ranked.map((c) => ({
|
|
757
857
|
name: c.solution.name,
|
|
758
858
|
path: c.solution.filePath,
|
|
759
859
|
scope: c.solution.scope,
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
export type Outcome = 'accept' | 'correct' | 'error' | 'unknown';
|
|
2
|
+
export type Attribution = 'explicit' | 'window' | 'session_end' | 'default';
|
|
3
|
+
/**
|
|
4
|
+
* One inject → outcome event. Written append-only to
|
|
5
|
+
* ~/.forgen/state/outcomes/{session_id}.jsonl. The pending state (inject
|
|
6
|
+
* happened, outcome not yet decided) is stored separately in
|
|
7
|
+
* ~/.forgen/state/outcome-pending-{session_id}.json.
|
|
8
|
+
*/
|
|
9
|
+
export interface OutcomeEvent {
|
|
10
|
+
ts: number;
|
|
11
|
+
session_id: string;
|
|
12
|
+
solution: string;
|
|
13
|
+
match_score: number;
|
|
14
|
+
injected_chars: number;
|
|
15
|
+
outcome: Outcome;
|
|
16
|
+
outcome_lag_ms: number;
|
|
17
|
+
attribution: Attribution;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Record that solutions were injected. Called from solution-injector right
|
|
21
|
+
* after `approveWithContext` is emitted. Fails silently — outcome tracking
|
|
22
|
+
* must never block the user's workflow.
|
|
23
|
+
*/
|
|
24
|
+
export declare function appendPending(sessionId: string, injections: Array<{
|
|
25
|
+
solution: string;
|
|
26
|
+
match_score: number;
|
|
27
|
+
injected_chars: number;
|
|
28
|
+
}>): void;
|
|
29
|
+
/**
|
|
30
|
+
* Flush pending injections as `accept` events. Called when a new user
|
|
31
|
+
* prompt arrives without any intervening correction/error, signaling that
|
|
32
|
+
* the previous injections were silently accepted. "Silence = consent."
|
|
33
|
+
*
|
|
34
|
+
* If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
|
|
35
|
+
* because an earlier step already attributed them as `correct` or `error`).
|
|
36
|
+
*/
|
|
37
|
+
export declare function flushAccept(sessionId: string, excludeSolutions?: Set<string>): number;
|
|
38
|
+
/**
|
|
39
|
+
* Attribute a correction to the most recent pending injection(s). Called
|
|
40
|
+
* from the correction-record MCP tool. Removes attributed entries from
|
|
41
|
+
* pending so subsequent `flushAccept` does not double-count them.
|
|
42
|
+
*
|
|
43
|
+
* Strategy: all currently-pending solutions in this session are marked as
|
|
44
|
+
* `correct`. This is conservative (the correction may target only one of
|
|
45
|
+
* them), but without semantic attribution we err on the side of the user's
|
|
46
|
+
* feedback signal being louder than acceptance.
|
|
47
|
+
*/
|
|
48
|
+
export declare function attributeCorrection(sessionId: string): string[];
|
|
49
|
+
/**
|
|
50
|
+
* Attribute a tool error to pending solutions in this session. Called from
|
|
51
|
+
* post-tool-failure hook. Unlike corrections, errors do not clear pending
|
|
52
|
+
* — an error is a weaker signal and the next user prompt can still produce
|
|
53
|
+
* a correct/accept decision.
|
|
54
|
+
*
|
|
55
|
+
* To avoid flooding the log with duplicate errors for the same pending
|
|
56
|
+
* batch, we cap at one `error` event per (session, solution) pair per
|
|
57
|
+
* pending-cycle by tracking a `error_flagged` set in the pending state.
|
|
58
|
+
*/
|
|
59
|
+
export declare function attributeError(sessionId: string): string[];
|
|
60
|
+
/**
|
|
61
|
+
* At session end, any still-pending entries are logged as `unknown` (we
|
|
62
|
+
* can't tell if the user was happy or just stopped). Pending file is
|
|
63
|
+
* removed.
|
|
64
|
+
*/
|
|
65
|
+
export declare function finalizeSession(sessionId: string): number;
|
|
66
|
+
/**
|
|
67
|
+
* Read all outcome events across all sessions. Used by fitness
|
|
68
|
+
* calculation. Returns events sorted by timestamp ascending.
|
|
69
|
+
*/
|
|
70
|
+
export declare function readAllOutcomes(): OutcomeEvent[];
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { OUTCOMES_DIR, STATE_DIR } from '../core/paths.js';
|
|
4
|
+
import { sanitizeId } from '../hooks/shared/sanitize-id.js';
|
|
5
|
+
import { createLogger } from '../core/logger.js';
|
|
6
|
+
const log = createLogger('solution-outcomes');
|
|
7
|
+
function pendingPath(sessionId) {
|
|
8
|
+
return path.join(STATE_DIR, `outcome-pending-${sanitizeId(sessionId)}.json`);
|
|
9
|
+
}
|
|
10
|
+
function outcomesPath(sessionId) {
|
|
11
|
+
return path.join(OUTCOMES_DIR, `${sanitizeId(sessionId)}.jsonl`);
|
|
12
|
+
}
|
|
13
|
+
function readPending(sessionId) {
|
|
14
|
+
const p = pendingPath(sessionId);
|
|
15
|
+
if (!fs.existsSync(p))
|
|
16
|
+
return { pending: [], last_prompt_ts: 0 };
|
|
17
|
+
try {
|
|
18
|
+
return JSON.parse(fs.readFileSync(p, 'utf-8'));
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
return { pending: [], last_prompt_ts: 0 };
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
function writePending(sessionId, state) {
|
|
25
|
+
const p = pendingPath(sessionId);
|
|
26
|
+
fs.mkdirSync(STATE_DIR, { recursive: true });
|
|
27
|
+
fs.writeFileSync(p, JSON.stringify(state));
|
|
28
|
+
}
|
|
29
|
+
function appendOutcome(event) {
|
|
30
|
+
fs.mkdirSync(OUTCOMES_DIR, { recursive: true });
|
|
31
|
+
fs.appendFileSync(outcomesPath(event.session_id), JSON.stringify(event) + '\n');
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Record that solutions were injected. Called from solution-injector right
|
|
35
|
+
* after `approveWithContext` is emitted. Fails silently — outcome tracking
|
|
36
|
+
* must never block the user's workflow.
|
|
37
|
+
*/
|
|
38
|
+
export function appendPending(sessionId, injections) {
|
|
39
|
+
if (!sessionId || injections.length === 0)
|
|
40
|
+
return;
|
|
41
|
+
try {
|
|
42
|
+
const state = readPending(sessionId);
|
|
43
|
+
const ts = Date.now();
|
|
44
|
+
for (const inj of injections) {
|
|
45
|
+
state.pending.push({ ...inj, ts });
|
|
46
|
+
}
|
|
47
|
+
writePending(sessionId, state);
|
|
48
|
+
}
|
|
49
|
+
catch (e) {
|
|
50
|
+
log.debug(`appendPending failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Flush pending injections as `accept` events. Called when a new user
|
|
55
|
+
* prompt arrives without any intervening correction/error, signaling that
|
|
56
|
+
* the previous injections were silently accepted. "Silence = consent."
|
|
57
|
+
*
|
|
58
|
+
* If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
|
|
59
|
+
* because an earlier step already attributed them as `correct` or `error`).
|
|
60
|
+
*/
|
|
61
|
+
export function flushAccept(sessionId, excludeSolutions = new Set()) {
|
|
62
|
+
if (!sessionId)
|
|
63
|
+
return 0;
|
|
64
|
+
try {
|
|
65
|
+
const state = readPending(sessionId);
|
|
66
|
+
if (state.pending.length === 0)
|
|
67
|
+
return 0;
|
|
68
|
+
const now = Date.now();
|
|
69
|
+
const kept = [];
|
|
70
|
+
let flushed = 0;
|
|
71
|
+
for (const p of state.pending) {
|
|
72
|
+
if (excludeSolutions.has(p.solution))
|
|
73
|
+
continue;
|
|
74
|
+
appendOutcome({
|
|
75
|
+
ts: now,
|
|
76
|
+
session_id: sessionId,
|
|
77
|
+
solution: p.solution,
|
|
78
|
+
match_score: p.match_score,
|
|
79
|
+
injected_chars: p.injected_chars,
|
|
80
|
+
outcome: 'accept',
|
|
81
|
+
outcome_lag_ms: now - p.ts,
|
|
82
|
+
attribution: 'default',
|
|
83
|
+
});
|
|
84
|
+
flushed++;
|
|
85
|
+
}
|
|
86
|
+
writePending(sessionId, { pending: kept, last_prompt_ts: now });
|
|
87
|
+
return flushed;
|
|
88
|
+
}
|
|
89
|
+
catch (e) {
|
|
90
|
+
log.debug(`flushAccept failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
91
|
+
return 0;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Attribute a correction to the most recent pending injection(s). Called
|
|
96
|
+
* from the correction-record MCP tool. Removes attributed entries from
|
|
97
|
+
* pending so subsequent `flushAccept` does not double-count them.
|
|
98
|
+
*
|
|
99
|
+
* Strategy: all currently-pending solutions in this session are marked as
|
|
100
|
+
* `correct`. This is conservative (the correction may target only one of
|
|
101
|
+
* them), but without semantic attribution we err on the side of the user's
|
|
102
|
+
* feedback signal being louder than acceptance.
|
|
103
|
+
*/
|
|
104
|
+
export function attributeCorrection(sessionId) {
|
|
105
|
+
if (!sessionId)
|
|
106
|
+
return [];
|
|
107
|
+
try {
|
|
108
|
+
const state = readPending(sessionId);
|
|
109
|
+
if (state.pending.length === 0)
|
|
110
|
+
return [];
|
|
111
|
+
const now = Date.now();
|
|
112
|
+
const attributed = [];
|
|
113
|
+
for (const p of state.pending) {
|
|
114
|
+
appendOutcome({
|
|
115
|
+
ts: now,
|
|
116
|
+
session_id: sessionId,
|
|
117
|
+
solution: p.solution,
|
|
118
|
+
match_score: p.match_score,
|
|
119
|
+
injected_chars: p.injected_chars,
|
|
120
|
+
outcome: 'correct',
|
|
121
|
+
outcome_lag_ms: now - p.ts,
|
|
122
|
+
attribution: 'explicit',
|
|
123
|
+
});
|
|
124
|
+
attributed.push(p.solution);
|
|
125
|
+
}
|
|
126
|
+
writePending(sessionId, { pending: [], last_prompt_ts: state.last_prompt_ts });
|
|
127
|
+
return attributed;
|
|
128
|
+
}
|
|
129
|
+
catch (e) {
|
|
130
|
+
log.debug(`attributeCorrection failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
131
|
+
return [];
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Attribute a tool error to pending solutions in this session. Called from
|
|
136
|
+
* post-tool-failure hook. Unlike corrections, errors do not clear pending
|
|
137
|
+
* — an error is a weaker signal and the next user prompt can still produce
|
|
138
|
+
* a correct/accept decision.
|
|
139
|
+
*
|
|
140
|
+
* To avoid flooding the log with duplicate errors for the same pending
|
|
141
|
+
* batch, we cap at one `error` event per (session, solution) pair per
|
|
142
|
+
* pending-cycle by tracking a `error_flagged` set in the pending state.
|
|
143
|
+
*/
|
|
144
|
+
export function attributeError(sessionId) {
|
|
145
|
+
if (!sessionId)
|
|
146
|
+
return [];
|
|
147
|
+
try {
|
|
148
|
+
const state = readPending(sessionId);
|
|
149
|
+
if (state.pending.length === 0)
|
|
150
|
+
return [];
|
|
151
|
+
const flaggedKey = `__error_flagged`;
|
|
152
|
+
const existing = state[flaggedKey];
|
|
153
|
+
const flagged = new Set(Array.isArray(existing) ? existing : []);
|
|
154
|
+
const now = Date.now();
|
|
155
|
+
const flaggedThisCall = [];
|
|
156
|
+
for (const p of state.pending) {
|
|
157
|
+
if (flagged.has(p.solution))
|
|
158
|
+
continue;
|
|
159
|
+
appendOutcome({
|
|
160
|
+
ts: now,
|
|
161
|
+
session_id: sessionId,
|
|
162
|
+
solution: p.solution,
|
|
163
|
+
match_score: p.match_score,
|
|
164
|
+
injected_chars: p.injected_chars,
|
|
165
|
+
outcome: 'error',
|
|
166
|
+
outcome_lag_ms: now - p.ts,
|
|
167
|
+
attribution: 'window',
|
|
168
|
+
});
|
|
169
|
+
flagged.add(p.solution);
|
|
170
|
+
flaggedThisCall.push(p.solution);
|
|
171
|
+
}
|
|
172
|
+
state[flaggedKey] = Array.from(flagged);
|
|
173
|
+
writePending(sessionId, state);
|
|
174
|
+
return flaggedThisCall;
|
|
175
|
+
}
|
|
176
|
+
catch (e) {
|
|
177
|
+
log.debug(`attributeError failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
178
|
+
return [];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* At session end, any still-pending entries are logged as `unknown` (we
|
|
183
|
+
* can't tell if the user was happy or just stopped). Pending file is
|
|
184
|
+
* removed.
|
|
185
|
+
*/
|
|
186
|
+
export function finalizeSession(sessionId) {
|
|
187
|
+
if (!sessionId)
|
|
188
|
+
return 0;
|
|
189
|
+
try {
|
|
190
|
+
const state = readPending(sessionId);
|
|
191
|
+
const now = Date.now();
|
|
192
|
+
let finalized = 0;
|
|
193
|
+
for (const p of state.pending) {
|
|
194
|
+
appendOutcome({
|
|
195
|
+
ts: now,
|
|
196
|
+
session_id: sessionId,
|
|
197
|
+
solution: p.solution,
|
|
198
|
+
match_score: p.match_score,
|
|
199
|
+
injected_chars: p.injected_chars,
|
|
200
|
+
outcome: 'unknown',
|
|
201
|
+
outcome_lag_ms: now - p.ts,
|
|
202
|
+
attribution: 'session_end',
|
|
203
|
+
});
|
|
204
|
+
finalized++;
|
|
205
|
+
}
|
|
206
|
+
const p = pendingPath(sessionId);
|
|
207
|
+
if (fs.existsSync(p))
|
|
208
|
+
fs.unlinkSync(p);
|
|
209
|
+
return finalized;
|
|
210
|
+
}
|
|
211
|
+
catch (e) {
|
|
212
|
+
log.debug(`finalizeSession failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
213
|
+
return 0;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Read all outcome events across all sessions. Used by fitness
|
|
218
|
+
* calculation. Returns events sorted by timestamp ascending.
|
|
219
|
+
*/
|
|
220
|
+
export function readAllOutcomes() {
|
|
221
|
+
if (!fs.existsSync(OUTCOMES_DIR))
|
|
222
|
+
return [];
|
|
223
|
+
const events = [];
|
|
224
|
+
for (const file of fs.readdirSync(OUTCOMES_DIR)) {
|
|
225
|
+
if (!file.endsWith('.jsonl'))
|
|
226
|
+
continue;
|
|
227
|
+
try {
|
|
228
|
+
const text = fs.readFileSync(path.join(OUTCOMES_DIR, file), 'utf-8');
|
|
229
|
+
for (const line of text.split('\n')) {
|
|
230
|
+
if (!line)
|
|
231
|
+
continue;
|
|
232
|
+
try {
|
|
233
|
+
events.push(JSON.parse(line));
|
|
234
|
+
}
|
|
235
|
+
catch { /* skip bad line */ }
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
catch { /* skip */ }
|
|
239
|
+
}
|
|
240
|
+
events.sort((a, b) => a.ts - b.ts);
|
|
241
|
+
return events;
|
|
242
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
interface QuarantineEntry {
|
|
2
|
+
path: string;
|
|
3
|
+
at: string;
|
|
4
|
+
errors: string[];
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Produce actionable frontmatter diagnostics directly from file content.
|
|
8
|
+
*
|
|
9
|
+
* This duplicates the YAML parse that `parseFrontmatterOnly` already does,
|
|
10
|
+
* but it runs only on the rare failure path (solution dropped from index),
|
|
11
|
+
* so the overhead is acceptable in exchange for a human-readable error list.
|
|
12
|
+
*/
|
|
13
|
+
export declare function diagnoseFromRawContent(content: string): string[];
|
|
14
|
+
/**
|
|
15
|
+
* Append one quarantine entry for `filePath`. Deduped by path within the
|
|
16
|
+
* current file: if the latest entry for this path already matches the
|
|
17
|
+
* current errors, skip the append.
|
|
18
|
+
*
|
|
19
|
+
* Storage: one JSONL line per quarantine event. Readers use only the
|
|
20
|
+
* latest line per path.
|
|
21
|
+
*/
|
|
22
|
+
export declare function recordQuarantine(filePath: string, errors: string[]): void;
|
|
23
|
+
/**
|
|
24
|
+
* Read the latest quarantine state: one entry per path, keyed to the most
|
|
25
|
+
* recent append. Entries whose file no longer exists are dropped.
|
|
26
|
+
*/
|
|
27
|
+
export declare function listQuarantined(): QuarantineEntry[];
|
|
28
|
+
/**
|
|
29
|
+
* Clear quarantine entries for files that now parse correctly or no longer
|
|
30
|
+
* exist. Intended to be called after `forgen learn fix-up` or a manual edit.
|
|
31
|
+
*/
|
|
32
|
+
export declare function pruneQuarantine(): {
|
|
33
|
+
removed: number;
|
|
34
|
+
kept: number;
|
|
35
|
+
};
|
|
36
|
+
export {};
|