@wooojin/forgen 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/agents/solution-evolver.md +115 -0
- package/dist/cli.js +8 -0
- package/dist/core/dashboard.js +46 -0
- package/dist/core/paths.d.ts +25 -0
- package/dist/core/paths.js +25 -0
- package/dist/engine/learn-cli.d.ts +1 -0
- package/dist/engine/learn-cli.js +182 -0
- package/dist/engine/solution-candidate.d.ts +30 -0
- package/dist/engine/solution-candidate.js +124 -0
- package/dist/engine/solution-fitness.d.ts +52 -0
- package/dist/engine/solution-fitness.js +95 -0
- package/dist/engine/solution-fixup.d.ts +30 -0
- package/dist/engine/solution-fixup.js +116 -0
- package/dist/engine/solution-format.d.ts +8 -0
- package/dist/engine/solution-format.js +38 -23
- package/dist/engine/solution-index.js +10 -0
- package/dist/engine/solution-matcher.js +24 -1
- package/dist/engine/solution-outcomes.d.ts +70 -0
- package/dist/engine/solution-outcomes.js +242 -0
- package/dist/engine/solution-quarantine.d.ts +36 -0
- package/dist/engine/solution-quarantine.js +172 -0
- package/dist/engine/solution-weakness.d.ts +45 -0
- package/dist/engine/solution-weakness.js +225 -0
- package/dist/engine/solution-writer.d.ts +5 -0
- package/dist/engine/solution-writer.js +18 -0
- package/dist/hooks/post-tool-failure.js +7 -0
- package/dist/hooks/solution-injector.js +20 -0
- package/dist/mcp/tools.js +8 -0
- package/package.json +1 -1
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { ME_SOLUTIONS, STATE_DIR } from '../core/paths.js';
|
|
4
|
+
import { parseFrontmatterOnly } from './solution-format.js';
|
|
5
|
+
import { computeFitness } from './solution-fitness.js';
|
|
6
|
+
import { readAllOutcomes } from './solution-outcomes.js';
|
|
7
|
+
import { createLogger } from '../core/logger.js';
|
|
8
|
+
const log = createLogger('solution-weakness');
|
|
9
|
+
function loadSolutionRows(solutionsDir) {
|
|
10
|
+
if (!fs.existsSync(solutionsDir))
|
|
11
|
+
return [];
|
|
12
|
+
const rows = [];
|
|
13
|
+
for (const file of fs.readdirSync(solutionsDir)) {
|
|
14
|
+
if (!file.endsWith('.md'))
|
|
15
|
+
continue;
|
|
16
|
+
try {
|
|
17
|
+
const content = fs.readFileSync(path.join(solutionsDir, file), 'utf-8');
|
|
18
|
+
const fm = parseFrontmatterOnly(content);
|
|
19
|
+
if (!fm)
|
|
20
|
+
continue;
|
|
21
|
+
rows.push({ name: fm.name, tags: fm.tags });
|
|
22
|
+
}
|
|
23
|
+
catch { /* skip */ }
|
|
24
|
+
}
|
|
25
|
+
return rows;
|
|
26
|
+
}
|
|
27
|
+
function findUnderServedTags(rows, fitnessByName) {
|
|
28
|
+
// Read correction evidence tags from ~/.forgen/me/behavior/*.json — each
|
|
29
|
+
// entry carries a `raw_payload` with inferred tags or keywords. Be
|
|
30
|
+
// tolerant: the schema has drifted historically, so we accept any string
|
|
31
|
+
// array we can find under likely field names.
|
|
32
|
+
const behaviorDir = path.join(ME_SOLUTIONS, '..', 'behavior');
|
|
33
|
+
const correctionTags = new Map();
|
|
34
|
+
if (fs.existsSync(behaviorDir)) {
|
|
35
|
+
for (const file of fs.readdirSync(behaviorDir)) {
|
|
36
|
+
if (!file.endsWith('.json'))
|
|
37
|
+
continue;
|
|
38
|
+
try {
|
|
39
|
+
const data = JSON.parse(fs.readFileSync(path.join(behaviorDir, file), 'utf-8'));
|
|
40
|
+
const payload = data.raw_payload ?? data.payload ?? {};
|
|
41
|
+
const tags = collectTags(payload).concat(collectTags(data.axis_refs ?? []));
|
|
42
|
+
const summary = typeof data.summary === 'string' ? data.summary.toLowerCase() : '';
|
|
43
|
+
for (const tag of new Set(tags)) {
|
|
44
|
+
correctionTags.set(tag, (correctionTags.get(tag) ?? 0) + 1);
|
|
45
|
+
}
|
|
46
|
+
// Summary keywords fallback — split on whitespace, filter obvious fillers
|
|
47
|
+
for (const word of summary.split(/\s+/)) {
|
|
48
|
+
if (word.length >= 5 && word.length <= 20) {
|
|
49
|
+
correctionTags.set(word, (correctionTags.get(word) ?? 0) + 0.3);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
catch { /* skip bad json */ }
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const result = [];
|
|
57
|
+
for (const [tag, count] of correctionTags) {
|
|
58
|
+
if (count < 2)
|
|
59
|
+
continue; // noise cutoff
|
|
60
|
+
let bestName = null;
|
|
61
|
+
let bestFitness = 0;
|
|
62
|
+
for (const row of rows) {
|
|
63
|
+
if (!row.tags.includes(tag))
|
|
64
|
+
continue;
|
|
65
|
+
const fit = fitnessByName.get(row.name)?.fitness ?? 0;
|
|
66
|
+
if (fit > bestFitness || (bestName === null && fit >= 0)) {
|
|
67
|
+
bestFitness = fit;
|
|
68
|
+
bestName = row.name;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Under-served: no matching solution, or best match is not a champion
|
|
72
|
+
const bestFit = bestName ? fitnessByName.get(bestName) : null;
|
|
73
|
+
const isChampion = bestFit?.state === 'champion';
|
|
74
|
+
if (!bestName || !isChampion) {
|
|
75
|
+
result.push({
|
|
76
|
+
tag,
|
|
77
|
+
correction_mentions: Math.round(count),
|
|
78
|
+
best_matching_champion: isChampion ? bestName : null,
|
|
79
|
+
best_fitness: bestFitness,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
result.sort((a, b) => b.correction_mentions - a.correction_mentions);
|
|
84
|
+
return result.slice(0, 10);
|
|
85
|
+
}
|
|
86
|
+
function collectTags(v) {
|
|
87
|
+
if (Array.isArray(v))
|
|
88
|
+
return v.filter((x) => typeof x === 'string');
|
|
89
|
+
if (v && typeof v === 'object') {
|
|
90
|
+
return Object.values(v)
|
|
91
|
+
.filter((x) => typeof x === 'string');
|
|
92
|
+
}
|
|
93
|
+
return [];
|
|
94
|
+
}
|
|
95
|
+
function findConflictClusters(rows, fitnessByName) {
|
|
96
|
+
const champions = rows.filter((r) => fitnessByName.get(r.name)?.state === 'champion');
|
|
97
|
+
const underperformers = rows.filter((r) => fitnessByName.get(r.name)?.state === 'underperform');
|
|
98
|
+
const clusters = [];
|
|
99
|
+
for (const ch of champions) {
|
|
100
|
+
for (const up of underperformers) {
|
|
101
|
+
const shared = ch.tags.filter((t) => up.tags.includes(t));
|
|
102
|
+
if (shared.length < 2)
|
|
103
|
+
continue;
|
|
104
|
+
clusters.push({
|
|
105
|
+
shared_tags: shared,
|
|
106
|
+
champion: { name: ch.name, fitness: fitnessByName.get(ch.name).fitness },
|
|
107
|
+
underperform: { name: up.name, fitness: fitnessByName.get(up.name).fitness },
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
clusters.sort((a, b) => b.shared_tags.length - a.shared_tags.length);
|
|
112
|
+
return clusters.slice(0, 5);
|
|
113
|
+
}
|
|
114
|
+
function findDeadCorners(rows, fitnessByName) {
|
|
115
|
+
// Dead = injected=0. Unique tags = tags present only in this solution.
|
|
116
|
+
const injectedRows = rows.filter((r) => (fitnessByName.get(r.name)?.injected ?? 0) > 0);
|
|
117
|
+
const injectedTags = new Set();
|
|
118
|
+
for (const r of injectedRows)
|
|
119
|
+
for (const t of r.tags)
|
|
120
|
+
injectedTags.add(t);
|
|
121
|
+
const dead = [];
|
|
122
|
+
for (const r of rows) {
|
|
123
|
+
const injected = fitnessByName.get(r.name)?.injected ?? 0;
|
|
124
|
+
if (injected > 0)
|
|
125
|
+
continue;
|
|
126
|
+
const unique = r.tags.filter((t) => !injectedTags.has(t));
|
|
127
|
+
if (unique.length === 0)
|
|
128
|
+
continue;
|
|
129
|
+
dead.push({ solution: r.name, unique_tags: unique, injected });
|
|
130
|
+
}
|
|
131
|
+
dead.sort((a, b) => b.unique_tags.length - a.unique_tags.length);
|
|
132
|
+
return dead.slice(0, 10);
|
|
133
|
+
}
|
|
134
|
+
function findVolatile(_fitnessByName) {
|
|
135
|
+
const events = readAllOutcomes();
|
|
136
|
+
if (events.length === 0)
|
|
137
|
+
return [];
|
|
138
|
+
// Split events into two halves by timestamp; compute per-solution accept
|
|
139
|
+
// rate delta between halves. Volatile = |delta| > 0.3 and enough data.
|
|
140
|
+
const mid = events[Math.floor(events.length / 2)].ts;
|
|
141
|
+
const by = new Map();
|
|
142
|
+
for (const ev of events) {
|
|
143
|
+
const c = by.get(ev.solution) ?? { a_accept: 0, a_total: 0, b_accept: 0, b_total: 0 };
|
|
144
|
+
if (ev.outcome === 'accept' || ev.outcome === 'correct' || ev.outcome === 'error') {
|
|
145
|
+
const isA = ev.ts < mid;
|
|
146
|
+
if (isA) {
|
|
147
|
+
c.a_total++;
|
|
148
|
+
if (ev.outcome === 'accept')
|
|
149
|
+
c.a_accept++;
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
c.b_total++;
|
|
153
|
+
if (ev.outcome === 'accept')
|
|
154
|
+
c.b_accept++;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
by.set(ev.solution, c);
|
|
158
|
+
}
|
|
159
|
+
const result = [];
|
|
160
|
+
for (const [name, c] of by) {
|
|
161
|
+
if (c.a_total < 3 || c.b_total < 3)
|
|
162
|
+
continue;
|
|
163
|
+
const rateA = c.a_accept / c.a_total;
|
|
164
|
+
const rateB = c.b_accept / c.b_total;
|
|
165
|
+
const delta = rateB - rateA;
|
|
166
|
+
if (Math.abs(delta) < 0.3)
|
|
167
|
+
continue;
|
|
168
|
+
result.push({
|
|
169
|
+
solution: name,
|
|
170
|
+
accept_rate_window_a: Number(rateA.toFixed(3)),
|
|
171
|
+
accept_rate_window_b: Number(rateB.toFixed(3)),
|
|
172
|
+
delta: Number(delta.toFixed(3)),
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
result.sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta));
|
|
176
|
+
return result.slice(0, 5);
|
|
177
|
+
}
|
|
178
|
+
export function buildWeaknessReport(solutionsDir = ME_SOLUTIONS) {
|
|
179
|
+
const rows = loadSolutionRows(solutionsDir);
|
|
180
|
+
const fitnessList = computeFitness();
|
|
181
|
+
const fitnessByName = new Map(fitnessList.map((f) => [f.solution, f]));
|
|
182
|
+
const population = {
|
|
183
|
+
total: fitnessList.length,
|
|
184
|
+
champion: fitnessList.filter((f) => f.state === 'champion').length,
|
|
185
|
+
active: fitnessList.filter((f) => f.state === 'active').length,
|
|
186
|
+
underperform: fitnessList.filter((f) => f.state === 'underperform').length,
|
|
187
|
+
draft: fitnessList.filter((f) => f.state === 'draft').length,
|
|
188
|
+
};
|
|
189
|
+
return {
|
|
190
|
+
generated_at: new Date().toISOString(),
|
|
191
|
+
population,
|
|
192
|
+
under_served_tags: findUnderServedTags(rows, fitnessByName),
|
|
193
|
+
conflict_clusters: findConflictClusters(rows, fitnessByName),
|
|
194
|
+
dead_corners: findDeadCorners(rows, fitnessByName),
|
|
195
|
+
volatile: findVolatile(fitnessByName),
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
export function saveWeaknessReport(report) {
|
|
199
|
+
fs.mkdirSync(STATE_DIR, { recursive: true });
|
|
200
|
+
const ts = Date.now();
|
|
201
|
+
const p = path.join(STATE_DIR, `weakness-report-${ts}.json`);
|
|
202
|
+
try {
|
|
203
|
+
fs.writeFileSync(p, JSON.stringify(report, null, 2));
|
|
204
|
+
}
|
|
205
|
+
catch (e) {
|
|
206
|
+
log.debug(`save failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
207
|
+
}
|
|
208
|
+
return p;
|
|
209
|
+
}
|
|
210
|
+
export function latestWeaknessReport() {
|
|
211
|
+
if (!fs.existsSync(STATE_DIR))
|
|
212
|
+
return null;
|
|
213
|
+
const candidates = fs.readdirSync(STATE_DIR)
|
|
214
|
+
.filter((f) => f.startsWith('weakness-report-') && f.endsWith('.json'))
|
|
215
|
+
.sort()
|
|
216
|
+
.reverse();
|
|
217
|
+
if (candidates.length === 0)
|
|
218
|
+
return null;
|
|
219
|
+
try {
|
|
220
|
+
return JSON.parse(fs.readFileSync(path.join(STATE_DIR, candidates[0]), 'utf-8'));
|
|
221
|
+
}
|
|
222
|
+
catch {
|
|
223
|
+
return null;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -72,5 +72,10 @@ export declare function mutateSolutionByName(name: string, mutator: SolutionMuta
|
|
|
72
72
|
/**
|
|
73
73
|
* Evidence 카운터 단일 증가 helper.
|
|
74
74
|
* mutateSolutionByName + 카운터 증가 패턴을 한 줄로.
|
|
75
|
+
*
|
|
76
|
+
* Also graduates Phase 4 candidates: when a `status: candidate` solution's
|
|
77
|
+
* injected count reaches `CANDIDATE_PROMOTION_INJECTIONS`, its status flips
|
|
78
|
+
* to `verified` in the same write. This keeps the exploration bonus from
|
|
79
|
+
* clinging to a solution that has had enough trials.
|
|
75
80
|
*/
|
|
76
81
|
export declare function incrementEvidence(solutionName: string, field: 'reflected' | 'negative' | 'injected' | 'sessions' | 'reExtracted'): boolean;
|
|
@@ -142,9 +142,22 @@ export function mutateSolutionByName(name, mutator, options) {
|
|
|
142
142
|
}
|
|
143
143
|
return false;
|
|
144
144
|
}
|
|
145
|
+
/**
|
|
146
|
+
* Phase 4 candidate promotion threshold: a `status: candidate` solution
|
|
147
|
+
* automatically graduates to `status: verified` once its injected count
|
|
148
|
+
* crosses this cutoff. At that point the cold-start exploration bonus
|
|
149
|
+
* (solution-matcher.ts) disappears naturally, since the bonus keys off
|
|
150
|
+
* `candidate` status.
|
|
151
|
+
*/
|
|
152
|
+
const CANDIDATE_PROMOTION_INJECTIONS = 5;
|
|
145
153
|
/**
|
|
146
154
|
* Evidence 카운터 단일 증가 helper.
|
|
147
155
|
* mutateSolutionByName + 카운터 증가 패턴을 한 줄로.
|
|
156
|
+
*
|
|
157
|
+
* Also graduates Phase 4 candidates: when a `status: candidate` solution's
|
|
158
|
+
* injected count reaches `CANDIDATE_PROMOTION_INJECTIONS`, its status flips
|
|
159
|
+
* to `verified` in the same write. This keeps the exploration bonus from
|
|
160
|
+
* clinging to a solution that has had enough trials.
|
|
148
161
|
*/
|
|
149
162
|
export function incrementEvidence(solutionName, field) {
|
|
150
163
|
return mutateSolutionByName(solutionName, sol => {
|
|
@@ -152,6 +165,11 @@ export function incrementEvidence(solutionName, field) {
|
|
|
152
165
|
if (!(field in ev))
|
|
153
166
|
return false;
|
|
154
167
|
ev[field] = (ev[field] ?? 0) + 1;
|
|
168
|
+
if (field === 'injected' &&
|
|
169
|
+
sol.frontmatter.status === 'candidate' &&
|
|
170
|
+
ev.injected >= CANDIDATE_PROMOTION_INJECTIONS) {
|
|
171
|
+
sol.frontmatter.status = 'verified';
|
|
172
|
+
}
|
|
155
173
|
return true;
|
|
156
174
|
});
|
|
157
175
|
}
|
|
@@ -105,6 +105,13 @@ async function main() {
|
|
|
105
105
|
saveFailureState(state);
|
|
106
106
|
// 컨텍스트 신호 업데이트
|
|
107
107
|
incrementFailureSignal(sessionId);
|
|
108
|
+
// Outcome tracking (Phase 1): attribute this tool failure to pending
|
|
109
|
+
// solution injections in the same session. Fail-open.
|
|
110
|
+
try {
|
|
111
|
+
const { attributeError } = await import('../engine/solution-outcomes.js');
|
|
112
|
+
attributeError(sessionId);
|
|
113
|
+
}
|
|
114
|
+
catch { /* ignore */ }
|
|
108
115
|
const failCount = state.failures[toolName].count;
|
|
109
116
|
const suggestion = getRecoverySuggestion(error, toolName);
|
|
110
117
|
// 3회 이상 반복 실패 시 강화된 경고
|
|
@@ -28,6 +28,7 @@ import { writeSignal } from './shared/plugin-signal.js';
|
|
|
28
28
|
import { approve, approveWithContext, failOpenWithTracking } from './shared/hook-response.js';
|
|
29
29
|
import { STATE_DIR } from '../core/paths.js';
|
|
30
30
|
import { recordHookTiming } from './shared/hook-timing.js';
|
|
31
|
+
import { appendPending, flushAccept } from '../engine/solution-outcomes.js';
|
|
31
32
|
const MAX_SOLUTIONS_PER_SESSION = 10;
|
|
32
33
|
/** 세션별 이미 주입된 솔루션 추적 (중복 방지) */
|
|
33
34
|
function getSessionCachePath(sessionId) {
|
|
@@ -451,6 +452,25 @@ async function main() {
|
|
|
451
452
|
catch (e) {
|
|
452
453
|
log.debug('plugin signal 기록 실패', e);
|
|
453
454
|
}
|
|
455
|
+
// Outcome tracking (Phase 1): flush previous pending as `accept` (silence
|
|
456
|
+
// = consent), then record this round's injections as new pending. Both
|
|
457
|
+
// calls are fail-open — a tracking crash must not block injection.
|
|
458
|
+
try {
|
|
459
|
+
flushAccept(sessionId);
|
|
460
|
+
}
|
|
461
|
+
catch (e) {
|
|
462
|
+
log.debug('outcome flushAccept 실패', e);
|
|
463
|
+
}
|
|
464
|
+
try {
|
|
465
|
+
appendPending(sessionId, effectiveToInject.map((sol) => ({
|
|
466
|
+
solution: sol.name,
|
|
467
|
+
match_score: sol.relevance,
|
|
468
|
+
injected_chars: (summaries.get(sol.name) ?? sol.name).length,
|
|
469
|
+
})));
|
|
470
|
+
}
|
|
471
|
+
catch (e) {
|
|
472
|
+
log.debug('outcome appendPending 실패', e);
|
|
473
|
+
}
|
|
454
474
|
console.log(approveWithContext(fullInjection, 'UserPromptSubmit'));
|
|
455
475
|
}
|
|
456
476
|
finally {
|
package/dist/mcp/tools.js
CHANGED
|
@@ -273,6 +273,14 @@ export function registerTools(server) {
|
|
|
273
273
|
target,
|
|
274
274
|
axis_hint: axis_hint,
|
|
275
275
|
});
|
|
276
|
+
// Outcome tracking (Phase 1): attribute this correction to any
|
|
277
|
+
// pending injections in the session. Fail-open — attribution is a
|
|
278
|
+
// best-effort signal, never block the correction record itself.
|
|
279
|
+
try {
|
|
280
|
+
const { attributeCorrection } = await import('../engine/solution-outcomes.js');
|
|
281
|
+
attributeCorrection(effectiveSessionId);
|
|
282
|
+
}
|
|
283
|
+
catch { /* ignore */ }
|
|
276
284
|
const lines = [
|
|
277
285
|
`Evidence recorded: ${result.evidence_event_id}`,
|
|
278
286
|
];
|