@wooojin/forgen 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import { ME_SOLUTIONS, STATE_DIR } from '../core/paths.js';
4
+ import { parseFrontmatterOnly } from './solution-format.js';
5
+ import { computeFitness } from './solution-fitness.js';
6
+ import { readAllOutcomes } from './solution-outcomes.js';
7
+ import { createLogger } from '../core/logger.js';
8
+ const log = createLogger('solution-weakness');
9
+ function loadSolutionRows(solutionsDir) {
10
+ if (!fs.existsSync(solutionsDir))
11
+ return [];
12
+ const rows = [];
13
+ for (const file of fs.readdirSync(solutionsDir)) {
14
+ if (!file.endsWith('.md'))
15
+ continue;
16
+ try {
17
+ const content = fs.readFileSync(path.join(solutionsDir, file), 'utf-8');
18
+ const fm = parseFrontmatterOnly(content);
19
+ if (!fm)
20
+ continue;
21
+ rows.push({ name: fm.name, tags: fm.tags });
22
+ }
23
+ catch { /* skip */ }
24
+ }
25
+ return rows;
26
+ }
27
+ function findUnderServedTags(rows, fitnessByName) {
28
+ // Read correction evidence tags from ~/.forgen/me/behavior/*.json — each
29
+ // entry carries a `raw_payload` with inferred tags or keywords. Be
30
+ // tolerant: the schema has drifted historically, so we accept any string
31
+ // array we can find under likely field names.
32
+ const behaviorDir = path.join(ME_SOLUTIONS, '..', 'behavior');
33
+ const correctionTags = new Map();
34
+ if (fs.existsSync(behaviorDir)) {
35
+ for (const file of fs.readdirSync(behaviorDir)) {
36
+ if (!file.endsWith('.json'))
37
+ continue;
38
+ try {
39
+ const data = JSON.parse(fs.readFileSync(path.join(behaviorDir, file), 'utf-8'));
40
+ const payload = data.raw_payload ?? data.payload ?? {};
41
+ const tags = collectTags(payload).concat(collectTags(data.axis_refs ?? []));
42
+ const summary = typeof data.summary === 'string' ? data.summary.toLowerCase() : '';
43
+ for (const tag of new Set(tags)) {
44
+ correctionTags.set(tag, (correctionTags.get(tag) ?? 0) + 1);
45
+ }
46
+ // Summary keywords fallback — split on whitespace, filter obvious fillers
47
+ for (const word of summary.split(/\s+/)) {
48
+ if (word.length >= 5 && word.length <= 20) {
49
+ correctionTags.set(word, (correctionTags.get(word) ?? 0) + 0.3);
50
+ }
51
+ }
52
+ }
53
+ catch { /* skip bad json */ }
54
+ }
55
+ }
56
+ const result = [];
57
+ for (const [tag, count] of correctionTags) {
58
+ if (count < 2)
59
+ continue; // noise cutoff
60
+ let bestName = null;
61
+ let bestFitness = 0;
62
+ for (const row of rows) {
63
+ if (!row.tags.includes(tag))
64
+ continue;
65
+ const fit = fitnessByName.get(row.name)?.fitness ?? 0;
66
+ if (fit > bestFitness || (bestName === null && fit >= 0)) {
67
+ bestFitness = fit;
68
+ bestName = row.name;
69
+ }
70
+ }
71
+ // Under-served: no matching solution, or best match is not a champion
72
+ const bestFit = bestName ? fitnessByName.get(bestName) : null;
73
+ const isChampion = bestFit?.state === 'champion';
74
+ if (!bestName || !isChampion) {
75
+ result.push({
76
+ tag,
77
+ correction_mentions: Math.round(count),
78
+ best_matching_champion: isChampion ? bestName : null,
79
+ best_fitness: bestFitness,
80
+ });
81
+ }
82
+ }
83
+ result.sort((a, b) => b.correction_mentions - a.correction_mentions);
84
+ return result.slice(0, 10);
85
+ }
86
+ function collectTags(v) {
87
+ if (Array.isArray(v))
88
+ return v.filter((x) => typeof x === 'string');
89
+ if (v && typeof v === 'object') {
90
+ return Object.values(v)
91
+ .filter((x) => typeof x === 'string');
92
+ }
93
+ return [];
94
+ }
95
+ function findConflictClusters(rows, fitnessByName) {
96
+ const champions = rows.filter((r) => fitnessByName.get(r.name)?.state === 'champion');
97
+ const underperformers = rows.filter((r) => fitnessByName.get(r.name)?.state === 'underperform');
98
+ const clusters = [];
99
+ for (const ch of champions) {
100
+ for (const up of underperformers) {
101
+ const shared = ch.tags.filter((t) => up.tags.includes(t));
102
+ if (shared.length < 2)
103
+ continue;
104
+ clusters.push({
105
+ shared_tags: shared,
106
+ champion: { name: ch.name, fitness: fitnessByName.get(ch.name).fitness },
107
+ underperform: { name: up.name, fitness: fitnessByName.get(up.name).fitness },
108
+ });
109
+ }
110
+ }
111
+ clusters.sort((a, b) => b.shared_tags.length - a.shared_tags.length);
112
+ return clusters.slice(0, 5);
113
+ }
114
+ function findDeadCorners(rows, fitnessByName) {
115
+ // Dead = injected=0. Unique tags = tags present only in this solution.
116
+ const injectedRows = rows.filter((r) => (fitnessByName.get(r.name)?.injected ?? 0) > 0);
117
+ const injectedTags = new Set();
118
+ for (const r of injectedRows)
119
+ for (const t of r.tags)
120
+ injectedTags.add(t);
121
+ const dead = [];
122
+ for (const r of rows) {
123
+ const injected = fitnessByName.get(r.name)?.injected ?? 0;
124
+ if (injected > 0)
125
+ continue;
126
+ const unique = r.tags.filter((t) => !injectedTags.has(t));
127
+ if (unique.length === 0)
128
+ continue;
129
+ dead.push({ solution: r.name, unique_tags: unique, injected });
130
+ }
131
+ dead.sort((a, b) => b.unique_tags.length - a.unique_tags.length);
132
+ return dead.slice(0, 10);
133
+ }
134
+ function findVolatile(_fitnessByName) {
135
+ const events = readAllOutcomes();
136
+ if (events.length === 0)
137
+ return [];
138
+ // Split events into two halves by timestamp; compute per-solution accept
139
+ // rate delta between halves. Volatile = |delta| > 0.3 and enough data.
140
+ const mid = events[Math.floor(events.length / 2)].ts;
141
+ const by = new Map();
142
+ for (const ev of events) {
143
+ const c = by.get(ev.solution) ?? { a_accept: 0, a_total: 0, b_accept: 0, b_total: 0 };
144
+ if (ev.outcome === 'accept' || ev.outcome === 'correct' || ev.outcome === 'error') {
145
+ const isA = ev.ts < mid;
146
+ if (isA) {
147
+ c.a_total++;
148
+ if (ev.outcome === 'accept')
149
+ c.a_accept++;
150
+ }
151
+ else {
152
+ c.b_total++;
153
+ if (ev.outcome === 'accept')
154
+ c.b_accept++;
155
+ }
156
+ }
157
+ by.set(ev.solution, c);
158
+ }
159
+ const result = [];
160
+ for (const [name, c] of by) {
161
+ if (c.a_total < 3 || c.b_total < 3)
162
+ continue;
163
+ const rateA = c.a_accept / c.a_total;
164
+ const rateB = c.b_accept / c.b_total;
165
+ const delta = rateB - rateA;
166
+ if (Math.abs(delta) < 0.3)
167
+ continue;
168
+ result.push({
169
+ solution: name,
170
+ accept_rate_window_a: Number(rateA.toFixed(3)),
171
+ accept_rate_window_b: Number(rateB.toFixed(3)),
172
+ delta: Number(delta.toFixed(3)),
173
+ });
174
+ }
175
+ result.sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta));
176
+ return result.slice(0, 5);
177
+ }
178
+ export function buildWeaknessReport(solutionsDir = ME_SOLUTIONS) {
179
+ const rows = loadSolutionRows(solutionsDir);
180
+ const fitnessList = computeFitness();
181
+ const fitnessByName = new Map(fitnessList.map((f) => [f.solution, f]));
182
+ const population = {
183
+ total: fitnessList.length,
184
+ champion: fitnessList.filter((f) => f.state === 'champion').length,
185
+ active: fitnessList.filter((f) => f.state === 'active').length,
186
+ underperform: fitnessList.filter((f) => f.state === 'underperform').length,
187
+ draft: fitnessList.filter((f) => f.state === 'draft').length,
188
+ };
189
+ return {
190
+ generated_at: new Date().toISOString(),
191
+ population,
192
+ under_served_tags: findUnderServedTags(rows, fitnessByName),
193
+ conflict_clusters: findConflictClusters(rows, fitnessByName),
194
+ dead_corners: findDeadCorners(rows, fitnessByName),
195
+ volatile: findVolatile(fitnessByName),
196
+ };
197
+ }
198
+ export function saveWeaknessReport(report) {
199
+ fs.mkdirSync(STATE_DIR, { recursive: true });
200
+ const ts = Date.now();
201
+ const p = path.join(STATE_DIR, `weakness-report-${ts}.json`);
202
+ try {
203
+ fs.writeFileSync(p, JSON.stringify(report, null, 2));
204
+ }
205
+ catch (e) {
206
+ log.debug(`save failed: ${e instanceof Error ? e.message : String(e)}`);
207
+ }
208
+ return p;
209
+ }
210
+ export function latestWeaknessReport() {
211
+ if (!fs.existsSync(STATE_DIR))
212
+ return null;
213
+ const candidates = fs.readdirSync(STATE_DIR)
214
+ .filter((f) => f.startsWith('weakness-report-') && f.endsWith('.json'))
215
+ .sort()
216
+ .reverse();
217
+ if (candidates.length === 0)
218
+ return null;
219
+ try {
220
+ return JSON.parse(fs.readFileSync(path.join(STATE_DIR, candidates[0]), 'utf-8'));
221
+ }
222
+ catch {
223
+ return null;
224
+ }
225
+ }
@@ -72,5 +72,10 @@ export declare function mutateSolutionByName(name: string, mutator: SolutionMuta
72
72
  /**
73
73
  * Evidence 카운터 단일 증가 helper.
74
74
  * mutateSolutionByName + 카운터 증가 패턴을 한 줄로.
75
+ *
76
+ * Also graduates Phase 4 candidates: when a `status: candidate` solution's
77
+ * injected count reaches `CANDIDATE_PROMOTION_INJECTIONS`, its status flips
78
+ * to `verified` in the same write. This keeps the exploration bonus from
79
+ * clinging to a solution that has had enough trials.
75
80
  */
76
81
  export declare function incrementEvidence(solutionName: string, field: 'reflected' | 'negative' | 'injected' | 'sessions' | 'reExtracted'): boolean;
@@ -142,9 +142,22 @@ export function mutateSolutionByName(name, mutator, options) {
142
142
  }
143
143
  return false;
144
144
  }
145
+ /**
146
+ * Phase 4 candidate promotion threshold: a `status: candidate` solution
147
+ * automatically graduates to `status: verified` once its injected count
148
+ * crosses this cutoff. At that point the cold-start exploration bonus
149
+ * (solution-matcher.ts) disappears naturally, since the bonus keys off
150
+ * `candidate` status.
151
+ */
152
+ const CANDIDATE_PROMOTION_INJECTIONS = 5;
145
153
  /**
146
154
  * Evidence 카운터 단일 증가 helper.
147
155
  * mutateSolutionByName + 카운터 증가 패턴을 한 줄로.
156
+ *
157
+ * Also graduates Phase 4 candidates: when a `status: candidate` solution's
158
+ * injected count reaches `CANDIDATE_PROMOTION_INJECTIONS`, its status flips
159
+ * to `verified` in the same write. This keeps the exploration bonus from
160
+ * clinging to a solution that has had enough trials.
148
161
  */
149
162
  export function incrementEvidence(solutionName, field) {
150
163
  return mutateSolutionByName(solutionName, sol => {
@@ -152,6 +165,11 @@ export function incrementEvidence(solutionName, field) {
152
165
  if (!(field in ev))
153
166
  return false;
154
167
  ev[field] = (ev[field] ?? 0) + 1;
168
+ if (field === 'injected' &&
169
+ sol.frontmatter.status === 'candidate' &&
170
+ ev.injected >= CANDIDATE_PROMOTION_INJECTIONS) {
171
+ sol.frontmatter.status = 'verified';
172
+ }
155
173
  return true;
156
174
  });
157
175
  }
@@ -105,6 +105,13 @@ async function main() {
105
105
  saveFailureState(state);
106
106
  // 컨텍스트 신호 업데이트
107
107
  incrementFailureSignal(sessionId);
108
+ // Outcome tracking (Phase 1): attribute this tool failure to pending
109
+ // solution injections in the same session. Fail-open.
110
+ try {
111
+ const { attributeError } = await import('../engine/solution-outcomes.js');
112
+ attributeError(sessionId);
113
+ }
114
+ catch { /* ignore */ }
108
115
  const failCount = state.failures[toolName].count;
109
116
  const suggestion = getRecoverySuggestion(error, toolName);
110
117
  // 3회 이상 반복 실패 시 강화된 경고
@@ -28,6 +28,7 @@ import { writeSignal } from './shared/plugin-signal.js';
28
28
  import { approve, approveWithContext, failOpenWithTracking } from './shared/hook-response.js';
29
29
  import { STATE_DIR } from '../core/paths.js';
30
30
  import { recordHookTiming } from './shared/hook-timing.js';
31
+ import { appendPending, flushAccept } from '../engine/solution-outcomes.js';
31
32
  const MAX_SOLUTIONS_PER_SESSION = 10;
32
33
  /** 세션별 이미 주입된 솔루션 추적 (중복 방지) */
33
34
  function getSessionCachePath(sessionId) {
@@ -451,6 +452,25 @@ async function main() {
451
452
  catch (e) {
452
453
  log.debug('plugin signal 기록 실패', e);
453
454
  }
455
+ // Outcome tracking (Phase 1): flush previous pending as `accept` (silence
456
+ // = consent), then record this round's injections as new pending. Both
457
+ // calls are fail-open — a tracking crash must not block injection.
458
+ try {
459
+ flushAccept(sessionId);
460
+ }
461
+ catch (e) {
462
+ log.debug('outcome flushAccept 실패', e);
463
+ }
464
+ try {
465
+ appendPending(sessionId, effectiveToInject.map((sol) => ({
466
+ solution: sol.name,
467
+ match_score: sol.relevance,
468
+ injected_chars: (summaries.get(sol.name) ?? sol.name).length,
469
+ })));
470
+ }
471
+ catch (e) {
472
+ log.debug('outcome appendPending 실패', e);
473
+ }
454
474
  console.log(approveWithContext(fullInjection, 'UserPromptSubmit'));
455
475
  }
456
476
  finally {
package/dist/mcp/tools.js CHANGED
@@ -273,6 +273,14 @@ export function registerTools(server) {
273
273
  target,
274
274
  axis_hint: axis_hint,
275
275
  });
276
+ // Outcome tracking (Phase 1): attribute this correction to any
277
+ // pending injections in the session. Fail-open — attribution is a
278
+ // best-effort signal, never block the correction record itself.
279
+ try {
280
+ const { attributeCorrection } = await import('../engine/solution-outcomes.js');
281
+ attributeCorrection(effectiveSessionId);
282
+ }
283
+ catch { /* ignore */ }
276
284
  const lines = [
277
285
  `Evidence recorded: ${result.evidence_event_id}`,
278
286
  ];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wooojin/forgen",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "preferGlobal": true,
5
5
  "main": "dist/lib.js",
6
6
  "types": "./dist/lib.d.ts",