@wooojin/forgen 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/CHANGELOG.md +76 -0
  2. package/README.ko.md +25 -14
  3. package/README.md +61 -17
  4. package/agents/analyst.md +48 -4
  5. package/agents/architect.md +39 -4
  6. package/agents/code-reviewer.md +107 -77
  7. package/agents/critic.md +47 -4
  8. package/agents/debugger.md +46 -4
  9. package/agents/designer.md +40 -4
  10. package/agents/executor.md +112 -30
  11. package/agents/explore.md +45 -5
  12. package/agents/git-master.md +48 -4
  13. package/agents/planner.md +121 -18
  14. package/agents/solution-evolver.md +115 -0
  15. package/agents/test-engineer.md +58 -4
  16. package/agents/verifier.md +92 -77
  17. package/commands/architecture-decision.md +127 -258
  18. package/commands/calibrate.md +225 -0
  19. package/commands/code-review.md +163 -178
  20. package/commands/compound.md +127 -68
  21. package/commands/deep-interview.md +212 -110
  22. package/commands/docker.md +68 -178
  23. package/commands/forge-loop.md +215 -0
  24. package/commands/learn.md +231 -0
  25. package/commands/retro.md +215 -0
  26. package/commands/ship.md +277 -0
  27. package/dist/cli.js +25 -9
  28. package/dist/core/auto-compound-runner.js +14 -0
  29. package/dist/core/config-injector.d.ts +2 -1
  30. package/dist/core/config-injector.js +2 -1
  31. package/dist/core/dashboard.d.ts +17 -0
  32. package/dist/core/dashboard.js +158 -2
  33. package/dist/core/harness.d.ts +6 -1
  34. package/dist/core/harness.js +75 -19
  35. package/dist/core/paths.d.ts +31 -1
  36. package/dist/core/paths.js +43 -2
  37. package/dist/core/spawn.d.ts +3 -2
  38. package/dist/core/spawn.js +27 -8
  39. package/dist/core/types.d.ts +34 -0
  40. package/dist/engine/compound-lifecycle.d.ts +4 -3
  41. package/dist/engine/compound-lifecycle.js +91 -46
  42. package/dist/engine/learn-cli.d.ts +1 -0
  43. package/dist/engine/learn-cli.js +182 -0
  44. package/dist/engine/meta-learning/adaptive-thresholds.d.ts +20 -0
  45. package/dist/engine/meta-learning/adaptive-thresholds.js +126 -0
  46. package/dist/engine/meta-learning/extraction-tuner.d.ts +15 -0
  47. package/dist/engine/meta-learning/extraction-tuner.js +99 -0
  48. package/dist/engine/meta-learning/matcher-weight-tuner.d.ts +21 -0
  49. package/dist/engine/meta-learning/matcher-weight-tuner.js +151 -0
  50. package/dist/engine/meta-learning/runner.d.ts +14 -0
  51. package/dist/engine/meta-learning/runner.js +90 -0
  52. package/dist/engine/meta-learning/scope-promoter.d.ts +21 -0
  53. package/dist/engine/meta-learning/scope-promoter.js +84 -0
  54. package/dist/engine/meta-learning/session-quality-scorer.d.ts +61 -0
  55. package/dist/engine/meta-learning/session-quality-scorer.js +166 -0
  56. package/dist/engine/meta-learning/types.d.ts +114 -0
  57. package/dist/engine/meta-learning/types.js +43 -0
  58. package/dist/engine/solution-candidate.d.ts +30 -0
  59. package/dist/engine/solution-candidate.js +124 -0
  60. package/dist/engine/solution-fitness.d.ts +52 -0
  61. package/dist/engine/solution-fitness.js +95 -0
  62. package/dist/engine/solution-fixup.d.ts +30 -0
  63. package/dist/engine/solution-fixup.js +116 -0
  64. package/dist/engine/solution-format.d.ts +10 -2
  65. package/dist/engine/solution-format.js +287 -57
  66. package/dist/engine/solution-index.d.ts +1 -1
  67. package/dist/engine/solution-index.js +10 -0
  68. package/dist/engine/solution-matcher.d.ts +7 -1
  69. package/dist/engine/solution-matcher.js +137 -37
  70. package/dist/engine/solution-outcomes.d.ts +70 -0
  71. package/dist/engine/solution-outcomes.js +242 -0
  72. package/dist/engine/solution-quarantine.d.ts +36 -0
  73. package/dist/engine/solution-quarantine.js +172 -0
  74. package/dist/engine/solution-weakness.d.ts +45 -0
  75. package/dist/engine/solution-weakness.js +225 -0
  76. package/dist/engine/solution-writer.d.ts +5 -0
  77. package/dist/engine/solution-writer.js +18 -0
  78. package/dist/fgx.js +12 -8
  79. package/dist/hooks/context-guard.d.ts +5 -0
  80. package/dist/hooks/context-guard.js +118 -2
  81. package/dist/hooks/hooks-generator.d.ts +3 -0
  82. package/dist/hooks/hooks-generator.js +23 -6
  83. package/dist/hooks/keyword-detector.js +16 -100
  84. package/dist/hooks/post-tool-failure.js +7 -0
  85. package/dist/hooks/skill-injector.d.ts +4 -3
  86. package/dist/hooks/skill-injector.js +6 -4
  87. package/dist/hooks/solution-injector.js +20 -0
  88. package/dist/host/codex-adapter.d.ts +10 -0
  89. package/dist/host/codex-adapter.js +154 -0
  90. package/dist/mcp/solution-reader.d.ts +5 -5
  91. package/dist/mcp/solution-reader.js +34 -24
  92. package/dist/mcp/tools.js +8 -0
  93. package/dist/services/session.d.ts +19 -0
  94. package/dist/services/session.js +62 -0
  95. package/hooks/hooks.json +2 -2
  96. package/package.json +2 -1
  97. package/skills/architecture-decision/SKILL.md +113 -257
  98. package/skills/calibrate/SKILL.md +207 -0
  99. package/skills/code-review/SKILL.md +151 -178
  100. package/skills/compound/SKILL.md +126 -68
  101. package/skills/deep-interview/SKILL.md +210 -110
  102. package/skills/docker/SKILL.md +57 -179
  103. package/skills/forge-loop/SKILL.md +198 -0
  104. package/skills/learn/SKILL.md +216 -0
  105. package/skills/retro/SKILL.md +199 -0
  106. package/skills/ship/SKILL.md +259 -0
  107. package/agents/code-simplifier.md +0 -197
  108. package/agents/performance-reviewer.md +0 -172
  109. package/agents/qa-tester.md +0 -158
  110. package/agents/refactoring-expert.md +0 -168
  111. package/agents/scientist.md +0 -144
  112. package/agents/security-reviewer.md +0 -137
  113. package/agents/writer.md +0 -184
  114. package/commands/api-design.md +0 -268
  115. package/commands/ci-cd.md +0 -270
  116. package/commands/database.md +0 -263
  117. package/commands/debug-detective.md +0 -99
  118. package/commands/documentation.md +0 -276
  119. package/commands/ecomode.md +0 -51
  120. package/commands/frontend.md +0 -271
  121. package/commands/git-master.md +0 -90
  122. package/commands/incident-response.md +0 -292
  123. package/commands/migrate.md +0 -101
  124. package/commands/performance.md +0 -288
  125. package/commands/refactor.md +0 -105
  126. package/commands/security-review.md +0 -288
  127. package/commands/specify.md +0 -128
  128. package/commands/tdd.md +0 -183
  129. package/commands/testing-strategy.md +0 -265
  130. package/skills/api-design/SKILL.md +0 -262
  131. package/skills/ci-cd/SKILL.md +0 -264
  132. package/skills/database/SKILL.md +0 -257
  133. package/skills/debug-detective/SKILL.md +0 -95
  134. package/skills/documentation/SKILL.md +0 -270
  135. package/skills/ecomode/SKILL.md +0 -46
  136. package/skills/frontend/SKILL.md +0 -265
  137. package/skills/git-master/SKILL.md +0 -86
  138. package/skills/incident-response/SKILL.md +0 -286
  139. package/skills/migrate/SKILL.md +0 -96
  140. package/skills/performance/SKILL.md +0 -282
  141. package/skills/refactor/SKILL.md +0 -100
  142. package/skills/security-review/SKILL.md +0 -282
  143. package/skills/specify/SKILL.md +0 -122
  144. package/skills/tdd/SKILL.md +0 -178
  145. package/skills/testing-strategy/SKILL.md +0 -260
@@ -1,9 +1,10 @@
1
+ import * as fs from 'node:fs';
1
2
  import * as path from 'node:path';
2
- import { ME_SOLUTIONS, PACKS_DIR } from '../core/paths.js';
3
- import { extractTags, expandCompoundTags, expandQueryBigrams } from './solution-format.js';
3
+ import { ME_SOLUTIONS, META_LEARNING_DIR, PACKS_DIR } from '../core/paths.js';
4
+ import { maskBlockedTokens } from './phrase-blocklist.js';
5
+ import { expandCompoundTags, expandQueryBigrams, extractTags } from './solution-format.js';
4
6
  import { getOrBuildIndex } from './solution-index.js';
5
7
  import { defaultNormalizer } from './term-normalizer.js';
6
- import { maskBlockedTokens } from './phrase-blocklist.js';
7
8
  // ── Synonym expansion (delegates to term-normalizer) ──
8
9
  //
9
10
  // The old `SYNONYM_MAP` + `expandTagsWithSynonyms` pair had two problems:
@@ -87,7 +88,7 @@ export function bm25Score(queryTags, docTags, avgDocLength) {
87
88
  let score = 0;
88
89
  for (const qt of queryTags) {
89
90
  // Term frequency in document
90
- const tf = docTags.filter(dt => dt === qt || (dt.length > 3 && qt.length > 3 && (dt.includes(qt) || qt.includes(dt)))).length;
91
+ const tf = docTags.filter((dt) => dt === qt || (dt.length > 3 && qt.length > 3 && (dt.includes(qt) || qt.includes(dt)))).length;
91
92
  if (tf === 0)
92
93
  continue;
93
94
  // BM25 TF saturation
@@ -100,10 +101,37 @@ export function bm25Score(queryTags, docTags, avgDocLength) {
100
101
  }
101
102
  /** High-frequency tags that should be weighted lower */
102
103
  const COMMON_TAGS = new Set([
103
- 'typescript', 'ts', 'javascript', 'js', 'fix', 'update', 'add', 'change',
104
- 'file', 'code', 'function', 'import', 'export', 'error', 'type', 'string',
105
- 'number', 'object', 'array', 'return', 'const', 'class', 'module',
106
- '코드', '파일', '함수', '수정', '추가', '변경', '에러', '타입',
104
+ 'typescript',
105
+ 'ts',
106
+ 'javascript',
107
+ 'js',
108
+ 'fix',
109
+ 'update',
110
+ 'add',
111
+ 'change',
112
+ 'file',
113
+ 'code',
114
+ 'function',
115
+ 'import',
116
+ 'export',
117
+ 'error',
118
+ 'type',
119
+ 'string',
120
+ 'number',
121
+ 'object',
122
+ 'array',
123
+ 'return',
124
+ 'const',
125
+ 'class',
126
+ 'module',
127
+ '코드',
128
+ '파일',
129
+ '함수',
130
+ '수정',
131
+ '추가',
132
+ '변경',
133
+ '에러',
134
+ '타입',
107
135
  ]);
108
136
  /** Apply IDF-like weight: common tags get reduced weight */
109
137
  export function tagWeight(tag) {
@@ -114,7 +142,7 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
114
142
  // Legacy mode: substring matching for backwards compatibility.
115
143
  // Not a hot path — only hit by the (old) solution-matcher.test.ts cases.
116
144
  const promptTags = extractTags(promptOrTags);
117
- const intersection = keywordsOrTags.filter(kw => promptTags.some(pt => pt === kw || (pt.length > 3 && kw.length > 3 && (pt.startsWith(kw) || kw.startsWith(pt)))));
145
+ const intersection = keywordsOrTags.filter((kw) => promptTags.some((pt) => pt === kw || (pt.length > 3 && kw.length > 3 && (pt.startsWith(kw) || kw.startsWith(pt)))));
118
146
  return Math.min(1, intersection.length / Math.max(promptTags.length * 0.5, 1));
119
147
  }
120
148
  // v3 mode: tag matching with synonym expansion + TF-IDF weighting.
@@ -124,21 +152,21 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
124
152
  // the hot path pre-compute the expansion once per query and pass it via
125
153
  // `options.normalizedPromptTags`, so this function no longer repeats the
126
154
  // work per solution.
127
- const expandedPromptTags = options?.normalizedPromptTags
128
- ?? defaultNormalizer.normalizeTerms(promptOrTags);
155
+ const expandedPromptTags = options?.normalizedPromptTags ?? defaultNormalizer.normalizeTerms(promptOrTags);
129
156
  // R4-T1: when the caller supplies a compound-expanded solution tag set,
130
157
  // intersection and partial matching run against the expanded set (so
131
158
  // `api-key` matches `api`/`key` queries via the split parts), but the
132
159
  // Jaccard union denominator below still uses the RAW `keywordsOrTags`
133
160
  // for normalization stability.
134
161
  const matchTags = options?.solutionTagsExpanded ?? keywordsOrTags;
135
- const intersection = matchTags.filter(t => expandedPromptTags.includes(t));
162
+ const intersection = matchTags.filter((t) => expandedPromptTags.includes(t));
136
163
  // partial/substring matches for longer tags (>3 chars)
137
- const partialMatches = matchTags.filter(t => t.length > 3 && !intersection.includes(t)
138
- && expandedPromptTags.some(pt => pt.length > 3 && (pt.includes(t) || t.includes(pt))));
164
+ const partialMatches = matchTags.filter((t) => t.length > 3 &&
165
+ !intersection.includes(t) &&
166
+ expandedPromptTags.some((pt) => pt.length > 3 && (pt.includes(t) || t.includes(pt))));
139
167
  // Apply TF-IDF weighting: common tags count less
140
- const weightedMatched = intersection.reduce((sum, t) => sum + tagWeight(t), 0)
141
- + partialMatches.reduce((sum, t) => sum + tagWeight(t) * 0.5, 0);
168
+ const weightedMatched = intersection.reduce((sum, t) => sum + tagWeight(t), 0) +
169
+ partialMatches.reduce((sum, t) => sum + tagWeight(t) * 0.5, 0);
142
170
  // ── Bigram similarity boost for borderline cases ──
143
171
  //
144
172
  // When the TF-IDF intersection score is below the match threshold (0.5),
@@ -176,7 +204,11 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
176
204
  const blendedScore = tfidfScore * 0.8 + bestBigramScore * 0.2;
177
205
  return {
178
206
  relevance: blendedScore * (confidence ?? 1),
179
- matchedTags: [...intersection, ...partialMatches, ...bigramMatchedTags.filter(t => !intersection.includes(t) && !partialMatches.includes(t))],
207
+ matchedTags: [
208
+ ...intersection,
209
+ ...partialMatches,
210
+ ...bigramMatchedTags.filter((t) => !intersection.includes(t) && !partialMatches.includes(t)),
211
+ ],
180
212
  };
181
213
  }
182
214
  return { relevance: 0, matchedTags: [] };
@@ -196,7 +228,8 @@ export function calculateRelevance(promptOrTags, keywordsOrTags, confidence, opt
196
228
  bigramBoost = sim;
197
229
  }
198
230
  }
199
- const ensembleScore = tfidfScore * 0.5 + bm25 * 0.3 + bigramBoost * 0.2;
231
+ const w = options?.ensembleWeights ?? { tfidf: 0.5, bm25: 0.3, bigram: 0.2 };
232
+ const ensembleScore = tfidfScore * w.tfidf + bm25 * w.bm25 + bigramBoost * w.bigram;
200
233
  return {
201
234
  relevance: ensembleScore * (confidence ?? 1),
202
235
  matchedTags: [...intersection, ...partialMatches],
@@ -274,8 +307,8 @@ export function shouldRejectByR4T3Rules(promptTags, matchedTags) {
274
307
  // Rule B
275
308
  if (matchedTags.length === 1) {
276
309
  const tag = matchedTags[0];
277
- const literalHit = promptTags.includes(tag)
278
- || promptTags.some(pt => {
310
+ const literalHit = promptTags.includes(tag) ||
311
+ promptTags.some((pt) => {
279
312
  if (pt.length <= 3 || tag.length <= 3)
280
313
  return false;
281
314
  if (pt.includes(tag) || tag.includes(pt))
@@ -310,7 +343,7 @@ export function shouldRejectByR4T3Rules(promptTags, matchedTags) {
310
343
  * `matchSolutions` behaviour (both scopes could rank). Callers that want
311
344
  * first-wins scope precedence must dedupe on their side.
312
345
  */
313
- function rankCandidates(promptTags, promptLower, solutions) {
346
+ function rankCandidates(promptTags, promptLower, solutions, ensembleWeights) {
314
347
  // T2: normalize prompt tags ONCE per query (not once per solution).
315
348
  // Pre-T2 this expansion happened inside calculateRelevance and was
316
349
  // repeated N times for N solutions — the plan's primary hot-path win.
@@ -345,7 +378,7 @@ function rankCandidates(promptTags, promptLower, solutions) {
345
378
  const promptTagsWithBigrams = expandQueryBigrams(maskedPromptTags);
346
379
  const normalizedPromptTags = defaultNormalizer.normalizeTerms(promptTagsWithBigrams);
347
380
  return solutions
348
- .map(sol => {
381
+ .map((sol) => {
349
382
  // R4-T1: solution-side compound-tag expansion. `api-key` becomes
350
383
  // {api-key, api, key} so a query token `api` (from "api keys") hits
351
384
  // it directly. Computed per solution because each sol.tags is
@@ -358,7 +391,11 @@ function rankCandidates(promptTags, promptLower, solutions) {
358
391
  // step (intersection/partialMatches) already uses the masked set
359
392
  // via `normalizedPromptTags` — the union must match for score
360
393
  // semantics to stay consistent.
361
- const result = calculateRelevance(maskedPromptTags, sol.tags, sol.confidence, { normalizedPromptTags, solutionTagsExpanded: solTagsExpanded });
394
+ const result = calculateRelevance(maskedPromptTags, sol.tags, sol.confidence, {
395
+ normalizedPromptTags,
396
+ solutionTagsExpanded: solTagsExpanded,
397
+ ensembleWeights,
398
+ });
362
399
  // Compute identifier boost FIRST — independent of tag scoring so
363
400
  // R4-T3's tag-evidence precision rules below cannot silently drop
364
401
  // a candidate that has strong identifier-level evidence.
@@ -385,9 +422,9 @@ function rankCandidates(promptTags, promptLower, solutions) {
385
422
  // the `matchedTags.length + matchedIdentifiers.length >= 1` filter.
386
423
  let tagRelevance = result.relevance;
387
424
  let tagMatches = result.matchedTags;
388
- if (matchedIdentifiers.length === 0
389
- && tagMatches.length > 0
390
- && shouldRejectByR4T3Rules(maskedPromptTags, tagMatches)) {
425
+ if (matchedIdentifiers.length === 0 &&
426
+ tagMatches.length > 0 &&
427
+ shouldRejectByR4T3Rules(maskedPromptTags, tagMatches)) {
391
428
  tagRelevance = 0;
392
429
  tagMatches = [];
393
430
  }
@@ -398,7 +435,7 @@ function rankCandidates(promptTags, promptLower, solutions) {
398
435
  matchedIdentifiers,
399
436
  };
400
437
  })
401
- .filter(c => c.matchedTags.length + c.matchedIdentifiers.length >= 1)
438
+ .filter((c) => c.matchedTags.length + c.matchedIdentifiers.length >= 1)
402
439
  .sort((a, b) => b.relevance - a.relevance)
403
440
  .slice(0, 5);
404
441
  }
@@ -675,7 +712,7 @@ function computeBucketMetrics(queries, solutions) {
675
712
  */
676
713
  export function evaluateQuery(query, solutions) {
677
714
  const promptTags = extractTags(query);
678
- return rankCandidates(promptTags, query.toLowerCase(), solutions).map(c => ({
715
+ return rankCandidates(promptTags, query.toLowerCase(), solutions).map((c) => ({
679
716
  name: c.solution.name,
680
717
  relevance: c.relevance,
681
718
  matchedTags: c.matchedTags,
@@ -701,13 +738,16 @@ export function evaluateSolutionMatcher(fixture) {
701
738
  // doesn't drown a small paraphrase bucket but also a single-query bucket
702
739
  // doesn't dominate.
703
740
  const recallAt5 = combinedTotal > 0
704
- ? (positiveM.recallAt5 * positiveM.total + paraphraseM.recallAt5 * paraphraseM.total) / combinedTotal
741
+ ? (positiveM.recallAt5 * positiveM.total + paraphraseM.recallAt5 * paraphraseM.total) /
742
+ combinedTotal
705
743
  : 0;
706
744
  const mrrAt5 = combinedTotal > 0
707
- ? (positiveM.mrrAt5 * positiveM.total + paraphraseM.mrrAt5 * paraphraseM.total) / combinedTotal
745
+ ? (positiveM.mrrAt5 * positiveM.total + paraphraseM.mrrAt5 * paraphraseM.total) /
746
+ combinedTotal
708
747
  : 0;
709
748
  const noResultRate = combinedTotal > 0
710
- ? (positiveM.noResultRate * positiveM.total + paraphraseM.noResultRate * paraphraseM.total) / combinedTotal
749
+ ? (positiveM.noResultRate * positiveM.total + paraphraseM.noResultRate * paraphraseM.total) /
750
+ combinedTotal
711
751
  : 0;
712
752
  let negAnyResult = 0;
713
753
  for (const q of fixture.negative) {
@@ -733,27 +773,87 @@ export function evaluateSolutionMatcher(fixture) {
733
773
  },
734
774
  };
735
775
  }
776
+ // ── Meta-learning: dynamic ensemble weights ──
777
+ let _cachedWeights;
778
+ let _weightsCacheTime = 0;
779
+ const WEIGHTS_CACHE_TTL = 60_000; // 1 minute cache
780
+ /**
781
+ * Load tuned matcher weights from meta-learning state.
782
+ * Returns undefined (use defaults) if no tuned weights exist.
783
+ * Cached for 1 minute to avoid re-reading per matchSolutions call.
784
+ */
785
+ function loadTunedMatcherWeights() {
786
+ const now = Date.now();
787
+ if (_cachedWeights !== undefined && now - _weightsCacheTime < WEIGHTS_CACHE_TTL) {
788
+ return _cachedWeights ?? undefined;
789
+ }
790
+ try {
791
+ const weightsPath = path.join(META_LEARNING_DIR, 'matcher-weights.json');
792
+ if (!fs.existsSync(weightsPath)) {
793
+ _cachedWeights = null;
794
+ _weightsCacheTime = now;
795
+ return undefined;
796
+ }
797
+ const data = JSON.parse(fs.readFileSync(weightsPath, 'utf-8'));
798
+ if (typeof data.tfidf === 'number' &&
799
+ typeof data.bm25 === 'number' &&
800
+ typeof data.bigram === 'number') {
801
+ _cachedWeights = { tfidf: data.tfidf, bm25: data.bm25, bigram: data.bigram };
802
+ _weightsCacheTime = now;
803
+ return _cachedWeights;
804
+ }
805
+ }
806
+ catch {
807
+ /* fail-open: use defaults */
808
+ }
809
+ _cachedWeights = null;
810
+ _weightsCacheTime = now;
811
+ return undefined;
812
+ }
813
+ /**
814
+ * Cold-start exploration bonus for candidate solutions.
815
+ *
816
+ * Phase 4 evolution: newly proposed solutions enter at `status: candidate`.
817
+ * Without a nudge they compete head-to-head with mature verified/champion
818
+ * entries and almost always lose the first few rounds — not because
819
+ * they're worse, but because matchers favor solutions with richer tag
820
+ * histories. A small confidence multiplier lets candidates surface often
821
+ * enough to accumulate outcome data, after which the fitness loop
822
+ * decides their fate.
823
+ *
824
+ * The 1.3× factor is a starting point (Q1 in docs/design-solution-evolution.md).
825
+ * Automatic deactivation after 5 accumulated injections is handled by a
826
+ * separate promoter that flips `status` to `verified`.
827
+ */
828
+ const CANDIDATE_EXPLORATION_MULTIPLIER = 1.3;
829
+ function applyCandidateExplorationBonus(entries) {
830
+ return entries.map((e) => {
831
+ if (e.status !== 'candidate')
832
+ return e;
833
+ return { ...e, confidence: Math.min(1, e.confidence * CANDIDATE_EXPLORATION_MULTIPLIER) };
834
+ });
835
+ }
736
836
  export function matchSolutions(prompt, scope, cwd) {
737
837
  // Build solution dirs for index cache
738
- const dirs = [
739
- { dir: ME_SOLUTIONS, scope: 'me' },
740
- ];
838
+ const dirs = [{ dir: ME_SOLUTIONS, scope: 'me' }];
741
839
  if (scope.team) {
742
840
  dirs.push({ dir: path.join(PACKS_DIR, scope.team.name, 'solutions'), scope: 'team' });
743
841
  }
744
842
  dirs.push({ dir: path.join(cwd, '.compound', 'solutions'), scope: 'project' });
745
843
  // Use cached index (rebuilt only when dirs change)
746
844
  const index = getOrBuildIndex(dirs);
747
- const allSolutions = index.entries.map(e => ({ ...e }));
845
+ const allSolutions = applyCandidateExplorationBonus(index.entries.map((e) => ({ ...e })));
748
846
  const promptTags = extractTags(prompt);
749
847
  const promptLower = prompt.toLowerCase();
848
+ // Meta-learning: load tuned weights if available
849
+ const tunedWeights = loadTunedMatcherWeights();
750
850
  // Delegate to shared ranking core. `rankCandidates` is generic so each
751
851
  // ranked candidate carries the original `LoadedSolution` reference — no
752
852
  // name-based re-lookup, so two scopes sharing a name (e.g. me/foo and
753
853
  // project/foo) can both appear in the result without a Map last-wins
754
854
  // scope-precedence bug.
755
- const ranked = rankCandidates(promptTags, promptLower, allSolutions);
756
- return ranked.map(c => ({
855
+ const ranked = rankCandidates(promptTags, promptLower, allSolutions, tunedWeights);
856
+ return ranked.map((c) => ({
757
857
  name: c.solution.name,
758
858
  path: c.solution.filePath,
759
859
  scope: c.solution.scope,
@@ -0,0 +1,70 @@
1
+ export type Outcome = 'accept' | 'correct' | 'error' | 'unknown';
2
+ export type Attribution = 'explicit' | 'window' | 'session_end' | 'default';
3
+ /**
4
+ * One inject → outcome event. Written append-only to
5
+ * ~/.forgen/state/outcomes/{session_id}.jsonl. The pending state (inject
6
+ * happened, outcome not yet decided) is stored separately in
7
+ * ~/.forgen/state/outcome-pending-{session_id}.json.
8
+ */
9
+ export interface OutcomeEvent {
10
+ ts: number;
11
+ session_id: string;
12
+ solution: string;
13
+ match_score: number;
14
+ injected_chars: number;
15
+ outcome: Outcome;
16
+ outcome_lag_ms: number;
17
+ attribution: Attribution;
18
+ }
19
+ /**
20
+ * Record that solutions were injected. Called from solution-injector right
21
+ * after `approveWithContext` is emitted. Fails silently — outcome tracking
22
+ * must never block the user's workflow.
23
+ */
24
+ export declare function appendPending(sessionId: string, injections: Array<{
25
+ solution: string;
26
+ match_score: number;
27
+ injected_chars: number;
28
+ }>): void;
29
+ /**
30
+ * Flush pending injections as `accept` events. Called when a new user
31
+ * prompt arrives without any intervening correction/error, signaling that
32
+ * the previous injections were silently accepted. "Silence = consent."
33
+ *
34
+ * If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
35
+ * because an earlier step already attributed them as `correct` or `error`).
36
+ */
37
+ export declare function flushAccept(sessionId: string, excludeSolutions?: Set<string>): number;
38
+ /**
39
+ * Attribute a correction to the most recent pending injection(s). Called
40
+ * from the correction-record MCP tool. Removes attributed entries from
41
+ * pending so subsequent `flushAccept` does not double-count them.
42
+ *
43
+ * Strategy: all currently-pending solutions in this session are marked as
44
+ * `correct`. This is conservative (the correction may target only one of
45
+ * them), but without semantic attribution we err on the side of the user's
46
+ * feedback signal being louder than acceptance.
47
+ */
48
+ export declare function attributeCorrection(sessionId: string): string[];
49
+ /**
50
+ * Attribute a tool error to pending solutions in this session. Called from
51
+ * post-tool-failure hook. Unlike corrections, errors do not clear pending
52
+ * — an error is a weaker signal and the next user prompt can still produce
53
+ * a correct/accept decision.
54
+ *
55
+ * To avoid flooding the log with duplicate errors for the same pending
56
+ * batch, we cap at one `error` event per (session, solution) pair per
57
+ * pending-cycle by tracking a `error_flagged` set in the pending state.
58
+ */
59
+ export declare function attributeError(sessionId: string): string[];
60
+ /**
61
+ * At session end, any still-pending entries are logged as `unknown` (we
62
+ * can't tell if the user was happy or just stopped). Pending file is
63
+ * removed.
64
+ */
65
+ export declare function finalizeSession(sessionId: string): number;
66
+ /**
67
+ * Read all outcome events across all sessions. Used by fitness
68
+ * calculation. Returns events sorted by timestamp ascending.
69
+ */
70
+ export declare function readAllOutcomes(): OutcomeEvent[];
@@ -0,0 +1,242 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import { OUTCOMES_DIR, STATE_DIR } from '../core/paths.js';
4
+ import { sanitizeId } from '../hooks/shared/sanitize-id.js';
5
+ import { createLogger } from '../core/logger.js';
6
+ const log = createLogger('solution-outcomes');
7
+ function pendingPath(sessionId) {
8
+ return path.join(STATE_DIR, `outcome-pending-${sanitizeId(sessionId)}.json`);
9
+ }
10
+ function outcomesPath(sessionId) {
11
+ return path.join(OUTCOMES_DIR, `${sanitizeId(sessionId)}.jsonl`);
12
+ }
13
+ function readPending(sessionId) {
14
+ const p = pendingPath(sessionId);
15
+ if (!fs.existsSync(p))
16
+ return { pending: [], last_prompt_ts: 0 };
17
+ try {
18
+ return JSON.parse(fs.readFileSync(p, 'utf-8'));
19
+ }
20
+ catch {
21
+ return { pending: [], last_prompt_ts: 0 };
22
+ }
23
+ }
24
+ function writePending(sessionId, state) {
25
+ const p = pendingPath(sessionId);
26
+ fs.mkdirSync(STATE_DIR, { recursive: true });
27
+ fs.writeFileSync(p, JSON.stringify(state));
28
+ }
29
+ function appendOutcome(event) {
30
+ fs.mkdirSync(OUTCOMES_DIR, { recursive: true });
31
+ fs.appendFileSync(outcomesPath(event.session_id), JSON.stringify(event) + '\n');
32
+ }
33
+ /**
34
+ * Record that solutions were injected. Called from solution-injector right
35
+ * after `approveWithContext` is emitted. Fails silently — outcome tracking
36
+ * must never block the user's workflow.
37
+ */
38
+ export function appendPending(sessionId, injections) {
39
+ if (!sessionId || injections.length === 0)
40
+ return;
41
+ try {
42
+ const state = readPending(sessionId);
43
+ const ts = Date.now();
44
+ for (const inj of injections) {
45
+ state.pending.push({ ...inj, ts });
46
+ }
47
+ writePending(sessionId, state);
48
+ }
49
+ catch (e) {
50
+ log.debug(`appendPending failed: ${e instanceof Error ? e.message : String(e)}`);
51
+ }
52
+ }
53
+ /**
54
+ * Flush pending injections as `accept` events. Called when a new user
55
+ * prompt arrives without any intervening correction/error, signaling that
56
+ * the previous injections were silently accepted. "Silence = consent."
57
+ *
58
+ * If `excludeSolutions` is provided, those solutions are NOT flushed (e.g.
59
+ * because an earlier step already attributed them as `correct` or `error`).
60
+ */
61
+ export function flushAccept(sessionId, excludeSolutions = new Set()) {
62
+ if (!sessionId)
63
+ return 0;
64
+ try {
65
+ const state = readPending(sessionId);
66
+ if (state.pending.length === 0)
67
+ return 0;
68
+ const now = Date.now();
69
+ const kept = [];
70
+ let flushed = 0;
71
+ for (const p of state.pending) {
72
+ if (excludeSolutions.has(p.solution))
73
+ continue;
74
+ appendOutcome({
75
+ ts: now,
76
+ session_id: sessionId,
77
+ solution: p.solution,
78
+ match_score: p.match_score,
79
+ injected_chars: p.injected_chars,
80
+ outcome: 'accept',
81
+ outcome_lag_ms: now - p.ts,
82
+ attribution: 'default',
83
+ });
84
+ flushed++;
85
+ }
86
+ writePending(sessionId, { pending: kept, last_prompt_ts: now });
87
+ return flushed;
88
+ }
89
+ catch (e) {
90
+ log.debug(`flushAccept failed: ${e instanceof Error ? e.message : String(e)}`);
91
+ return 0;
92
+ }
93
+ }
94
+ /**
95
+ * Attribute a correction to the most recent pending injection(s). Called
96
+ * from the correction-record MCP tool. Removes attributed entries from
97
+ * pending so subsequent `flushAccept` does not double-count them.
98
+ *
99
+ * Strategy: all currently-pending solutions in this session are marked as
100
+ * `correct`. This is conservative (the correction may target only one of
101
+ * them), but without semantic attribution we err on the side of the user's
102
+ * feedback signal being louder than acceptance.
103
+ */
104
+ export function attributeCorrection(sessionId) {
105
+ if (!sessionId)
106
+ return [];
107
+ try {
108
+ const state = readPending(sessionId);
109
+ if (state.pending.length === 0)
110
+ return [];
111
+ const now = Date.now();
112
+ const attributed = [];
113
+ for (const p of state.pending) {
114
+ appendOutcome({
115
+ ts: now,
116
+ session_id: sessionId,
117
+ solution: p.solution,
118
+ match_score: p.match_score,
119
+ injected_chars: p.injected_chars,
120
+ outcome: 'correct',
121
+ outcome_lag_ms: now - p.ts,
122
+ attribution: 'explicit',
123
+ });
124
+ attributed.push(p.solution);
125
+ }
126
+ writePending(sessionId, { pending: [], last_prompt_ts: state.last_prompt_ts });
127
+ return attributed;
128
+ }
129
+ catch (e) {
130
+ log.debug(`attributeCorrection failed: ${e instanceof Error ? e.message : String(e)}`);
131
+ return [];
132
+ }
133
+ }
134
+ /**
135
+ * Attribute a tool error to pending solutions in this session. Called from
136
+ * post-tool-failure hook. Unlike corrections, errors do not clear pending
137
+ * — an error is a weaker signal and the next user prompt can still produce
138
+ * a correct/accept decision.
139
+ *
140
+ * To avoid flooding the log with duplicate errors for the same pending
141
+ * batch, we cap at one `error` event per (session, solution) pair per
142
+ * pending-cycle by tracking a `error_flagged` set in the pending state.
143
+ */
144
+ export function attributeError(sessionId) {
145
+ if (!sessionId)
146
+ return [];
147
+ try {
148
+ const state = readPending(sessionId);
149
+ if (state.pending.length === 0)
150
+ return [];
151
+ const flaggedKey = `__error_flagged`;
152
+ const existing = state[flaggedKey];
153
+ const flagged = new Set(Array.isArray(existing) ? existing : []);
154
+ const now = Date.now();
155
+ const flaggedThisCall = [];
156
+ for (const p of state.pending) {
157
+ if (flagged.has(p.solution))
158
+ continue;
159
+ appendOutcome({
160
+ ts: now,
161
+ session_id: sessionId,
162
+ solution: p.solution,
163
+ match_score: p.match_score,
164
+ injected_chars: p.injected_chars,
165
+ outcome: 'error',
166
+ outcome_lag_ms: now - p.ts,
167
+ attribution: 'window',
168
+ });
169
+ flagged.add(p.solution);
170
+ flaggedThisCall.push(p.solution);
171
+ }
172
+ state[flaggedKey] = Array.from(flagged);
173
+ writePending(sessionId, state);
174
+ return flaggedThisCall;
175
+ }
176
+ catch (e) {
177
+ log.debug(`attributeError failed: ${e instanceof Error ? e.message : String(e)}`);
178
+ return [];
179
+ }
180
+ }
181
+ /**
182
+ * At session end, any still-pending entries are logged as `unknown` (we
183
+ * can't tell if the user was happy or just stopped). Pending file is
184
+ * removed.
185
+ */
186
+ export function finalizeSession(sessionId) {
187
+ if (!sessionId)
188
+ return 0;
189
+ try {
190
+ const state = readPending(sessionId);
191
+ const now = Date.now();
192
+ let finalized = 0;
193
+ for (const p of state.pending) {
194
+ appendOutcome({
195
+ ts: now,
196
+ session_id: sessionId,
197
+ solution: p.solution,
198
+ match_score: p.match_score,
199
+ injected_chars: p.injected_chars,
200
+ outcome: 'unknown',
201
+ outcome_lag_ms: now - p.ts,
202
+ attribution: 'session_end',
203
+ });
204
+ finalized++;
205
+ }
206
+ const p = pendingPath(sessionId);
207
+ if (fs.existsSync(p))
208
+ fs.unlinkSync(p);
209
+ return finalized;
210
+ }
211
+ catch (e) {
212
+ log.debug(`finalizeSession failed: ${e instanceof Error ? e.message : String(e)}`);
213
+ return 0;
214
+ }
215
+ }
216
+ /**
217
+ * Read all outcome events across all sessions. Used by fitness
218
+ * calculation. Returns events sorted by timestamp ascending.
219
+ */
220
+ export function readAllOutcomes() {
221
+ if (!fs.existsSync(OUTCOMES_DIR))
222
+ return [];
223
+ const events = [];
224
+ for (const file of fs.readdirSync(OUTCOMES_DIR)) {
225
+ if (!file.endsWith('.jsonl'))
226
+ continue;
227
+ try {
228
+ const text = fs.readFileSync(path.join(OUTCOMES_DIR, file), 'utf-8');
229
+ for (const line of text.split('\n')) {
230
+ if (!line)
231
+ continue;
232
+ try {
233
+ events.push(JSON.parse(line));
234
+ }
235
+ catch { /* skip bad line */ }
236
+ }
237
+ }
238
+ catch { /* skip */ }
239
+ }
240
+ events.sort((a, b) => a.ts - b.ts);
241
+ return events;
242
+ }
@@ -0,0 +1,36 @@
1
+ interface QuarantineEntry {
2
+ path: string;
3
+ at: string;
4
+ errors: string[];
5
+ }
6
+ /**
7
+ * Produce actionable frontmatter diagnostics directly from file content.
8
+ *
9
+ * This duplicates the YAML parse that `parseFrontmatterOnly` already does,
10
+ * but it runs only on the rare failure path (solution dropped from index),
11
+ * so the overhead is acceptable in exchange for a human-readable error list.
12
+ */
13
+ export declare function diagnoseFromRawContent(content: string): string[];
14
+ /**
15
+ * Append one quarantine entry for `filePath`. Deduped by path within the
16
+ * current file: if the latest entry for this path already matches the
17
+ * current errors, skip the append.
18
+ *
19
+ * Storage: one JSONL line per quarantine event. Readers use only the
20
+ * latest line per path.
21
+ */
22
+ export declare function recordQuarantine(filePath: string, errors: string[]): void;
23
+ /**
24
+ * Read the latest quarantine state: one entry per path, keyed to the most
25
+ * recent append. Entries whose file no longer exists are dropped.
26
+ */
27
+ export declare function listQuarantined(): QuarantineEntry[];
28
+ /**
29
+ * Clear quarantine entries for files that now parse correctly or no longer
30
+ * exist. Intended to be called after `forgen learn fix-up` or a manual edit.
31
+ */
32
+ export declare function pruneQuarantine(): {
33
+ removed: number;
34
+ kept: number;
35
+ };
36
+ export {};