@wooojin/forgen 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. package/.claude-plugin/plugin.json +20 -0
  2. package/CHANGELOG.md +353 -0
  3. package/CONTRIBUTING.md +98 -0
  4. package/LICENSE +21 -0
  5. package/README.ja.md +469 -0
  6. package/README.ko.md +469 -0
  7. package/README.md +483 -0
  8. package/README.zh.md +469 -0
  9. package/agents/analyst.md +98 -0
  10. package/agents/architect.md +62 -0
  11. package/agents/code-reviewer.md +120 -0
  12. package/agents/code-simplifier.md +197 -0
  13. package/agents/critic.md +70 -0
  14. package/agents/debugger.md +117 -0
  15. package/agents/designer.md +131 -0
  16. package/agents/executor.md +54 -0
  17. package/agents/explore.md +145 -0
  18. package/agents/git-master.md +212 -0
  19. package/agents/performance-reviewer.md +172 -0
  20. package/agents/planner.md +29 -0
  21. package/agents/qa-tester.md +158 -0
  22. package/agents/refactoring-expert.md +168 -0
  23. package/agents/scientist.md +144 -0
  24. package/agents/security-reviewer.md +137 -0
  25. package/agents/test-engineer.md +153 -0
  26. package/agents/verifier.md +133 -0
  27. package/agents/writer.md +184 -0
  28. package/commands/api-design.md +268 -0
  29. package/commands/architecture-decision.md +314 -0
  30. package/commands/ci-cd.md +270 -0
  31. package/commands/code-review.md +233 -0
  32. package/commands/compound.md +117 -0
  33. package/commands/database.md +263 -0
  34. package/commands/debug-detective.md +99 -0
  35. package/commands/docker.md +274 -0
  36. package/commands/documentation.md +276 -0
  37. package/commands/ecomode.md +51 -0
  38. package/commands/frontend.md +271 -0
  39. package/commands/git-master.md +90 -0
  40. package/commands/incident-response.md +292 -0
  41. package/commands/migrate.md +101 -0
  42. package/commands/performance.md +288 -0
  43. package/commands/refactor.md +105 -0
  44. package/commands/security-review.md +288 -0
  45. package/commands/tdd.md +183 -0
  46. package/commands/testing-strategy.md +265 -0
  47. package/dist/cli.d.ts +2 -0
  48. package/dist/cli.js +295 -0
  49. package/dist/core/auto-compound-runner.d.ts +12 -0
  50. package/dist/core/auto-compound-runner.js +460 -0
  51. package/dist/core/config-hooks.d.ts +10 -0
  52. package/dist/core/config-hooks.js +112 -0
  53. package/dist/core/config-injector.d.ts +50 -0
  54. package/dist/core/config-injector.js +455 -0
  55. package/dist/core/doctor.d.ts +1 -0
  56. package/dist/core/doctor.js +163 -0
  57. package/dist/core/errors.d.ts +81 -0
  58. package/dist/core/errors.js +133 -0
  59. package/dist/core/global-config.d.ts +43 -0
  60. package/dist/core/global-config.js +25 -0
  61. package/dist/core/harness.d.ts +24 -0
  62. package/dist/core/harness.js +621 -0
  63. package/dist/core/init.d.ts +7 -0
  64. package/dist/core/init.js +37 -0
  65. package/dist/core/inspect-cli.d.ts +7 -0
  66. package/dist/core/inspect-cli.js +47 -0
  67. package/dist/core/legacy-detector.d.ts +33 -0
  68. package/dist/core/legacy-detector.js +66 -0
  69. package/dist/core/logger.d.ts +34 -0
  70. package/dist/core/logger.js +121 -0
  71. package/dist/core/mcp-config.d.ts +44 -0
  72. package/dist/core/mcp-config.js +177 -0
  73. package/dist/core/notepad.d.ts +31 -0
  74. package/dist/core/notepad.js +88 -0
  75. package/dist/core/paths.d.ts +85 -0
  76. package/dist/core/paths.js +101 -0
  77. package/dist/core/plugin-detector.d.ts +44 -0
  78. package/dist/core/plugin-detector.js +226 -0
  79. package/dist/core/runtime-detector.d.ts +8 -0
  80. package/dist/core/runtime-detector.js +49 -0
  81. package/dist/core/scope-resolver.d.ts +8 -0
  82. package/dist/core/scope-resolver.js +45 -0
  83. package/dist/core/session-logger.d.ts +6 -0
  84. package/dist/core/session-logger.js +111 -0
  85. package/dist/core/session-store.d.ts +28 -0
  86. package/dist/core/session-store.js +218 -0
  87. package/dist/core/settings-lock.d.ts +18 -0
  88. package/dist/core/settings-lock.js +125 -0
  89. package/dist/core/spawn.d.ts +3 -0
  90. package/dist/core/spawn.js +135 -0
  91. package/dist/core/types.d.ts +108 -0
  92. package/dist/core/types.js +1 -0
  93. package/dist/core/uninstall.d.ts +4 -0
  94. package/dist/core/uninstall.js +307 -0
  95. package/dist/core/v1-bootstrap.d.ts +26 -0
  96. package/dist/core/v1-bootstrap.js +155 -0
  97. package/dist/engine/compound-cli.d.ts +24 -0
  98. package/dist/engine/compound-cli.js +250 -0
  99. package/dist/engine/compound-extractor.d.ts +68 -0
  100. package/dist/engine/compound-extractor.js +860 -0
  101. package/dist/engine/compound-lifecycle.d.ts +32 -0
  102. package/dist/engine/compound-lifecycle.js +305 -0
  103. package/dist/engine/compound-loop.d.ts +32 -0
  104. package/dist/engine/compound-loop.js +511 -0
  105. package/dist/engine/match-eval-log.d.ts +139 -0
  106. package/dist/engine/match-eval-log.js +270 -0
  107. package/dist/engine/phrase-blocklist.d.ts +119 -0
  108. package/dist/engine/phrase-blocklist.js +208 -0
  109. package/dist/engine/skill-promoter.d.ts +20 -0
  110. package/dist/engine/skill-promoter.js +115 -0
  111. package/dist/engine/solution-format.d.ts +160 -0
  112. package/dist/engine/solution-format.js +432 -0
  113. package/dist/engine/solution-index.d.ts +13 -0
  114. package/dist/engine/solution-index.js +252 -0
  115. package/dist/engine/solution-matcher.d.ts +364 -0
  116. package/dist/engine/solution-matcher.js +656 -0
  117. package/dist/engine/solution-writer.d.ts +76 -0
  118. package/dist/engine/solution-writer.js +157 -0
  119. package/dist/engine/term-matcher.d.ts +81 -0
  120. package/dist/engine/term-matcher.js +268 -0
  121. package/dist/engine/term-normalizer.d.ts +116 -0
  122. package/dist/engine/term-normalizer.js +171 -0
  123. package/dist/fgx.d.ts +6 -0
  124. package/dist/fgx.js +42 -0
  125. package/dist/forge/cli.d.ts +11 -0
  126. package/dist/forge/cli.js +100 -0
  127. package/dist/forge/evidence-processor.d.ts +21 -0
  128. package/dist/forge/evidence-processor.js +87 -0
  129. package/dist/forge/mismatch-detector.d.ts +44 -0
  130. package/dist/forge/mismatch-detector.js +83 -0
  131. package/dist/forge/onboarding-cli.d.ts +6 -0
  132. package/dist/forge/onboarding-cli.js +89 -0
  133. package/dist/forge/onboarding.d.ts +25 -0
  134. package/dist/forge/onboarding.js +122 -0
  135. package/dist/hooks/compound-reflection.d.ts +45 -0
  136. package/dist/hooks/compound-reflection.js +82 -0
  137. package/dist/hooks/context-guard.d.ts +24 -0
  138. package/dist/hooks/context-guard.js +156 -0
  139. package/dist/hooks/dangerous-patterns.json +18 -0
  140. package/dist/hooks/db-guard.d.ts +17 -0
  141. package/dist/hooks/db-guard.js +105 -0
  142. package/dist/hooks/hook-config.d.ts +29 -0
  143. package/dist/hooks/hook-config.js +92 -0
  144. package/dist/hooks/hook-registry.d.ts +43 -0
  145. package/dist/hooks/hook-registry.js +31 -0
  146. package/dist/hooks/hooks-generator.d.ts +49 -0
  147. package/dist/hooks/hooks-generator.js +99 -0
  148. package/dist/hooks/intent-classifier.d.ts +12 -0
  149. package/dist/hooks/intent-classifier.js +62 -0
  150. package/dist/hooks/keyword-detector.d.ts +25 -0
  151. package/dist/hooks/keyword-detector.js +389 -0
  152. package/dist/hooks/notepad-injector.d.ts +18 -0
  153. package/dist/hooks/notepad-injector.js +51 -0
  154. package/dist/hooks/permission-handler.d.ts +14 -0
  155. package/dist/hooks/permission-handler.js +114 -0
  156. package/dist/hooks/post-tool-failure.d.ts +11 -0
  157. package/dist/hooks/post-tool-failure.js +118 -0
  158. package/dist/hooks/post-tool-handlers.d.ts +17 -0
  159. package/dist/hooks/post-tool-handlers.js +115 -0
  160. package/dist/hooks/post-tool-use.d.ts +29 -0
  161. package/dist/hooks/post-tool-use.js +151 -0
  162. package/dist/hooks/pre-compact.d.ts +10 -0
  163. package/dist/hooks/pre-compact.js +165 -0
  164. package/dist/hooks/pre-tool-use.d.ts +31 -0
  165. package/dist/hooks/pre-tool-use.js +325 -0
  166. package/dist/hooks/prompt-injection-filter.d.ts +56 -0
  167. package/dist/hooks/prompt-injection-filter.js +287 -0
  168. package/dist/hooks/rate-limiter.d.ts +21 -0
  169. package/dist/hooks/rate-limiter.js +86 -0
  170. package/dist/hooks/secret-filter.d.ts +14 -0
  171. package/dist/hooks/secret-filter.js +65 -0
  172. package/dist/hooks/session-recovery.d.ts +27 -0
  173. package/dist/hooks/session-recovery.js +406 -0
  174. package/dist/hooks/shared/atomic-write.d.ts +41 -0
  175. package/dist/hooks/shared/atomic-write.js +148 -0
  176. package/dist/hooks/shared/context-budget.d.ts +37 -0
  177. package/dist/hooks/shared/context-budget.js +45 -0
  178. package/dist/hooks/shared/file-lock.d.ts +56 -0
  179. package/dist/hooks/shared/file-lock.js +253 -0
  180. package/dist/hooks/shared/hook-response.d.ts +33 -0
  181. package/dist/hooks/shared/hook-response.js +62 -0
  182. package/dist/hooks/shared/injection-caps.d.ts +39 -0
  183. package/dist/hooks/shared/injection-caps.js +52 -0
  184. package/dist/hooks/shared/plugin-signal.d.ts +23 -0
  185. package/dist/hooks/shared/plugin-signal.js +104 -0
  186. package/dist/hooks/shared/read-stdin.d.ts +8 -0
  187. package/dist/hooks/shared/read-stdin.js +63 -0
  188. package/dist/hooks/shared/sanitize-id.d.ts +7 -0
  189. package/dist/hooks/shared/sanitize-id.js +9 -0
  190. package/dist/hooks/shared/sanitize.d.ts +7 -0
  191. package/dist/hooks/shared/sanitize.js +22 -0
  192. package/dist/hooks/skill-injector.d.ts +38 -0
  193. package/dist/hooks/skill-injector.js +285 -0
  194. package/dist/hooks/slop-detector.d.ts +18 -0
  195. package/dist/hooks/slop-detector.js +93 -0
  196. package/dist/hooks/solution-injector.d.ts +58 -0
  197. package/dist/hooks/solution-injector.js +436 -0
  198. package/dist/hooks/subagent-tracker.d.ts +10 -0
  199. package/dist/hooks/subagent-tracker.js +90 -0
  200. package/dist/i18n/index.d.ts +43 -0
  201. package/dist/i18n/index.js +224 -0
  202. package/dist/lib.d.ts +14 -0
  203. package/dist/lib.js +14 -0
  204. package/dist/mcp/server.d.ts +8 -0
  205. package/dist/mcp/server.js +40 -0
  206. package/dist/mcp/solution-reader.d.ts +90 -0
  207. package/dist/mcp/solution-reader.js +273 -0
  208. package/dist/mcp/tools.d.ts +16 -0
  209. package/dist/mcp/tools.js +302 -0
  210. package/dist/preset/facet-catalog.d.ts +17 -0
  211. package/dist/preset/facet-catalog.js +46 -0
  212. package/dist/preset/preset-manager.d.ts +31 -0
  213. package/dist/preset/preset-manager.js +111 -0
  214. package/dist/renderer/inspect-renderer.d.ts +11 -0
  215. package/dist/renderer/inspect-renderer.js +123 -0
  216. package/dist/renderer/rule-renderer.d.ts +18 -0
  217. package/dist/renderer/rule-renderer.js +159 -0
  218. package/dist/store/evidence-store.d.ts +23 -0
  219. package/dist/store/evidence-store.js +58 -0
  220. package/dist/store/profile-store.d.ts +12 -0
  221. package/dist/store/profile-store.js +53 -0
  222. package/dist/store/recommendation-store.d.ts +22 -0
  223. package/dist/store/recommendation-store.js +64 -0
  224. package/dist/store/rule-store.d.ts +22 -0
  225. package/dist/store/rule-store.js +62 -0
  226. package/dist/store/session-state-store.d.ts +11 -0
  227. package/dist/store/session-state-store.js +44 -0
  228. package/dist/store/types.d.ts +159 -0
  229. package/dist/store/types.js +7 -0
  230. package/hooks/hook-registry.json +21 -0
  231. package/hooks/hooks.json +185 -0
  232. package/package.json +89 -0
  233. package/plugin.json +20 -0
  234. package/scripts/postinstall.js +826 -0
  235. package/skills/api-design/SKILL.md +262 -0
  236. package/skills/architecture-decision/SKILL.md +309 -0
  237. package/skills/ci-cd/SKILL.md +264 -0
  238. package/skills/code-review/SKILL.md +228 -0
  239. package/skills/compound/SKILL.md +101 -0
  240. package/skills/database/SKILL.md +257 -0
  241. package/skills/debug-detective/SKILL.md +95 -0
  242. package/skills/docker/SKILL.md +268 -0
  243. package/skills/documentation/SKILL.md +270 -0
  244. package/skills/ecomode/SKILL.md +46 -0
  245. package/skills/frontend/SKILL.md +265 -0
  246. package/skills/git-master/SKILL.md +86 -0
  247. package/skills/incident-response/SKILL.md +286 -0
  248. package/skills/migrate/SKILL.md +96 -0
  249. package/skills/performance/SKILL.md +282 -0
  250. package/skills/refactor/SKILL.md +100 -0
  251. package/skills/security-review/SKILL.md +282 -0
  252. package/skills/tdd/SKILL.md +178 -0
  253. package/skills/testing-strategy/SKILL.md +260 -0
  254. package/starter-pack/solutions/starter-api-error-responses.md +37 -0
  255. package/starter-pack/solutions/starter-async-patterns.md +40 -0
  256. package/starter-pack/solutions/starter-caching-strategy.md +40 -0
  257. package/starter-pack/solutions/starter-code-review-checklist.md +39 -0
  258. package/starter-pack/solutions/starter-debugging-systematic.md +40 -0
  259. package/starter-pack/solutions/starter-dependency-injection.md +40 -0
  260. package/starter-pack/solutions/starter-error-handling-patterns.md +38 -0
  261. package/starter-pack/solutions/starter-git-atomic-commits.md +36 -0
  262. package/starter-pack/solutions/starter-input-validation.md +40 -0
  263. package/starter-pack/solutions/starter-n-plus-one-queries.md +37 -0
  264. package/starter-pack/solutions/starter-refactor-safely.md +38 -0
  265. package/starter-pack/solutions/starter-secret-management.md +37 -0
  266. package/starter-pack/solutions/starter-separation-of-concerns.md +36 -0
  267. package/starter-pack/solutions/starter-tdd-red-green-refactor.md +40 -0
  268. package/starter-pack/solutions/starter-typescript-strict-types.md +39 -0
@@ -0,0 +1,270 @@
1
+ /**
2
+ * Match eval log — JSONL ranking-decision writer (T3 of the Round 3 plan).
3
+ *
4
+ * Why this module exists:
5
+ * The bootstrap evaluator (`evaluateSolutionMatcher`) measures matcher
6
+ * quality against a labeled fixture, but production traffic is open-ended.
7
+ * T2 hoisted query normalization out of the per-solution loop, which is
8
+ * fast, but it also hid the "what did we actually rank, and why?" signal
9
+ * from offline review. This module appends a single JSONL line per matcher
10
+ * call capturing the normalized query, the top candidates with their
11
+ * matched terms, and which ones the caller ultimately surfaced.
12
+ *
13
+ * The target consumer is offline analysis: a reviewer can tail or grep
14
+ * the file to spot systematic recall misses or spurious matches without
15
+ * instrumenting production.
16
+ *
17
+ * Privacy posture (T3 security review fix):
18
+ * The raw user prompt is NEVER written to disk. Instead, we store a
19
+ * short SHA-256 prefix (`rawQueryHash`) plus character length
20
+ * (`rawQueryLen`). This keeps dedup and "was the prompt substantial"
21
+ * signals available for offline analysis while eliminating the PII /
22
+ * API-key / credential leakage risk of persisting raw prompts in
23
+ * `~/.forgen/state/match-eval-log.jsonl`. The `normalizedQuery` array
24
+ * already carries the matching-signal payload and is safe to persist
25
+ * because it only contains short tag tokens (never the full prompt).
26
+ *
27
+ * Operational principles:
28
+ * 1. **Off the critical path.** Never throw; never block. A failed write
29
+ * is silently swallowed — the hook must continue to return its
30
+ * solutions even if the log is misconfigured, read-only, or full.
31
+ * 2. **Bounded record size.** Candidates are capped at 5 (the matcher's
32
+ * own top-5 cap). `normalizedQuery` is capped at 64 terms. Each
33
+ * candidate's `matchedTerms` is capped at 16. Worst-case record ≈
34
+ * 2KB, which stays under Linux PIPE_BUF=4096 for safe concurrent
35
+ * appends on local filesystems.
36
+ * 3. **Symlink defense.** `fs.openSync` with `O_NOFOLLOW` refuses to
37
+ * follow a symlink at the log path. Without this guard, an attacker
38
+ * with write access to `~/.forgen/state/` could redirect appends to
39
+ * `~/.ssh/authorized_keys`, `~/.bashrc`, or other sensitive files.
40
+ * 4. **File-lock for concurrency.** Uses `withFileLockSync` to serialize
41
+ * concurrent writers. macOS PIPE_BUF=512 is smaller than the worst-
42
+ * case record size so POSIX atomic append alone isn't enough.
43
+ * 5. **Opt-out via env, fail-closed on invalid config.**
44
+ * `FORGEN_MATCH_EVAL_LOG=off|disabled|0|false|no` disables entirely.
45
+ * `FORGEN_MATCH_EVAL_LOG_SAMPLE=<float 0..1>` samples. An invalid
46
+ * sample value (NaN, out of range, whitespace) falls back to 0
47
+ * (skip) rather than 1 (log everything) — fail-closed for privacy.
48
+ * 6. **File size cap.** `readMatchEvalLog` refuses to parse files
49
+ * larger than 50 MB to prevent OOM in the offline analyzer. Callers
50
+ * are responsible for rotating the log externally.
51
+ */
52
+ import * as fs from 'node:fs';
53
+ import * as path from 'node:path';
54
+ import { constants as fsc } from 'node:fs';
55
+ import { createHash } from 'node:crypto';
56
+ import { MATCH_EVAL_LOG_PATH } from '../core/paths.js';
57
+ import { createLogger } from '../core/logger.js';
58
+ import { withFileLockSync } from '../hooks/shared/file-lock.js';
59
+ const log = createLogger('match-eval-log');
60
+ /** Environment variable controlling log enable/disable. */
61
+ export const MATCH_EVAL_LOG_ENV = 'FORGEN_MATCH_EVAL_LOG';
62
+ /** Environment variable controlling sample rate (0.0 – 1.0). */
63
+ export const MATCH_EVAL_LOG_SAMPLE_ENV = `${MATCH_EVAL_LOG_ENV}_SAMPLE`;
64
+ /** Max candidates to log per record (mirrors matcher top-5). */
65
+ const MAX_CANDIDATES_LOGGED = 5;
66
+ /** Max normalized-query terms to log — defends against large synonym families. */
67
+ const MAX_NORMALIZED_QUERY_LOGGED = 64;
68
+ /** Max matched-terms per candidate — prevents pathological spam. */
69
+ const MAX_MATCHED_TERMS_PER_CANDIDATE = 16;
70
+ /** Read-side DoS guard: refuse to load if the JSONL file is larger than this. */
71
+ const MAX_LOG_FILE_SIZE_BYTES = 50 * 1024 * 1024; // 50 MB
72
+ /**
73
+ * Check whether logging is disabled via environment variable.
74
+ * Accepts `off`, `disabled`, `0`, `false`, `no` (case-insensitive).
75
+ */
76
+ function isDisabled() {
77
+ const raw = process.env[MATCH_EVAL_LOG_ENV];
78
+ if (raw === undefined)
79
+ return false;
80
+ const v = raw.trim().toLowerCase();
81
+ return v === 'off' || v === 'disabled' || v === '0' || v === 'false' || v === 'no';
82
+ }
83
+ /**
84
+ * Read the sample rate from environment. Defaults to 1.0 (log everything).
85
+ * Invalid values (non-numeric, out of range, whitespace-only) fall back to
86
+ * 0 — fail-closed for privacy. Rationale: if an operator mistypes
87
+ * `SAMPLE=01` (intended 0.1) and we default to 1.0, they get 10× more
88
+ * records than they expected. Fail-closed is safer.
89
+ */
90
+ function getSampleRate() {
91
+ const raw = process.env[MATCH_EVAL_LOG_SAMPLE_ENV];
92
+ if (raw === undefined)
93
+ return 1.0;
94
+ const trimmed = raw.trim();
95
+ if (trimmed === '')
96
+ return 0;
97
+ const n = Number.parseFloat(trimmed);
98
+ if (!Number.isFinite(n) || n < 0 || n > 1)
99
+ return 0;
100
+ return n;
101
+ }
102
+ /** Compute a privacy-safe hash + length pair from the raw prompt. */
103
+ function hashRawQuery(rawQuery) {
104
+ const hash = createHash('sha256').update(rawQuery).digest('hex').slice(0, 16);
105
+ // Use [...rawQuery].length to get code-point count rather than UTF-16
106
+ // unit count — a more honest "characters" metric for mixed-script text.
107
+ const len = [...rawQuery].length;
108
+ return { hash, len };
109
+ }
110
+ /**
111
+ * Append a single ranking decision to the match-eval-log JSONL file.
112
+ *
113
+ * Fail-open: any error is caught and debug-logged. Callers can invoke
114
+ * this without guarding — the logger will never bubble an exception into
115
+ * the hook critical path.
116
+ */
117
+ export function logMatchDecision(input) {
118
+ try {
119
+ if (isDisabled())
120
+ return;
121
+ const sampleRate = getSampleRate();
122
+ if (sampleRate <= 0)
123
+ return;
124
+ if (sampleRate < 1 && Math.random() >= sampleRate)
125
+ return;
126
+ // Derive privacy-safe hash from rawQuery; never persist the prompt.
127
+ const { hash, len } = hashRawQuery(input.rawQuery);
128
+ // Bound record size before serialization.
129
+ const record = {
130
+ source: input.source,
131
+ rawQueryHash: hash,
132
+ rawQueryLen: len,
133
+ normalizedQuery: input.normalizedQuery.slice(0, MAX_NORMALIZED_QUERY_LOGGED),
134
+ candidates: input.candidates.slice(0, MAX_CANDIDATES_LOGGED).map(c => ({
135
+ name: c.name,
136
+ relevance: c.relevance,
137
+ matchedTerms: c.matchedTerms.slice(0, MAX_MATCHED_TERMS_PER_CANDIDATE),
138
+ })),
139
+ rankedTopN: input.rankedTopN.slice(0, MAX_CANDIDATES_LOGGED),
140
+ ts: new Date().toISOString(),
141
+ };
142
+ // Serialize FIRST so any toJSON throw is caught before we touch disk.
143
+ const line = `${JSON.stringify(record)}\n`;
144
+ // Ensure STATE_DIR exists (idempotent). mode 0o700 matches other
145
+ // sensitive state under ~/.forgen/state/.
146
+ const dir = path.dirname(MATCH_EVAL_LOG_PATH);
147
+ fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
148
+ // Use a file lock — POSIX atomic append only guarantees atomicity
149
+ // under PIPE_BUF (512 on macOS). Records can approach 2KB worst-case
150
+ // so concurrent writers could interleave without this lock. The lock
151
+ // is taken on the log file itself, and cleaned up by withFileLockSync.
152
+ withFileLockSync(MATCH_EVAL_LOG_PATH, () => {
153
+ // O_NOFOLLOW: refuse to follow a symlink at the target path. This
154
+ // blocks a local-attacker symlink swap attack where the log file
155
+ // is replaced with a link to e.g. ~/.ssh/authorized_keys.
156
+ // O_APPEND: POSIX atomic append within the lock (defense in depth).
157
+ // O_CREAT with 0o600: create with owner-only mode if absent.
158
+ const fd = fs.openSync(MATCH_EVAL_LOG_PATH, fsc.O_WRONLY | fsc.O_CREAT | fsc.O_APPEND | fsc.O_NOFOLLOW, 0o600);
159
+ try {
160
+ // Enforce mode on pre-existing files (0o600 in openSync only
161
+ // applies on creation; an existing file with different permissions
162
+ // keeps them unless we fchmod).
163
+ try {
164
+ fs.fchmodSync(fd, 0o600);
165
+ }
166
+ catch { /* best-effort: fchmod may fail on non-owned files */ }
167
+ fs.writeSync(fd, line);
168
+ }
169
+ finally {
170
+ fs.closeSync(fd);
171
+ }
172
+ });
173
+ }
174
+ catch (e) {
175
+ // Fail-open: never rethrow. Debug-log so the failure is discoverable
176
+ // via the standard logger if it turns out to be persistent.
177
+ log.debug(`logMatchDecision failed (swallowed): ${e instanceof Error ? e.message : String(e)}`);
178
+ }
179
+ }
180
+ /**
181
+ * Read all records from the match-eval-log file. Intended for tests and
182
+ * offline analysis tools; NOT for hot-path use.
183
+ *
184
+ * Malformed lines (non-JSON, missing required fields, wrong shape) are
185
+ * silently skipped — preserves the debug value of the rest of the file
186
+ * if one entry gets corrupted by a partial write or tool error.
187
+ *
188
+ * DoS guard: refuses to read files larger than `MAX_LOG_FILE_SIZE_BYTES`
189
+ * to prevent OOM when a long-running log grows unbounded. Returns [] in
190
+ * that case and debug-logs the skip.
191
+ */
192
+ export function readMatchEvalLog() {
193
+ try {
194
+ if (!fs.existsSync(MATCH_EVAL_LOG_PATH))
195
+ return [];
196
+ // Symlink check on read too — don't exfiltrate arbitrary files if the
197
+ // path has been swapped.
198
+ const lst = fs.lstatSync(MATCH_EVAL_LOG_PATH);
199
+ if (lst.isSymbolicLink()) {
200
+ log.debug('readMatchEvalLog: refusing to read a symlinked log path');
201
+ return [];
202
+ }
203
+ if (lst.size > MAX_LOG_FILE_SIZE_BYTES) {
204
+ log.debug(`readMatchEvalLog: file exceeds ${MAX_LOG_FILE_SIZE_BYTES} bytes, skipping`);
205
+ return [];
206
+ }
207
+ const content = fs.readFileSync(MATCH_EVAL_LOG_PATH, 'utf-8');
208
+ const out = [];
209
+ for (const line of content.split('\n')) {
210
+ if (!line.trim())
211
+ continue;
212
+ try {
213
+ const parsed = JSON.parse(line);
214
+ if (isValidRecord(parsed)) {
215
+ out.push(parsed);
216
+ }
217
+ }
218
+ catch {
219
+ // Skip malformed lines
220
+ }
221
+ }
222
+ return out;
223
+ }
224
+ catch {
225
+ return [];
226
+ }
227
+ }
228
+ /**
229
+ * Runtime shape check for a parsed record. Strict validation of every
230
+ * field including per-candidate shape — a downstream consumer that calls
231
+ * `rec.candidates[0].matchedTerms.slice(0, 3)` must not crash on a
232
+ * malformed entry.
233
+ */
234
+ function isValidRecord(v) {
235
+ if (v == null || typeof v !== 'object')
236
+ return false;
237
+ const r = v;
238
+ if (r.source !== 'hook' && r.source !== 'mcp')
239
+ return false;
240
+ if (typeof r.rawQueryHash !== 'string')
241
+ return false;
242
+ if (typeof r.rawQueryLen !== 'number')
243
+ return false;
244
+ if (!Array.isArray(r.normalizedQuery))
245
+ return false;
246
+ if (!r.normalizedQuery.every(t => typeof t === 'string'))
247
+ return false;
248
+ if (!Array.isArray(r.candidates))
249
+ return false;
250
+ for (const c of r.candidates) {
251
+ if (c == null || typeof c !== 'object')
252
+ return false;
253
+ const cc = c;
254
+ if (typeof cc.name !== 'string')
255
+ return false;
256
+ if (typeof cc.relevance !== 'number')
257
+ return false;
258
+ if (!Array.isArray(cc.matchedTerms))
259
+ return false;
260
+ if (!cc.matchedTerms.every(t => typeof t === 'string'))
261
+ return false;
262
+ }
263
+ if (!Array.isArray(r.rankedTopN))
264
+ return false;
265
+ if (!r.rankedTopN.every(t => typeof t === 'string'))
266
+ return false;
267
+ if (typeof r.ts !== 'string')
268
+ return false;
269
+ return true;
270
+ }
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Phrase blocklist — non-dev-context 2-word English compounds.
3
+ *
4
+ * Why this module exists (R4-T2 of the Round 4 plan):
5
+ * The fixture v2 negative bucket exposed 5 false positive triggers
6
+ * ("performance review meeting notes", "system architecture overview
7
+ * document", "database backup recovery procedure", "validation of
8
+ * insurance claims", "solar system planets astronomy"). All five share
9
+ * the same structural problem: a single common dev-adjacent word
10
+ * ("performance", "system", "database", "validation", "system") is
11
+ * simultaneously a legitimate dev tag AND a legitimate English noun.
12
+ * Tag-based matching cannot distinguish "user typed dev term in dev
13
+ * context" from "user typed the same word in a non-dev context"
14
+ * without external semantic signal.
15
+ *
16
+ * T4 BM25 was prototyped as a fix (frequency-based down-weighting) and
17
+ * skipped — see `docs/plans/2026-04-08-t4-bm25-skip-adr.md` for the
18
+ * full rationale. The structural reason BM25 didn't help: with N=15
19
+ * solutions, common dev-adjacent words still cluster in the high-IDF
20
+ * range, so even after IDF the bare-tag match wins.
21
+ *
22
+ * R4-T2's approach is the inverse: instead of trying to make the
23
+ * matcher smarter, surface the non-dev *context* directly. A 2-word
24
+ * English compound like "performance review" or "system architecture"
25
+ * is a strong signal that the surrounding query is NOT a dev question.
26
+ * When such a compound appears in the query, the function below masks
27
+ * its constituent tokens from the prompt tag list, removing the false
28
+ * evidence the matcher would otherwise rank on. Other dev tokens in
29
+ * the same query are preserved, so a dev query that happens to include
30
+ * one of these compounds (e.g., "performance review of caching
31
+ * strategy") still surfaces the legitimate cache match.
32
+ *
33
+ * Curation rules (for entries in PHRASE_BLOCKLIST):
34
+ * 1. **2 words minimum**, lowercase ASCII, single space separator.
35
+ * Single words are too prone to false negatives — "performance"
36
+ * alone is a real dev concept; "performance review" is not.
37
+ * 2. **NEVER block legitimate dev compounds.** "code review", "function
38
+ * call", "error message", "database query", "system design", "type
39
+ * check", "unit test", "build pipeline" — all of these are first-
40
+ * class dev terms and MUST stay matchable.
41
+ * 3. **Prefer concrete English compounds with a known false-positive
42
+ * footprint.** Each entry should trace back to either (a) one of
43
+ * the 5 known fixture v2 trigger queries, or (b) a manual review
44
+ * of top-50 corpus tags for English homographs.
45
+ * 4. **Plurals as separate entries.** "performance review" and
46
+ * "performance reviews" are both common; we list both rather than
47
+ * apply automatic stemming, since stemming would risk over-blocking
48
+ * ("review" → "reviews" → "reviewed" cascade).
49
+ * 5. **No regex / wildcards.** Literal phrase matching keeps the
50
+ * blocklist auditable and avoids ReDoS surface.
51
+ *
52
+ * Roll-out posture:
53
+ * Start with ~15 entries (5 known fixture triggers + 10 homograph
54
+ * candidates), measure on the bootstrap eval, expand only if metrics
55
+ * indicate real-world false positives that aren't covered. The ADR
56
+ * targeted ~50 phrases as an upper bound — exceeding that without
57
+ * measured evidence is a sign that the blocklist is becoming a leaky
58
+ * abstraction for a deeper matcher problem.
59
+ */
60
+ /**
61
+ * Lowercase ASCII 2-word phrases that signal a non-dev context.
62
+ *
63
+ * Audit owner: matcher maintainer. Adding/removing entries MUST be
64
+ * accompanied by a fixture eval re-run and (if the move shifts metrics)
65
+ * a `ROUND3_BASELINE` update in the same PR.
66
+ */
67
+ export declare const PHRASE_BLOCKLIST: readonly string[];
68
+ /**
69
+ * Find every blocked phrase that appears in the query as a whole-word match.
70
+ *
71
+ * Whole-word means the phrase is bounded by start-of-string, end-of-string,
72
+ * any whitespace, or any punctuation/non-ASCII-letter character on both
73
+ * sides. Substring matching alone would over-block ("performance reviewer"
74
+ * must NOT match "performance review"); whitespace-only boundary checks
75
+ * would under-detect natural-language punctuation ("performance review.").
76
+ *
77
+ * Iterates ALL occurrences of each phrase, not just the first — so a query
78
+ * like "performance reviewer and performance review meeting" still detects
79
+ * the second occurrence as a valid match even though the first overlaps a
80
+ * longer word.
81
+ *
82
+ * Returns the list of matched phrases in input order; the same phrase is
83
+ * never reported twice even if it appears multiple times. Empty array
84
+ * when no blocked phrase is present.
85
+ */
86
+ export declare function findBlockedPhrases(rawQuery: string): string[];
87
+ /**
88
+ * Mask the tokens of any blocked phrase from a prompt tag list.
89
+ *
90
+ * Given the raw query (used for phrase detection) and the already-extracted
91
+ * prompt tags, this function:
92
+ * 1. Finds every blocked phrase in the raw query.
93
+ * 2. Computes the union of all phrase-constituent tokens (after running
94
+ * them through `extractTags` so the masking matches the same
95
+ * lowercase / Korean-aware token shape the matcher already uses).
96
+ * 3. Returns a new prompt tag list with the masked tokens removed.
97
+ *
98
+ * If no blocked phrase is found, the input array is returned unchanged
99
+ * (referentially — for the hot path's allocation cost). Otherwise a new
100
+ * filtered array is returned.
101
+ *
102
+ * Example: query "performance review meeting notes"
103
+ * - Blocked phrases found: ["performance review", "meeting notes"]
104
+ * - Masked tokens: {performance, review, meeting, notes}
105
+ * - extractTags("performance review meeting notes") =
106
+ * [performance, review, meeting, notes]
107
+ * - Result: [] (every prompt tag was masked)
108
+ *
109
+ * Example: query "performance review of caching strategy"
110
+ * - Blocked phrases found: ["performance review"]
111
+ * - Masked tokens: {performance, review}
112
+ * - extractTags result: [performance, review, caching, strategy]
113
+ * - Filtered result: [caching, strategy] ← legitimate dev tags survive
114
+ *
115
+ * Korean queries: blocked phrases are ASCII-only, so a Korean query never
116
+ * triggers masking. Mixed queries (Korean + English) only mask the
117
+ * English-side tokens that participate in a blocked phrase.
118
+ */
119
+ export declare function maskBlockedTokens(rawQuery: string, promptTags: readonly string[]): string[];
@@ -0,0 +1,208 @@
1
+ /**
2
+ * Phrase blocklist — non-dev-context 2-word English compounds.
3
+ *
4
+ * Why this module exists (R4-T2 of the Round 4 plan):
5
+ * The fixture v2 negative bucket exposed 5 false positive triggers
6
+ * ("performance review meeting notes", "system architecture overview
7
+ * document", "database backup recovery procedure", "validation of
8
+ * insurance claims", "solar system planets astronomy"). All five share
9
+ * the same structural problem: a single common dev-adjacent word
10
+ * ("performance", "system", "database", "validation", "system") is
11
+ * simultaneously a legitimate dev tag AND a legitimate English noun.
12
+ * Tag-based matching cannot distinguish "user typed dev term in dev
13
+ * context" from "user typed the same word in a non-dev context"
14
+ * without external semantic signal.
15
+ *
16
+ * T4 BM25 was prototyped as a fix (frequency-based down-weighting) and
17
+ * skipped — see `docs/plans/2026-04-08-t4-bm25-skip-adr.md` for the
18
+ * full rationale. The structural reason BM25 didn't help: with N=15
19
+ * solutions, common dev-adjacent words still cluster in the high-IDF
20
+ * range, so even after IDF the bare-tag match wins.
21
+ *
22
+ * R4-T2's approach is the inverse: instead of trying to make the
23
+ * matcher smarter, surface the non-dev *context* directly. A 2-word
24
+ * English compound like "performance review" or "system architecture"
25
+ * is a strong signal that the surrounding query is NOT a dev question.
26
+ * When such a compound appears in the query, the function below masks
27
+ * its constituent tokens from the prompt tag list, removing the false
28
+ * evidence the matcher would otherwise rank on. Other dev tokens in
29
+ * the same query are preserved, so a dev query that happens to include
30
+ * one of these compounds (e.g., "performance review of caching
31
+ * strategy") still surfaces the legitimate cache match.
32
+ *
33
+ * Curation rules (for entries in PHRASE_BLOCKLIST):
34
+ * 1. **2 words minimum**, lowercase ASCII, single space separator.
35
+ * Single words are too prone to false negatives — "performance"
36
+ * alone is a real dev concept; "performance review" is not.
37
+ * 2. **NEVER block legitimate dev compounds.** "code review", "function
38
+ * call", "error message", "database query", "system design", "type
39
+ * check", "unit test", "build pipeline" — all of these are first-
40
+ * class dev terms and MUST stay matchable.
41
+ * 3. **Prefer concrete English compounds with a known false-positive
42
+ * footprint.** Each entry should trace back to either (a) one of
43
+ * the 5 known fixture v2 trigger queries, or (b) a manual review
44
+ * of top-50 corpus tags for English homographs.
45
+ * 4. **Plurals as separate entries.** "performance review" and
46
+ * "performance reviews" are both common; we list both rather than
47
+ * apply automatic stemming, since stemming would risk over-blocking
48
+ * ("review" → "reviews" → "reviewed" cascade).
49
+ * 5. **No regex / wildcards.** Literal phrase matching keeps the
50
+ * blocklist auditable and avoids ReDoS surface.
51
+ *
52
+ * Roll-out posture:
53
+ * Start with ~15 entries (5 known fixture triggers + 10 homograph
54
+ * candidates), measure on the bootstrap eval, expand only if metrics
55
+ * indicate real-world false positives that aren't covered. The ADR
56
+ * targeted ~50 phrases as an upper bound — exceeding that without
57
+ * measured evidence is a sign that the blocklist is becoming a leaky
58
+ * abstraction for a deeper matcher problem.
59
+ */
60
+ import { extractTags } from './solution-format.js';
61
+ /**
62
+ * Lowercase ASCII 2-word phrases that signal a non-dev context.
63
+ *
64
+ * Audit owner: matcher maintainer. Adding/removing entries MUST be
65
+ * accompanied by a fixture eval re-run and (if the move shifts metrics)
66
+ * a `ROUND3_BASELINE` update in the same PR.
67
+ */
68
+ export const PHRASE_BLOCKLIST = [
69
+ // ── 5 known fixture v2 triggers ──
70
+ 'performance review',
71
+ 'system architecture',
72
+ 'database backup',
73
+ 'insurance claim',
74
+ 'solar system',
75
+ // ── Plural forms of the above (separate entries per curation rule 4) ──
76
+ 'performance reviews',
77
+ 'system architectures',
78
+ 'database backups',
79
+ 'insurance claims',
80
+ // ── Common non-dev English compounds with dev-tag homographs ──
81
+ // "validation ... insurance" path: insurance domain compounds
82
+ 'insurance policy',
83
+ 'insurance policies',
84
+ // "system architecture overview document" path: document/overview compounds
85
+ 'overview document',
86
+ 'document overview',
87
+ // "performance review meeting notes" path: meeting/notes compounds
88
+ 'meeting notes',
89
+ 'meeting minutes',
90
+ // NOTE on intentionally-omitted entries:
91
+ // - 'recovery procedure' / 'backup recovery' were considered (and
92
+ // redundantly covered the `database backup recovery procedure`
93
+ // trigger), but rejected per code review: they would silently mask
94
+ // dev SRE queries like 'disaster recovery procedure' or 'rollback
95
+ // recovery procedure'. The `database backup` entry alone catches
96
+ // the v2 trigger, so the redundancy was pure downside.
97
+ // - 'function room' / 'room booking' were also considered as
98
+ // hypothetical homographs but rejected per curation rule #3 (no
99
+ // fixture-traceable false-positive footprint, so adding them
100
+ // would turn the blocklist into a leaky abstraction).
101
+ ];
102
+ /**
103
+ * Test whether a single character is an "alphanumeric word character" for
104
+ * the purpose of word-boundary detection. Anything that's NOT [a-z0-9] is
105
+ * treated as a boundary — that includes whitespace, punctuation
106
+ * (`. , ; : ! ? ( ) [ ] { } " ' /`), Korean/CJK characters, and the
107
+ * absence of a character (start/end of string, signaled by `undefined`).
108
+ *
109
+ * Why not just whitespace: real user prompts contain natural-language
110
+ * punctuation ("performance review.", "(performance review)",
111
+ * "performance review, then revert"). Whitespace-only boundaries miss
112
+ * these cases and the trigger phrases survive into the matcher.
113
+ */
114
+ function isWordChar(ch) {
115
+ if (ch === undefined)
116
+ return false;
117
+ const code = ch.charCodeAt(0);
118
+ // ASCII '0'-'9' (48-57), 'a'-'z' (97-122). Lowercase only because
119
+ // callers always pass `lower` strings.
120
+ return (code >= 48 && code <= 57) || (code >= 97 && code <= 122);
121
+ }
122
+ /**
123
+ * Find every blocked phrase that appears in the query as a whole-word match.
124
+ *
125
+ * Whole-word means the phrase is bounded by start-of-string, end-of-string,
126
+ * any whitespace, or any punctuation/non-ASCII-letter character on both
127
+ * sides. Substring matching alone would over-block ("performance reviewer"
128
+ * must NOT match "performance review"); whitespace-only boundary checks
129
+ * would under-detect natural-language punctuation ("performance review.").
130
+ *
131
+ * Iterates ALL occurrences of each phrase, not just the first — so a query
132
+ * like "performance reviewer and performance review meeting" still detects
133
+ * the second occurrence as a valid match even though the first overlaps a
134
+ * longer word.
135
+ *
136
+ * Returns the list of matched phrases in input order; the same phrase is
137
+ * never reported twice even if it appears multiple times. Empty array
138
+ * when no blocked phrase is present.
139
+ */
140
+ export function findBlockedPhrases(rawQuery) {
141
+ const lower = rawQuery.toLowerCase();
142
+ const found = [];
143
+ for (const phrase of PHRASE_BLOCKLIST) {
144
+ let from = 0;
145
+ while (true) {
146
+ const idx = lower.indexOf(phrase, from);
147
+ if (idx === -1)
148
+ break;
149
+ const beforeOk = idx === 0 || !isWordChar(lower[idx - 1]);
150
+ const afterOk = !isWordChar(lower[idx + phrase.length]);
151
+ if (beforeOk && afterOk) {
152
+ if (!found.includes(phrase))
153
+ found.push(phrase);
154
+ break; // dedup policy: one hit per phrase is enough
155
+ }
156
+ from = idx + 1;
157
+ }
158
+ }
159
+ return found;
160
+ }
161
+ /**
162
+ * Mask the tokens of any blocked phrase from a prompt tag list.
163
+ *
164
+ * Given the raw query (used for phrase detection) and the already-extracted
165
+ * prompt tags, this function:
166
+ * 1. Finds every blocked phrase in the raw query.
167
+ * 2. Computes the union of all phrase-constituent tokens (after running
168
+ * them through `extractTags` so the masking matches the same
169
+ * lowercase / Korean-aware token shape the matcher already uses).
170
+ * 3. Returns a new prompt tag list with the masked tokens removed.
171
+ *
172
+ * If no blocked phrase is found, the input array is returned unchanged
173
+ * (referentially — for the hot path's allocation cost). Otherwise a new
174
+ * filtered array is returned.
175
+ *
176
+ * Example: query "performance review meeting notes"
177
+ * - Blocked phrases found: ["performance review", "meeting notes"]
178
+ * - Masked tokens: {performance, review, meeting, notes}
179
+ * - extractTags("performance review meeting notes") =
180
+ * [performance, review, meeting, notes]
181
+ * - Result: [] (every prompt tag was masked)
182
+ *
183
+ * Example: query "performance review of caching strategy"
184
+ * - Blocked phrases found: ["performance review"]
185
+ * - Masked tokens: {performance, review}
186
+ * - extractTags result: [performance, review, caching, strategy]
187
+ * - Filtered result: [caching, strategy] ← legitimate dev tags survive
188
+ *
189
+ * Korean queries: blocked phrases are ASCII-only, so a Korean query never
190
+ * triggers masking. Mixed queries (Korean + English) only mask the
191
+ * English-side tokens that participate in a blocked phrase.
192
+ */
193
+ export function maskBlockedTokens(rawQuery, promptTags) {
194
+ const blockedPhrases = findBlockedPhrases(rawQuery);
195
+ if (blockedPhrases.length === 0)
196
+ return [...promptTags];
197
+ // Tokenize blocked phrases through the SAME pipeline that produced
198
+ // promptTags so the mask shape matches. extractTags lowercases, splits
199
+ // on non-word characters, and applies stopword/length filters.
200
+ const masked = new Set();
201
+ for (const phrase of blockedPhrases) {
202
+ for (const token of extractTags(phrase))
203
+ masked.add(token);
204
+ }
205
+ if (masked.size === 0)
206
+ return [...promptTags];
207
+ return promptTags.filter(t => !masked.has(t));
208
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Forgen — Skill Promoter
3
+ *
4
+ * verified/mature 솔루션을 .forgen/me/skills/ 스킬로 승격.
5
+ * 솔루션(선언적 지식) → 스킬(절차적 지식) 변환.
6
+ */
7
+ export interface PromoteResult {
8
+ success: boolean;
9
+ skillPath?: string;
10
+ reason?: string;
11
+ }
12
+ /** 솔루션을 스킬로 승격 */
13
+ export declare function promoteSolution(solutionName: string, triggers?: string[]): PromoteResult;
14
+ /** 스킬 목록 조회 */
15
+ export declare function listSkills(): Array<{
16
+ name: string;
17
+ status: string;
18
+ promotedFrom?: string;
19
+ triggers: string[];
20
+ }>;