@ijfw/memory-server 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/ijfw-dashboard +20 -1
  2. package/package.json +4 -3
  3. package/src/audit-roster.js +89 -12
  4. package/src/brain/tiered-llm.js +57 -7
  5. package/src/cross-orchestrator-cli.js +344 -4
  6. package/src/cross-project-search.js +39 -1
  7. package/src/dashboard-server.js +7 -1
  8. package/src/dream/runner.mjs +560 -8
  9. package/src/handlers/brain-handler.js +101 -1
  10. package/src/importers/discover.js +1 -1
  11. package/src/memory/bench-metrics.js +289 -0
  12. package/src/memory/benchmark.js +1 -1
  13. package/src/memory/search.js +53 -1
  14. package/src/orchestrator/plan-checker.js +1 -1
  15. package/src/profile/audit.js +671 -0
  16. package/src/profile/capture.js +871 -0
  17. package/src/profile/derive-dialectic.js +242 -0
  18. package/src/profile/derive-heuristic.js +733 -0
  19. package/src/profile/derive.js +156 -0
  20. package/src/profile/egress.js +306 -0
  21. package/src/profile/eval/build-real-probes.mjs +197 -0
  22. package/src/profile/eval/corpus-from-reddit.mjs +166 -0
  23. package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
  24. package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
  25. package/src/profile/eval/gate-b-behavior.mjs +420 -0
  26. package/src/profile/eval/gate-b-decision-run.mjs +171 -0
  27. package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
  28. package/src/profile/eval/gate-b-run.mjs +417 -0
  29. package/src/profile/eval/gate-b-run.test.mjs +204 -0
  30. package/src/profile/eval/gate-c-capture.mjs +323 -0
  31. package/src/profile/eval/harness.mjs +551 -0
  32. package/src/profile/eval/instrument-validation.mjs +248 -0
  33. package/src/profile/eval/instrument-validation.test.mjs +125 -0
  34. package/src/profile/eval/multi-subject-harness.mjs +106 -0
  35. package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
  36. package/src/profile/eval/personas.test.mjs +83 -0
  37. package/src/profile/eval/plumbing.test.mjs +69 -0
  38. package/src/profile/eval/prereg.mjs +130 -0
  39. package/src/profile/eval/prereg.test.mjs +78 -0
  40. package/src/profile/eval/real-corpus.test.mjs +103 -0
  41. package/src/profile/eval/real-personas.mjs +109 -0
  42. package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
  43. package/src/profile/eval/run-real-corpus.mjs +358 -0
  44. package/src/profile/eval/slug-quality.mjs +464 -0
  45. package/src/profile/eval/stylometry-features.js +85 -0
  46. package/src/profile/eval/stylometry-reference.js +16 -0
  47. package/src/profile/eval/stylometry.js +224 -0
  48. package/src/profile/eval/stylometry.test.mjs +103 -0
  49. package/src/profile/eval/synthetic-personas.js +91 -0
  50. package/src/profile/eval/verifier-features.mjs +170 -0
  51. package/src/profile/eval/verifier-logreg.mjs +74 -0
  52. package/src/profile/eval/verifier-pair.mjs +122 -0
  53. package/src/profile/eval/verifier-reference.mjs +68 -0
  54. package/src/profile/eval/verifier-scorer.mjs +30 -0
  55. package/src/profile/eval/wrong-target-control.mjs +168 -0
  56. package/src/profile/eval/wrong-target-control.test.mjs +124 -0
  57. package/src/profile/exemplar-capture.js +232 -0
  58. package/src/profile/exemplar-retrieve.js +138 -0
  59. package/src/profile/exemplar-store.js +314 -0
  60. package/src/profile/lock.js +64 -0
  61. package/src/profile/merge.js +624 -0
  62. package/src/profile/path-policy.js +213 -0
  63. package/src/profile/precision-stamp.mjs +151 -0
  64. package/src/profile/render-brief.js +717 -0
  65. package/src/profile/schema.js +244 -0
  66. package/src/profile/sensitivity.js +249 -0
  67. package/src/profile/serve.js +345 -0
  68. package/src/profile/store.js +261 -0
  69. package/src/profile/telemetry.js +289 -0
  70. package/src/recovery/checkpoint.js +7 -1
  71. package/src/server.js +185 -14
  72. package/src/.registry-meta-key.pem +0 -3
@@ -0,0 +1,232 @@
1
+ /**
2
+ * profile/exemplar-capture.js — Voice exemplar capture (V1).
3
+ *
4
+ * Turns a raw piece of the USER's OWN natural-language writing (their prompt
5
+ * text, or a git commit message subject+body) into a bounded, PII-scrubbed
6
+ * Exemplar and appends it to the transient exemplar store. This is the capture
7
+ * side of the "draft in your voice" FEATURE — it is NOT a stylometry/authorship
8
+ * detector. No statistical ruler, no corpus, no LLM, no network.
9
+ *
10
+ * What we DELIBERATELY skip (don't pollute the voice set):
11
+ * - empties / whitespace-only;
12
+ * - slash-commands, IJFW control prompts (`*`, `/`, `#` leading, "ijfw off");
13
+ * - pasted MACHINE OUTPUT — fenced code blocks, stack traces, diffs, JSON/log
14
+ * dumps. We want the user's natural-language writing, not code they pasted;
15
+ * - near-duplicates already in the store (dedup-by-id covers exact repeats;
16
+ * we also normalize trivially so "Fix it." and "fix it" collapse).
17
+ *
18
+ * Best-effort + fully isolated: every entrypoint is wrapped so a malformed
19
+ * payload or a store write error NEVER throws into the caller (the hook must
20
+ * never crash Claude Code). Zero deps, Node built-ins only. NO LLM calls.
21
+ */
22
+
23
+ import { appendExemplar, exemplarId, EXEMPLAR_TEXT_MAX } from './exemplar-store.js';
24
+
25
+ // Direct-identifier patterns scrubbed before persist. Mirrors capture.js
26
+ // EDIT_PII_PATTERNS in spirit (kept LOCAL on purpose so this module's import
27
+ // graph stays zero-LLM/zero-network and independent of capture.js internals).
28
+ // Keep deliberately in sync if either list grows.
29
+ const PII_PATTERNS = [
30
+ /[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, // email
31
+ /\b\d{3}-\d{2}-\d{4}\b/g, // US SSN shape
32
+ // eslint-disable-next-line security/detect-unsafe-regex -- linear-time PII redactor: each repetition consumes a mandatory digit, no ambiguous/overlapping quantifier; not ReDoS-exploitable.
33
+ /\b(?:\d[ -]?){13,19}\b/g, // card-ish long digit run
34
+ // eslint-disable-next-line security/detect-unsafe-regex -- linear-time PII redactor: each repetition consumes a mandatory digit, no ambiguous/overlapping quantifier; not ReDoS-exploitable.
35
+ /(?:\+?\d[\s().-]?){7,}\d/g, // phone-ish run of digits
36
+ /\b[A-Za-z0-9_-]*(?:secret|token|api[_-]?key|password|passwd|bearer)[A-Za-z0-9_-]*\s*[:=]\s*\S+/gi, // assigned secret
37
+ // Absolute homedir paths leak the OS username (/Users/<name>/, /home/<name>/,
38
+ // C:\Users\<name>\). Replace the user segment, keep the tail shape.
39
+ /(?:\/Users\/|\/home\/)[^/\s]+/g, // unix homedir
40
+ /[A-Za-z]:\\Users\\[^\\\s]+/g, // windows homedir
41
+ ];
42
+
43
+ /** PII-scrub: replace direct identifiers with a placeholder token. */
44
+ function scrub(text) {
45
+ let s = String(text == null ? '' : text);
46
+ for (const re of PII_PATTERNS) s = s.replace(re, ' ');
47
+ return s;
48
+ }
49
+
50
+ /**
51
+ * Looks like pasted machine output rather than natural-language writing.
52
+ * Heuristic — conservative (better to skip a borderline snippet than capture a
53
+ * log dump as "voice"). Triggers on: a fenced code block, a unified diff, a
54
+ * stack-trace frame, a shell-prompt dump, or a body that is mostly non-prose
55
+ * (high ratio of braces/semicolons/symbols to words).
56
+ */
57
+ function looksMachine(text) {
58
+ const s = String(text || '');
59
+ if (!s.trim()) return false;
60
+ if (/```/.test(s)) return true; // fenced code block
61
+ if (/^\s*(?:diff --git|@@ -\d|index [0-9a-f]{7})/m.test(s)) return true; // diff
62
+ if (/^\s*(?:at\s+\S+\s+\(.*:\d+:\d+\)|Traceback \(most recent call last\))/m.test(s)) return true; // stack trace
63
+ if (/^\s*[$#>]\s+\S+/m.test(s) && /\n/.test(s)) return true; // shell session dump
64
+ // JSON / structured dump: starts like an object/array and is bracket-heavy.
65
+ const trimmed = s.trim();
66
+ if (/^[[{]/.test(trimmed) && /[}\]]\s*$/.test(trimmed)) {
67
+ try { JSON.parse(trimmed); return true; } catch { /* not strict JSON */ }
68
+ }
69
+ // Symbol density: count "code-ish" chars vs alphabetic word chars. A natural
70
+ // sentence is mostly letters/spaces; a code paste is dense with {};()=<>/.
71
+ // A short single-line paste like `const x = {a:1, b:()=>{...}};` has few
72
+ // multi-letter words but many symbols, so the floor is low (>=2 words) and the
73
+ // trigger is symbols >= words (a natural sentence has far more words).
74
+ const words = (s.match(/[A-Za-z]{2,}/g) || []).length;
75
+ const symbols = (s.match(/[{}();=<>/\\|&]/g) || []).length;
76
+ if (words >= 2 && symbols >= words) return true;
77
+ // Many lines that each look like code (end in ; or { or }) and few sentences.
78
+ const lines = s.split('\n').filter((l) => l.trim());
79
+ if (lines.length >= 3) {
80
+ const codeish = lines.filter((l) => /[;{}]\s*$/.test(l.trim())).length;
81
+ if (codeish / lines.length > 0.5) return true;
82
+ }
83
+ return false;
84
+ }
85
+
86
+ /** IJFW control / non-voice prompt prefixes we never capture as writing. */
87
+ function isControlPrompt(text) {
88
+ const s = String(text || '').trim();
89
+ if (!s) return true;
90
+ if (/^[*/#]/.test(s)) return true; // slash-command / skip / comment
91
+ if (/\bijfw\s+off\b/i.test(s)) return true; // disable phrase
92
+ return false;
93
+ }
94
+
95
+ /** Collapse whitespace; used for length/heuristic measurement (not stored raw). */
96
+ function collapse(text) {
97
+ return String(text || '').replace(/\s+/g, ' ').trim();
98
+ }
99
+
100
+ /**
101
+ * register(text, source) -> one of 'terse' | 'casual' | 'formal' | 'commit' | 'doc'.
102
+ *
103
+ * SIMPLE heuristic (no ML):
104
+ * - source === 'commit-msg' → 'commit'
105
+ * - markdown headers / list-y prose / multi-para → 'doc'
106
+ * - short + lowercase + light punctuation → 'terse'
107
+ * - long, multi-sentence, properly capitalized → 'formal'
108
+ * - everything else → 'casual'
109
+ */
110
+ export function classifyRegister(text, source) {
111
+ if (source === 'commit-msg') return 'commit';
112
+ const raw = String(text || '');
113
+ const flat = collapse(raw);
114
+
115
+ // doc: markdown structure or genuine multi-paragraph prose.
116
+ const hasMarkdown = /(^|\n)\s{0,3}#{1,6}\s+\S/.test(raw) ||
117
+ /(^|\n)\s*[-*+]\s+\S/.test(raw) ||
118
+ /(^|\n)\s*\d+\.\s+\S/.test(raw) ||
119
+ /\n\s*\n/.test(raw.trim());
120
+ if (hasMarkdown && flat.length > 80) return 'doc';
121
+
122
+ const len = flat.length;
123
+ const sentences = (flat.match(/[.!?](?:\s|$)/g) || []).length;
124
+ const isLowercaseStart = /^[a-z]/.test(flat);
125
+ const punctRuns = (flat.match(/[.,;:!?]/g) || []).length;
126
+
127
+ // terse: short AND informal — either a lowercase start ("fix the typo") or no
128
+ // sentence-ending punctuation at all. A short but properly-capitalized,
129
+ // properly-punctuated question/request reads as casual, not terse.
130
+ const hasSentenceEnd = /[.!?]/.test(flat);
131
+ if (len <= 60 && (isLowercaseStart || !hasSentenceEnd) && punctRuns <= 1) return 'terse';
132
+
133
+ // formal: longish, multiple full sentences, capitalized opening. The length
134
+ // floor (110) separates a single capitalized request ("casual") from genuine
135
+ // multi-sentence prose with structure.
136
+ if (len >= 110 && sentences >= 2 && /^[A-Z]/.test(flat)) return 'formal';
137
+
138
+ return 'casual';
139
+ }
140
+
141
+ /**
142
+ * buildExemplar({ text, source, ts }) -> Exemplar | null.
143
+ *
144
+ * Pure. Returns a fully-formed Exemplar record, or null when the input should
145
+ * be skipped (empty, control prompt, machine output, or scrubs to nothing).
146
+ * The text is PII-scrubbed and bounded to EXEMPLAR_TEXT_MAX before the id is
147
+ * computed (so the id is stable over the FINAL stored text).
148
+ */
149
+ export function buildExemplar({ text, source = 'prompt', ts } = {}) {
150
+ const original = String(text == null ? '' : text);
151
+ if (!original.trim()) return null;
152
+ if (source !== 'commit-msg' && isControlPrompt(original)) return null;
153
+ if (looksMachine(original)) return null;
154
+
155
+ // Scrub, then bound. We keep original line structure (don't collapse) so the
156
+ // stored snippet still reads like the user's writing, but cap the length.
157
+ let finalText = scrub(original).replace(/[ \t]+/g, ' ').replace(/\n{3,}/g, '\n\n').trim();
158
+ if (!finalText) return null;
159
+ if (finalText.length > EXEMPLAR_TEXT_MAX) {
160
+ finalText = finalText.slice(0, EXEMPLAR_TEXT_MAX).trim();
161
+ }
162
+ // After scrub+bound the snippet must still carry SOME natural-language signal
163
+ // (at least a couple of word characters). A snippet that scrubbed down to
164
+ // punctuation/placeholders is not worth keeping as voice.
165
+ if ((finalText.match(/[A-Za-z]{2,}/g) || []).length < 2) return null;
166
+
167
+ const src = source === 'commit-msg' ? 'commit-msg' : 'prompt';
168
+ return {
169
+ id: exemplarId(finalText),
170
+ text: finalText,
171
+ register: classifyRegister(finalText, src),
172
+ source: src,
173
+ ts: ts ? new Date(ts).toISOString() : new Date().toISOString(),
174
+ };
175
+ }
176
+
177
+ /**
178
+ * captureMessage({ text, ts, opts }) -> { ok, skipped?, id? } | { ok:false }.
179
+ *
180
+ * Best-effort capture of the user's OWN prompt text as a 'prompt'-source
181
+ * exemplar. Fully isolated: never throws. A skipped input (empty/control/
182
+ * machine) returns { ok:true, skipped:true }. Mirrors the call shape the
183
+ * pre-prompt hook already uses for capture.js's captureMessage.
184
+ */
185
+ export function captureMessage({ text, ts, opts } = {}) {
186
+ try {
187
+ const ex = buildExemplar({ text, source: 'prompt', ts });
188
+ if (!ex) return { ok: true, skipped: true };
189
+ const r = appendExemplar(ex, opts || {});
190
+ if (!r.ok) return { ok: false, code: r.code, message: r.message };
191
+ return { ok: true, id: ex.id, removed: r.removed };
192
+ } catch (err) {
193
+ // Never surface into the hook caller.
194
+ return { ok: false, code: 'ECAPTURE', message: err && err.message };
195
+ }
196
+ }
197
+
198
+ /**
199
+ * captureCommitMessage(msg, opts?) -> { ok, skipped?, id? } | { ok:false }.
200
+ *
201
+ * Best-effort capture of a git commit message (subject + body) as a 'commit-msg'
202
+ * exemplar. `msg` may be the raw `git log -1 --format=%B` style string. We strip
203
+ * trailers (Co-Authored-By, Signed-off-by, the IJFW 🤖 line) and comment lines
204
+ * so the captured voice is the user's actual prose, not boilerplate. Wiring this
205
+ * into a real git hook is OUT OF SCOPE for V1 — this function is the seam.
206
+ * Fully isolated: never throws.
207
+ */
208
+ export function captureCommitMessage(msg, opts = {}) {
209
+ try {
210
+ const raw = String(msg == null ? '' : msg);
211
+ if (!raw.trim()) return { ok: true, skipped: true };
212
+ // Drop comment lines (git's `#`-prefixed scissor lines) and known trailers.
213
+ const cleaned = raw
214
+ .split('\n')
215
+ .filter((line) => {
216
+ const t = line.trim();
217
+ if (t.startsWith('#')) return false;
218
+ if (/^(?:Co-Authored-By|Signed-off-by|Co-authored-by):/i.test(t)) return false;
219
+ if (/Generated with \[Claude Code\]/i.test(t)) return false;
220
+ if (t.startsWith('🤖')) return false;
221
+ return true;
222
+ })
223
+ .join('\n');
224
+ const ex = buildExemplar({ text: cleaned, source: 'commit-msg', ts: opts.ts });
225
+ if (!ex) return { ok: true, skipped: true };
226
+ const r = appendExemplar(ex, opts || {});
227
+ if (!r.ok) return { ok: false, code: r.code, message: r.message };
228
+ return { ok: true, id: ex.id, removed: r.removed };
229
+ } catch (err) {
230
+ return { ok: false, code: 'ECAPTURE', message: err && err.message };
231
+ }
232
+ }
@@ -0,0 +1,138 @@
1
+ // V2 — exemplar retrieval (voice exemplars feature).
2
+ //
3
+ // ZERO-LLM serve path ("the moat"). This module ranks the user's OWN real
4
+ // writing samples (exemplars) so the agent can few-shot draft in their voice.
5
+ // It MUST NOT import any LLM / network / embedder module. Pure ranking only:
6
+ // register-match boost + lexical similarity (BM25) + recency tiebreak.
7
+ //
8
+ // Exemplar record contract (shared with the store, V1):
9
+ // { id, text, register: 'terse'|'casual'|'formal'|'commit'|'doc',
10
+ // source: 'prompt'|'commit-msg', ts: ISO-string }
11
+
12
+ import { createRequire } from 'node:module';
13
+ import { searchCorpus } from '../search-bm25.js';
14
+
15
+ // V1's store (exemplar-store.js → listExemplars) is built in parallel and is
16
+ // only needed when the caller does NOT inject a candidate set. We therefore
17
+ // load it LAZILY (synchronous require-of-ESM, stable in Node ≥22): a static
18
+ // import would couple this module's load-time to V1's build order, and would
19
+ // crash the zero-LLM serve path before V1 lands. The injected-exemplars path
20
+ // (and every test that uses it) stays fully decoupled from the store.
21
+ const require = createRequire(import.meta.url);
22
+ function loadFromStore() {
23
+ try {
24
+ const mod = require('./exemplar-store.js');
25
+ if (mod && typeof mod.listExemplars === 'function') return mod.listExemplars();
26
+ } catch {
27
+ /* store not available yet — treat as cold-start (empty set). */
28
+ }
29
+ return [];
30
+ }
31
+
32
+ // Blend weights. Register match dominates (we want same-register voice first),
33
+ // lexical relevance is the primary tiebreak among same-register candidates,
34
+ // recency is a gentle final nudge so newer writing wins on equal footing.
35
+ const W_REGISTER = 1.0;
36
+ const W_LEXICAL = 0.6;
37
+ const W_RECENCY = 0.1;
38
+
39
+ // Parse an ISO timestamp to epoch millis; non-parseable → 0 (treated oldest).
40
+ function tsMillis(ts) {
41
+ if (typeof ts !== 'string' || !ts) return 0;
42
+ const t = Date.parse(ts);
43
+ return Number.isNaN(t) ? 0 : t;
44
+ }
45
+
46
+ // Map each candidate's ts into a [0,1] recency factor relative to the set.
47
+ // Newest → 1, oldest → 0. A single distinct ts (or none) → 0 for all, so
48
+ // recency never perturbs an otherwise-tied ordering arbitrarily.
49
+ function recencyFactors(exemplars) {
50
+ const millis = exemplars.map((e) => tsMillis(e.ts));
51
+ let min = Infinity;
52
+ let max = -Infinity;
53
+ for (const m of millis) {
54
+ if (m < min) min = m;
55
+ if (m > max) max = m;
56
+ }
57
+ const span = max - min;
58
+ const out = new Map();
59
+ for (let i = 0; i < exemplars.length; i++) {
60
+ out.set(exemplars[i].id, span > 0 ? (millis[i] - min) / span : 0);
61
+ }
62
+ return out;
63
+ }
64
+
65
+ /**
66
+ * Return up to `k` exemplars most relevant to the current drafting task,
67
+ * best-first. Deterministic: equal inputs → identical ordering.
68
+ *
69
+ * @param {object} args
70
+ * @param {string} [args.register] Target register to prefer (strong boost).
71
+ * @param {string} [args.taskText] Current drafting task text (lexical signal).
72
+ * @param {number} [args.k=3] Max exemplars to return.
73
+ * @param {Array} [args.exemplars] Candidate set (DI); if null, loads via store.
74
+ * @param {object} [args.env] Env (unused here; kept for interface parity).
75
+ * @returns {Array} ranked exemplars (the original records), best-first.
76
+ */
77
+ export function retrieveExemplars({
78
+ register,
79
+ taskText,
80
+ k = 3,
81
+ exemplars = null,
82
+ env = process.env, // eslint-disable-line no-unused-vars -- interface parity
83
+ } = {}) {
84
+ // Load candidates if not injected. listExemplars returns NEWEST-first.
85
+ let candidates = Array.isArray(exemplars) ? exemplars : loadFromStore();
86
+ if (!Array.isArray(candidates) || candidates.length === 0) return [];
87
+
88
+ // Defensive copy so we never mutate the caller's / store's array.
89
+ candidates = candidates.filter((e) => e && typeof e === 'object');
90
+ if (candidates.length === 0) return [];
91
+
92
+ const limit = Number.isFinite(k) && k > 0 ? Math.floor(k) : 0;
93
+ if (limit === 0) return [];
94
+
95
+ const query = typeof taskText === 'string' ? taskText.trim() : '';
96
+
97
+ // Lexical scores via the repo's pure BM25 primitive over in-memory docs.
98
+ // Normalize to [0,1] within this candidate set so the lexical contribution
99
+ // is bounded and comparable to the register boost regardless of corpus size.
100
+ const lexical = new Map();
101
+ if (query) {
102
+ const docs = candidates.map((e) => ({
103
+ id: e.id,
104
+ text: typeof e.text === 'string' ? e.text : '',
105
+ meta: null,
106
+ }));
107
+ // limit = full set so every candidate that scores > 0 is captured.
108
+ const hits = searchCorpus(query, docs, { limit: docs.length });
109
+ let maxScore = 0;
110
+ for (const h of hits) if (h.score > maxScore) maxScore = h.score;
111
+ if (maxScore > 0) {
112
+ for (const h of hits) lexical.set(h.id, h.score / maxScore);
113
+ }
114
+ }
115
+
116
+ const recency = recencyFactors(candidates);
117
+
118
+ const scored = candidates.map((e) => {
119
+ const registerMatch = register && e.register === register ? 1 : 0;
120
+ const lex = lexical.get(e.id) ?? 0;
121
+ const rec = recency.get(e.id) ?? 0;
122
+ const score = W_REGISTER * registerMatch + W_LEXICAL * lex + W_RECENCY * rec;
123
+ return { e, score };
124
+ });
125
+
126
+ // Deterministic ordering: composite score desc, then newer ts, then id asc.
127
+ scored.sort((a, b) => {
128
+ if (b.score !== a.score) return b.score - a.score;
129
+ const tb = tsMillis(b.e.ts);
130
+ const ta = tsMillis(a.e.ts);
131
+ if (tb !== ta) return tb - ta;
132
+ const ia = String(a.e.id);
133
+ const ib = String(b.e.id);
134
+ return ia < ib ? -1 : ia > ib ? 1 : 0;
135
+ });
136
+
137
+ return scored.slice(0, limit).map((s) => s.e);
138
+ }
@@ -0,0 +1,314 @@
1
+ /**
2
+ * profile/exemplar-store.js — Voice exemplars (V1: capture + transient store).
3
+ *
4
+ * An "exemplar" is a short raw snippet of the USER's OWN natural-language
5
+ * writing — their prompt text, or a git commit message subject+body. Holding a
6
+ * handful of these lets a downstream drafter few-shot the user's real voice
7
+ * (NOT a stylometry/authorship ruler — this is a FEATURE, not a research proof).
8
+ *
9
+ * TIER (deliberately narrow): this is a LOCAL-ONLY, TRANSIENT tier. It is
10
+ * - bounded (MAX_EXEMPLARS records; oldest-by-ts evicted when over cap),
11
+ * - dedup-by-id,
12
+ * - one-shot wipeable (clearExemplars),
13
+ * and it is NEVER promoted to the durable/global/cross-project user profile.
14
+ * It lives beside the profile in the SAME directory so it inherits the profile
15
+ * path policy for free — including the test-context auto-tmpdir guarantee
16
+ * (path-policy.js homedirProfileDefault), so a forgetful test can NEVER write
17
+ * into the user's real ~/.ijfw/profile.
18
+ *
19
+ * Storage: a single JSONL file under profileDir(). One Exemplar record per line.
20
+ * Reads are symlink-guarded and size-capped; writes are atomic (temp in the same
21
+ * dir → fsync → rename), mirroring store.js. Zero deps, Node built-ins only.
22
+ * NO LLM calls.
23
+ *
24
+ * Exemplar record (the SHARED contract — V2/V4 code against this):
25
+ * {
26
+ * id: string, // stable: `exemplar::${sha8(text)}`
27
+ * text: string, // raw snippet, bounded (~600 chars), PII-scrubbed
28
+ * register: string, // 'terse' | 'casual' | 'formal' | 'commit' | 'doc'
29
+ * source: string, // 'prompt' | 'commit-msg'
30
+ * ts: string // ISO-8601
31
+ * }
32
+ */
33
+
34
+ import {
35
+ openSync,
36
+ writeFileSync,
37
+ fsyncSync,
38
+ closeSync,
39
+ renameSync,
40
+ unlinkSync,
41
+ readFileSync,
42
+ existsSync,
43
+ mkdirSync,
44
+ lstatSync,
45
+ constants as fsConstants,
46
+ } from 'node:fs';
47
+ import { join } from 'node:path';
48
+ import { randomBytes, createHash } from 'node:crypto';
49
+
50
+ import { resolveOverrideDir, homedirProfileDefault } from './path-policy.js';
51
+
52
+ const EXEMPLAR_FILE = 'exemplars.jsonl';
53
+
54
+ /** Max exemplars retained. Over cap → evict oldest by `ts` (transient tier). */
55
+ export const MAX_EXEMPLARS = 200;
56
+
57
+ /** Hard cap on a single exemplar's text length (chars). Mirrors the capture bound. */
58
+ export const EXEMPLAR_TEXT_MAX = 600;
59
+
60
+ /**
61
+ * Max bytes we will read from the on-disk JSONL. The store is bounded by
62
+ * MAX_EXEMPLARS short records, so a file larger than this is a corrupt/hand-
63
+ * edited artifact; refusing to slurp it whole avoids an OOM. ~2 MiB is orders
64
+ * of magnitude above any legitimate exemplar set (200 × 600 chars ≈ 120 KiB).
65
+ */
66
+ const MAX_STORE_BYTES = 2 * 1024 * 1024;
67
+
68
+ /** Short stable content hash → the dedupe key inside the exemplar id. */
69
+ export function sha8(text) {
70
+ return createHash('sha256').update(String(text == null ? '' : text)).digest('hex').slice(0, 8);
71
+ }
72
+
73
+ /** The stable exemplar id for a given (already-final) text. */
74
+ export function exemplarId(text) {
75
+ return `exemplar::${sha8(text)}`;
76
+ }
77
+
78
+ /**
79
+ * The JSONL store path. Routed through profileDir() so it inherits the profile
80
+ * path policy — in a test context profileDir() resolves under os.tmpdir(), so
81
+ * this path is NEVER under the real ~/.ijfw. See store.js / path-policy.js.
82
+ */
83
+ export function exemplarStorePath(env = process.env) {
84
+ // Mirror profileDir()'s resolution exactly, but thread `env` explicitly so a
85
+ // caller-supplied env override is honored WITHOUT mutating the process-global
86
+ // process.env (that round-trip was a concurrency footgun). When env === the
87
+ // process default this is byte-identical to profileDir(). The policy's own
88
+ // resolveOverrideDir() applies the safety gate (homedir-scoping, symlink
89
+ // refusal); homedirProfileDefault() applies the test-context auto-tmpdir.
90
+ const safe = resolveOverrideDir(env.IJFW_PROFILE_DIR, env);
91
+ const dir = safe || homedirProfileDefault(['.ijfw', 'profile'], env);
92
+ return join(dir, EXEMPLAR_FILE);
93
+ }
94
+
95
+ function ensureDir(dir) {
96
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
97
+ }
98
+
99
+ /** True iff `p` exists AND is a symlink (refuse to read/write through it). */
100
+ function isSymlink(p) {
101
+ try {
102
+ return lstatSync(p).isSymbolicLink();
103
+ } catch {
104
+ return false;
105
+ }
106
+ }
107
+
108
+ /** Validate a record loosely against the contract; drop anything malformed. */
109
+ function isValidRecord(r) {
110
+ return (
111
+ r &&
112
+ typeof r === 'object' &&
113
+ typeof r.id === 'string' &&
114
+ r.id &&
115
+ typeof r.text === 'string' &&
116
+ typeof r.register === 'string' &&
117
+ typeof r.source === 'string' &&
118
+ typeof r.ts === 'string'
119
+ );
120
+ }
121
+
122
+ /**
123
+ * Read the raw record set from disk (insertion order = file order). Best-effort:
124
+ * a missing file → []; a symlinked/oversized/garbage file → [] (fail-soft, the
125
+ * transient tier is reconstructable). Malformed lines are skipped individually.
126
+ */
127
+ function readRaw(opts = {}) {
128
+ const path = opts.path || exemplarStorePath(opts.env || process.env);
129
+ if (isSymlink(path)) return [];
130
+ if (!existsSync(path)) return [];
131
+ try {
132
+ const st = lstatSync(path);
133
+ if (st.isFile() && st.size > MAX_STORE_BYTES) return [];
134
+ } catch {
135
+ return [];
136
+ }
137
+ let raw;
138
+ try {
139
+ raw = readFileSync(path, 'utf8');
140
+ } catch {
141
+ return [];
142
+ }
143
+ const out = [];
144
+ for (const line of raw.split('\n')) {
145
+ const t = line.trim();
146
+ if (!t) continue;
147
+ let rec;
148
+ try {
149
+ rec = JSON.parse(t);
150
+ } catch {
151
+ continue;
152
+ }
153
+ if (isValidRecord(rec)) out.push(rec);
154
+ }
155
+ return out;
156
+ }
157
+
158
+ /**
159
+ * Atomic full-file rewrite of the JSONL store (records in file order). Symlink-
160
+ * guarded both before and right before rename (TOCTOU narrowing). Returns
161
+ * { ok } | { ok:false, code, message }; never throws.
162
+ */
163
+ function writeRaw(records, opts = {}) {
164
+ const target = opts.path || exemplarStorePath(opts.env || process.env);
165
+ if (isSymlink(target)) {
166
+ return { ok: false, code: 'EEXEMPLAR_SYMLINK', message: `refusing symlinked target: ${target}` };
167
+ }
168
+ const dir = join(target, '..');
169
+ try {
170
+ ensureDir(dir);
171
+ } catch (err) {
172
+ return { ok: false, code: err.code || 'EMKDIR', message: err.message };
173
+ }
174
+ const body = records.map((r) => JSON.stringify(r)).join('\n') + (records.length ? '\n' : '');
175
+ const tmp = `${target}.tmp.${process.pid}.${randomBytes(4).toString('hex')}`;
176
+ let fd;
177
+ try {
178
+ fd = openSync(
179
+ tmp,
180
+ fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_NOFOLLOW,
181
+ 0o600,
182
+ );
183
+ writeFileSync(fd, body, 'utf8');
184
+ fsyncSync(fd);
185
+ closeSync(fd);
186
+ fd = null;
187
+ if (isSymlink(target)) {
188
+ try { unlinkSync(tmp); } catch {}
189
+ return { ok: false, code: 'EEXEMPLAR_SYMLINK', message: `target became a symlink: ${target}` };
190
+ }
191
+ renameSync(tmp, target);
192
+ return { ok: true };
193
+ } catch (err) {
194
+ if (fd != null) { try { closeSync(fd); } catch {} }
195
+ try { unlinkSync(tmp); } catch {}
196
+ return { ok: false, code: err.code || 'EWRITE', message: err.message };
197
+ }
198
+ }
199
+
200
+ /** Chronological sort key — ISO-8601 sorts lexicographically; fall back to id. */
201
+ function tsKey(r) {
202
+ return `${r.ts || ''}::${r.id || ''}`;
203
+ }
204
+
205
+ /**
206
+ * appendExemplar(rec, opts?) -> { ok, removed?, code?, message? }.
207
+ *
208
+ * Adds one Exemplar to the transient store. Behavior:
209
+ * - dedup by `id`: a record whose id already exists is treated as a refresh —
210
+ * the existing copy is replaced (kept once), no growth. removed counts any
211
+ * records evicted to honor the cap (NOT the dedup replacement).
212
+ * - bound: after insert, if over MAX_EXEMPLARS, evict the OLDEST by `ts`.
213
+ * - the file is rewritten atomically newest-by-insertion last.
214
+ * Fail-soft: a bad record → { ok:false, code:'EINVALID' }; a write error →
215
+ * { ok:false, code, message }. Never throws.
216
+ */
217
+ export function appendExemplar(rec, opts = {}) {
218
+ if (!isValidRecord(rec)) {
219
+ return { ok: false, code: 'EINVALID', message: 'record does not satisfy the exemplar contract' };
220
+ }
221
+ const cap = Number.isInteger(opts.max) && opts.max > 0 ? opts.max : MAX_EXEMPLARS;
222
+ const existing = readRaw(opts);
223
+
224
+ // Dedup by id: drop any prior copy, then append the fresh one at the end.
225
+ const filtered = existing.filter((r) => r.id !== rec.id);
226
+ filtered.push(rec);
227
+
228
+ // Bound: evict oldest by ts until within cap. We sort a COPY by ts to find the
229
+ // oldest, but preserve file order otherwise. Simplest correct approach: while
230
+ // over cap, find the min-ts record and remove it.
231
+ let removed = 0;
232
+ while (filtered.length > cap) {
233
+ let minIdx = 0;
234
+ for (let i = 1; i < filtered.length; i += 1) {
235
+ if (tsKey(filtered[i]) < tsKey(filtered[minIdx])) minIdx = i;
236
+ }
237
+ filtered.splice(minIdx, 1);
238
+ removed += 1;
239
+ }
240
+
241
+ const w = writeRaw(filtered, opts);
242
+ if (!w.ok) return w;
243
+ return { ok: true, removed };
244
+ }
245
+
246
+ /**
247
+ * listExemplars(opts?) -> Exemplar[], NEWEST-first (by ts, then id for ties).
248
+ * A read-only view of the current set. Missing/corrupt store → [].
249
+ */
250
+ export function listExemplars(opts = {}) {
251
+ const recs = readRaw(opts);
252
+ return recs.sort((a, b) => {
253
+ const ka = tsKey(a);
254
+ const kb = tsKey(b);
255
+ if (ka < kb) return 1;
256
+ if (ka > kb) return -1;
257
+ return 0;
258
+ });
259
+ }
260
+
261
+ /**
262
+ * forgetExemplars(idOrPattern, opts?) -> { ok, removed }.
263
+ * - a string equal to an existing id → removes that one record;
264
+ * - any other string → treated as a case-insensitive substring/regex match
265
+ * against id + text (a "pattern"): every matching record is removed;
266
+ * - a RegExp → matched against id + text.
267
+ * Removing nothing is still { ok:true, removed:0 }. Fail-soft on write error.
268
+ */
269
+ export function forgetExemplars(idOrPattern, opts = {}) {
270
+ const existing = readRaw(opts);
271
+ if (!existing.length) return { ok: true, removed: 0 };
272
+
273
+ let predicate;
274
+ if (idOrPattern instanceof RegExp) {
275
+ predicate = (r) => idOrPattern.test(r.id) || idOrPattern.test(r.text);
276
+ } else {
277
+ const s = String(idOrPattern == null ? '' : idOrPattern);
278
+ if (!s) return { ok: true, removed: 0 };
279
+ // Exact id match first (precise forget), else substring (case-insensitive).
280
+ const lower = s.toLowerCase();
281
+ predicate = (r) =>
282
+ r.id === s ||
283
+ r.id.toLowerCase().includes(lower) ||
284
+ r.text.toLowerCase().includes(lower);
285
+ }
286
+
287
+ const kept = existing.filter((r) => !predicate(r));
288
+ const removed = existing.length - kept.length;
289
+ if (removed === 0) return { ok: true, removed: 0 };
290
+
291
+ const w = writeRaw(kept, opts);
292
+ if (!w.ok) return { ok: false, removed: 0, code: w.code, message: w.message };
293
+ return { ok: true, removed };
294
+ }
295
+
296
+ /**
297
+ * clearExemplars(opts?) -> { ok, removed }. One-shot wipe of the whole transient
298
+ * store. Returns the count removed. Fail-soft.
299
+ */
300
+ export function clearExemplars(opts = {}) {
301
+ const existing = readRaw(opts);
302
+ const removed = existing.length;
303
+ if (removed === 0) {
304
+ // Nothing to clear; ensure no stale file lingers but don't error if absent.
305
+ const path = opts.path || exemplarStorePath(opts.env || process.env);
306
+ if (existsSync(path) && !isSymlink(path)) {
307
+ try { unlinkSync(path); } catch {}
308
+ }
309
+ return { ok: true, removed: 0 };
310
+ }
311
+ const w = writeRaw([], opts);
312
+ if (!w.ok) return { ok: false, removed: 0, code: w.code, message: w.message };
313
+ return { ok: true, removed };
314
+ }