@ijfw/memory-server 1.5.6 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/bin/ijfw-dashboard +20 -1
  2. package/package.json +4 -3
  3. package/src/audit-roster.js +89 -12
  4. package/src/brain/tiered-llm.js +57 -7
  5. package/src/cross-orchestrator-cli.js +390 -4
  6. package/src/cross-project-search.js +39 -1
  7. package/src/dashboard-server.js +23 -1
  8. package/src/dream/runner.mjs +560 -8
  9. package/src/handlers/brain-handler.js +101 -1
  10. package/src/importers/discover.js +1 -1
  11. package/src/memory/bench-metrics.js +289 -0
  12. package/src/memory/benchmark.js +1 -1
  13. package/src/memory/search.js +53 -1
  14. package/src/model-refresh.js +4 -2
  15. package/src/orchestrator/plan-checker.js +1 -1
  16. package/src/profile/audit.js +671 -0
  17. package/src/profile/capture.js +871 -0
  18. package/src/profile/derive-dialectic.js +242 -0
  19. package/src/profile/derive-heuristic.js +733 -0
  20. package/src/profile/derive.js +156 -0
  21. package/src/profile/egress.js +306 -0
  22. package/src/profile/eval/build-real-probes.mjs +197 -0
  23. package/src/profile/eval/corpus-from-reddit.mjs +166 -0
  24. package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
  25. package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
  26. package/src/profile/eval/gate-b-behavior.mjs +420 -0
  27. package/src/profile/eval/gate-b-decision-run.mjs +171 -0
  28. package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
  29. package/src/profile/eval/gate-b-run.mjs +417 -0
  30. package/src/profile/eval/gate-b-run.test.mjs +204 -0
  31. package/src/profile/eval/gate-c-capture.mjs +323 -0
  32. package/src/profile/eval/harness.mjs +551 -0
  33. package/src/profile/eval/instrument-validation.mjs +248 -0
  34. package/src/profile/eval/instrument-validation.test.mjs +125 -0
  35. package/src/profile/eval/multi-subject-harness.mjs +106 -0
  36. package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
  37. package/src/profile/eval/personas.test.mjs +83 -0
  38. package/src/profile/eval/plumbing.test.mjs +69 -0
  39. package/src/profile/eval/prereg.mjs +130 -0
  40. package/src/profile/eval/prereg.test.mjs +78 -0
  41. package/src/profile/eval/real-corpus.test.mjs +103 -0
  42. package/src/profile/eval/real-personas.mjs +109 -0
  43. package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
  44. package/src/profile/eval/run-real-corpus.mjs +358 -0
  45. package/src/profile/eval/slug-quality.mjs +464 -0
  46. package/src/profile/eval/stylometry-features.js +85 -0
  47. package/src/profile/eval/stylometry-reference.js +16 -0
  48. package/src/profile/eval/stylometry.js +224 -0
  49. package/src/profile/eval/stylometry.test.mjs +103 -0
  50. package/src/profile/eval/synthetic-personas.js +91 -0
  51. package/src/profile/eval/verifier-features.mjs +170 -0
  52. package/src/profile/eval/verifier-logreg.mjs +74 -0
  53. package/src/profile/eval/verifier-pair.mjs +122 -0
  54. package/src/profile/eval/verifier-reference.mjs +68 -0
  55. package/src/profile/eval/verifier-scorer.mjs +30 -0
  56. package/src/profile/eval/wrong-target-control.mjs +168 -0
  57. package/src/profile/eval/wrong-target-control.test.mjs +124 -0
  58. package/src/profile/exemplar-capture.js +232 -0
  59. package/src/profile/exemplar-retrieve.js +138 -0
  60. package/src/profile/exemplar-store.js +314 -0
  61. package/src/profile/lock.js +64 -0
  62. package/src/profile/merge.js +624 -0
  63. package/src/profile/path-policy.js +213 -0
  64. package/src/profile/precision-stamp.mjs +151 -0
  65. package/src/profile/render-brief.js +717 -0
  66. package/src/profile/schema.js +244 -0
  67. package/src/profile/sensitivity.js +249 -0
  68. package/src/profile/serve.js +345 -0
  69. package/src/profile/store.js +261 -0
  70. package/src/profile/telemetry.js +289 -0
  71. package/src/recovery/checkpoint.js +7 -1
  72. package/src/server.js +194 -16
  73. package/src/.registry-meta-key.pem +0 -3
@@ -0,0 +1,671 @@
1
+ /**
2
+ * profile/audit.js — Cross-system profile bus, P0.7 + S3 (human-in-the-loop).
3
+ *
4
+ * Inspectability + the right to be forgotten, for the user-global profile.
5
+ * Extends the existing memory-audit / forget UX to the profile tier (design-v2
6
+ * §7 Poisoning guard: "memory-audit + forget extended to the profile").
7
+ *
8
+ * - listInferences(profile, registry?): every inference (global dialectic +
9
+ * per-overlay) surfaced with full provenance, scope, its CITATION (the
10
+ * source locator + any verbatim span the atom carries) and an
11
+ * `inject_eligible` flag — so a user can SEE what was inferred, from where,
12
+ * and whether it has been approved to inject.
13
+ * - approve/reject/inject-eligibility: the human-in-the-loop gate Cursor
14
+ * abandoned (design S3). A newly-derived atom is `pending` (NOT
15
+ * inject-eligible) until the user explicitly approves it. An atom with no
16
+ * citation locator at all can never be approved (cite-or-drop).
17
+ * - forget(profile, idOrPattern): purge matching inferences (pure). Returns
18
+ * the new profile + the removed entries. ALSO purges the matching entries
19
+ * from the egress log AND from the approval registry — right-to-be-forgotten
20
+ * completeness: a forgotten atom leaves no approval record behind.
21
+ * - forgetAndWrite(idOrPattern): read-forget-write under the global lock.
22
+ *
23
+ * APPROVAL REGISTRY: an out-of-band JSON store (`approvals.json`, sibling of the
24
+ * profile) mapping inference id -> { state, ts }. It is deliberately SEPARATE
25
+ * from the profile atom (which is slug-only and privacy-minimized, FIX 4) so the
26
+ * approval decision is a user control surface, not derived data that the CRDT
27
+ * fold could resurrect or a foreign session could forge into the global merge.
28
+ * The registry is FAIL-CLOSED: absent/unknown/`pending`/`rejected` -> not
29
+ * inject-eligible; only an explicit `approved` admits an atom for injection.
30
+ * render-brief/serve (a later slice, S5) consult `injectEligibleIds()` BEFORE
31
+ * surfacing any preference slug — this module owns the gate, not the renderer.
32
+ *
33
+ * Zero deps. NO LLM calls.
34
+ */
35
+
36
+ import {
37
+ openSync,
38
+ writeFileSync,
39
+ fsyncSync,
40
+ closeSync,
41
+ renameSync,
42
+ unlinkSync,
43
+ readFileSync,
44
+ existsSync,
45
+ mkdirSync,
46
+ lstatSync,
47
+ constants as fsConstants,
48
+ } from 'node:fs';
49
+ import { join } from 'node:path';
50
+ import { randomBytes } from 'node:crypto';
51
+
52
+ import { withProfileLock } from './lock.js';
53
+ import { readProfile, writeProfile, profileDir } from './store.js';
54
+ import { purgeEgress, exemplarField } from './egress.js';
55
+ import { listExemplars, forgetExemplars, clearExemplars } from './exemplar-store.js';
56
+ import { citedSpan } from './capture.js';
57
+
58
+ /**
59
+ * Build the CITATION for an inference (design S3 "verbatim span + source
60
+ * locator"). The durable atom is deliberately SLUG-ONLY — FIX 4 (CRITICAL-2)
61
+ * does NOT persist the verbatim user text into the durable, travel-eligible tier
62
+ * (privacy minimization). So a citation here is primarily the LOCATOR:
63
+ * { span, sessions[], hosts[], last_confirmed, value }
64
+ * `span` is the verbatim quote ONLY when the atom actually carries one — we read
65
+ * the first present of `inf.cite` / `inf.evidence_span` / `inf.evidence` (the
66
+ * forward-compatible fields S4's edit-diff capture may attach in the transient
67
+ * tier). We NEVER fabricate a span. `has_locator` is true iff at least one
68
+ * source session or host is present: an atom with no locator at all has nothing
69
+ * grounding it (cite-or-drop) and therefore can never be approved for injection.
70
+ */
71
+ function citationFor(inf) {
72
+ const sessions = Array.isArray(inf.source_sessions) ? [...inf.source_sessions] : [];
73
+ const hosts = Array.isArray(inf.source_hosts) ? [...inf.source_hosts] : [];
74
+ let span = null;
75
+ for (const k of ['cite', 'evidence_span', 'evidence']) {
76
+ if (typeof inf[k] === 'string' && inf[k]) { span = inf[k]; break; }
77
+ }
78
+ return {
79
+ span,
80
+ sessions,
81
+ hosts,
82
+ last_confirmed: inf.last_confirmed,
83
+ value: inf.value === undefined ? null : inf.value,
84
+ has_locator: sessions.length > 0 || hosts.length > 0,
85
+ };
86
+ }
87
+
88
+ /**
89
+ * Surface every inference with provenance, its CITATION, and an inject-
90
+ * eligibility flag. Each row:
91
+ * { scope, id, kind, subject, value, confidence, evidence_count,
92
+ * last_confirmed, source_sessions, source_hosts, sensitivity,
93
+ * citation, approval_state, inject_eligible }.
94
+ * scope is 'global' or 'overlay:<key>'.
95
+ *
96
+ * `registry` is the approval map (id -> { state }). When omitted, every atom is
97
+ * treated as `pending` / NOT inject-eligible — the safe, fail-closed default
98
+ * (an atom is never inject-eligible until a human approves it). An atom whose
99
+ * citation has no locator is FORCED ineligible regardless of registry state.
100
+ */
101
+ export function listInferences(profile, registry = null) {
102
+ const reg = registry && typeof registry === 'object' ? registry : {};
103
+ const rows = [];
104
+ const push = (scope, inf) => {
105
+ const citation = citationFor(inf);
106
+ const entry = reg[inf.id];
107
+ const state = entry && typeof entry.state === 'string' ? entry.state : 'pending';
108
+ // Fail-closed: ONLY an explicit `approved` state AND a real citation locator
109
+ // make an atom inject-eligible. Everything else (pending/rejected/unknown,
110
+ // or a citation-less atom) is held back.
111
+ const inject_eligible = state === 'approved' && citation.has_locator === true;
112
+ rows.push({
113
+ scope,
114
+ id: inf.id,
115
+ kind: inf.kind,
116
+ subject: inf.subject,
117
+ value: inf.value,
118
+ confidence: inf.confidence,
119
+ evidence_count: inf.evidence_count,
120
+ last_confirmed: inf.last_confirmed,
121
+ source_sessions: [...(inf.source_sessions || [])],
122
+ source_hosts: [...(inf.source_hosts || [])],
123
+ sensitivity: inf.sensitivity,
124
+ citation,
125
+ approval_state: state,
126
+ inject_eligible,
127
+ });
128
+ };
129
+
130
+ if (profile && profile.global && Array.isArray(profile.global.dialectic)) {
131
+ for (const inf of profile.global.dialectic) push('global', inf);
132
+ }
133
+ if (profile && profile.overlays && typeof profile.overlays === 'object') {
134
+ for (const [key, ov] of Object.entries(profile.overlays)) {
135
+ if (ov && Array.isArray(ov.dialectic)) {
136
+ for (const inf of ov.dialectic) push(`overlay:${key}`, inf);
137
+ }
138
+ }
139
+ }
140
+ return rows;
141
+ }
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // Approval registry — the human-in-the-loop gate (design S3). A small JSON store
145
+ // `approvals.json` (sibling of the profile) mapping inference id -> { state, ts,
146
+ // note? }. SEPARATE from the profile atom on purpose: the approval decision is a
147
+ // user control surface, never derived data that the CRDT fold could resurrect or
148
+ // a foreign session could forge into the global merge. FAIL-CLOSED: a missing
149
+ // file / unknown id reads as `pending` (NOT inject-eligible).
150
+ // ---------------------------------------------------------------------------
151
+
152
+ const APPROVALS_FILE = 'approvals.json';
153
+ const APPROVAL_STATES = Object.freeze(['pending', 'approved', 'rejected']);
154
+ /** Read-size cap: the registry is one tiny record per atom; a file past this is corrupt. */
155
+ const MAX_APPROVALS_BYTES = 4 * 1024 * 1024;
156
+
157
+ export function approvalsPath() {
158
+ return join(profileDir(), APPROVALS_FILE);
159
+ }
160
+
161
+ function ensureDir(dir) {
162
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
163
+ }
164
+
165
+ /** True iff `p` exists AND is a symlink (refuse to read/write through links). */
166
+ function isSymlink(p) {
167
+ try {
168
+ return lstatSync(p).isSymbolicLink();
169
+ } catch {
170
+ return false;
171
+ }
172
+ }
173
+
174
+ /**
175
+ * readApprovals() -> { ok, registry }. Missing file -> empty registry (the
176
+ * fail-closed default). A symlinked / oversized / unparseable file -> empty
177
+ * registry too: we never trust a tampered approval store to grant eligibility,
178
+ * so a corrupt store degrades to "nothing approved" (safe), not "everything".
179
+ */
180
+ export function readApprovals() {
181
+ const target = approvalsPath();
182
+ if (isSymlink(target)) return { ok: false, code: 'EAPPROVALS_SYMLINK', registry: {} };
183
+ if (!existsSync(target)) return { ok: true, registry: {} };
184
+ try {
185
+ const st = lstatSync(target);
186
+ if (st.isFile() && st.size > MAX_APPROVALS_BYTES) {
187
+ return { ok: false, code: 'EAPPROVALS_TOOBIG', registry: {} };
188
+ }
189
+ } catch {
190
+ // fall through to read
191
+ }
192
+ let raw;
193
+ try {
194
+ raw = readFileSync(target, 'utf8');
195
+ } catch (err) {
196
+ return { ok: false, code: err.code || 'EAPPROVALS_READ', registry: {} };
197
+ }
198
+ try {
199
+ const obj = JSON.parse(raw);
200
+ if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return { ok: true, registry: {} };
201
+ return { ok: true, registry: obj };
202
+ } catch {
203
+ // Corrupt store -> fail closed (empty = nothing approved), never throw.
204
+ return { ok: false, code: 'EAPPROVALS_PARSE', registry: {} };
205
+ }
206
+ }
207
+
208
+ /**
209
+ * Atomic write of the approval registry (temp in same dir -> fsync -> rename,
210
+ * symlink-guarded both sides — mirrors store.js / egress.js discipline).
211
+ */
212
+ function writeApprovals(registry) {
213
+ const target = approvalsPath();
214
+ if (isSymlink(target)) return { ok: false, code: 'EAPPROVALS_SYMLINK', message: `refusing symlinked target: ${target}` };
215
+ try {
216
+ ensureDir(profileDir());
217
+ } catch (err) {
218
+ return { ok: false, code: err.code || 'EMKDIR', message: err.message };
219
+ }
220
+ const contents = `${JSON.stringify(registry, null, 2)}\n`;
221
+ const tmp = `${target}.tmp.${process.pid}.${randomBytes(4).toString('hex')}`;
222
+ let fd;
223
+ try {
224
+ fd = openSync(
225
+ tmp,
226
+ fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_NOFOLLOW,
227
+ 0o600,
228
+ );
229
+ writeFileSync(fd, contents, 'utf8');
230
+ fsyncSync(fd);
231
+ closeSync(fd);
232
+ fd = null;
233
+ if (isSymlink(target)) {
234
+ try { unlinkSync(tmp); } catch {}
235
+ return { ok: false, code: 'EAPPROVALS_SYMLINK', message: `target became a symlink: ${target}` };
236
+ }
237
+ renameSync(tmp, target);
238
+ return { ok: true };
239
+ } catch (err) {
240
+ if (fd != null) { try { closeSync(fd); } catch {} }
241
+ try { unlinkSync(tmp); } catch {}
242
+ return { ok: false, code: err.code || 'EAPPROVALS_WRITE', message: err.message };
243
+ }
244
+ }
245
+
246
+ /**
247
+ * setApprovalState(registry, id, state, opts?) -> new registry (PURE; does not
248
+ * mutate the input). `state` must be one of pending|approved|rejected. Stamps a
249
+ * timestamp and an optional `note`. Throws TypeError on an unknown state or a
250
+ * blank id — an invalid approval must never silently no-op.
251
+ */
252
+ export function setApprovalState(registry, id, state, opts = {}) {
253
+ const key = String(id || '');
254
+ if (!key) throw new TypeError('setApprovalState: id must be a non-empty string');
255
+ if (!APPROVAL_STATES.includes(state)) {
256
+ throw new TypeError(`setApprovalState: state must be one of ${APPROVAL_STATES.join('|')} (got ${JSON.stringify(state)})`);
257
+ }
258
+ const next = { ...(registry && typeof registry === 'object' ? registry : {}) };
259
+ const entry = { state, ts: typeof opts.ts === 'string' && opts.ts ? opts.ts : new Date().toISOString() };
260
+ if (typeof opts.note === 'string' && opts.note) entry.note = opts.note;
261
+ next[key] = entry;
262
+ return next;
263
+ }
264
+
265
+ /**
266
+ * injectEligibleIds(profile, registry) -> Set<string>. The single source of
267
+ * truth for "which atoms may inject" — render-brief/serve (S5) call THIS rather
268
+ * than re-deriving the gate. An id is eligible iff its registry state is
269
+ * `approved` AND the atom carries a real citation locator. Fail-closed.
270
+ */
271
+ export function injectEligibleIds(profile, registry = null) {
272
+ const ids = new Set();
273
+ for (const row of listInferences(profile, registry)) {
274
+ if (row.inject_eligible) ids.add(row.id);
275
+ }
276
+ return ids;
277
+ }
278
+
279
+ /**
280
+ * setApprovalAndWrite(id, state, opts?) — read -> set -> write the approval
281
+ * registry under the GLOBAL profile lock (the registry shares the profile dir;
282
+ * one lock serializes both). Returns { ok, id, state, code?, message? }.
283
+ *
284
+ * GUARD: a state can only be set for an atom that ACTUALLY EXISTS in the current
285
+ * profile (prevents approving a forged id that no derivation ever produced) AND,
286
+ * for `approved`, the atom must carry a citation locator (cite-or-drop — you
287
+ * cannot approve something with nothing grounding it).
288
+ */
289
+ export function setApprovalAndWrite(id, state, opts = {}) {
290
+ const { lockPath, ...rest } = opts;
291
+ return withProfileLock(async () => {
292
+ const pr = readProfile();
293
+ if (!pr.ok) return { ok: false, code: pr.code || 'EREAD', message: pr.message };
294
+ const rows = listInferences(pr.profile);
295
+ const row = rows.find((r) => r.id === String(id));
296
+ if (!row) return { ok: false, code: 'ENOATOM', message: `no inference with id ${JSON.stringify(String(id))}` };
297
+ if (state === 'approved' && !row.citation.has_locator) {
298
+ return { ok: false, code: 'ENOCITATION', message: 'cannot approve a citation-less atom (cite-or-drop)' };
299
+ }
300
+ const ar = readApprovals();
301
+ let next;
302
+ try {
303
+ next = setApprovalState(ar.registry, id, state, rest);
304
+ } catch (err) {
305
+ return { ok: false, code: 'EBADSTATE', message: err.message };
306
+ }
307
+ const w = writeApprovals(next);
308
+ if (!w.ok) return { ok: false, code: w.code, message: w.message };
309
+ return { ok: true, id: String(id), state };
310
+ }, { lockPath, ...rest });
311
+ }
312
+
313
+ /** Convenience wrappers for the two human actions. */
314
+ export function approveAndWrite(id, opts = {}) { return setApprovalAndWrite(id, 'approved', opts); }
315
+ export function rejectAndWrite(id, opts = {}) { return setApprovalAndWrite(id, 'rejected', opts); }
316
+
317
+ /**
318
+ * Purge approval-registry entries for the given removed ids (PURE on the passed
319
+ * registry). Returns { registry, removed }.
320
+ */
321
+ function purgeApprovals(registry, removedIds) {
322
+ const ids = removedIds instanceof Set ? removedIds : new Set(removedIds || []);
323
+ const next = {};
324
+ let removed = 0;
325
+ for (const [k, v] of Object.entries(registry && typeof registry === 'object' ? registry : {})) {
326
+ if (ids.has(k)) { removed += 1; continue; }
327
+ next[k] = v;
328
+ }
329
+ return { registry: next, removed };
330
+ }
331
+
332
+ /** Hard cap on a user-supplied RegExp source (HIGH-2 ReDoS bound). */
333
+ const MAX_PATTERN_SOURCE = 200;
334
+
335
+ /**
336
+ * Reject a RegExp whose SOURCE is structurally prone to catastrophic
337
+ * backtracking. Inference ids are short slugs, but a pathological source like
338
+ * `/(a+)+$/` can still hang the event loop — and `forget` runs under the GLOBAL
339
+ * profile lock, so a hang there stalls the whole fleet's profile writes. We
340
+ * bound the source length and reject the classic nested-quantifier shapes
341
+ * (a quantifier applied to a group that itself ends in a quantifier). This is a
342
+ * conservative denylist: it does not catch every ReDoS, but combined with the
343
+ * bounded-length id corpus (we only ever .test() against short ids) it removes
344
+ * the practical hang vector without a regex-engine dependency.
345
+ *
346
+ * Returns { ok:true, re } or { ok:false, code, message }.
347
+ */
348
+ function sanitizeRegExp(re) {
349
+ const src = re.source || '';
350
+ if (src.length > MAX_PATTERN_SOURCE) {
351
+ return { ok: false, code: 'EPATTERN_TOO_LONG', message: `regex source exceeds ${MAX_PATTERN_SOURCE} chars` };
352
+ }
353
+ // Nested quantifier: a group whose body ends in a quantifier, immediately
354
+ // followed by another quantifier — e.g. (a+)+, (a*)*, (a+)*, (.{1,9})+ .
355
+ const NESTED_QUANTIFIER = /\([^)]*[+*}][)?]*\)\s*[+*{]/;
356
+ if (NESTED_QUANTIFIER.test(src)) {
357
+ return { ok: false, code: 'EPATTERN_UNSAFE', message: 'regex has a nested quantifier (ReDoS-prone)' };
358
+ }
359
+ return { ok: true, re };
360
+ }
361
+
362
+ /**
363
+ * Build a predicate from an id/pattern (HIGH-2 hardened):
364
+ *
365
+ * - RegExp: sanitized first (length-bounded + nested-quantifier rejected). An
366
+ * unsafe source yields a predicate that throws an Error carrying the code,
367
+ * so `forgetAndWrite` can reject it BEFORE taking the global lock.
368
+ * - string: EXACT-id OR explicit-segment match — `id === needle`,
369
+ * `id.startsWith(needle + '::')` (kind prefix), or `id.endsWith('::' +
370
+ * needle)` (subject segment). NOT a bare `includes` substring: bare
371
+ * substring is an over-deletion foot-gun (`forget('e')` would nuke every
372
+ * inference whose id contains an "e"). The `::` segment boundary keeps the
373
+ * `forget('tests') -> preference::tests` UX while making mid-token matches
374
+ * impossible.
375
+ *
376
+ * Returns a predicate `(id) => boolean`. For an unsafe RegExp it returns a
377
+ * predicate that throws on first call (callers gate before the lock).
378
+ */
379
+ function matcherFor(idOrPattern) {
380
+ if (idOrPattern instanceof RegExp) {
381
+ const s = sanitizeRegExp(idOrPattern);
382
+ if (!s.ok) {
383
+ const err = new Error(s.message);
384
+ err.code = s.code;
385
+ return () => { throw err; };
386
+ }
387
+ return (id) => s.re.test(String(id));
388
+ }
389
+ const needle = String(idOrPattern);
390
+ if (!needle) return () => false;
391
+ return (id) => {
392
+ const s = String(id);
393
+ return s === needle || s.startsWith(`${needle}::`) || s.endsWith(`::${needle}`);
394
+ };
395
+ }
396
+
397
+ /**
398
+ * Pre-validate an id/pattern WITHOUT running it against any data — used to
399
+ * reject an unsafe RegExp BEFORE the global lock is acquired (HIGH-2: never hang
400
+ * the event loop while holding the fleet-wide profile lock). Returns
401
+ * { ok:true } or { ok:false, code, message }.
402
+ */
403
+ export function validatePattern(idOrPattern) {
404
+ if (idOrPattern instanceof RegExp) {
405
+ const s = sanitizeRegExp(idOrPattern);
406
+ return s.ok ? { ok: true } : { ok: false, code: s.code, message: s.message };
407
+ }
408
+ return { ok: true };
409
+ }
410
+
411
+ /**
412
+ * Purge the matching entries from the egress log — P4 WIRED. `forget` is now a
413
+ * single, complete "right to be forgotten" operation: removing an inference also
414
+ * expunges every egress-ledger record that leaked it. Delegates to
415
+ * `egress.purgeEgress`, which reads ~/.ijfw/profile/egress.log, drops every
416
+ * entry whose `fields[]` references a removed inference id, and rewrites the log
417
+ * ATOMICALLY (temp → fsync → rename, symlink-guarded).
418
+ *
419
+ * When no egress log exists yet (nothing has been served), `purgeEgress` returns
420
+ * 0 — so the historical P0 contract ("no log -> egressRemoved 0") still holds.
421
+ * Never throws: a failed egress rewrite must not abort the inference removal.
422
+ */
423
+ function purgeEgressEntries(removedIds) {
424
+ try {
425
+ return purgeEgress(removedIds);
426
+ } catch {
427
+ // Egress purge is best-effort; the inference removal is the primary contract.
428
+ return 0;
429
+ }
430
+ }
431
+
432
+ /**
433
+ * forget(profile, idOrPattern) -> { profile, removed, egressRemoved }. PURE:
434
+ * does not mutate `profile`. Removes matching inferences from the global
435
+ * dialectic AND every overlay, and (when present) their egress entries.
436
+ */
437
+ export function forget(profile, idOrPattern) {
438
+ const match = matcherFor(idOrPattern);
439
+ const removed = [];
440
+ const next = JSON.parse(JSON.stringify(profile || {}));
441
+
442
+ if (next.global && Array.isArray(next.global.dialectic)) {
443
+ next.global.dialectic = next.global.dialectic.filter((inf) => {
444
+ if (match(inf.id)) { removed.push(inf); return false; }
445
+ return true;
446
+ });
447
+ }
448
+ if (next.overlays && typeof next.overlays === 'object') {
449
+ for (const key of Object.keys(next.overlays)) {
450
+ const ov = next.overlays[key];
451
+ if (ov && Array.isArray(ov.dialectic)) {
452
+ ov.dialectic = ov.dialectic.filter((inf) => {
453
+ if (match(inf.id)) { removed.push(inf); return false; }
454
+ return true;
455
+ });
456
+ }
457
+ }
458
+ }
459
+
460
+ const egressRemoved = purgeEgressEntries(removed.map((r) => r.id));
461
+ return { profile: next, removed, egressRemoved };
462
+ }
463
+
464
+ /**
465
+ * forgetAndWrite(idOrPattern, opts?) — read → forget → write under the global
466
+ * lock. Returns { ok, removed, egressRemoved, code?, message? }.
467
+ */
468
+ export function forgetAndWrite(idOrPattern, opts = {}) {
469
+ const { lockPath, ...lockOpts } = opts;
470
+ // HIGH-2: validate the pattern BEFORE acquiring the global lock. An unsafe
471
+ // RegExp must be rejected without ever running a `.test()` while the
472
+ // fleet-wide profile lock is held — a hang there would stall every host's
473
+ // profile write. matcherFor's predicate also throws on an unsafe source, but
474
+ // by then we'd already hold the lock; gating here keeps the lock hold trivial.
475
+ const v = validatePattern(idOrPattern);
476
+ if (!v.ok) {
477
+ return Promise.resolve({ ok: false, code: v.code, message: v.message, removed: [] });
478
+ }
479
+ return withProfileLock(async () => {
480
+ const r = readProfile();
481
+ if (!r.ok) return { ok: false, code: r.code || 'EREAD', message: r.message, removed: [] };
482
+ const { profile: next, removed, egressRemoved } = forget(r.profile, idOrPattern);
483
+ const w = writeProfile(next);
484
+ if (!w.ok) return { ok: false, code: w.code, message: w.message, removed: [] };
485
+ // Right-to-be-forgotten completeness: a forgotten atom must leave NO approval
486
+ // record behind (otherwise a re-derived id could silently inherit a stale
487
+ // `approved`). Best-effort: a failed registry rewrite must not abort the
488
+ // already-committed inference removal.
489
+ let approvalsRemoved = 0;
490
+ try {
491
+ const ar = readApprovals();
492
+ const { registry: prunedReg, removed: aRemoved } = purgeApprovals(ar.registry, removed.map((x) => x.id));
493
+ if (aRemoved > 0) {
494
+ const aw = writeApprovals(prunedReg);
495
+ if (aw.ok) approvalsRemoved = aRemoved;
496
+ }
497
+ } catch {
498
+ // best-effort — inference removal is the primary contract.
499
+ }
500
+ return { ok: true, removed, egressRemoved, approvalsRemoved };
501
+ }, { lockPath, ...lockOpts });
502
+ }
503
+
504
+ // ---------------------------------------------------------------------------
505
+ // Voice exemplars (V4) — VISIBLE, FORGETTABLE, disclosure-logged.
506
+ //
507
+ // A voice exemplar is a short raw snippet of the USER's OWN writing, stored
508
+ // locally + transiently by the V1 store (`exemplar-store.js`) and few-shot into
509
+ // prompts so the agent can draft in the user's voice. This is a FEATURE, not a
510
+ // research proof: NO stylometry / AUC / Gate-B here. This slice extends the same
511
+ // audit + right-to-be-forgotten UX the inference tier already has (listVoice-
512
+ // Exemplars mirrors listInferences; forgetVoiceExemplars mirrors forgetAndWrite)
513
+ // to the exemplar tier so a user can SEE every writing sample being used and
514
+ // PURGE any of them. Zero-LLM, zero-network — same as the rest of this module.
515
+ // ---------------------------------------------------------------------------
516
+
517
+ /** Human-readable label so an auditor knows EXACTLY what these rows are. */
518
+ const EXEMPLAR_LABEL = 'writing sample used to match your voice';
519
+ /** Preview cap (~80 chars) — a glanceable excerpt, never the full snippet. */
520
+ const EXEMPLAR_PREVIEW_MAX = 80;
521
+
522
+ /**
523
+ * scrubbedPreview(text) -> short, already-PII-scrubbed excerpt of an exemplar's
524
+ * raw `text`, safe to render in an audit list. Reuses capture.js `citedSpan`
525
+ * (direct-identifier + assigned-secret scrub + whitespace-collapse) — the SAME
526
+ * scrub the edit-delta citation uses — then hard-caps to EXEMPLAR_PREVIEW_MAX so
527
+ * the audit surface never echoes a full writing sample back at the user. Pure.
528
+ */
529
+ function scrubbedPreview(text) {
530
+ // citedSpan covers the edit-delta PII set; the stored exemplar text is also
531
+ // homedir-path-scrubbed upstream (exemplar-capture PII_PATTERNS). We re-scrub
532
+ // homedir paths HERE too — defense-in-depth at the boundary, so the preview
533
+ // can never echo an OS username regardless of what upstream did (MED-2).
534
+ const scrubbed = scrubHomedirPaths(citedSpan(text == null ? '' : text));
535
+ if (scrubbed.length <= EXEMPLAR_PREVIEW_MAX) return scrubbed;
536
+ return `${scrubbed.slice(0, EXEMPLAR_PREVIEW_MAX - 1).trimEnd()}…`;
537
+ }
538
+
539
+ /** Replace the username segment of a homedir path with a placeholder. Pure. */
540
+ function scrubHomedirPaths(s) {
541
+ return String(s)
542
+ .replace(/(\/(?:Users|home)\/)[^/\s]+/g, '$1<user>')
543
+ .replace(/([A-Za-z]:\\Users\\)[^\\/\s]+/g, '$1<user>');
544
+ }
545
+
546
+ /**
547
+ * listVoiceExemplars(opts?) -> Array<{ id, register, source, ts, preview, label }>.
548
+ *
549
+ * Surfaces the user's voice exemplars in human-glanceable form. Reads via V1's
550
+ * `listExemplars(opts)` and shapes each record into an audit row whose `preview`
551
+ * is a PII-scrubbed, length-capped excerpt of the raw `text` (the raw text
552
+ * itself is NEVER returned by this audit surface). `label` states plainly what
553
+ * the row is, so the control surface reads as "these are writing samples used to
554
+ * match your voice" rather than opaque ids.
555
+ *
556
+ * Resilient: a missing/throwing store (V1 lands in parallel) degrades to an
557
+ * empty list rather than throwing — an audit read must never crash the caller.
558
+ */
559
+ export function listVoiceExemplars(opts = {}) {
560
+ let records;
561
+ try {
562
+ records = listExemplars(opts);
563
+ } catch {
564
+ return [];
565
+ }
566
+ if (!Array.isArray(records)) return [];
567
+ const rows = [];
568
+ for (const ex of records) {
569
+ if (!ex || typeof ex !== 'object') continue;
570
+ rows.push({
571
+ id: ex.id,
572
+ register: ex.register,
573
+ source: ex.source,
574
+ ts: ex.ts,
575
+ preview: scrubbedPreview(ex.text),
576
+ label: EXEMPLAR_LABEL,
577
+ });
578
+ }
579
+ return rows;
580
+ }
581
+
582
+ /**
583
+ * forgetVoiceExemplars(idOrPattern, opts?) -> { ok, removed, egressRemoved, code?, message? }.
584
+ *
585
+ * The right-to-be-forgotten path for voice exemplars. Runs under the GLOBAL
586
+ * profile lock (same discipline as forgetAndWrite — the exemplar store shares
587
+ * the profile dir, and one lock serializes both) so a concurrent capture/serve
588
+ * can't race the purge. Delegates the actual removal to V1's store:
589
+ * - a falsy / `'*'` / `'all'` pattern -> clearExemplars(opts) (forget ALL)
590
+ * - any other id or pattern -> forgetExemplars(idOrPattern, opts)
591
+ *
592
+ * Then expunges the egress trail: each forgotten exemplar was disclosed (if at
593
+ * all) under a `voice-exemplar::<id>` field, so we hand `purgeEgress` those
594
+ * exact field strings. `purgeEgress` already matches entries by exact field
595
+ * value, so NO purge-side change is needed — forgetting an exemplar drops every
596
+ * egress line that recorded it being injected. Best-effort egress purge: a
597
+ * failed egress rewrite must not abort the already-committed exemplar removal.
598
+ *
599
+ * EGRESS-PURGE PRECISION: V1's forgetExemplars/clearExemplars report `removed` as
600
+ * a COUNT, not the removed ids — so we cannot derive the disclosed-field strings
601
+ * from the result. We therefore SNAPSHOT the exemplar ids that WILL match BEFORE
602
+ * forgetting (under the same lock, applying V1's exact-id|substring|regex match
603
+ * semantics over id+text, or ALL for a clear), and purge egress for exactly that
604
+ * snapshot. The snapshot is taken under the global lock so it can't race a
605
+ * concurrent capture.
606
+ */
607
+ export function forgetVoiceExemplars(idOrPattern, opts = {}) {
608
+ const { lockPath, ...rest } = opts;
609
+ const forgetAll = idOrPattern == null || idOrPattern === '*' || idOrPattern === 'all';
610
+ return withProfileLock(async () => {
611
+ // Snapshot the ids that this forget WILL remove, BEFORE removing them — V1
612
+ // reports a count, not ids, so this is the only way to know which egress
613
+ // fields to expunge. Mirror V1's match semantics (exact id, else case-
614
+ // insensitive substring over id+text; RegExp tested over id+text; ALL on
615
+ // clear) so the snapshot is exactly the set forgetExemplars will drop.
616
+ let snapshotIds = [];
617
+ try {
618
+ const current = listExemplars(rest);
619
+ if (Array.isArray(current)) {
620
+ const match = exemplarMatcher(idOrPattern, forgetAll);
621
+ snapshotIds = current.filter((ex) => ex && match(ex)).map((ex) => ex.id);
622
+ }
623
+ } catch {
624
+ snapshotIds = []; // snapshot is best-effort; the removal is primary.
625
+ }
626
+
627
+ let res;
628
+ try {
629
+ res = forgetAll ? clearExemplars(rest) : forgetExemplars(idOrPattern, rest);
630
+ } catch (err) {
631
+ return { ok: false, code: err.code || 'EEXEMPLAR_FORGET', message: err.message, removed: 0 };
632
+ }
633
+ if (!res || res.ok !== true) {
634
+ return { ok: false, code: (res && res.code) || 'EEXEMPLAR_FORGET', message: res && res.message, removed: 0 };
635
+ }
636
+
637
+ let egressRemoved = 0;
638
+ if (snapshotIds.length > 0) {
639
+ try {
640
+ egressRemoved = purgeEgress(snapshotIds.map((id) => exemplarField(id)));
641
+ } catch {
642
+ egressRemoved = 0; // best-effort — exemplar removal is the primary contract.
643
+ }
644
+ }
645
+ // Echo V1's `removed` (a count) back unchanged; also surface the concrete ids
646
+ // we expunged from the egress trail so a caller can report precisely.
647
+ return { ok: true, removed: res.removed, removedIds: snapshotIds, egressRemoved };
648
+ }, { lockPath, ...rest });
649
+ }
650
+
651
+ /**
652
+ * Build a predicate `(exemplar) => boolean` mirroring V1 forgetExemplars match
653
+ * semantics, used ONLY to snapshot the to-be-removed ids for the egress purge:
654
+ * - forgetAll -> every record;
655
+ * - RegExp -> tested against id OR text;
656
+ * - string -> exact id, else case-insensitive substring over id+text.
657
+ * Kept in lockstep with exemplar-store.js forgetExemplars deliberately.
658
+ */
659
+ function exemplarMatcher(idOrPattern, forgetAll) {
660
+ if (forgetAll) return () => true;
661
+ if (idOrPattern instanceof RegExp) {
662
+ return (r) => idOrPattern.test(String(r.id)) || idOrPattern.test(String(r.text));
663
+ }
664
+ const s = String(idOrPattern == null ? '' : idOrPattern);
665
+ if (!s) return () => false;
666
+ const lower = s.toLowerCase();
667
+ return (r) =>
668
+ r.id === s ||
669
+ String(r.id).toLowerCase().includes(lower) ||
670
+ String(r.text).toLowerCase().includes(lower);
671
+ }