@ijfw/memory-server 1.5.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw-dashboard +20 -1
- package/package.json +4 -3
- package/src/audit-roster.js +89 -12
- package/src/brain/tiered-llm.js +57 -7
- package/src/cross-orchestrator-cli.js +344 -4
- package/src/cross-project-search.js +39 -1
- package/src/dashboard-server.js +7 -1
- package/src/dream/runner.mjs +560 -8
- package/src/handlers/brain-handler.js +101 -1
- package/src/importers/discover.js +1 -1
- package/src/memory/bench-metrics.js +289 -0
- package/src/memory/benchmark.js +1 -1
- package/src/memory/search.js +53 -1
- package/src/orchestrator/plan-checker.js +1 -1
- package/src/profile/audit.js +671 -0
- package/src/profile/capture.js +871 -0
- package/src/profile/derive-dialectic.js +242 -0
- package/src/profile/derive-heuristic.js +733 -0
- package/src/profile/derive.js +156 -0
- package/src/profile/egress.js +306 -0
- package/src/profile/eval/build-real-probes.mjs +197 -0
- package/src/profile/eval/corpus-from-reddit.mjs +166 -0
- package/src/profile/eval/corpus-from-reddit.test.mjs +121 -0
- package/src/profile/eval/corpus-from-transcripts.mjs +264 -0
- package/src/profile/eval/gate-b-behavior.mjs +420 -0
- package/src/profile/eval/gate-b-decision-run.mjs +171 -0
- package/src/profile/eval/gate-b-decision-run.test.mjs +141 -0
- package/src/profile/eval/gate-b-run.mjs +417 -0
- package/src/profile/eval/gate-b-run.test.mjs +204 -0
- package/src/profile/eval/gate-c-capture.mjs +323 -0
- package/src/profile/eval/harness.mjs +551 -0
- package/src/profile/eval/instrument-validation.mjs +248 -0
- package/src/profile/eval/instrument-validation.test.mjs +125 -0
- package/src/profile/eval/multi-subject-harness.mjs +106 -0
- package/src/profile/eval/multi-subject-harness.test.mjs +99 -0
- package/src/profile/eval/personas.test.mjs +83 -0
- package/src/profile/eval/plumbing.test.mjs +69 -0
- package/src/profile/eval/prereg.mjs +130 -0
- package/src/profile/eval/prereg.test.mjs +78 -0
- package/src/profile/eval/real-corpus.test.mjs +103 -0
- package/src/profile/eval/real-personas.mjs +109 -0
- package/src/profile/eval/run-real-corpus-concurrent.mjs +407 -0
- package/src/profile/eval/run-real-corpus.mjs +358 -0
- package/src/profile/eval/slug-quality.mjs +464 -0
- package/src/profile/eval/stylometry-features.js +85 -0
- package/src/profile/eval/stylometry-reference.js +16 -0
- package/src/profile/eval/stylometry.js +224 -0
- package/src/profile/eval/stylometry.test.mjs +103 -0
- package/src/profile/eval/synthetic-personas.js +91 -0
- package/src/profile/eval/verifier-features.mjs +170 -0
- package/src/profile/eval/verifier-logreg.mjs +74 -0
- package/src/profile/eval/verifier-pair.mjs +122 -0
- package/src/profile/eval/verifier-reference.mjs +68 -0
- package/src/profile/eval/verifier-scorer.mjs +30 -0
- package/src/profile/eval/wrong-target-control.mjs +168 -0
- package/src/profile/eval/wrong-target-control.test.mjs +124 -0
- package/src/profile/exemplar-capture.js +232 -0
- package/src/profile/exemplar-retrieve.js +138 -0
- package/src/profile/exemplar-store.js +314 -0
- package/src/profile/lock.js +64 -0
- package/src/profile/merge.js +624 -0
- package/src/profile/path-policy.js +213 -0
- package/src/profile/precision-stamp.mjs +151 -0
- package/src/profile/render-brief.js +717 -0
- package/src/profile/schema.js +244 -0
- package/src/profile/sensitivity.js +249 -0
- package/src/profile/serve.js +345 -0
- package/src/profile/store.js +261 -0
- package/src/profile/telemetry.js +289 -0
- package/src/recovery/checkpoint.js +7 -1
- package/src/server.js +185 -14
- package/src/.registry-meta-key.pem +0 -3
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* profile/audit.js — Cross-system profile bus, P0.7 + S3 (human-in-the-loop).
|
|
3
|
+
*
|
|
4
|
+
* Inspectability + the right to be forgotten, for the user-global profile.
|
|
5
|
+
* Extends the existing memory-audit / forget UX to the profile tier (design-v2
|
|
6
|
+
* §7 Poisoning guard: "memory-audit + forget extended to the profile").
|
|
7
|
+
*
|
|
8
|
+
* - listInferences(profile, registry?): every inference (global dialectic +
|
|
9
|
+
* per-overlay) surfaced with full provenance, scope, its CITATION (the
|
|
10
|
+
* source locator + any verbatim span the atom carries) and an
|
|
11
|
+
* `inject_eligible` flag — so a user can SEE what was inferred, from where,
|
|
12
|
+
* and whether it has been approved to inject.
|
|
13
|
+
* - approve/reject/inject-eligibility: the human-in-the-loop gate Cursor
|
|
14
|
+
* abandoned (design S3). A newly-derived atom is `pending` (NOT
|
|
15
|
+
* inject-eligible) until the user explicitly approves it. An atom with no
|
|
16
|
+
* citation locator at all can never be approved (cite-or-drop).
|
|
17
|
+
* - forget(profile, idOrPattern): purge matching inferences (pure). Returns
|
|
18
|
+
* the new profile + the removed entries. ALSO purges the matching entries
|
|
19
|
+
* from the egress log AND from the approval registry — right-to-be-forgotten
|
|
20
|
+
* completeness: a forgotten atom leaves no approval record behind.
|
|
21
|
+
* - forgetAndWrite(idOrPattern): read-forget-write under the global lock.
|
|
22
|
+
*
|
|
23
|
+
* APPROVAL REGISTRY: an out-of-band JSON store (`approvals.json`, sibling of the
|
|
24
|
+
* profile) mapping inference id -> { state, ts }. It is deliberately SEPARATE
|
|
25
|
+
* from the profile atom (which is slug-only and privacy-minimized, FIX 4) so the
|
|
26
|
+
* approval decision is a user control surface, not derived data that the CRDT
|
|
27
|
+
* fold could resurrect or a foreign session could forge into the global merge.
|
|
28
|
+
* The registry is FAIL-CLOSED: absent/unknown/`pending`/`rejected` -> not
|
|
29
|
+
* inject-eligible; only an explicit `approved` admits an atom for injection.
|
|
30
|
+
* render-brief/serve (a later slice, S5) consult `injectEligibleIds()` BEFORE
|
|
31
|
+
* surfacing any preference slug — this module owns the gate, not the renderer.
|
|
32
|
+
*
|
|
33
|
+
* Zero deps. NO LLM calls.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import {
|
|
37
|
+
openSync,
|
|
38
|
+
writeFileSync,
|
|
39
|
+
fsyncSync,
|
|
40
|
+
closeSync,
|
|
41
|
+
renameSync,
|
|
42
|
+
unlinkSync,
|
|
43
|
+
readFileSync,
|
|
44
|
+
existsSync,
|
|
45
|
+
mkdirSync,
|
|
46
|
+
lstatSync,
|
|
47
|
+
constants as fsConstants,
|
|
48
|
+
} from 'node:fs';
|
|
49
|
+
import { join } from 'node:path';
|
|
50
|
+
import { randomBytes } from 'node:crypto';
|
|
51
|
+
|
|
52
|
+
import { withProfileLock } from './lock.js';
|
|
53
|
+
import { readProfile, writeProfile, profileDir } from './store.js';
|
|
54
|
+
import { purgeEgress, exemplarField } from './egress.js';
|
|
55
|
+
import { listExemplars, forgetExemplars, clearExemplars } from './exemplar-store.js';
|
|
56
|
+
import { citedSpan } from './capture.js';
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Build the CITATION for an inference (design S3 "verbatim span + source
|
|
60
|
+
* locator"). The durable atom is deliberately SLUG-ONLY — FIX 4 (CRITICAL-2)
|
|
61
|
+
* does NOT persist the verbatim user text into the durable, travel-eligible tier
|
|
62
|
+
* (privacy minimization). So a citation here is primarily the LOCATOR:
|
|
63
|
+
* { span, sessions[], hosts[], last_confirmed, value }
|
|
64
|
+
* `span` is the verbatim quote ONLY when the atom actually carries one — we read
|
|
65
|
+
* the first present of `inf.cite` / `inf.evidence_span` / `inf.evidence` (the
|
|
66
|
+
* forward-compatible fields S4's edit-diff capture may attach in the transient
|
|
67
|
+
* tier). We NEVER fabricate a span. `has_locator` is true iff at least one
|
|
68
|
+
* source session or host is present: an atom with no locator at all has nothing
|
|
69
|
+
* grounding it (cite-or-drop) and therefore can never be approved for injection.
|
|
70
|
+
*/
|
|
71
|
+
function citationFor(inf) {
|
|
72
|
+
const sessions = Array.isArray(inf.source_sessions) ? [...inf.source_sessions] : [];
|
|
73
|
+
const hosts = Array.isArray(inf.source_hosts) ? [...inf.source_hosts] : [];
|
|
74
|
+
let span = null;
|
|
75
|
+
for (const k of ['cite', 'evidence_span', 'evidence']) {
|
|
76
|
+
if (typeof inf[k] === 'string' && inf[k]) { span = inf[k]; break; }
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
span,
|
|
80
|
+
sessions,
|
|
81
|
+
hosts,
|
|
82
|
+
last_confirmed: inf.last_confirmed,
|
|
83
|
+
value: inf.value === undefined ? null : inf.value,
|
|
84
|
+
has_locator: sessions.length > 0 || hosts.length > 0,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Surface every inference with provenance, its CITATION, and an inject-
|
|
90
|
+
* eligibility flag. Each row:
|
|
91
|
+
* { scope, id, kind, subject, value, confidence, evidence_count,
|
|
92
|
+
* last_confirmed, source_sessions, source_hosts, sensitivity,
|
|
93
|
+
* citation, approval_state, inject_eligible }.
|
|
94
|
+
* scope is 'global' or 'overlay:<key>'.
|
|
95
|
+
*
|
|
96
|
+
* `registry` is the approval map (id -> { state }). When omitted, every atom is
|
|
97
|
+
* treated as `pending` / NOT inject-eligible — the safe, fail-closed default
|
|
98
|
+
* (an atom is never inject-eligible until a human approves it). An atom whose
|
|
99
|
+
* citation has no locator is FORCED ineligible regardless of registry state.
|
|
100
|
+
*/
|
|
101
|
+
export function listInferences(profile, registry = null) {
|
|
102
|
+
const reg = registry && typeof registry === 'object' ? registry : {};
|
|
103
|
+
const rows = [];
|
|
104
|
+
const push = (scope, inf) => {
|
|
105
|
+
const citation = citationFor(inf);
|
|
106
|
+
const entry = reg[inf.id];
|
|
107
|
+
const state = entry && typeof entry.state === 'string' ? entry.state : 'pending';
|
|
108
|
+
// Fail-closed: ONLY an explicit `approved` state AND a real citation locator
|
|
109
|
+
// make an atom inject-eligible. Everything else (pending/rejected/unknown,
|
|
110
|
+
// or a citation-less atom) is held back.
|
|
111
|
+
const inject_eligible = state === 'approved' && citation.has_locator === true;
|
|
112
|
+
rows.push({
|
|
113
|
+
scope,
|
|
114
|
+
id: inf.id,
|
|
115
|
+
kind: inf.kind,
|
|
116
|
+
subject: inf.subject,
|
|
117
|
+
value: inf.value,
|
|
118
|
+
confidence: inf.confidence,
|
|
119
|
+
evidence_count: inf.evidence_count,
|
|
120
|
+
last_confirmed: inf.last_confirmed,
|
|
121
|
+
source_sessions: [...(inf.source_sessions || [])],
|
|
122
|
+
source_hosts: [...(inf.source_hosts || [])],
|
|
123
|
+
sensitivity: inf.sensitivity,
|
|
124
|
+
citation,
|
|
125
|
+
approval_state: state,
|
|
126
|
+
inject_eligible,
|
|
127
|
+
});
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
if (profile && profile.global && Array.isArray(profile.global.dialectic)) {
|
|
131
|
+
for (const inf of profile.global.dialectic) push('global', inf);
|
|
132
|
+
}
|
|
133
|
+
if (profile && profile.overlays && typeof profile.overlays === 'object') {
|
|
134
|
+
for (const [key, ov] of Object.entries(profile.overlays)) {
|
|
135
|
+
if (ov && Array.isArray(ov.dialectic)) {
|
|
136
|
+
for (const inf of ov.dialectic) push(`overlay:${key}`, inf);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return rows;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Approval registry — the human-in-the-loop gate (design S3). A small JSON store
|
|
145
|
+
// `approvals.json` (sibling of the profile) mapping inference id -> { state, ts,
|
|
146
|
+
// note? }. SEPARATE from the profile atom on purpose: the approval decision is a
|
|
147
|
+
// user control surface, never derived data that the CRDT fold could resurrect or
|
|
148
|
+
// a foreign session could forge into the global merge. FAIL-CLOSED: a missing
|
|
149
|
+
// file / unknown id reads as `pending` (NOT inject-eligible).
|
|
150
|
+
// ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
const APPROVALS_FILE = 'approvals.json';
|
|
153
|
+
const APPROVAL_STATES = Object.freeze(['pending', 'approved', 'rejected']);
|
|
154
|
+
/** Read-size cap: the registry is one tiny record per atom; a file past this is corrupt. */
|
|
155
|
+
const MAX_APPROVALS_BYTES = 4 * 1024 * 1024;
|
|
156
|
+
|
|
157
|
+
export function approvalsPath() {
|
|
158
|
+
return join(profileDir(), APPROVALS_FILE);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function ensureDir(dir) {
|
|
162
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/** True iff `p` exists AND is a symlink (refuse to read/write through links). */
|
|
166
|
+
function isSymlink(p) {
|
|
167
|
+
try {
|
|
168
|
+
return lstatSync(p).isSymbolicLink();
|
|
169
|
+
} catch {
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* readApprovals() -> { ok, registry }. Missing file -> empty registry (the
|
|
176
|
+
* fail-closed default). A symlinked / oversized / unparseable file -> empty
|
|
177
|
+
* registry too: we never trust a tampered approval store to grant eligibility,
|
|
178
|
+
* so a corrupt store degrades to "nothing approved" (safe), not "everything".
|
|
179
|
+
*/
|
|
180
|
+
export function readApprovals() {
|
|
181
|
+
const target = approvalsPath();
|
|
182
|
+
if (isSymlink(target)) return { ok: false, code: 'EAPPROVALS_SYMLINK', registry: {} };
|
|
183
|
+
if (!existsSync(target)) return { ok: true, registry: {} };
|
|
184
|
+
try {
|
|
185
|
+
const st = lstatSync(target);
|
|
186
|
+
if (st.isFile() && st.size > MAX_APPROVALS_BYTES) {
|
|
187
|
+
return { ok: false, code: 'EAPPROVALS_TOOBIG', registry: {} };
|
|
188
|
+
}
|
|
189
|
+
} catch {
|
|
190
|
+
// fall through to read
|
|
191
|
+
}
|
|
192
|
+
let raw;
|
|
193
|
+
try {
|
|
194
|
+
raw = readFileSync(target, 'utf8');
|
|
195
|
+
} catch (err) {
|
|
196
|
+
return { ok: false, code: err.code || 'EAPPROVALS_READ', registry: {} };
|
|
197
|
+
}
|
|
198
|
+
try {
|
|
199
|
+
const obj = JSON.parse(raw);
|
|
200
|
+
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return { ok: true, registry: {} };
|
|
201
|
+
return { ok: true, registry: obj };
|
|
202
|
+
} catch {
|
|
203
|
+
// Corrupt store -> fail closed (empty = nothing approved), never throw.
|
|
204
|
+
return { ok: false, code: 'EAPPROVALS_PARSE', registry: {} };
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Atomic write of the approval registry (temp in same dir -> fsync -> rename,
|
|
210
|
+
* symlink-guarded both sides — mirrors store.js / egress.js discipline).
|
|
211
|
+
*/
|
|
212
|
+
function writeApprovals(registry) {
|
|
213
|
+
const target = approvalsPath();
|
|
214
|
+
if (isSymlink(target)) return { ok: false, code: 'EAPPROVALS_SYMLINK', message: `refusing symlinked target: ${target}` };
|
|
215
|
+
try {
|
|
216
|
+
ensureDir(profileDir());
|
|
217
|
+
} catch (err) {
|
|
218
|
+
return { ok: false, code: err.code || 'EMKDIR', message: err.message };
|
|
219
|
+
}
|
|
220
|
+
const contents = `${JSON.stringify(registry, null, 2)}\n`;
|
|
221
|
+
const tmp = `${target}.tmp.${process.pid}.${randomBytes(4).toString('hex')}`;
|
|
222
|
+
let fd;
|
|
223
|
+
try {
|
|
224
|
+
fd = openSync(
|
|
225
|
+
tmp,
|
|
226
|
+
fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_NOFOLLOW,
|
|
227
|
+
0o600,
|
|
228
|
+
);
|
|
229
|
+
writeFileSync(fd, contents, 'utf8');
|
|
230
|
+
fsyncSync(fd);
|
|
231
|
+
closeSync(fd);
|
|
232
|
+
fd = null;
|
|
233
|
+
if (isSymlink(target)) {
|
|
234
|
+
try { unlinkSync(tmp); } catch {}
|
|
235
|
+
return { ok: false, code: 'EAPPROVALS_SYMLINK', message: `target became a symlink: ${target}` };
|
|
236
|
+
}
|
|
237
|
+
renameSync(tmp, target);
|
|
238
|
+
return { ok: true };
|
|
239
|
+
} catch (err) {
|
|
240
|
+
if (fd != null) { try { closeSync(fd); } catch {} }
|
|
241
|
+
try { unlinkSync(tmp); } catch {}
|
|
242
|
+
return { ok: false, code: err.code || 'EAPPROVALS_WRITE', message: err.message };
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* setApprovalState(registry, id, state, opts?) -> new registry (PURE; does not
|
|
248
|
+
* mutate the input). `state` must be one of pending|approved|rejected. Stamps a
|
|
249
|
+
* timestamp and an optional `note`. Throws TypeError on an unknown state or a
|
|
250
|
+
* blank id — an invalid approval must never silently no-op.
|
|
251
|
+
*/
|
|
252
|
+
export function setApprovalState(registry, id, state, opts = {}) {
|
|
253
|
+
const key = String(id || '');
|
|
254
|
+
if (!key) throw new TypeError('setApprovalState: id must be a non-empty string');
|
|
255
|
+
if (!APPROVAL_STATES.includes(state)) {
|
|
256
|
+
throw new TypeError(`setApprovalState: state must be one of ${APPROVAL_STATES.join('|')} (got ${JSON.stringify(state)})`);
|
|
257
|
+
}
|
|
258
|
+
const next = { ...(registry && typeof registry === 'object' ? registry : {}) };
|
|
259
|
+
const entry = { state, ts: typeof opts.ts === 'string' && opts.ts ? opts.ts : new Date().toISOString() };
|
|
260
|
+
if (typeof opts.note === 'string' && opts.note) entry.note = opts.note;
|
|
261
|
+
next[key] = entry;
|
|
262
|
+
return next;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* injectEligibleIds(profile, registry) -> Set<string>. The single source of
|
|
267
|
+
* truth for "which atoms may inject" — render-brief/serve (S5) call THIS rather
|
|
268
|
+
* than re-deriving the gate. An id is eligible iff its registry state is
|
|
269
|
+
* `approved` AND the atom carries a real citation locator. Fail-closed.
|
|
270
|
+
*/
|
|
271
|
+
export function injectEligibleIds(profile, registry = null) {
|
|
272
|
+
const ids = new Set();
|
|
273
|
+
for (const row of listInferences(profile, registry)) {
|
|
274
|
+
if (row.inject_eligible) ids.add(row.id);
|
|
275
|
+
}
|
|
276
|
+
return ids;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* setApprovalAndWrite(id, state, opts?) — read -> set -> write the approval
|
|
281
|
+
* registry under the GLOBAL profile lock (the registry shares the profile dir;
|
|
282
|
+
* one lock serializes both). Returns { ok, id, state, code?, message? }.
|
|
283
|
+
*
|
|
284
|
+
* GUARD: a state can only be set for an atom that ACTUALLY EXISTS in the current
|
|
285
|
+
* profile (prevents approving a forged id that no derivation ever produced) AND,
|
|
286
|
+
* for `approved`, the atom must carry a citation locator (cite-or-drop — you
|
|
287
|
+
* cannot approve something with nothing grounding it).
|
|
288
|
+
*/
|
|
289
|
+
export function setApprovalAndWrite(id, state, opts = {}) {
|
|
290
|
+
const { lockPath, ...rest } = opts;
|
|
291
|
+
return withProfileLock(async () => {
|
|
292
|
+
const pr = readProfile();
|
|
293
|
+
if (!pr.ok) return { ok: false, code: pr.code || 'EREAD', message: pr.message };
|
|
294
|
+
const rows = listInferences(pr.profile);
|
|
295
|
+
const row = rows.find((r) => r.id === String(id));
|
|
296
|
+
if (!row) return { ok: false, code: 'ENOATOM', message: `no inference with id ${JSON.stringify(String(id))}` };
|
|
297
|
+
if (state === 'approved' && !row.citation.has_locator) {
|
|
298
|
+
return { ok: false, code: 'ENOCITATION', message: 'cannot approve a citation-less atom (cite-or-drop)' };
|
|
299
|
+
}
|
|
300
|
+
const ar = readApprovals();
|
|
301
|
+
let next;
|
|
302
|
+
try {
|
|
303
|
+
next = setApprovalState(ar.registry, id, state, rest);
|
|
304
|
+
} catch (err) {
|
|
305
|
+
return { ok: false, code: 'EBADSTATE', message: err.message };
|
|
306
|
+
}
|
|
307
|
+
const w = writeApprovals(next);
|
|
308
|
+
if (!w.ok) return { ok: false, code: w.code, message: w.message };
|
|
309
|
+
return { ok: true, id: String(id), state };
|
|
310
|
+
}, { lockPath, ...rest });
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/** Convenience wrappers for the two human actions. */
|
|
314
|
+
export function approveAndWrite(id, opts = {}) { return setApprovalAndWrite(id, 'approved', opts); }
|
|
315
|
+
export function rejectAndWrite(id, opts = {}) { return setApprovalAndWrite(id, 'rejected', opts); }
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Purge approval-registry entries for the given removed ids (PURE on the passed
|
|
319
|
+
* registry). Returns { registry, removed }.
|
|
320
|
+
*/
|
|
321
|
+
function purgeApprovals(registry, removedIds) {
|
|
322
|
+
const ids = removedIds instanceof Set ? removedIds : new Set(removedIds || []);
|
|
323
|
+
const next = {};
|
|
324
|
+
let removed = 0;
|
|
325
|
+
for (const [k, v] of Object.entries(registry && typeof registry === 'object' ? registry : {})) {
|
|
326
|
+
if (ids.has(k)) { removed += 1; continue; }
|
|
327
|
+
next[k] = v;
|
|
328
|
+
}
|
|
329
|
+
return { registry: next, removed };
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/** Hard cap on a user-supplied RegExp source (HIGH-2 ReDoS bound). */
|
|
333
|
+
const MAX_PATTERN_SOURCE = 200;
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Reject a RegExp whose SOURCE is structurally prone to catastrophic
|
|
337
|
+
* backtracking. Inference ids are short slugs, but a pathological source like
|
|
338
|
+
* `/(a+)+$/` can still hang the event loop — and `forget` runs under the GLOBAL
|
|
339
|
+
* profile lock, so a hang there stalls the whole fleet's profile writes. We
|
|
340
|
+
* bound the source length and reject the classic nested-quantifier shapes
|
|
341
|
+
* (a quantifier applied to a group that itself ends in a quantifier). This is a
|
|
342
|
+
* conservative denylist: it does not catch every ReDoS, but combined with the
|
|
343
|
+
* bounded-length id corpus (we only ever .test() against short ids) it removes
|
|
344
|
+
* the practical hang vector without a regex-engine dependency.
|
|
345
|
+
*
|
|
346
|
+
* Returns { ok:true, re } or { ok:false, code, message }.
|
|
347
|
+
*/
|
|
348
|
+
function sanitizeRegExp(re) {
|
|
349
|
+
const src = re.source || '';
|
|
350
|
+
if (src.length > MAX_PATTERN_SOURCE) {
|
|
351
|
+
return { ok: false, code: 'EPATTERN_TOO_LONG', message: `regex source exceeds ${MAX_PATTERN_SOURCE} chars` };
|
|
352
|
+
}
|
|
353
|
+
// Nested quantifier: a group whose body ends in a quantifier, immediately
|
|
354
|
+
// followed by another quantifier — e.g. (a+)+, (a*)*, (a+)*, (.{1,9})+ .
|
|
355
|
+
const NESTED_QUANTIFIER = /\([^)]*[+*}][)?]*\)\s*[+*{]/;
|
|
356
|
+
if (NESTED_QUANTIFIER.test(src)) {
|
|
357
|
+
return { ok: false, code: 'EPATTERN_UNSAFE', message: 'regex has a nested quantifier (ReDoS-prone)' };
|
|
358
|
+
}
|
|
359
|
+
return { ok: true, re };
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Build a predicate from an id/pattern (HIGH-2 hardened):
|
|
364
|
+
*
|
|
365
|
+
* - RegExp: sanitized first (length-bounded + nested-quantifier rejected). An
|
|
366
|
+
* unsafe source yields a predicate that throws an Error carrying the code,
|
|
367
|
+
* so `forgetAndWrite` can reject it BEFORE taking the global lock.
|
|
368
|
+
* - string: EXACT-id OR explicit-segment match — `id === needle`,
|
|
369
|
+
* `id.startsWith(needle + '::')` (kind prefix), or `id.endsWith('::' +
|
|
370
|
+
* needle)` (subject segment). NOT a bare `includes` substring: bare
|
|
371
|
+
* substring is an over-deletion foot-gun (`forget('e')` would nuke every
|
|
372
|
+
* inference whose id contains an "e"). The `::` segment boundary keeps the
|
|
373
|
+
* `forget('tests') -> preference::tests` UX while making mid-token matches
|
|
374
|
+
* impossible.
|
|
375
|
+
*
|
|
376
|
+
* Returns a predicate `(id) => boolean`. For an unsafe RegExp it returns a
|
|
377
|
+
* predicate that throws on first call (callers gate before the lock).
|
|
378
|
+
*/
|
|
379
|
+
function matcherFor(idOrPattern) {
|
|
380
|
+
if (idOrPattern instanceof RegExp) {
|
|
381
|
+
const s = sanitizeRegExp(idOrPattern);
|
|
382
|
+
if (!s.ok) {
|
|
383
|
+
const err = new Error(s.message);
|
|
384
|
+
err.code = s.code;
|
|
385
|
+
return () => { throw err; };
|
|
386
|
+
}
|
|
387
|
+
return (id) => s.re.test(String(id));
|
|
388
|
+
}
|
|
389
|
+
const needle = String(idOrPattern);
|
|
390
|
+
if (!needle) return () => false;
|
|
391
|
+
return (id) => {
|
|
392
|
+
const s = String(id);
|
|
393
|
+
return s === needle || s.startsWith(`${needle}::`) || s.endsWith(`::${needle}`);
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Pre-validate an id/pattern WITHOUT running it against any data — used to
|
|
399
|
+
* reject an unsafe RegExp BEFORE the global lock is acquired (HIGH-2: never hang
|
|
400
|
+
* the event loop while holding the fleet-wide profile lock). Returns
|
|
401
|
+
* { ok:true } or { ok:false, code, message }.
|
|
402
|
+
*/
|
|
403
|
+
export function validatePattern(idOrPattern) {
|
|
404
|
+
if (idOrPattern instanceof RegExp) {
|
|
405
|
+
const s = sanitizeRegExp(idOrPattern);
|
|
406
|
+
return s.ok ? { ok: true } : { ok: false, code: s.code, message: s.message };
|
|
407
|
+
}
|
|
408
|
+
return { ok: true };
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Purge the matching entries from the egress log — P4 WIRED. `forget` is now a
|
|
413
|
+
* single, complete "right to be forgotten" operation: removing an inference also
|
|
414
|
+
* expunges every egress-ledger record that leaked it. Delegates to
|
|
415
|
+
* `egress.purgeEgress`, which reads ~/.ijfw/profile/egress.log, drops every
|
|
416
|
+
* entry whose `fields[]` references a removed inference id, and rewrites the log
|
|
417
|
+
* ATOMICALLY (temp → fsync → rename, symlink-guarded).
|
|
418
|
+
*
|
|
419
|
+
* When no egress log exists yet (nothing has been served), `purgeEgress` returns
|
|
420
|
+
* 0 — so the historical P0 contract ("no log -> egressRemoved 0") still holds.
|
|
421
|
+
* Never throws: a failed egress rewrite must not abort the inference removal.
|
|
422
|
+
*/
|
|
423
|
+
function purgeEgressEntries(removedIds) {
|
|
424
|
+
try {
|
|
425
|
+
return purgeEgress(removedIds);
|
|
426
|
+
} catch {
|
|
427
|
+
// Egress purge is best-effort; the inference removal is the primary contract.
|
|
428
|
+
return 0;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* forget(profile, idOrPattern) -> { profile, removed, egressRemoved }. PURE:
|
|
434
|
+
* does not mutate `profile`. Removes matching inferences from the global
|
|
435
|
+
* dialectic AND every overlay, and (when present) their egress entries.
|
|
436
|
+
*/
|
|
437
|
+
export function forget(profile, idOrPattern) {
|
|
438
|
+
const match = matcherFor(idOrPattern);
|
|
439
|
+
const removed = [];
|
|
440
|
+
const next = JSON.parse(JSON.stringify(profile || {}));
|
|
441
|
+
|
|
442
|
+
if (next.global && Array.isArray(next.global.dialectic)) {
|
|
443
|
+
next.global.dialectic = next.global.dialectic.filter((inf) => {
|
|
444
|
+
if (match(inf.id)) { removed.push(inf); return false; }
|
|
445
|
+
return true;
|
|
446
|
+
});
|
|
447
|
+
}
|
|
448
|
+
if (next.overlays && typeof next.overlays === 'object') {
|
|
449
|
+
for (const key of Object.keys(next.overlays)) {
|
|
450
|
+
const ov = next.overlays[key];
|
|
451
|
+
if (ov && Array.isArray(ov.dialectic)) {
|
|
452
|
+
ov.dialectic = ov.dialectic.filter((inf) => {
|
|
453
|
+
if (match(inf.id)) { removed.push(inf); return false; }
|
|
454
|
+
return true;
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const egressRemoved = purgeEgressEntries(removed.map((r) => r.id));
|
|
461
|
+
return { profile: next, removed, egressRemoved };
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* forgetAndWrite(idOrPattern, opts?) — read → forget → write under the global
|
|
466
|
+
* lock. Returns { ok, removed, egressRemoved, code?, message? }.
|
|
467
|
+
*/
|
|
468
|
+
export function forgetAndWrite(idOrPattern, opts = {}) {
|
|
469
|
+
const { lockPath, ...lockOpts } = opts;
|
|
470
|
+
// HIGH-2: validate the pattern BEFORE acquiring the global lock. An unsafe
|
|
471
|
+
// RegExp must be rejected without ever running a `.test()` while the
|
|
472
|
+
// fleet-wide profile lock is held — a hang there would stall every host's
|
|
473
|
+
// profile write. matcherFor's predicate also throws on an unsafe source, but
|
|
474
|
+
// by then we'd already hold the lock; gating here keeps the lock hold trivial.
|
|
475
|
+
const v = validatePattern(idOrPattern);
|
|
476
|
+
if (!v.ok) {
|
|
477
|
+
return Promise.resolve({ ok: false, code: v.code, message: v.message, removed: [] });
|
|
478
|
+
}
|
|
479
|
+
return withProfileLock(async () => {
|
|
480
|
+
const r = readProfile();
|
|
481
|
+
if (!r.ok) return { ok: false, code: r.code || 'EREAD', message: r.message, removed: [] };
|
|
482
|
+
const { profile: next, removed, egressRemoved } = forget(r.profile, idOrPattern);
|
|
483
|
+
const w = writeProfile(next);
|
|
484
|
+
if (!w.ok) return { ok: false, code: w.code, message: w.message, removed: [] };
|
|
485
|
+
// Right-to-be-forgotten completeness: a forgotten atom must leave NO approval
|
|
486
|
+
// record behind (otherwise a re-derived id could silently inherit a stale
|
|
487
|
+
// `approved`). Best-effort: a failed registry rewrite must not abort the
|
|
488
|
+
// already-committed inference removal.
|
|
489
|
+
let approvalsRemoved = 0;
|
|
490
|
+
try {
|
|
491
|
+
const ar = readApprovals();
|
|
492
|
+
const { registry: prunedReg, removed: aRemoved } = purgeApprovals(ar.registry, removed.map((x) => x.id));
|
|
493
|
+
if (aRemoved > 0) {
|
|
494
|
+
const aw = writeApprovals(prunedReg);
|
|
495
|
+
if (aw.ok) approvalsRemoved = aRemoved;
|
|
496
|
+
}
|
|
497
|
+
} catch {
|
|
498
|
+
// best-effort — inference removal is the primary contract.
|
|
499
|
+
}
|
|
500
|
+
return { ok: true, removed, egressRemoved, approvalsRemoved };
|
|
501
|
+
}, { lockPath, ...lockOpts });
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// ---------------------------------------------------------------------------
|
|
505
|
+
// Voice exemplars (V4) — VISIBLE, FORGETTABLE, disclosure-logged.
|
|
506
|
+
//
|
|
507
|
+
// A voice exemplar is a short raw snippet of the USER's OWN writing, stored
|
|
508
|
+
// locally + transiently by the V1 store (`exemplar-store.js`) and few-shot into
|
|
509
|
+
// prompts so the agent can draft in the user's voice. This is a FEATURE, not a
|
|
510
|
+
// research proof: NO stylometry / AUC / Gate-B here. This slice extends the same
|
|
511
|
+
// audit + right-to-be-forgotten UX the inference tier already has (listVoice-
|
|
512
|
+
// Exemplars mirrors listInferences; forgetVoiceExemplars mirrors forgetAndWrite)
|
|
513
|
+
// to the exemplar tier so a user can SEE every writing sample being used and
|
|
514
|
+
// PURGE any of them. Zero-LLM, zero-network — same as the rest of this module.
|
|
515
|
+
// ---------------------------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
/** Human-readable label so an auditor knows EXACTLY what these rows are. */
|
|
518
|
+
const EXEMPLAR_LABEL = 'writing sample used to match your voice';
|
|
519
|
+
/** Preview cap (~80 chars) — a glanceable excerpt, never the full snippet. */
|
|
520
|
+
const EXEMPLAR_PREVIEW_MAX = 80;
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* scrubbedPreview(text) -> short, already-PII-scrubbed excerpt of an exemplar's
|
|
524
|
+
* raw `text`, safe to render in an audit list. Reuses capture.js `citedSpan`
|
|
525
|
+
* (direct-identifier + assigned-secret scrub + whitespace-collapse) — the SAME
|
|
526
|
+
* scrub the edit-delta citation uses — then hard-caps to EXEMPLAR_PREVIEW_MAX so
|
|
527
|
+
* the audit surface never echoes a full writing sample back at the user. Pure.
|
|
528
|
+
*/
|
|
529
|
+
function scrubbedPreview(text) {
|
|
530
|
+
// citedSpan covers the edit-delta PII set; the stored exemplar text is also
|
|
531
|
+
// homedir-path-scrubbed upstream (exemplar-capture PII_PATTERNS). We re-scrub
|
|
532
|
+
// homedir paths HERE too — defense-in-depth at the boundary, so the preview
|
|
533
|
+
// can never echo an OS username regardless of what upstream did (MED-2).
|
|
534
|
+
const scrubbed = scrubHomedirPaths(citedSpan(text == null ? '' : text));
|
|
535
|
+
if (scrubbed.length <= EXEMPLAR_PREVIEW_MAX) return scrubbed;
|
|
536
|
+
return `${scrubbed.slice(0, EXEMPLAR_PREVIEW_MAX - 1).trimEnd()}…`;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
/** Replace the username segment of a homedir path with a placeholder. Pure. */
|
|
540
|
+
function scrubHomedirPaths(s) {
|
|
541
|
+
return String(s)
|
|
542
|
+
.replace(/(\/(?:Users|home)\/)[^/\s]+/g, '$1<user>')
|
|
543
|
+
.replace(/([A-Za-z]:\\Users\\)[^\\/\s]+/g, '$1<user>');
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
/**
|
|
547
|
+
* listVoiceExemplars(opts?) -> Array<{ id, register, source, ts, preview, label }>.
|
|
548
|
+
*
|
|
549
|
+
* Surfaces the user's voice exemplars in human-glanceable form. Reads via V1's
|
|
550
|
+
* `listExemplars(opts)` and shapes each record into an audit row whose `preview`
|
|
551
|
+
* is a PII-scrubbed, length-capped excerpt of the raw `text` (the raw text
|
|
552
|
+
* itself is NEVER returned by this audit surface). `label` states plainly what
|
|
553
|
+
* the row is, so the control surface reads as "these are writing samples used to
|
|
554
|
+
* match your voice" rather than opaque ids.
|
|
555
|
+
*
|
|
556
|
+
* Resilient: a missing/throwing store (V1 lands in parallel) degrades to an
|
|
557
|
+
* empty list rather than throwing — an audit read must never crash the caller.
|
|
558
|
+
*/
|
|
559
|
+
export function listVoiceExemplars(opts = {}) {
|
|
560
|
+
let records;
|
|
561
|
+
try {
|
|
562
|
+
records = listExemplars(opts);
|
|
563
|
+
} catch {
|
|
564
|
+
return [];
|
|
565
|
+
}
|
|
566
|
+
if (!Array.isArray(records)) return [];
|
|
567
|
+
const rows = [];
|
|
568
|
+
for (const ex of records) {
|
|
569
|
+
if (!ex || typeof ex !== 'object') continue;
|
|
570
|
+
rows.push({
|
|
571
|
+
id: ex.id,
|
|
572
|
+
register: ex.register,
|
|
573
|
+
source: ex.source,
|
|
574
|
+
ts: ex.ts,
|
|
575
|
+
preview: scrubbedPreview(ex.text),
|
|
576
|
+
label: EXEMPLAR_LABEL,
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
return rows;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* forgetVoiceExemplars(idOrPattern, opts?) -> { ok, removed, egressRemoved, code?, message? }.
|
|
584
|
+
*
|
|
585
|
+
* The right-to-be-forgotten path for voice exemplars. Runs under the GLOBAL
|
|
586
|
+
* profile lock (same discipline as forgetAndWrite — the exemplar store shares
|
|
587
|
+
* the profile dir, and one lock serializes both) so a concurrent capture/serve
|
|
588
|
+
* can't race the purge. Delegates the actual removal to V1's store:
|
|
589
|
+
* - a falsy / `'*'` / `'all'` pattern -> clearExemplars(opts) (forget ALL)
|
|
590
|
+
* - any other id or pattern -> forgetExemplars(idOrPattern, opts)
|
|
591
|
+
*
|
|
592
|
+
* Then expunges the egress trail: each forgotten exemplar was disclosed (if at
|
|
593
|
+
* all) under a `voice-exemplar::<id>` field, so we hand `purgeEgress` those
|
|
594
|
+
* exact field strings. `purgeEgress` already matches entries by exact field
|
|
595
|
+
* value, so NO purge-side change is needed — forgetting an exemplar drops every
|
|
596
|
+
* egress line that recorded it being injected. Best-effort egress purge: a
|
|
597
|
+
* failed egress rewrite must not abort the already-committed exemplar removal.
|
|
598
|
+
*
|
|
599
|
+
* EGRESS-PURGE PRECISION: V1's forgetExemplars/clearExemplars report `removed` as
|
|
600
|
+
* a COUNT, not the removed ids — so we cannot derive the disclosed-field strings
|
|
601
|
+
* from the result. We therefore SNAPSHOT the exemplar ids that WILL match BEFORE
|
|
602
|
+
* forgetting (under the same lock, applying V1's exact-id|substring|regex match
|
|
603
|
+
* semantics over id+text, or ALL for a clear), and purge egress for exactly that
|
|
604
|
+
* snapshot. The snapshot is taken under the global lock so it can't race a
|
|
605
|
+
* concurrent capture.
|
|
606
|
+
*/
|
|
607
|
+
export function forgetVoiceExemplars(idOrPattern, opts = {}) {
|
|
608
|
+
const { lockPath, ...rest } = opts;
|
|
609
|
+
const forgetAll = idOrPattern == null || idOrPattern === '*' || idOrPattern === 'all';
|
|
610
|
+
return withProfileLock(async () => {
|
|
611
|
+
// Snapshot the ids that this forget WILL remove, BEFORE removing them — V1
|
|
612
|
+
// reports a count, not ids, so this is the only way to know which egress
|
|
613
|
+
// fields to expunge. Mirror V1's match semantics (exact id, else case-
|
|
614
|
+
// insensitive substring over id+text; RegExp tested over id+text; ALL on
|
|
615
|
+
// clear) so the snapshot is exactly the set forgetExemplars will drop.
|
|
616
|
+
let snapshotIds = [];
|
|
617
|
+
try {
|
|
618
|
+
const current = listExemplars(rest);
|
|
619
|
+
if (Array.isArray(current)) {
|
|
620
|
+
const match = exemplarMatcher(idOrPattern, forgetAll);
|
|
621
|
+
snapshotIds = current.filter((ex) => ex && match(ex)).map((ex) => ex.id);
|
|
622
|
+
}
|
|
623
|
+
} catch {
|
|
624
|
+
snapshotIds = []; // snapshot is best-effort; the removal is primary.
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
let res;
|
|
628
|
+
try {
|
|
629
|
+
res = forgetAll ? clearExemplars(rest) : forgetExemplars(idOrPattern, rest);
|
|
630
|
+
} catch (err) {
|
|
631
|
+
return { ok: false, code: err.code || 'EEXEMPLAR_FORGET', message: err.message, removed: 0 };
|
|
632
|
+
}
|
|
633
|
+
if (!res || res.ok !== true) {
|
|
634
|
+
return { ok: false, code: (res && res.code) || 'EEXEMPLAR_FORGET', message: res && res.message, removed: 0 };
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
let egressRemoved = 0;
|
|
638
|
+
if (snapshotIds.length > 0) {
|
|
639
|
+
try {
|
|
640
|
+
egressRemoved = purgeEgress(snapshotIds.map((id) => exemplarField(id)));
|
|
641
|
+
} catch {
|
|
642
|
+
egressRemoved = 0; // best-effort — exemplar removal is the primary contract.
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
// Echo V1's `removed` (a count) back unchanged; also surface the concrete ids
|
|
646
|
+
// we expunged from the egress trail so a caller can report precisely.
|
|
647
|
+
return { ok: true, removed: res.removed, removedIds: snapshotIds, egressRemoved };
|
|
648
|
+
}, { lockPath, ...rest });
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Build a predicate `(exemplar) => boolean` mirroring V1 forgetExemplars match
|
|
653
|
+
* semantics, used ONLY to snapshot the to-be-removed ids for the egress purge:
|
|
654
|
+
* - forgetAll -> every record;
|
|
655
|
+
* - RegExp -> tested against id OR text;
|
|
656
|
+
* - string -> exact id, else case-insensitive substring over id+text.
|
|
657
|
+
* Kept in lockstep with exemplar-store.js forgetExemplars deliberately.
|
|
658
|
+
*/
|
|
659
|
+
function exemplarMatcher(idOrPattern, forgetAll) {
|
|
660
|
+
if (forgetAll) return () => true;
|
|
661
|
+
if (idOrPattern instanceof RegExp) {
|
|
662
|
+
return (r) => idOrPattern.test(String(r.id)) || idOrPattern.test(String(r.text));
|
|
663
|
+
}
|
|
664
|
+
const s = String(idOrPattern == null ? '' : idOrPattern);
|
|
665
|
+
if (!s) return () => false;
|
|
666
|
+
const lower = s.toLowerCase();
|
|
667
|
+
return (r) =>
|
|
668
|
+
r.id === s ||
|
|
669
|
+
String(r.id).toLowerCase().includes(lower) ||
|
|
670
|
+
String(r.text).toLowerCase().includes(lower);
|
|
671
|
+
}
|