@ijfw/memory-server 1.6.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,10 @@
8
8
  // Mirrors src/compute/fts5.js patterns:
9
9
  // - WAL journal mode for concurrent readers
10
10
  // - PRAGMA busy_timeout = 5000 + BEGIN IMMEDIATE for racing writers
11
- // - PRAGMA quick_check post-write enforces integrity
11
+ // - PRAGMA quick_check corruption tripwire on a throttled cadence
12
+ // (first write per db file per process, then every Nth write or
13
+ // after a time floor -- never on every single-row insert, because
14
+ // quick_check is a full-database scan)
12
15
  //
13
16
  // Security model (D-PILLAR-SPEC section 12, real fix-wave C3):
14
17
  // indexEntry runs `redactSecrets()` over `entry.body` AND `entry.source`
@@ -182,9 +185,52 @@ function readUserVersion(db) {
182
185
  return Number(row.user_version ?? row.USER_VERSION ?? 0);
183
186
  }
184
187
 
185
- // Insert one row into memory_entries inside a BEGIN IMMEDIATE transaction,
186
- // then run PRAGMA quick_check on the whole db. Throws MemoryIntegrityError
187
- // on anything other than 'ok'. Returns { id } of the inserted row.
188
+ // Corruption tripwire cadence. PRAGMA quick_check walks every page of the
189
+ // database, so running it inside EVERY single-row insert transaction is
190
+ // O(db size) per write while the RESERVED lock is held -- a quadratic
191
+ // total-cost cliff as the warm tier grows. The tripwire is kept, but on a
192
+ // throttle: the FIRST write per db file per process always checks (so a
193
+ // reopen-after-corruption is caught on the next write), then every Nth
194
+ // write or once the time floor elapses, whichever fires first. State is
195
+ // keyed by filename, NOT by handle, because server.js re-opens the db per
196
+ // store -- a per-open or per-handle check would put the full scan right
197
+ // back on the hot path.
198
+ const QUICK_CHECK_EVERY_N = 100;
199
+ const QUICK_CHECK_MIN_INTERVAL_MS = 5 * 60 * 1000;
200
+ const __quickCheckState = new Map(); // filename -> { writes, lastTs }
201
+
202
+ function shouldQuickCheck(filename, now = Date.now()) {
203
+ const key = filename || ':unknown:';
204
+ let st = __quickCheckState.get(key);
205
+ if (!st) {
206
+ st = { writes: 0, lastTs: 0 };
207
+ __quickCheckState.set(key, st);
208
+ }
209
+ st.writes++;
210
+ if (
211
+ st.writes === 1 ||
212
+ st.writes % QUICK_CHECK_EVERY_N === 0 ||
213
+ (now - st.lastTs) >= QUICK_CHECK_MIN_INTERVAL_MS
214
+ ) {
215
+ st.lastTs = now;
216
+ return true;
217
+ }
218
+ return false;
219
+ }
220
+
221
+ // Test hook -- cadence logic is invisible from outside (it only changes
222
+ // WHEN the scan runs), so tests assert on it directly.
223
+ export const __quickCheck = {
224
+ shouldQuickCheck,
225
+ QUICK_CHECK_EVERY_N,
226
+ QUICK_CHECK_MIN_INTERVAL_MS,
227
+ reset: () => __quickCheckState.clear(),
228
+ };
229
+
230
+ // Insert one row into memory_entries inside a BEGIN IMMEDIATE transaction.
231
+ // On the throttled cadence above, runs PRAGMA quick_check inside the same
232
+ // transaction and throws MemoryIntegrityError on anything other than 'ok'
233
+ // (rolling the insert back -- fail-safe). Returns { id } of the inserted row.
188
234
  //
189
235
  // Caller passes { body, source?, session_id? }. created_at is set here
190
236
  // (unix ms) so callers don't have to remember the convention.
@@ -224,12 +270,14 @@ export function indexEntry(db, entry) {
224
270
  inserted = {
225
271
  id: info && info.lastInsertRowid != null ? Number(info.lastInsertRowid) : null,
226
272
  };
227
- const qc = db.prepare('PRAGMA quick_check').get();
228
- const status = qc && (qc.quick_check ?? qc.QUICK_CHECK);
229
- if (status !== 'ok') {
230
- throw new MemoryIntegrityError(
231
- `PRAGMA quick_check failed after insert into memory_entries: ${status || '(no result)'}.`
232
- );
273
+ if (shouldQuickCheck(db.__ijfw_filename)) {
274
+ const qc = db.prepare('PRAGMA quick_check').get();
275
+ const status = qc && (qc.quick_check ?? qc.QUICK_CHECK);
276
+ if (status !== 'ok') {
277
+ throw new MemoryIntegrityError(
278
+ `PRAGMA quick_check failed after insert into memory_entries: ${status || '(no result)'}.`
279
+ );
280
+ }
233
281
  }
234
282
  });
235
283
  tx();
@@ -282,6 +330,13 @@ export function indexEntry(db, entry) {
282
330
  const ts = row.created_at;
283
331
  const sessionId = row.session_id;
284
332
  const body = row.body;
333
+ // Receipt path must belong to the project that owns THIS db, never the
334
+ // process cwd (MCP hosts commonly spawn servers from $HOME, and openDb
335
+ // supports an explicit projectRoot distinct from cwd). The db lives at
336
+ // <root>/.ijfw/index/memory.db, so dirname(filename) IS the index dir.
337
+ const receiptDir = db.__ijfw_filename
338
+ ? dirname(db.__ijfw_filename)
339
+ : join(process.env.IJFW_PROJECT_DIR || process.cwd(), IJFW_DIR_NAME, INDEX_DIR_NAME);
285
340
  // v1.5.0 audit-LOW-memory-#14: dead-letter receipt for auto-index failures.
286
341
  // Fire-and-forget was already swallowed silently; now we append an
287
342
  // append-only JSONL receipt so silent indexer breakage is detectable in
@@ -293,10 +348,10 @@ export function indexEntry(db, entry) {
293
348
  // Lazy import; node:fs/promises is always available.
294
349
  import('node:fs/promises').then(({ appendFile, mkdir }) => {
295
350
  try {
296
- const indexDir = '.ijfw/index';
351
+ const indexDir = receiptDir;
297
352
  return mkdir(indexDir, { recursive: true })
298
353
  .then(() => appendFile(
299
- `${indexDir}/graph-errors.jsonl`,
354
+ join(indexDir, 'graph-errors.jsonl'),
300
355
  JSON.stringify({
301
356
  ts: new Date().toISOString(),
302
357
  session_id: sessionId || null,
@@ -38,6 +38,12 @@ import { loadMigrations } from './migration-runner.js';
38
38
  // is imported directly so M1 runs synchronously inside the same txn batch.
39
39
  import { indexObsidianRelations } from './obsidian-parser.js';
40
40
  import { autoLink } from './auto-linker.js';
41
+ // Ingest scrub gate (D-PILLAR-SPEC section 12) -- the warm-tier rebuild
42
+ // reads raw markdown from disk, which is NOT guaranteed pre-scrubbed
43
+ // (hand-edited notes, hook-written files, imports never went through
44
+ // handleStore's redaction). autoIndex must apply the same redactSecrets
45
+ // pass as fts5.js#indexEntry or secrets land cleartext in memory.db.
46
+ import { redactSecrets } from '../redactor.js';
41
47
 
42
48
  const MAX_RESULTS = 50;
43
49
  const SNIPPET_HALF = 60;
@@ -259,25 +265,35 @@ function runMemoryMigrationsSync(db, currentVersion, targetVersion) {
259
265
  }
260
266
 
261
267
  function autoIndex(db, files) {
262
- let n = 0;
263
268
  // v1.5.1 R4-H2 — capture the rowid of every inserted entry so the
264
269
  // memory-moat aux indexing (M1 Obsidian relations, M2 auto-link) can run
265
270
  // over the warm-tier rebuild, not just the benchmark harness. The bulk
266
271
  // INSERT stays in one transaction for FTS write performance; M1/M2 run
267
272
  // AFTER commit so a parse/link failure can never abort the rebuild.
273
+ //
274
+ // Rollback safety: ids are collected in a transaction-local array and
275
+ // only published to `inserted` after txfn commits. If the batch rolls
276
+ // back, the rowids it produced no longer exist (and AUTOINCREMENT will
277
+ // reuse them), so running M1/M2 over them would attach links/tags/meta
278
+ // to the WRONG future entries.
268
279
  const inserted = [];
269
280
  const txfn = db.transaction((batch) => {
270
281
  const stmt = db.prepare(
271
282
  'INSERT INTO memory_entries (body, source, session_id, created_at) VALUES (?, ?, ?, ?)'
272
283
  );
284
+ const out = [];
273
285
  for (const item of batch) {
274
286
  const info = stmt.run(item.body, item.source, null, item.created_at);
275
287
  const id = info && info.lastInsertRowid != null ? Number(info.lastInsertRowid) : null;
276
- inserted.push({ id, body: item.body });
277
- n++;
288
+ out.push({ id, body: item.body });
278
289
  }
290
+ return out;
279
291
  });
280
292
 
293
+ // Same ingest scrub gate as fts5.js#indexEntry (IJFW_INGEST_SCRUB=0 is
294
+ // the only escape hatch, local debugging only). Body AND source are
295
+ // scrubbed so the FTS index and downstream M1/M2 only see safe text.
296
+ const scrub = process.env.IJFW_INGEST_SCRUB !== '0';
281
297
  const batch = [];
282
298
  const now = Date.now();
283
299
  for (const f of files) {
@@ -286,10 +302,22 @@ function autoIndex(db, files) {
286
302
  let body;
287
303
  try { body = readFileSync(f.path, 'utf8'); } catch { continue; }
288
304
  if (!body) continue;
289
- batch.push({ body, source: f.relpath || f.path, created_at: now });
305
+ const rawSource = f.relpath || f.path;
306
+ batch.push({
307
+ body: scrub ? redactSecrets(body) : body,
308
+ source: scrub ? redactSecrets(String(rawSource)) : rawSource,
309
+ created_at: now,
310
+ });
290
311
  }
291
312
  if (batch.length === 0) return 0;
292
- try { txfn.immediate(batch); } catch { /* one bad batch should not abort the search */ }
313
+ let n = 0;
314
+ try {
315
+ const committed = txfn.immediate(batch);
316
+ if (Array.isArray(committed)) {
317
+ inserted.push(...committed);
318
+ n = committed.length;
319
+ }
320
+ } catch { /* one bad batch should not abort the search; rollback discards ids */ }
293
321
 
294
322
  // v1.5.1 R4-H2 — M1: Obsidian wikilink/tag/meta indexing into
295
323
  // memory_links/_tags/_meta. Synchronous + idempotent (indexObsidianRelations
@@ -169,7 +169,7 @@ export function propagateStaleMemory(memDb, computeDb, supersededNodeId, options
169
169
  if (namesToFlag.length > 0) {
170
170
  const updateMem = memDb.prepare(
171
171
  `UPDATE memory_entries SET stale_candidate = ? ` +
172
- `WHERE COALESCE(stale_candidate, 0) < ? AND body LIKE ?`
172
+ `WHERE COALESCE(stale_candidate, 0) < ? AND body LIKE ? ESCAPE '\\'`
173
173
  );
174
174
 
175
175
  const txWrap = (typeof memDb.transaction === 'function')
@@ -232,9 +232,11 @@ async function probeGoogle(env, fetchImpl) {
232
232
  if (!key) return null;
233
233
  const { signal, cancel } = makeAbortable();
234
234
  try {
235
+ // Pass the key as a header, not a URL query param, so it never lands in
236
+ // proxy / CDN / firewall access logs (privacy audit finding).
235
237
  const r = await fetchImpl(
236
- `https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(key)}`,
237
- { signal },
238
+ 'https://generativelanguage.googleapis.com/v1beta/models',
239
+ { signal, headers: { 'x-goog-api-key': key } },
238
240
  );
239
241
  if (!r.ok) return null;
240
242
  const json = await r.json();
@@ -42,7 +42,7 @@ function writeJson(rows) {
42
42
  return p;
43
43
  }
44
44
 
45
- // 10 authors × 6 long docs each — comfortably over the floors.
45
+ // 10 authors x 6 long docs each — comfortably over the floors.
46
46
  function tenAuthors() {
47
47
  const rows = [];
48
48
  for (let a = 0; a < 10; a += 1) rows.push(...makeAuthorRows(`u${a}`, 6));
@@ -218,7 +218,7 @@ export async function runGateBProduction(opts = {}) {
218
218
  // budget-guarded cloud transport here: the allowed-set is the closed set of EVERY brief
219
219
  // the pool's own personas + foreigner-pool produce (baseline '' + derived + fewShotOracle
220
220
  // + register-echo) — foreign prose is never a target, only a fingerprint. The budget is
221
- // sized from arms × pool × probes × (pilot + confirmatory) with headroom.
221
+ // sized from arms x pool x probes x (pilot + confirmatory) with headroom.
222
222
  const poolForGuard = [...personas, ...foreigners];
223
223
  const budget = opts.budget || {
224
224
  calls: 0,
@@ -328,7 +328,7 @@ export function buildAllowedSys(personas, cfg = {}) {
328
328
  return sys;
329
329
  }
330
330
 
331
- // Estimate the cloud-call budget: arms × subjects × probes, per spend phase.
331
+ // Estimate the cloud-call budget: arms x subjects x probes, per spend phase.
332
332
  export function estimateCalls({
333
333
  nArms = 4, nSubjects, nProbes,
334
334
  }) {
@@ -225,7 +225,7 @@ export function cohenKappa(raterA = [], raterB = []) {
225
225
 
226
226
  // ---------------------------------------------------------------------------
227
227
  // ECE — Expected Calibration Error on the profile's `confidence` field. Bins
228
- // (confidence, correctness) pairs and measures |avg-confidence accuracy| per
228
+ // (confidence, correctness) pairs and measures |avg-confidence - accuracy| per
229
229
  // bin, weighted by bin mass. A well-calibrated profile that says "0.7 confident"
230
230
  // is right ~70% of the time. This is what makes `confidence` an honest number
231
231
  // instead of decoration.
@@ -52,7 +52,7 @@ export function bonferroniAlpha(familyAlpha, verdictArms) {
52
52
  }
53
53
 
54
54
  // Measured-scale floor: the minimum mean margin that counts as a real effect, expressed
55
- // in the instrument's OWN units = floorK * (betweenMean withinMean) from validateInstrument.
55
+ // in the instrument's OWN units = floorK * (betweenMean - withinMean) from validateInstrument.
56
56
  // This REPLACES the blind absolute constant (the prior attempt's failure class). Frozen
57
57
  // before any cloud spend (floorK is hashed; the derived value is recorded in the run).
58
58
  export function deriveMinMeanMargin(validation, floorK) {
@@ -1,7 +1,7 @@
1
1
  // wrong-target-control.mjs — Gate B v2, Task T5. THE discriminator.
2
2
  //
3
3
  // For each subject P and arm, the margin is:
4
- // m_P = distance(output, NEAREST same-register foreigner) distance(output, OWN test)
4
+ // m_P = distance(output, NEAREST same-register foreigner) - distance(output, OWN test)
5
5
  // m_P > 0 means the styled output landed closer to P's OWN held-out fingerprint than to
6
6
  // the CLOSEST same-register stranger. A generic register-obeyer is ~equidistant from all
7
7
  // same-register targets ⇒ m≈0 ⇒ NULL. Only idiosyncratic voice capture wins.
@@ -118,7 +118,7 @@ export function wrongTargetControl(harnessOut, personas, opts = {}) {
118
118
  }
119
119
  const ownLoss = margins.map((m) => (m < 0 ? 1 : 0));
120
120
  const ci = bootstrapCI(margins, { iters: cfg.bootstrapIters, alpha: cfg.alpha, seed: cfg.seed });
121
- // zeros-vs-wins sign test: b = #(margin>0), c = #(margin<0); two-sided p on |bc|.
121
+ // zeros-vs-wins sign test: b = #(margin>0), c = #(margin<0); two-sided p on |b-c|.
122
122
  const sign = mcnemar(ownLoss, ownWin);
123
123
  perArm[arm] = {
124
124
  arm,
@@ -141,7 +141,7 @@ export function wrongTargetControl(harnessOut, personas, opts = {}) {
141
141
  for (const arm of harnessOut.arms) {
142
142
  if (arm === 'baseline' || !perArm.baseline) continue;
143
143
  const m = mcnemar(perArm.baseline.ownWin, perArm[arm].ownWin);
144
- // mcnemar.pValue is TWO-SIDED (|bc|), so the direction guard m.b > m.c is mandatory:
144
+ // mcnemar.pValue is TWO-SIDED (|b-c|), so the direction guard m.b > m.c is mandatory:
145
145
  // the arm must FLIP MORE subjects to own-match than baseline does, not merely differ.
146
146
  perArm[arm].vsBaseline = {
147
147
  b: m.b, c: m.c, pValue: m.pValue, beatsBaseline: significantAt(m.pValue, cfg.perTestAlpha) && m.b > m.c,
@@ -61,7 +61,7 @@ export const EXEMPLAR_TEXT_MAX = 600;
61
61
  * Max bytes we will read from the on-disk JSONL. The store is bounded by
62
62
  * MAX_EXEMPLARS short records, so a file larger than this is a corrupt/hand-
63
63
  * edited artifact; refusing to slurp it whole avoids an OOM. ~2 MiB is orders
64
- * of magnitude above any legitimate exemplar set (200 × 600 chars ≈ 120 KiB).
64
+ * of magnitude above any legitimate exemplar set (200 x 600 chars ≈ 120 KiB).
65
65
  */
66
66
  const MAX_STORE_BYTES = 2 * 1024 * 1024;
67
67
 
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * The NO-JUDGE behavioral metric (design spec §"The honest bar", claim 2):
5
5
  * "Repeat-correction-rate drop — how often you re-issue the SAME correction,
6
- * bucketed by session age. A working system bends the curve down ( in week 1
7
- * -> by week 4). The most honest single number."
6
+ * bucketed by session age. A working system bends the curve down (3x in week 1
7
+ * -> 0x by week 4). The most honest single number."
8
8
  *
9
9
  * This module records, per preference SLUG, every time the user RE-ISSUES a
10
10
  * correction that the profile should already have learned, and computes the drop
@@ -286,14 +286,25 @@ export function tier2SyntaxCheckCmd(filePath) {
286
286
  ],
287
287
  };
288
288
  case '.py':
289
- return { cmd: 'python3', args: ['-m', 'py_compile', filePath] };
289
+ // Windows ships python.exe, not python3. If neither exists the spawn
290
+ // ENOENT is treated as SKIP by verifyTier2, not a syntax failure.
291
+ return {
292
+ cmd: process.platform === 'win32' ? 'python' : 'python3',
293
+ args: ['-m', 'py_compile', filePath],
294
+ };
290
295
  case '.sh':
291
296
  case '.bash':
297
+ // On Windows this only works when a real bash.exe (Git Bash) is on
298
+ // PATH; otherwise verifyTier2 maps the ENOENT to SKIP.
292
299
  return { cmd: 'bash', args: ['-n', filePath] };
293
300
  case '.ts':
294
301
  case '.tsx': {
295
302
  // Only if tsc on PATH. The agent contract says SKIP when absent.
296
- const which = spawnSync(process.platform === 'win32' ? 'where' : 'which', ['tsc'], {
303
+ // On Windows tsc is a .cmd shim which Node cannot spawn without a
304
+ // shell (CVE-2024-27980), and shelling out with an interpolated
305
+ // filePath would be an injection vector -- so SKIP honestly there.
306
+ if (process.platform === 'win32') return null;
307
+ const which = spawnSync('which', ['tsc'], {
297
308
  encoding: 'utf8',
298
309
  });
299
310
  if (which.status === 0 && which.stdout.trim()) {
@@ -319,6 +330,11 @@ export async function verifyTier2(filePath) {
319
330
  await execFileAsync(spec.cmd, spec.args, { timeout: 15_000 });
320
331
  return { ok: true, skipped: false };
321
332
  } catch (err) {
333
+ // Checker binary missing/not spawnable (ENOENT, or EINVAL for Windows
334
+ // .cmd shims) is "cannot verify", not "syntax error" -- honest SKIP.
335
+ if (err && (err.code === 'ENOENT' || err.code === 'EINVAL')) {
336
+ return { ok: true, skipped: true };
337
+ }
322
338
  const stderr = err.stderr || err.stdout || err.message || '';
323
339
  return {
324
340
  ok: false,
@@ -369,10 +385,15 @@ async function resolveProjectVerifyCmd(projectRoot, verifyCmdOverride) {
369
385
  export async function verifyTier3(projectRoot, verifyCmdOverride) {
370
386
  const cmd = await resolveProjectVerifyCmd(projectRoot, verifyCmdOverride);
371
387
  if (!cmd) return { ok: true, skipped: true };
372
- // Run the command via `sh -c` so script lines like `npm test --silent` work
373
- // verbatim. Timeout is generous (5 min) because real test suites can be slow.
388
+ // Run the command via the platform shell so script lines like
389
+ // `npm test --silent` work verbatim: `sh -c` on POSIX, `cmd /d /s /c` on
390
+ // Windows ('sh' is not on PATH there). Timeout is generous (5 min)
391
+ // because real test suites can be slow.
392
+ const [shellBin, shellArgs] = process.platform === 'win32'
393
+ ? [process.env.ComSpec || 'cmd.exe', ['/d', '/s', '/c', cmd]]
394
+ : ['sh', ['-c', cmd]];
374
395
  return new Promise((resolve) => {
375
- execFile('sh', ['-c', cmd], { cwd: projectRoot, timeout: 5 * 60_000 }, (err, stdout, stderr) => {
396
+ execFile(shellBin, shellArgs, { cwd: projectRoot, timeout: 5 * 60_000 }, (err, stdout, stderr) => {
376
397
  const combined = `${String(stdout || '')}\n${String(stderr || '')}`;
377
398
  if (err) {
378
399
  const evidence = combined.split('\n').slice(0, 20).join('\n');
@@ -215,8 +215,11 @@ export async function maybeWarnDivergence(opts = {}) {
215
215
 
216
216
  /**
217
217
  * Map an MCP tool name (+ args) to the (action, target) tuple used for
218
- * permission checks. Returns null for unrecognised tool names; callers
219
- * should treat null as "no policy applies, allow" (these are bundled-only).
218
+ * permission checks. Returns null for unrecognised tool names. Callers MUST
219
+ * treat null as fail-closed whenever an extension is active: every tool the
220
+ * server advertises has an explicit mapping here, so a null mapping means a
221
+ * future tool was added without a policy entry -- denying is the only answer
222
+ * that keeps the sandbox sound (see gatePermissionAndQuota in server.js).
220
223
  */
221
224
  export function toolNameToActionTarget(toolName, args) {
222
225
  switch (toolName) {
@@ -225,8 +228,23 @@ export function toolNameToActionTarget(toolName, args) {
225
228
  case 'ijfw_memory_recall':
226
229
  case 'ijfw_memory_search':
227
230
  case 'ijfw_memory_prelude':
231
+ case 'ijfw_memory_facts':
228
232
  case 'ijfw_cross_project_search':
229
233
  return { action: 'read', target: 'memory:read' };
234
+ case 'ijfw_brain': {
235
+ // Brain verbs can write to the facts DB (wiki rebuilds, fact upserts),
236
+ // so classify the whole facade as a write -- conservative by design.
237
+ const verb = (args && typeof args.verb === 'string' && args.verb) ? args.verb : '*';
238
+ return { action: 'write', target: `brain:${verb}` };
239
+ }
240
+ case 'ijfw_state': {
241
+ // state-sdk verbs mutate project orchestration state.
242
+ const verb = (args && typeof args.verb === 'string' && args.verb) ? args.verb : '*';
243
+ return { action: 'write', target: `state:${verb}` };
244
+ }
245
+ case 'ijfw_cross_audit_converge':
246
+ // autoFix:true mutates source -- always treat as a write.
247
+ return { action: 'write', target: 'audit:converge' };
230
248
  case 'ijfw_metrics':
231
249
  return { action: 'read', target: 'metrics:read' };
232
250
  case 'ijfw_update_check':