@ijfw/memory-server 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -149,4 +149,119 @@ export async function autoLink(db, entry, opts = {}) {
149
149
  return { skipped: false, neighbors, proposal, applied };
150
150
  }
151
151
 
152
- export default { autoLink };
152
+ // v1.5.1 R5-1.2 -- one-time M2 (A-Mem auto-link) backfill for memory written
153
+ // during v1.5.0, when autoLink was NOT wired into the production write path.
154
+ //
155
+ // UNLIKE the M1 backfill (free, always-on), M2 backfill makes one LLM call
156
+ // per row -- backfilling over a large memory can cost real money. So M2
157
+ // backfill is OPT-IN and budget-gated:
158
+ //
159
+ // - IJFW_AUTOLINK_OFF=1 -> backfill is a no-op (kill switch)
160
+ // - IJFW_AUTOLINK_BACKFILL!=1 -> backfill is a no-op by default
161
+ // (M1-always, M2-opt-in is the safe
162
+ // default per R5-1.2)
163
+ // - IJFW_AUTOLINK_BUDGET_USD unset -> backfill is a no-op. A budget MUST be
164
+ // OR <= 0 explicitly configured. The per-call
165
+ // llm-call.js path treats an unset
166
+ // budget as "uncapped"; for a bulk
167
+ // backfill that is unsafe -- a large
168
+ // memory could spend without bound. So
169
+ // the backfill REQUIRES a positive cap.
170
+ // - no API key -> backfill is a no-op (autoLink skips)
171
+ //
172
+ // The per-row autoLink call independently re-checks the SAME env gates (off /
173
+ // budget / key), so even mid-run the backfill respects a budget that drops to
174
+ // zero or a kill switch that flips. Returns aggregate counts.
175
+ export async function backfillAutoLink(db, opts = {}) {
176
+ if (!db || typeof db.prepare !== 'function') {
177
+ throw new Error('backfillAutoLink: db handle is invalid.');
178
+ }
179
+ const force = opts.force === true;
180
+ // Opt-in gate: M2 backfill only runs when explicitly enabled. M1 backfill
181
+ // (obsidian-parser.js) is the always-on default; M2 costs money so it is
182
+ // off unless the operator opts in via IJFW_AUTOLINK_BACKFILL=1.
183
+ if (!force && process.env.IJFW_AUTOLINK_BACKFILL !== '1') {
184
+ return { skipped: true, reason: 'backfill_not_enabled', rows: 0 };
185
+ }
186
+ if (process.env.IJFW_AUTOLINK_OFF === '1') {
187
+ return { skipped: true, reason: 'autolink_off', rows: 0 };
188
+ }
189
+ // Budget cap is MANDATORY for the backfill. An unset budget means
190
+ // llm-call.js runs uncapped -- fine for one-off write-time autoLink, but a
191
+ // bulk backfill over thousands of rows would spend without bound. Refuse
192
+ // unless the operator has set a positive IJFW_AUTOLINK_BUDGET_USD.
193
+ const budget = process.env.IJFW_AUTOLINK_BUDGET_USD;
194
+ if (budget === undefined || !(Number(budget) > 0)) {
195
+ return {
196
+ skipped: true,
197
+ reason: budget === undefined ? 'budget_not_set' : 'budget_exhausted',
198
+ rows: 0,
199
+ };
200
+ }
201
+ const hasKey = !!(process.env.IJFW_AUTOLINK_API_KEY || process.env.ANTHROPIC_API_KEY);
202
+ if (!hasKey) {
203
+ return { skipped: true, reason: 'no_key', rows: 0 };
204
+ }
205
+
206
+ const batchSize = Math.max(1, opts.batchSize || 200);
207
+ const result = {
208
+ skipped: false, rows: 0, linked: 0, links_added: 0,
209
+ neighbor_tags_added: 0, stopped_early: false,
210
+ };
211
+ let lastId = 0;
212
+ // eslint-disable-next-line no-constant-condition
213
+ while (true) {
214
+ let batch;
215
+ try {
216
+ batch = db
217
+ .prepare(
218
+ 'SELECT id, body FROM memory_entries WHERE id > ? ORDER BY id ASC LIMIT ?',
219
+ )
220
+ .all(lastId, batchSize);
221
+ } catch {
222
+ break;
223
+ }
224
+ if (!batch || batch.length === 0) break;
225
+ for (const row of batch) {
226
+ lastId = row.id;
227
+ if (typeof row.body !== 'string' || row.body.length === 0) continue;
228
+ // Per-row re-check: a budget that drops to zero or a kill switch that
229
+ // flips mid-run stops the backfill before the next paid call. autoLink
230
+ // itself ALSO re-checks, but stopping here avoids the wasted SELECT.
231
+ if (process.env.IJFW_AUTOLINK_OFF === '1') {
232
+ result.stopped_early = true;
233
+ return result;
234
+ }
235
+ const b = process.env.IJFW_AUTOLINK_BUDGET_USD;
236
+ if (b === undefined || !(Number(b) > 0)) {
237
+ result.stopped_early = true;
238
+ return result;
239
+ }
240
+ result.rows += 1;
241
+ let res;
242
+ try {
243
+ res = await autoLink(db, { id: row.id, body: row.body });
244
+ } catch {
245
+ continue;
246
+ }
247
+ if (res && res.skipped) {
248
+ // autoLink skipped (budget exhausted / off / no key / parse fail).
249
+ // budget_exhausted + autolink_off mean stop the whole run.
250
+ if (res.reason === 'budget_exhausted' || res.reason === 'autolink_off') {
251
+ result.stopped_early = true;
252
+ return result;
253
+ }
254
+ continue;
255
+ }
256
+ result.linked += 1;
257
+ if (res && res.applied) {
258
+ result.links_added += res.applied.links_added || 0;
259
+ result.neighbor_tags_added += res.applied.neighbor_tags_added || 0;
260
+ }
261
+ }
262
+ if (batch.length < batchSize) break;
263
+ }
264
+ return result;
265
+ }
266
+
267
+ export default { autoLink, backfillAutoLink };
@@ -29,7 +29,12 @@ export class SchemaVersionError extends Error {
29
29
  // Discover and load every migration module under ./migrations/, sorted by
30
30
  // numeric prefix ascending. Each module must export VERSION (integer),
31
31
  // DESCRIPTION (string), and up(db) (function).
32
- async function loadMigrations() {
32
+ //
33
+ // Exported (v1.5.1 W3.B) so search.js (and any other consumer that needs
34
+ // the sync migration pipeline) can reuse the SAME discovery path instead
35
+ // of maintaining a parallel hardcoded list. Single source of truth: drop
36
+ // a new NNN-name.js into ./migrations/ and every consumer sees it.
37
+ export async function loadMigrations() {
33
38
  let files;
34
39
  try {
35
40
  files = readdirSync(MIGRATIONS_DIR);
@@ -0,0 +1,50 @@
1
+ // IJFW v1.5.1 -- memory migration 009: M1 obsidian-index backfill.
2
+ //
3
+ // Source authority: Trident r5 finding 1.2 (HIGH).
4
+ //
5
+ // Round-4 Fix-1 (commit 3218812) wired M1 (Obsidian wikilink/tag/meta
6
+ // indexing -- indexObsidianRelations) and M2 (A-Mem auto-linking -- autoLink)
7
+ // into the production memory-write path (handleStore, search.js#autoIndex).
8
+ // But the fix is forward-only: every memory_entries row written during
9
+ // v1.5.0 -- when M1/M2 were bypassed -- has empty memory_links / memory_tags
10
+ // / memory_meta. An existing user upgrading to v1.5.1 got auto-linking +
11
+ // wikilink indexing only on NEW entries; their accumulated memory stayed
12
+ // un-indexed.
13
+ //
14
+ // This migration runs a ONE-TIME M1 backfill: it walks every existing
15
+ // memory_entries row and runs indexObsidianRelations over its body. M1 is:
16
+ // - free -- pure markdown parse, zero LLM / network / cost
17
+ // - idempotent -- indexObsidianRelations does DELETE-then-INSERT per id,
18
+ // so re-applying produces identical aux rows
19
+ // which is exactly what makes it safe to run inside a schema migration: it
20
+ // runs once (user_version gates re-application), deterministically, and a
21
+ // crash rolls the whole txn back to user_version 8.
22
+ //
23
+ // M2 (autoLink) is NOT backfilled here -- it makes one LLM call per row and
24
+ // can cost real money over a large memory. M2 backfill is opt-in via the
25
+ // `ijfw memory reindex --m2` CLI verb, which is budget-gated (respects
26
+ // IJFW_AUTOLINK_BUDGET_USD / IJFW_AUTOLINK_OFF / IJFW_AUTOLINK_BACKFILL).
27
+ //
28
+ // Ordering: migration 001 creates memory_entries; migration 006 creates
29
+ // memory_links / memory_tags / memory_meta. Both run before 009, so by the
30
+ // time up() executes the source table and the three aux tables all exist.
31
+ //
32
+ // Crash safety: the migration runner wraps up() in BEGIN IMMEDIATE.
33
+ // backfillObsidianIndex's per-row indexObsidianRelations opens a nested
34
+ // SQLite transaction (savepoint) -- valid inside the outer txn -- and the
35
+ // whole thing rolls back to user_version 8 on any failure.
36
+
37
+ import { backfillObsidianIndex } from '../obsidian-parser.js';
38
+
39
+ export const VERSION = 9;
40
+ export const DESCRIPTION =
41
+ 'memory v1.5.1 -- one-time M1 obsidian-index backfill for pre-fix rows (Trident r5 1.2)';
42
+
43
+ export function up(db) {
44
+ // backfillObsidianIndex tolerates a missing memory_entries table (returns
45
+ // zero counts) so a brand-new db that jumps straight to v9 is a clean
46
+ // no-op -- there are no pre-fix rows to backfill on a fresh install.
47
+ backfillObsidianIndex(db);
48
+ }
49
+
50
+ export default { version: VERSION, description: DESCRIPTION, up };
@@ -88,4 +88,65 @@ export function indexObsidianRelations(db, memoryId, text) {
88
88
  return parsed;
89
89
  }
90
90
 
91
- export default { parseObsidian, indexObsidianRelations };
91
+ // v1.5.1 R5-1.2 -- one-time M1 backfill for memory written during v1.5.0,
92
+ // when indexObsidianRelations was NOT wired into the production write path.
93
+ // Round-4 Fix-1 (commit 3218812) wired M1+M2 into handleStore/autoIndex but
94
+ // forward-only: rows already in memory_entries have empty memory_links /
95
+ // memory_tags / memory_meta. This walks EVERY row and re-runs M1 over it.
96
+ //
97
+ // Safe to run over everything:
98
+ // - free -- pure markdown parse, zero LLM / network
99
+ // - idempotent-- indexObsidianRelations clears prior aux rows per id before
100
+ // re-inserting, so a re-run produces identical state
101
+ //
102
+ // The walk reads ids in batches so a very large memory_entries doesn't pin
103
+ // the whole table in memory; each row's indexObsidianRelations call carries
104
+ // its own transaction (DELETE-then-INSERT) so a single bad row never aborts
105
+ // the rest of the backfill.
106
+ //
107
+ // Returns { rows, links, tags, meta } -- counts re-indexed across the run.
108
+ export function backfillObsidianIndex(db, opts = {}) {
109
+ if (!db || typeof db.prepare !== 'function') {
110
+ throw new Error('backfillObsidianIndex: db handle is invalid.');
111
+ }
112
+ const batchSize = Math.max(1, opts.batchSize || 500);
113
+ const result = { rows: 0, links: 0, tags: 0, meta: 0, errors: 0 };
114
+ let lastId = 0;
115
+ // eslint-disable-next-line no-constant-condition
116
+ while (true) {
117
+ let batch;
118
+ try {
119
+ batch = db
120
+ .prepare(
121
+ 'SELECT id, body FROM memory_entries WHERE id > ? ORDER BY id ASC LIMIT ?',
122
+ )
123
+ .all(lastId, batchSize);
124
+ } catch {
125
+ // memory_entries missing (fresh db before migration 001) -- nothing to do.
126
+ break;
127
+ }
128
+ if (!batch || batch.length === 0) break;
129
+ for (const row of batch) {
130
+ lastId = row.id;
131
+ if (typeof row.body !== 'string' || row.body.length === 0) continue;
132
+ try {
133
+ const parsed = indexObsidianRelations(db, String(row.id), row.body);
134
+ result.rows += 1;
135
+ result.links += parsed.links.length;
136
+ result.tags += parsed.tags.length;
137
+ result.meta += parsed.meta.length;
138
+ } catch (e) {
139
+ result.errors += 1;
140
+ try {
141
+ console.error(
142
+ '[obsidian] backfill failed for id', row.id, ':', e?.message || e,
143
+ );
144
+ } catch { /* never throw out of the backfill */ }
145
+ }
146
+ }
147
+ if (batch.length < batchSize) break;
148
+ }
149
+ return result;
150
+ }
151
+
152
+ export default { parseObsidian, indexObsidianRelations, backfillObsidianIndex };
@@ -31,6 +31,13 @@ import { readFileSync, existsSync, mkdirSync } from 'node:fs';
31
31
  import { dirname, join, resolve, normalize, isAbsolute } from 'node:path';
32
32
 
33
33
  import { expandQuery } from '../compute/synonyms.js';
34
+ import { loadMigrations } from './migration-runner.js';
35
+ // v1.5.1 R4-H2 — auto-index rows must flow through indexEntry so the
36
+ // v1.5.0 memory-moat (M1 Obsidian indexing + M2 A-Mem auto-linking) fires
37
+ // for warm-tier rebuilds, not just the benchmark harness. obsidian-parser
38
+ // is imported directly so M1 runs synchronously inside the same txn batch.
39
+ import { indexObsidianRelations } from './obsidian-parser.js';
40
+ import { autoLink } from './auto-linker.js';
34
41
 
35
42
  const MAX_RESULTS = 50;
36
43
  const SNIPPET_HALF = 60;
@@ -50,30 +57,16 @@ try {
50
57
  }
51
58
 
52
59
  // Resolve migration modules synchronously at module load via top-level
53
- // await. Replayed inside searchMemory's sync path. Keep in lockstep with
54
- // ./migrations/.
55
- const MEMORY_MIGRATIONS = await loadMemoryMigrationsSync();
56
-
57
- async function loadMemoryMigrationsSync() {
58
- const v1 = await import('./migrations/001-fts5-init.js');
59
- const v2 = await import('./migrations/002-tier-semantic.js');
60
- const v3 = await import('./migrations/003-stale-candidate.js');
61
- const v4 = await import('./migrations/004-bitemporal.js');
62
- const v5 = await import('./migrations/005-vector-cache.js');
63
- const v6 = await import('./migrations/006-obsidian-graph.js');
64
- const v7 = await import('./migrations/007-skill-telemetry.js');
65
- const v8 = await import('./migrations/008-write-provenance.js');
66
- return [
67
- { version: v1.VERSION, description: v1.DESCRIPTION, up: v1.up },
68
- { version: v2.VERSION, description: v2.DESCRIPTION, up: v2.up },
69
- { version: v3.VERSION, description: v3.DESCRIPTION, up: v3.up },
70
- { version: v4.VERSION, description: v4.DESCRIPTION, up: v4.up },
71
- { version: v5.VERSION, description: v5.DESCRIPTION, up: v5.up },
72
- { version: v6.VERSION, description: v6.DESCRIPTION, up: v6.up },
73
- { version: v7.VERSION, description: v7.DESCRIPTION, up: v7.up },
74
- { version: v8.VERSION, description: v8.DESCRIPTION, up: v8.up },
75
- ].sort((a, b) => a.version - b.version);
76
- }
60
+ // await. Replayed inside searchMemory's sync path.
61
+ //
62
+ // v1.5.1 W3.B: discovery is delegated to memory/migration-runner.js
63
+ // (readdirSync over ./migrations/) so a single source of truth governs
64
+ // which migrations search.js knows about. Prior to this, search.js
65
+ // carried its OWN hardcoded list -- the v1.5.0 INT.7 hotfix patched
66
+ // the symptom (006/007/008 missing); this kills the dual-registry bug
67
+ // class outright. Drop migration 009 into ./migrations/, and search.js
68
+ // will pick it up automatically.
69
+ const MEMORY_MIGRATIONS = await loadMigrations();
77
70
 
78
71
  function highestMigrationVersion() {
79
72
  if (!MEMORY_MIGRATIONS.length) return 0;
@@ -220,12 +213,20 @@ function runMemoryMigrationsSync(db, currentVersion, targetVersion) {
220
213
 
221
214
  function autoIndex(db, files) {
222
215
  let n = 0;
216
+ // v1.5.1 R4-H2 — capture the rowid of every inserted entry so the
217
+ // memory-moat aux indexing (M1 Obsidian relations, M2 auto-link) can run
218
+ // over the warm-tier rebuild, not just the benchmark harness. The bulk
219
+ // INSERT stays in one transaction for FTS write performance; M1/M2 run
220
+ // AFTER commit so a parse/link failure can never abort the rebuild.
221
+ const inserted = [];
223
222
  const txfn = db.transaction((batch) => {
224
223
  const stmt = db.prepare(
225
224
  'INSERT INTO memory_entries (body, source, session_id, created_at) VALUES (?, ?, ?, ?)'
226
225
  );
227
226
  for (const item of batch) {
228
- stmt.run(item.body, item.source, null, item.created_at);
227
+ const info = stmt.run(item.body, item.source, null, item.created_at);
228
+ const id = info && info.lastInsertRowid != null ? Number(info.lastInsertRowid) : null;
229
+ inserted.push({ id, body: item.body });
229
230
  n++;
230
231
  }
231
232
  });
@@ -242,6 +243,26 @@ function autoIndex(db, files) {
242
243
  }
243
244
  if (batch.length === 0) return 0;
244
245
  try { txfn.immediate(batch); } catch { /* one bad batch should not abort the search */ }
246
+
247
+ // v1.5.1 R4-H2 — M1: Obsidian wikilink/tag/meta indexing into
248
+ // memory_links/_tags/_meta. Synchronous + idempotent (indexObsidianRelations
249
+ // clears prior rows for the id before re-inserting). Best-effort: a missing
250
+ // migration-006 schema or a parse failure must never break the search path.
251
+ // M2: A-Mem auto-linking — fire-and-forget, env-gated (IJFW_AUTOLINK_OFF),
252
+ // budget-capped (IJFW_AUTOLINK_BUDGET_USD); returns skipped cleanly when no
253
+ // API key, so a bulk rebuild without credentials does no LLM work.
254
+ for (const row of inserted) {
255
+ if (row.id == null) continue;
256
+ try {
257
+ indexObsidianRelations(db, String(row.id), row.body);
258
+ } catch { /* M1 best-effort -- never abort the search */ }
259
+ try {
260
+ const p = autoLink(db, { id: row.id, body: row.body });
261
+ if (p && typeof p.catch === 'function') p.catch(() => {});
262
+ // expose for tests that want deterministic completion
263
+ autoIndex.__lastAutoLinkPromise = p;
264
+ } catch { /* M2 dispatch best-effort */ }
265
+ }
245
266
  return n;
246
267
  }
247
268