@ijfw/memory-server 1.5.1 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/package.json +6 -5
  2. package/src/brain/budget-guard.js +86 -0
  3. package/src/brain/citation-resolver.js +41 -0
  4. package/src/brain/context-injection.js +69 -0
  5. package/src/brain/discovery.js +83 -0
  6. package/src/brain/dream-pipeline.js +324 -0
  7. package/src/brain/dump-ingest.js +88 -0
  8. package/src/brain/entity-collapse.js +28 -0
  9. package/src/brain/export.js +112 -0
  10. package/src/brain/extractors/index.js +24 -0
  11. package/src/brain/extractors/markdown.js +27 -0
  12. package/src/brain/extractors/pdf.js +31 -0
  13. package/src/brain/extractors/transcript.js +38 -0
  14. package/src/brain/first-run-scan.js +61 -0
  15. package/src/brain/index.js +1 -0
  16. package/src/brain/layout-sentinel.js +29 -0
  17. package/src/brain/migrate-facts-internal-once.js +87 -0
  18. package/src/brain/path-guard.js +103 -0
  19. package/src/brain/paths.js +26 -0
  20. package/src/brain/promotion-suggester.js +41 -0
  21. package/src/brain/stub-detector.js +33 -0
  22. package/src/brain/tiered-llm.js +83 -0
  23. package/src/brain/wiki-compiler.js +144 -0
  24. package/src/brain/wiki-sentinels.js +45 -0
  25. package/src/brain/wiki-templates.js +94 -0
  26. package/src/cross-orchestrator-cli.js +132 -5
  27. package/src/cross-orchestrator.js +2 -2
  28. package/src/dashboard-server.js +1 -1
  29. package/src/dream/runner.mjs +21 -0
  30. package/src/extension-registry.js +2 -2
  31. package/src/handlers/brain-handler.js +319 -0
  32. package/src/memory/auto-linker.js +5 -1
  33. package/src/memory/benchmark.js +4 -3
  34. package/src/memory/layout-migrations/001-visible-layer.js +131 -0
  35. package/src/memory/layout-migrations/index.js +50 -0
  36. package/src/memory/migration-runner.js +31 -2
  37. package/src/memory/obsidian-parser.js +3 -1
  38. package/src/memory/reader.js +2 -1
  39. package/src/memory/search.js +144 -16
  40. package/src/memory/temporal.js +40 -1
  41. package/src/orchestrator/agents-md-blackboard.js +114 -1
  42. package/src/orchestrator/discipline-selector.js +276 -0
  43. package/src/orchestrator/merge-block-aware.js +15 -5
  44. package/src/orchestrator/state-sdk.js +42 -4
  45. package/src/orchestrator/wave-state.js +38 -0
  46. package/src/recovery/code-fixer.js +1 -1
  47. package/src/server.js +290 -75
  48. package/src/update-apply.js +1 -1
@@ -22,7 +22,8 @@ const PREVIEW_CHARS = 300;
22
22
  /** Parse YAML-style frontmatter (key: value lines between --- fences). */
23
23
  function parseFrontmatter(raw) {
24
24
  const fm = { title: null, description: null, type: null };
25
- const m = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/);
25
+ const stripped = String(raw).replace(/^/, '').replace(/^\s+/, '');
26
+ const m = stripped.match(/^---\r?\n([\s\S]*?)\r?\n---/);
26
27
  if (!m) return fm;
27
28
  for (const line of m[1].split('\n')) {
28
29
  const kv = line.match(/^(\w+):\s*(.+)/);
@@ -346,6 +346,108 @@ function rowCount(db) {
346
346
  }
347
347
  }
348
348
 
349
+ // --- Structured provenance helpers -----------------------------------------
350
+
351
+ /**
352
+ * Convert a raw FTS row + fileBySource map to a structured provenance object.
353
+ * Used when opts.format === 'structured'.
354
+ *
355
+ * Fields that aren't computed in the existing pipeline are returned as
356
+ * null / 0 rather than introducing new compute work (Task 28 spec).
357
+ *
358
+ * @param {object} row - raw DB row from searchFts5
359
+ * @param {Map} fileBySource
360
+ * @param {string} rawQuery - original user query (for whyMatched extraction)
361
+ * @param {object} db - open DB handle (for backlink count query)
362
+ * @returns {object}
363
+ */
364
+ function ftsRowToStructured(row, fileBySource, rawQuery, db) {
365
+ const src = row.source || '';
366
+ const meta = fileBySource.get(src) || null;
367
+ const source = (meta && meta.path) || src;
368
+ const text = String(row.body || '');
369
+ const snip = text.slice(0, 200).replace(/\s+/g, ' ').trim();
370
+
371
+ // confidence: bm25 rank is negative (more negative = better). Convert to 0..1.
372
+ // rank returned from searchFts5 can be 0 or negative; we use the same
373
+ // score formula as ftsRowToResult (100 - rank) but normalise to 0..1 by
374
+ // clamping to [0, 100] and dividing.
375
+ const rawRank = Number(row.rank || 0);
376
+ const scoreRaw = 100 - rawRank; // same as ftsRowToResult
377
+ const confidence = Math.min(1, Math.max(0, scoreRaw / 100));
378
+
379
+ // ageDays: created_at is unix ms
380
+ const createdAt = Number(row.created_at || 0);
381
+ const ageDays = createdAt > 0
382
+ ? Math.max(0, (Date.now() - createdAt) / 86400000)
383
+ : 0;
384
+
385
+ // decayFactor: not yet computed in pipeline — return null per spec
386
+ const decayFactor = null;
387
+
388
+ // whyMatched: tokenise the raw query into distinct non-trivial terms
389
+ const whyMatched = rawQuery
390
+ .trim()
391
+ .split(/\s+/)
392
+ .map(t => t.replace(/['"*()]/g, '').toLowerCase())
393
+ .filter(t => t.length > 0);
394
+
395
+ // backlinkCount: count rows in memory_links where to_target matches source
396
+ let backlinkCount = 0;
397
+ if (db && row.id != null) {
398
+ try {
399
+ const idStr = String(row.id);
400
+ const r = db.prepare(
401
+ 'SELECT COUNT(*) AS n FROM memory_links WHERE to_target = ?'
402
+ ).get(idStr);
403
+ backlinkCount = r ? Number(r.n) : 0;
404
+ } catch { /* memory_links may not exist in older dbs */ }
405
+ }
406
+
407
+ return {
408
+ source,
409
+ anchor: null,
410
+ snippet: snip,
411
+ confidence,
412
+ ageDays,
413
+ decayFactor,
414
+ whyMatched,
415
+ backlinkCount,
416
+ };
417
+ }
418
+
419
+ /**
420
+ * Convert a hot-linear result to a structured provenance object.
421
+ * Linear results lack a DB row id so backlinkCount is always 0.
422
+ *
423
+ * @param {object} result - from searchLinear
424
+ * @param {string} rawQuery
425
+ * @returns {object}
426
+ */
427
+ function linearResultToStructured(result, rawQuery) {
428
+ const scoreRaw = Number(result.score || 0);
429
+ // Hot-linear score is titleMatches*3 + bodyMatches; normalise loosely to 0..1
430
+ // by capping at 50 matches (arbitrary but safe)
431
+ const confidence = Math.min(1, scoreRaw / 50);
432
+
433
+ const whyMatched = rawQuery
434
+ .trim()
435
+ .split(/\s+/)
436
+ .map(t => t.replace(/['"*()]/g, '').toLowerCase())
437
+ .filter(t => t.length > 0);
438
+
439
+ return {
440
+ source: result.path || result.relpath || '',
441
+ anchor: null,
442
+ snippet: result.snippet || '',
443
+ confidence,
444
+ ageDays: 0,
445
+ decayFactor: null,
446
+ whyMatched,
447
+ backlinkCount: 0,
448
+ };
449
+ }
450
+
349
451
  // --- Public API -------------------------------------------------------------
350
452
 
351
453
  /**
@@ -365,30 +467,41 @@ function rowCount(db) {
365
467
  * the hot-linear fallback is unfiltered (D1 does not yet write tier
366
468
  * metadata into the markdown surface).
367
469
  *
470
+ * format option (Task 28 — structured provenance):
471
+ * opts.format === 'structured' returns an Array of provenance objects:
472
+ * [{source, anchor, snippet, confidence, ageDays, decayFactor,
473
+ * whyMatched, backlinkCount}]
474
+ * Default (no format) returns Array<{path,relpath,title,snippet,score,
475
+ * tier_semantic}> — byte-identical to pre-Task-28 behaviour.
476
+ *
368
477
  * @param {string} q
369
478
  * @param {Array<{path,relpath,title,preview}>} files
370
479
  * @param {number} limit
371
480
  * @param {object|undefined} options
372
- * @returns {Array<{path,relpath,title,snippet,score,tier_semantic}>}
481
+ * @returns {Array<{path,relpath,title,snippet,score,tier_semantic}>|Array<provenance>}
373
482
  */
374
483
  export function searchMemory(q, files, limit = MAX_RESULTS, options) {
375
484
  if (!q || !q.trim() || !files || files.length === 0) return [];
376
485
 
377
- // Normalise options. Allow undefined / { tier_semantic, include_stale } /
378
- // a bare string (treated as the tier_semantic value) for ergonomic call
379
- // sites. include_stale defaults to false -- D4 GA-B2 retrieval guard.
486
+ // Normalise options. Allow undefined / { tier_semantic, include_stale,
487
+ // format } / a bare string (treated as the tier_semantic value) for
488
+ // ergonomic call sites. include_stale defaults to false -- D4 GA-B2
489
+ // retrieval guard. format === 'structured' enables Task-28 provenance.
380
490
  let tier_semantic;
381
491
  let include_stale = false;
492
+ let format;
382
493
  if (typeof options === 'string') {
383
494
  tier_semantic = options;
384
495
  } else if (options && typeof options === 'object') {
385
496
  tier_semantic = options.tier_semantic;
386
497
  include_stale = options.include_stale === true;
498
+ format = options.format;
387
499
  }
388
500
 
389
501
  const { expanded, synonym_matches, applied } = expandQuery(q);
390
502
 
391
503
  let warmHits = null;
504
+ let warmRawRows = null; // preserved for structured format (Task 28)
392
505
  let warmEmpty = false;
393
506
  let db = null;
394
507
 
@@ -424,6 +537,11 @@ export function searchMemory(q, files, limit = MAX_RESULTS, options) {
424
537
  if (f.relpath) fileBySource.set(f.relpath, f);
425
538
  if (f.path) fileBySource.set(f.path, f);
426
539
  }
540
+ if (format === 'structured') {
541
+ // Task 28: map raw rows to provenance objects while db is still open
542
+ // (backlinkCount query needs the handle)
543
+ warmRawRows = rows.map(r => ftsRowToStructured(r, fileBySource, q, db));
544
+ }
427
545
  warmHits = rows.map(r => ftsRowToResult(r, fileBySource));
428
546
  } else {
429
547
  warmEmpty = true;
@@ -432,31 +550,41 @@ export function searchMemory(q, files, limit = MAX_RESULTS, options) {
432
550
  }
433
551
  } catch {
434
552
  warmHits = null;
553
+ warmRawRows = null;
435
554
  } finally {
436
555
  if (db) { try { db.close(); } catch { /* ignore */ } }
437
556
  }
438
557
 
439
558
  let results;
440
559
  if (warmHits && warmHits.length > 0) {
441
- results = warmHits.slice(0, limit);
560
+ results = format === 'structured'
561
+ ? warmRawRows.slice(0, limit)
562
+ : warmHits.slice(0, limit);
442
563
  } else if (tier_semantic) {
443
564
  // Tier filter active and warm tier has no matches -- the hot-linear
444
565
  // tier doesn't carry tier metadata so it can't honour the filter.
445
566
  // Returning [] here keeps the contract honest ("only matching tier").
446
567
  results = [];
447
568
  } else {
448
- results = searchLinear(q, files, limit);
569
+ const linearResults = searchLinear(q, files, limit);
570
+ results = format === 'structured'
571
+ ? linearResults.map(r => linearResultToStructured(r, q))
572
+ : linearResults;
449
573
  }
450
574
 
451
- Object.defineProperty(results, 'synonym_matches', {
452
- value: applied ? synonym_matches : {},
453
- enumerable: false,
454
- });
455
- Object.defineProperty(results, 'tier', {
456
- value: warmHits && warmHits.length > 0
457
- ? 'warm-fts5'
458
- : (warmEmpty ? 'hot-linear-empty-fts5' : 'hot-linear'),
459
- enumerable: false,
460
- });
575
+ // Structured results are plain arrays — no non-enumerable decorations needed.
576
+ // Legacy path: attach non-enumerable metadata as before (byte-identical).
577
+ if (format !== 'structured') {
578
+ Object.defineProperty(results, 'synonym_matches', {
579
+ value: applied ? synonym_matches : {},
580
+ enumerable: false,
581
+ });
582
+ Object.defineProperty(results, 'tier', {
583
+ value: warmHits && warmHits.length > 0
584
+ ? 'warm-fts5'
585
+ : (warmEmpty ? 'hot-linear-empty-fts5' : 'hot-linear'),
586
+ enumerable: false,
587
+ });
588
+ }
461
589
  return results;
462
590
  }
@@ -333,7 +333,13 @@ export function storeFactBitemporal(db, fact, now) {
333
333
  const factId = insertFact(db, f, t);
334
334
  return { invalidated, factId };
335
335
  });
336
- const r = txn(fact, ts);
336
+ // F2.7: .immediate() issues BEGIN IMMEDIATE — see brain-handler conflict.resolve.
337
+ // Sister writers (conflict.resolve) hold IMMEDIATE locks; if we ran DEFERRED here
338
+ // we would hit SQLITE_BUSY when upgrading SHARED→RESERVED on the first write
339
+ // inside the txn body and the user's memory write would silently drop (or
340
+ // throw SQLITE_BUSY at the caller). Lock-mode alignment with sister writers
341
+ // is what makes the cross-connection contract honest.
342
+ const r = txn.immediate(fact, ts);
337
343
  return { invalidated: r.invalidated, factId: r.factId, deduped: false };
338
344
  }
339
345
 
@@ -513,6 +519,38 @@ export function applyDecayToFacts(rows, now, options = {}) {
513
519
  });
514
520
  }
515
521
 
522
+ /**
523
+ * getHistoryWindow -- bounded slice of facts about (subject[, predicate]) for
524
+ * the wiki compiler's "history" section. Returns at most `limit` rows ordered
525
+ * by valid_from DESC. When rollupOlder is true and rows.length === limit,
526
+ * also returns an `older` rollup of facts beyond the window so the wiki page
527
+ * can show "Older: 55 events between 2024-03-01 and 2025-06-30" without
528
+ * bloating the page.
529
+ *
530
+ * Trident F-B2 protection: prevents page-bloat for hot subjects with hundreds
531
+ * of facts.
532
+ */
533
+ export function getHistoryWindow(db, subject, predicate, { limit = 50, since = null, rollupOlder = true } = {}) {
534
+ const params = [subject];
535
+ let where = 'subject = ?';
536
+ if (predicate != null) { where += ' AND predicate = ?'; params.push(predicate); }
537
+ if (since) { where += ' AND valid_from >= ?'; params.push(since); }
538
+ const rows = db.prepare(
539
+ `SELECT id, predicate, object, valid_from, valid_to, memory_id, source, confidence
540
+ FROM facts WHERE ${where} ORDER BY valid_from DESC LIMIT ?`
541
+ ).all(...params, limit);
542
+ let older = null;
543
+ if (rollupOlder && rows.length === limit) {
544
+ const earliest = rows[rows.length - 1].valid_from;
545
+ const r = db.prepare(
546
+ `SELECT COUNT(*) AS count, MIN(valid_from) AS fromIso, MAX(valid_from) AS toIso
547
+ FROM facts WHERE ${where} AND valid_from < ?`
548
+ ).get(...params, earliest);
549
+ if (r.count > 0) older = r;
550
+ }
551
+ return { rows, older };
552
+ }
553
+
516
554
  export default {
517
555
  openTemporalDb,
518
556
  openTemporalDbSync,
@@ -524,6 +562,7 @@ export default {
524
562
  getHistory,
525
563
  getAllFactsWithWindows,
526
564
  applyDecayToFacts,
565
+ getHistoryWindow,
527
566
  DECAY_HALFLIFE_DAYS,
528
567
  DECAY_HALFLIFE_SESSION_DAYS,
529
568
  };
@@ -46,12 +46,17 @@
46
46
  * AGENTS.md hiccup — see `wave-state.js#checkpointWave`).
47
47
  */
48
48
 
49
- import { join } from 'node:path';
49
+ import { join, resolve as pathResolve, dirname as pathDirname } from 'node:path';
50
50
 
51
51
  import { withFsLock, lockPathFor } from '../fs-lock.js';
52
52
  import { readWaveState } from './wave-state.js';
53
53
  import { mergeFile, MergeBlockAwareError } from './merge-block-aware.js';
54
54
  import { query } from './state-sdk.js';
55
+ import {
56
+ selectDisciplineTemplate,
57
+ detectProjectTypeFromRepo,
58
+ } from './discipline-selector.js';
59
+ import { validateSafeRepoPath } from '../brain/path-guard.js';
55
60
 
56
61
  /**
57
62
  * Render the BLACKBOARD marker-block payload from a wave's STATE.md
@@ -93,8 +98,21 @@ export async function populateBlackboardBlock(waveId, projectRoot) {
93
98
  const state = await readWaveState(waveId, projectRoot);
94
99
  if (!state) return { ok: false, reason: 'no-state' };
95
100
 
101
+ // Defense-in-depth: refuse to operate at a filesystem root (e.g. '/' on
102
+ // POSIX, 'C:\\' on Windows). validateSafeRepoPath alone accepts these
103
+ // because '/AGENTS.md' is technically "inside" '/'; OS permissions would
104
+ // then catch the actual write, but the failure mode would be 'merge-error'
105
+ // not 'unsafe-path'. We want a clean structured rejection upstream of any
106
+ // I/O attempt. Test at integration/test-discipline-integration.js:189.
107
+ const resolvedRoot = pathResolve(projectRoot || '.');
108
+ if (resolvedRoot === pathDirname(resolvedRoot)) {
109
+ return { ok: false, reason: 'unsafe-path', error: 'projectRoot is a filesystem root' };
110
+ }
111
+
96
112
  const payload = renderBlackboardPayload(waveId, state);
97
113
  const agentsMdPath = join(projectRoot, 'AGENTS.md');
114
+ const guard = validateSafeRepoPath(projectRoot, agentsMdPath);
115
+ if (!guard.ok) return { ok: false, reason: 'unsafe-path', error: guard.error };
98
116
  const lockPath = lockPathFor(agentsMdPath);
99
117
 
100
118
  let mergeResult;
@@ -150,3 +168,98 @@ export async function populateBlackboardBlock(waveId, projectRoot) {
150
168
 
151
169
  return { ok: true };
152
170
  }
171
+
172
+ /**
173
+ * Populate the DISCIPLINE marker block in `<projectRoot>/AGENTS.md` with the
174
+ * project-appropriate discipline template body. Held under the §3 #8 AGENTS.md
175
+ * lock; in-process (no spawn). Best-effort SDK event emit after lock release.
176
+ *
177
+ * If `projectType` is not supplied, `detectProjectTypeFromRepo(projectRoot)`
178
+ * is called to infer it. For `unknown` / `mixed` types the DISCIPLINE block
179
+ * is written with an empty body (marker present, body empty) -- this is the
180
+ * correct state, not an error.
181
+ *
182
+ * Return shapes:
183
+ * `{ ok: true }` -- wrote AGENTS.md
184
+ * `{ ok: false, reason: 'merge-error', error }` -- merger threw
185
+ * `{ ok: false, reason: 'template-missing', error }` -- template file absent
186
+ *
187
+ * @param {string} projectRoot
188
+ * @param {string} [projectType] optional; inferred when absent
189
+ * @param {{waveId?: string}} [opts] optional; waveId defaults to 'system'
190
+ * @returns {Promise<{ok: boolean, reason?: string, error?: string}>}
191
+ */
192
+ export async function populateDisciplineBlock(projectRoot, projectType, opts = {}) {
193
+ const waveId = opts.waveId || 'system';
194
+ const type = projectType !== undefined && projectType !== null
195
+ ? String(projectType)
196
+ : detectProjectTypeFromRepo(projectRoot);
197
+
198
+ let content;
199
+ try {
200
+ content = selectDisciplineTemplate(type);
201
+ } catch (err) {
202
+ return {
203
+ ok: false,
204
+ reason: 'template-missing',
205
+ error: String(err.message || err),
206
+ };
207
+ }
208
+
209
+ // Defense-in-depth: same filesystem-root rejection as populateBlackboardBlock.
210
+ const resolvedRoot = pathResolve(projectRoot || '.');
211
+ if (resolvedRoot === pathDirname(resolvedRoot)) {
212
+ return { ok: false, reason: 'unsafe-path', error: 'projectRoot is a filesystem root' };
213
+ }
214
+
215
+ const agentsMdPath = join(projectRoot, 'AGENTS.md');
216
+ const guard = validateSafeRepoPath(projectRoot, agentsMdPath);
217
+ if (!guard.ok) return { ok: false, reason: 'unsafe-path', error: guard.error };
218
+ const lockPath = lockPathFor(agentsMdPath);
219
+
220
+ let mergeResult;
221
+ let mergeError = null;
222
+ try {
223
+ mergeResult = await withFsLock(lockPath, async () => mergeFile(
224
+ agentsMdPath,
225
+ [{ block: 'DISCIPLINE', content }],
226
+ ));
227
+ } catch (err) {
228
+ mergeError = err;
229
+ }
230
+
231
+ if (mergeError) {
232
+ const code = mergeError instanceof MergeBlockAwareError ? mergeError.code : null;
233
+ return {
234
+ ok: false,
235
+ reason: code === 'ERR_TEMPLATE_MISSING' ? 'template-missing' : 'merge-error',
236
+ error: String(mergeError.message || mergeError),
237
+ };
238
+ }
239
+
240
+ // SDK-routed observability event -- fire-and-forget AFTER lock release.
241
+ // Mirrors the agents-md.blackboard.set emit pattern above.
242
+ try {
243
+ await query('event.emit', {
244
+ subagentId: 'parent',
245
+ waveId,
246
+ eventType: 'agents-md.discipline.set',
247
+ data: {
248
+ path: mergeResult?.path ?? agentsMdPath,
249
+ bytes: mergeResult?.bytes ?? 0,
250
+ seeded: !!mergeResult?.seeded,
251
+ project_type: type,
252
+ },
253
+ dedupKey: `agents-md.discipline.set:${waveId}:${type}`,
254
+ }, { projectRoot, subagentId: 'parent' });
255
+ } catch {
256
+ // Observability is best-effort; never demote a successful AGENTS.md
257
+ // rewrite because the event tap had a hiccup.
258
+ }
259
+
260
+ // Forward noop flag from the short-circuit so callers can detect idempotent
261
+ // calls (5B-L2-05). When mergeResult.noop is true the file was unchanged.
262
+ const result = { ok: true };
263
+ if (mergeResult?.noop) result.noop = true;
264
+ return result;
265
+ }