@equationalapplications/core-llm-wiki 4.7.0 → 4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6,6 +6,14 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
6
6
 
7
7
  var MiniSearch__default = /*#__PURE__*/_interopDefault(MiniSearch);
8
8
 
9
+ var __typeError = (msg) => {
10
+ throw TypeError(msg);
11
+ };
12
+ var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
13
+ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
14
+ var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
15
+ var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), member.set(obj, value), value);
16
+
9
17
  // src/db/schema.ts
10
18
  async function setupDatabase(db, prefix) {
11
19
  await db.execAsync(`
@@ -81,6 +89,9 @@ async function setupDatabase(db, prefix) {
81
89
 
82
90
  CREATE INDEX IF NOT EXISTS ${prefix}outbox_entity_id_created_at
83
91
  ON ${prefix}outbox (entity_id, created_at);
92
+
93
+ CREATE INDEX IF NOT EXISTS ${prefix}outbox_created_at
94
+ ON ${prefix}outbox (created_at);
84
95
  `);
85
96
  }
86
97
 
@@ -155,28 +166,6 @@ for (let i = 1; i < MIGRATIONS.length; i++) {
155
166
  }
156
167
  var CURRENT_SCHEMA_VERSION = MIGRATIONS.length > 0 ? MIGRATIONS[MIGRATIONS.length - 1].version : 0;
157
168
 
158
- // src/types.ts
159
- var WikiBusyError = class extends Error {
160
- constructor(operation, entityId) {
161
- super(`${operation} already running for entity ${entityId}`);
162
- this.name = "WikiBusyError";
163
- this.operation = operation;
164
- this.entityId = entityId;
165
- }
166
- };
167
- var PrunePartialFailureError = class extends Error {
168
- constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
169
- super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
170
- this.name = "PrunePartialFailureError";
171
- this.deleted = deleted;
172
- this.failedAt = failedAt;
173
- this.remaining = remaining;
174
- this.deletedTasks = deletedTasks;
175
- this.deletedEvents = deletedEvents;
176
- this.cause = cause;
177
- }
178
- };
179
-
180
169
  // src/repositories/BaseRepository.ts
181
170
  var BaseRepository = class {
182
171
  constructor(db, prefix) {
@@ -220,6 +209,28 @@ function mapRowToFact(row) {
220
209
  access_count: Number(row.access_count ?? 0)
221
210
  };
222
211
  }
212
+ function normalizeEmbeddingBlobValue(blob) {
213
+ if (blob instanceof Uint8Array) return blob;
214
+ if (blob !== null && blob !== void 0 && typeof blob === "object") {
215
+ const obj = blob;
216
+ if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
217
+ return new Uint8Array(obj["data"]);
218
+ }
219
+ const entries = Object.keys(obj);
220
+ if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
221
+ const len = entries.length;
222
+ const arr = new Uint8Array(len);
223
+ for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
224
+ return arr;
225
+ }
226
+ }
227
+ return null;
228
+ }
229
+ function mapRowToFactWithBlobs(row) {
230
+ const base = mapRowToFact(row);
231
+ const embeddingBlob = normalizeEmbeddingBlobValue(row.embedding_blob);
232
+ return embeddingBlob ? { ...base, embedding_blob: embeddingBlob } : base;
233
+ }
223
234
  var EntryRepository = class extends BaseRepository {
224
235
  constructor(db, prefix, outbox) {
225
236
  super(db, prefix);
@@ -315,21 +326,7 @@ var EntryRepository = class extends BaseRepository {
315
326
  * Normalize an embedding blob value to Uint8Array or null.
316
327
  */
317
328
  normalizeEmbeddingBlob(blob) {
318
- if (blob instanceof Uint8Array) return blob;
319
- if (blob !== null && blob !== void 0 && typeof blob === "object") {
320
- const obj = blob;
321
- if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
322
- return new Uint8Array(obj["data"]);
323
- }
324
- const entries = Object.keys(obj);
325
- if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
326
- const len = entries.length;
327
- const arr = new Uint8Array(len);
328
- for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
329
- return arr;
330
- }
331
- }
332
- return null;
329
+ return normalizeEmbeddingBlobValue(blob);
333
330
  }
334
331
  /**
335
332
  * Fetch existing rows by IDs and return id/entity_id/updated_at for import collision resolution.
@@ -509,7 +506,7 @@ var EntryRepository = class extends BaseRepository {
509
506
  }
510
507
  /**
511
508
  * Fetch recent non-deleted entries for an entity (limited), ordered by updated_at DESC.
512
- * Used by _doRunLibrarian().
509
+ * Used by MaintenanceService.doRunLibrarian().
513
510
  */
514
511
  async findRecentByEntityId(entityId, limit, tx) {
515
512
  const executor = this.getExecutor(tx);
@@ -519,6 +516,18 @@ var EntryRepository = class extends BaseRepository {
519
516
  );
520
517
  return rows.map(mapRowToFact);
521
518
  }
519
+ /**
520
+ * Fetch all non-deleted entries for an entity with embedding blobs preserved.
521
+ * Used by ImportExportService for export/import round-tripping.
522
+ */
523
+ async findAllByEntityIdWithBlobs(entityId, tx) {
524
+ const executor = this.getExecutor(tx);
525
+ const rows = await executor.getAllAsync(
526
+ `SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
527
+ [entityId]
528
+ );
529
+ return rows.map(mapRowToFactWithBlobs);
530
+ }
522
531
  /**
523
532
  * Count non-deleted entries for the given entities whose embedding_blob dimension
524
533
  * doesn't match queryVecLength. Used by read() to detect model-switch mismatches.
@@ -605,24 +614,19 @@ var EntryRepository = class extends BaseRepository {
605
614
  }
606
615
  /**
607
616
  * Mark orphaned entries (never accessed, old) as deleted.
608
- * Used by _doRunHeal().
617
+ * Used by MaintenanceService.doRunHeal().
609
618
  */
610
619
  async markOrphaned(entityId, orphanThreshold, tx) {
611
620
  const executor = this.getExecutor(tx);
612
621
  const now = Date.now();
613
- const orphanedRows = await executor.getAllAsync(
614
- `SELECT id FROM ${this.prefix}entries
615
- WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL`,
616
- [entityId, orphanThreshold]
617
- );
618
- if (orphanedRows.length === 0) return 0;
619
- const result = await executor.runAsync(
622
+ const updatedRows = await executor.getAllAsync(
620
623
  `UPDATE ${this.prefix}entries
621
624
  SET deleted_at = ?, updated_at = ?
622
- WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL`,
625
+ WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
626
+ RETURNING id`,
623
627
  [now, now, entityId, orphanThreshold]
624
628
  );
625
- for (const row of orphanedRows) {
629
+ for (const row of updatedRows) {
626
630
  await this.outbox.push({
627
631
  entityId,
628
632
  tableName: "entries",
@@ -631,11 +635,11 @@ var EntryRepository = class extends BaseRepository {
631
635
  payload: { id: row.id, entity_id: entityId, deleted_at: now }
632
636
  }, tx);
633
637
  }
634
- return result.changes;
638
+ return updatedRows.map((r) => r.id);
635
639
  }
636
640
  /**
637
641
  * Downgrade stale inferred entries to 'tentative'.
638
- * Used by _doRunHeal().
642
+ * Used by MaintenanceService.doRunHeal().
639
643
  */
640
644
  async downgradeStaleInferred(entityId, staleThreshold, tx) {
641
645
  const executor = this.getExecutor(tx);
@@ -667,7 +671,7 @@ var EntryRepository = class extends BaseRepository {
667
671
  }
668
672
  /**
669
673
  * Downgrade specific entries to 'tentative' by IDs.
670
- * Used by _doRunHeal().
674
+ * Used by MaintenanceService.doRunHeal().
671
675
  */
672
676
  async downgradeByIds(ids, entityId, tx) {
673
677
  if (ids.length === 0) return;
@@ -690,7 +694,7 @@ var EntryRepository = class extends BaseRepository {
690
694
  }
691
695
  /**
692
696
  * Soft-delete specific entries by IDs.
693
- * Used by _doRunHeal().
697
+ * Used by MaintenanceService.doRunHeal().
694
698
  */
695
699
  async softDeleteByIds(ids, entityId, tx) {
696
700
  if (ids.length === 0) return;
@@ -926,12 +930,18 @@ function generateId(prefix = "") {
926
930
 
927
931
  // src/repositories/OutboxRepository.ts
928
932
  var OutboxRepository = class extends BaseRepository {
933
+ constructor(db, prefix, enableOutbox = false) {
934
+ super(db, prefix);
935
+ this.enableOutbox = enableOutbox;
936
+ }
929
937
  /**
930
938
  * Insert a new outbox event within the provided transaction.
939
+ * No-op when enableOutbox is false.
931
940
  * `tx` is required — callers must always pass the active transaction
932
941
  * so the write is atomic with the main table mutation.
933
942
  */
934
943
  async push(params, tx) {
944
+ if (!this.enableOutbox) return;
935
945
  const executor = this.getExecutor(tx);
936
946
  const id = generateId("out_");
937
947
  const now = Date.now();
@@ -942,12 +952,12 @@ var OutboxRepository = class extends BaseRepository {
942
952
  );
943
953
  }
944
954
  /**
945
- * Fetch pending outbox rows ordered by created_at ASC.
955
+ * Fetch pending outbox rows ordered by created_at ASC, rowid ASC.
946
956
  * Reads directly from `this.db` (not a transaction).
947
957
  */
948
958
  async fetchPending(limit = 50) {
949
959
  return this.db.getAllAsync(
950
- `SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC LIMIT ?`,
960
+ `SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC, rowid ASC LIMIT ?`,
951
961
  [limit]
952
962
  );
953
963
  }
@@ -958,11 +968,15 @@ var OutboxRepository = class extends BaseRepository {
958
968
  */
959
969
  async acknowledge(ids) {
960
970
  if (ids.length === 0) return;
961
- const placeholders = ids.map(() => "?").join(", ");
962
- await this.db.runAsync(
963
- `DELETE FROM ${this.prefix}outbox WHERE id IN (${placeholders})`,
964
- ids
965
- );
971
+ const chunkSize = 500;
972
+ for (let i = 0; i < ids.length; i += chunkSize) {
973
+ const chunk = ids.slice(i, i + chunkSize);
974
+ const placeholders = chunk.map(() => "?").join(", ");
975
+ await this.db.runAsync(
976
+ `DELETE FROM ${this.prefix}outbox WHERE id IN (${placeholders})`,
977
+ chunk
978
+ );
979
+ }
966
980
  }
967
981
  };
968
982
 
@@ -1420,29 +1434,6 @@ var MetadataRepository = class extends BaseRepository {
1420
1434
  }
1421
1435
  };
1422
1436
 
1423
- // src/prompts.ts
1424
- var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
1425
- Return ONLY a valid JSON object matching this schema:
1426
- {
1427
- "facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
1428
- "tasks": [{ "description": "string", "priority": "number (0-10)" }]
1429
- }
1430
- Keep facts concise. Do not return markdown, just raw JSON.`;
1431
- var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
1432
- Return ONLY a valid JSON object matching this schema:
1433
- {
1434
- "downgraded": ["string (fact IDs)"],
1435
- "deleted": ["string (fact IDs)"],
1436
- "newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
1437
- }
1438
- Do not return markdown, just raw JSON.`;
1439
- var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
1440
- Return ONLY a valid JSON object matching this schema:
1441
- {
1442
- "facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
1443
- }
1444
- Extract verbatim factual content. Do not return markdown, just raw JSON.`;
1445
-
1446
1437
  // src/utils/cosine.ts
1447
1438
  function cosineSimilarity(a, b) {
1448
1439
  let dot = 0, normA = 0, normB = 0;
@@ -1484,284 +1475,137 @@ function parseEmbedding(blob, text) {
1484
1475
  return null;
1485
1476
  }
1486
1477
 
1487
- // src/readOptions.ts
1488
- function normalizeEntityIds(entityId) {
1489
- const input = Array.isArray(entityId) ? entityId : [entityId];
1490
- const seen = /* @__PURE__ */ new Set();
1491
- const normalized = [];
1492
- for (const id of input) {
1493
- if (seen.has(id)) continue;
1494
- seen.add(id);
1495
- normalized.push(id);
1478
+ // src/services/SearchService.ts
1479
+ var _SearchService = class _SearchService {
1480
+ constructor(entryRepo) {
1481
+ this.entryRepo = entryRepo;
1482
+ this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
1483
+ this.vectorCache = /* @__PURE__ */ new Map();
1484
+ this.miniSearch = new MiniSearch__default.default({
1485
+ fields: ["title", "body", "tags"],
1486
+ storeFields: ["entity_id"],
1487
+ searchOptions: {
1488
+ boost: { title: 2 },
1489
+ fuzzy: 0.2,
1490
+ prefix: true
1491
+ }
1492
+ });
1496
1493
  }
1497
- return normalized;
1498
- }
1499
- function sanitizeTierWeights(entityIds, tierWeights) {
1500
- if (tierWeights === void 0) return void 0;
1501
- const sanitized = /* @__PURE__ */ Object.create(null);
1502
- for (const entityId of entityIds) {
1503
- const raw = tierWeights[entityId];
1504
- if (raw === void 0 || !Number.isFinite(raw)) {
1505
- sanitized[entityId] = 1;
1494
+ /**
1495
+ * Rebuilds the search index and clears the vector cache for a given entity.
1496
+ * A direct replacement for manually syncing state after a DB transaction.
1497
+ */
1498
+ async sync(entityId) {
1499
+ await this.rebuildIndex(entityId);
1500
+ this.evictCache(entityId);
1501
+ }
1502
+ /**
1503
+ * Clears the parsed vector cache. Useful for mid-loop flush guarantees
1504
+ * or memory pressure evictions.
1505
+ */
1506
+ evictCache(entityId) {
1507
+ if (entityId) {
1508
+ this.vectorCache.delete(entityId);
1506
1509
  } else {
1507
- sanitized[entityId] = Math.max(0, raw);
1510
+ this.vectorCache.clear();
1508
1511
  }
1509
1512
  }
1510
- return sanitized;
1511
- }
1512
- function applyTierWeight(score, entityId, sanitizedTierWeights) {
1513
- const weight = sanitizedTierWeights?.[entityId] ?? 1;
1514
- if (weight === 0) return -Infinity;
1515
- return score * weight;
1516
- }
1517
- function shouldExposeReadMetadata(entityId) {
1518
- return Array.isArray(entityId);
1519
- }
1520
-
1521
- // src/WikiMemory.ts
1522
- var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
1523
- function parseJsonResponse(text) {
1524
- const firstBrace = text.indexOf("{");
1525
- const firstBracket = text.indexOf("[");
1526
- let start;
1527
- let openChar;
1528
- let closeChar;
1529
- if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
1530
- start = firstBrace;
1531
- openChar = "{";
1532
- closeChar = "}";
1533
- } else if (firstBracket !== -1) {
1534
- start = firstBracket;
1535
- openChar = "[";
1536
- closeChar = "]";
1537
- } else {
1538
- throw new SyntaxError("No JSON object/array found in LLM response");
1513
+ /**
1514
+ * Fully resets the search service.
1515
+ */
1516
+ clearAll() {
1517
+ this.vectorCache.clear();
1518
+ this.miniSearch.removeAll();
1519
+ this.miniSearchEntryIdsByEntity.clear();
1539
1520
  }
1540
- let depth = 0;
1541
- let inString = false;
1542
- let escape = false;
1543
- let end = -1;
1544
- for (let i = start; i < text.length; i++) {
1545
- const ch = text[i];
1546
- if (escape) {
1547
- escape = false;
1548
- continue;
1549
- }
1550
- if (ch === "\\" && inString) {
1551
- escape = true;
1552
- continue;
1521
+ /**
1522
+ * Executes a keyword search against the active MiniSearch index.
1523
+ */
1524
+ searchKeyword(query, entityIds, limit) {
1525
+ const entityIdSet = new Set(entityIds);
1526
+ const results = this.miniSearch.search(query, {
1527
+ filter: (r) => entityIdSet.has(r.entity_id),
1528
+ combineWith: "OR"
1529
+ });
1530
+ return results.slice(0, limit);
1531
+ }
1532
+ /**
1533
+ * Pre-fetches MiniSearch scores for candidate hydration, used during hybrid weighting.
1534
+ */
1535
+ getMiniSearchScores(query, entityIds, preFilterLimit) {
1536
+ const entityIdSet = new Set(entityIds);
1537
+ let results = this.miniSearch.search(query, {
1538
+ filter: (r) => entityIdSet.has(r.entity_id),
1539
+ combineWith: "OR"
1540
+ });
1541
+ if (preFilterLimit !== void 0) {
1542
+ results = results.slice(0, preFilterLimit);
1553
1543
  }
1554
- if (ch === '"') {
1555
- inString = !inString;
1556
- continue;
1544
+ if (results.length === 0) return /* @__PURE__ */ new Map();
1545
+ const maxMsScore = Math.max(1, results[0]?.score ?? 1);
1546
+ return new Map(results.map((r) => [r.id, r.score / maxMsScore]));
1547
+ }
1548
+ /**
1549
+ * Score candidate rows using in-process JS cosine similarity.
1550
+ * Applies hybrid blending (if weight set) and tie-break sorting before returning.
1551
+ */
1552
+ async rankSemantic(args) {
1553
+ const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
1554
+ const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
1555
+ let entityCache = this.vectorCache.get(entityId);
1556
+ const tooLarge = populateCache && candidateRows.length > _SearchService.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
1557
+ if (tooLarge && entityCache) {
1558
+ this.vectorCache.delete(entityId);
1559
+ entityCache = void 0;
1557
1560
  }
1558
- if (inString) continue;
1559
- if (ch === openChar) {
1560
- depth++;
1561
- continue;
1561
+ const canCache = populateCache && !tooLarge;
1562
+ if (canCache && !entityCache) {
1563
+ entityCache = /* @__PURE__ */ new Map();
1562
1564
  }
1563
- if (ch === closeChar) {
1564
- depth--;
1565
- if (depth === 0) {
1566
- end = i;
1567
- break;
1565
+ const scored = candidateRows.map((row) => {
1566
+ let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
1567
+ if (vector && canCache && entityCache && !entityCache.has(row.id)) {
1568
+ entityCache.set(row.id, vector);
1569
+ }
1570
+ let score = 0;
1571
+ if (vector && vector.length === queryVec.length) {
1572
+ const cosSim = cosineSimilarity(queryVec, vector);
1573
+ if (weight !== void 0) {
1574
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
1575
+ score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
1576
+ } else {
1577
+ score = cosSim;
1578
+ }
1579
+ } else if (weight !== void 0 && weight < 1) {
1580
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
1581
+ score = (1 - weight) * kwScore;
1582
+ } else {
1583
+ score = -2;
1584
+ }
1585
+ return {
1586
+ id: row.id,
1587
+ entity_id: row.entity_id,
1588
+ score,
1589
+ updated_at: row.updated_at,
1590
+ access_count: row.access_count
1591
+ };
1592
+ });
1593
+ if (canCache && entityCache && entityCache.size > 0) {
1594
+ if (!this.vectorCache.has(entityId)) {
1595
+ if (this.vectorCache.size >= _SearchService.MAX_VECTOR_CACHE_ENTITIES) {
1596
+ const oldestKey = this.vectorCache.keys().next().value;
1597
+ if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
1598
+ }
1599
+ this.vectorCache.set(entityId, entityCache);
1568
1600
  }
1569
1601
  }
1602
+ if (!skipSort) {
1603
+ this._tieBreakSort(scored);
1604
+ }
1605
+ return scored.slice(0, limit);
1570
1606
  }
1571
- if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
1572
- return JSON.parse(text.slice(start, end + 1));
1573
- }
1574
- function safeSlice(value, start, end) {
1575
- const length = value.length;
1576
- let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
1577
- let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
1578
- if (safeStart > safeEnd) {
1579
- [safeStart, safeEnd] = [safeEnd, safeStart];
1580
- }
1581
- if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
1582
- safeStart--;
1583
- }
1584
- if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
1585
- safeEnd--;
1586
- }
1587
- return value.slice(safeStart, safeEnd);
1588
- }
1589
- function chunkText(input, maxChunkLength, overlap) {
1590
- const text = input.trim();
1591
- if (text.length === 0) return { chunks: [], truncated: false };
1592
- if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
1593
- throw new Error("maxChunkLength must be an integer >= 2");
1594
- }
1595
- if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
1596
- throw new Error("overlap must be a non-negative integer < maxChunkLength");
1597
- }
1598
- const chunks = [];
1599
- let truncated = false;
1600
- let cursor = 0;
1601
- const halfMax = Math.floor(maxChunkLength / 2);
1602
- while (cursor < text.length) {
1603
- const remaining = text.length - cursor;
1604
- if (remaining <= maxChunkLength) {
1605
- chunks.push(safeSlice(text, cursor, text.length));
1606
- break;
1607
- }
1608
- const windowEnd = cursor + maxChunkLength;
1609
- const minSplit = cursor + halfMax;
1610
- let splitPoint = -1;
1611
- const paraIdx = text.lastIndexOf("\n\n", windowEnd);
1612
- if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
1613
- splitPoint = paraIdx + 2;
1614
- }
1615
- if (splitPoint === -1) {
1616
- let lastTerm = -1;
1617
- for (let i = minSplit; i < windowEnd - 1; i++) {
1618
- const ch = text[i];
1619
- if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
1620
- lastTerm = i + 2;
1621
- }
1622
- }
1623
- if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
1624
- }
1625
- if (splitPoint === -1) {
1626
- for (let i = windowEnd - 1; i >= minSplit; i--) {
1627
- if (/\s/.test(text[i])) {
1628
- splitPoint = i + 1;
1629
- break;
1630
- }
1631
- }
1632
- }
1633
- if (splitPoint === -1) {
1634
- truncated = true;
1635
- splitPoint = windowEnd;
1636
- }
1637
- chunks.push(safeSlice(text, cursor, splitPoint));
1638
- const next = Math.max(splitPoint - overlap, cursor + 1);
1639
- cursor = next;
1640
- }
1641
- return { chunks, truncated };
1642
- }
1643
- async function withConcurrency(tasks, limit) {
1644
- const results = new Array(tasks.length);
1645
- let index = 0;
1646
- let failed = false;
1647
- let firstError;
1648
- async function worker() {
1649
- while (index < tasks.length && !failed) {
1650
- const i = index++;
1651
- try {
1652
- results[i] = await tasks[i]();
1653
- } catch (e) {
1654
- if (!failed) {
1655
- failed = true;
1656
- firstError = e;
1657
- }
1658
- return;
1659
- }
1660
- }
1661
- }
1662
- const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
1663
- await Promise.allSettled(Array.from({ length: workerCount }, worker));
1664
- if (failed) throw firstError;
1665
- return results;
1666
- }
1667
- function clip(value, max) {
1668
- if (typeof value !== "string") return "";
1669
- const s = value.trim();
1670
- return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
1671
- }
1672
- function validateTags(tags) {
1673
- if (!Array.isArray(tags)) return [];
1674
- return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
1675
- }
1676
- function validateFact(fact) {
1677
- if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
1678
- const title = clip(fact.title, 80);
1679
- const body = clip(fact.body, 800);
1680
- if (!title || !body) return null;
1681
- let confidence = fact.confidence;
1682
- if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
1683
- return {
1684
- ...fact,
1685
- title,
1686
- body,
1687
- confidence,
1688
- tags: validateTags(fact.tags)
1689
- };
1690
- }
1691
- function validateTask(task) {
1692
- if (typeof task?.description !== "string") return null;
1693
- const description = clip(task.description, 200);
1694
- if (!description) return null;
1695
- let priority = task.priority;
1696
- if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
1697
- return {
1698
- ...task,
1699
- description,
1700
- priority
1701
- };
1702
- }
1703
- function normalizeSourceRef(value) {
1704
- if (typeof value !== "string") return null;
1705
- const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
1706
- return cleaned.length > 0 ? cleaned : null;
1707
- }
1708
- function normalizeSourceHash(value) {
1709
- if (typeof value !== "string") return null;
1710
- return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
1711
- }
1712
- function titleTokens(title) {
1713
- return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
1714
- }
1715
- function jaccardScore(a, b) {
1716
- if (a.size === 0 && b.size === 0) return 0;
1717
- const intersection = new Set([...a].filter((x) => b.has(x)));
1718
- const union = /* @__PURE__ */ new Set([...a, ...b]);
1719
- return intersection.size / union.size;
1720
- }
1721
- var FUZZY_THRESHOLD = 0.5;
1722
- var MIN_TOKENS_TO_QUALIFY = 3;
1723
- var _WikiMemory = class _WikiMemory {
1724
- constructor(db, options) {
1725
- this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
1726
- this.activeIngestJobs = /* @__PURE__ */ new Set();
1727
- this.statusSubscribers = /* @__PURE__ */ new Map();
1728
- this.miniSearch = new MiniSearch__default.default({
1729
- fields: ["title", "body", "tags"],
1730
- storeFields: ["entity_id"],
1731
- searchOptions: {
1732
- boost: { title: 2 },
1733
- fuzzy: 0.2,
1734
- prefix: true
1735
- }
1736
- });
1737
- this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
1738
- this.vectorCache = /* @__PURE__ */ new Map();
1739
- this.db = db;
1740
- this.options = options;
1741
- this.prefix = options.config?.tablePrefix || "llm_wiki_";
1742
- this.outboxRepo = new OutboxRepository(db, this.prefix);
1743
- this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
1744
- this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
1745
- this.eventRepo = new EventRepository(db, this.prefix);
1746
- this.metadataRepo = new MetadataRepository(db, this.prefix);
1747
- }
1748
- normalizeMiniSearchRow(row) {
1749
- return {
1750
- id: row.id,
1751
- entity_id: row.entity_id,
1752
- title: row.title,
1753
- body: row.body,
1754
- tags: (() => {
1755
- try {
1756
- const parsed = JSON.parse(row.tags);
1757
- return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
1758
- } catch {
1759
- return row.tags;
1760
- }
1761
- })()
1762
- };
1763
- }
1764
- async rebuildMiniSearchIndex(entityId) {
1607
+ // --- Internal Index Management ---
1608
+ async rebuildIndex(entityId) {
1765
1609
  if (entityId) {
1766
1610
  const rows2 = await this.entryRepo.findMiniSearchRows(entityId);
1767
1611
  const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
@@ -1774,7 +1618,10 @@ var _WikiMemory = class _WikiMemory {
1774
1618
  if (documents2.length > 0) {
1775
1619
  this.miniSearch.addAll(documents2);
1776
1620
  }
1777
- this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
1621
+ this.miniSearchEntryIdsByEntity.set(
1622
+ entityId,
1623
+ new Set(documents2.map((document) => document.id))
1624
+ );
1778
1625
  return;
1779
1626
  }
1780
1627
  const rows = await this.entryRepo.findMiniSearchRows();
@@ -1790,223 +1637,79 @@ var _WikiMemory = class _WikiMemory {
1790
1637
  this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
1791
1638
  }
1792
1639
  }
1793
- async storeEmbeddingDimension(dim) {
1794
- const existing = await this.metadataRepo.getMeta("embedding_dimension");
1795
- if (existing) {
1796
- const storedDim = parseInt(existing, 10);
1797
- if (storedDim !== dim) {
1798
- console.warn(
1799
- `[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
1800
- );
1801
- await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(dim), this.db);
1802
- }
1803
- } else {
1804
- await this.metadataRepo.setMeta("embedding_dimension", String(dim), this.db);
1805
- }
1806
- }
1807
- /**
1808
- * After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
1809
- * value to the canonical `embedding_dimension` key and clear the mismatch flag.
1810
- * This ensures future read() calls use embedding-based retrieval rather than staying
1811
- * stuck on the MiniSearch fallback.
1812
- */
1813
- async _reconcileEmbeddingDimension() {
1814
- const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
1815
- if (!mismatchValue) return;
1816
- const newDim = parseInt(mismatchValue, 10);
1817
- const residualCount = await this.entryRepo.countStaleEmbeddings(newDim);
1818
- if (residualCount === 0) {
1819
- await this.metadataRepo.setMeta("embedding_dimension", mismatchValue, this.db);
1820
- await this.metadataRepo.clearDimensionMismatch(this.db);
1821
- }
1822
- }
1823
- async embedFact(fact) {
1824
- const embedFn = this.options.llmProvider.embed;
1825
- if (!embedFn) return false;
1826
- let tagsStr;
1827
- if (Array.isArray(fact.tags)) {
1828
- tagsStr = fact.tags.join(" ");
1829
- } else {
1830
- try {
1831
- const parsed = JSON.parse(fact.tags);
1832
- tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
1833
- } catch {
1834
- tagsStr = fact.tags;
1835
- }
1836
- }
1837
- const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
1838
- try {
1839
- const vector = await embedFn(text);
1840
- if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
1841
- console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
1842
- return false;
1843
- }
1844
- const float32Vector = new Float32Array(vector);
1845
- let hasNonFinite = false;
1846
- for (let i = 0; i < float32Vector.length; i++) {
1847
- if (!isFinite(float32Vector[i])) {
1848
- hasNonFinite = true;
1849
- break;
1640
+ normalizeMiniSearchRow(row) {
1641
+ return {
1642
+ id: row.id,
1643
+ entity_id: row.entity_id,
1644
+ title: row.title,
1645
+ body: row.body,
1646
+ tags: (() => {
1647
+ try {
1648
+ const parsed = JSON.parse(row.tags);
1649
+ return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
1650
+ } catch {
1651
+ return row.tags;
1850
1652
  }
1851
- }
1852
- if (hasNonFinite) {
1853
- console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
1854
- return false;
1855
- }
1856
- await this.storeEmbeddingDimension(float32Vector.length);
1857
- const blob = new Uint8Array(float32Vector.buffer);
1858
- await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
1859
- try {
1860
- await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
1861
- } catch (hookErr) {
1862
- console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
1863
- }
1864
- return true;
1865
- } catch (err) {
1866
- console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
1867
- return false;
1868
- }
1869
- }
1870
- _librarianKey(entityId) {
1871
- return `${this.prefix}:${entityId}:librarian`;
1653
+ })()
1654
+ };
1872
1655
  }
1873
- _healKey(entityId) {
1874
- return `${this.prefix}:${entityId}:heal`;
1656
+ _tieBreakSort(items) {
1657
+ items.sort((a, b) => this._compareScoredRows(a, b));
1875
1658
  }
1876
- _warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
1877
- console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
1878
- }
1879
- /** Maps pre-rename enum strings from older dumps to current source_type values. */
1880
- _normalizeImportedSourceType(raw, ctx) {
1881
- if (raw === "user_document") return "immutable_document";
1882
- if (raw === "agent_inferred") return "librarian_inferred";
1883
- const allowed = ["user_stated", "librarian_inferred", "user_confirmed", "immutable_document"];
1884
- if (allowed.includes(raw)) return raw;
1885
- const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
1886
- throw new Error(
1887
- `importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
1888
- );
1659
+ _compareScoredRows(a, b) {
1660
+ const scoreDiff = b.score - a.score;
1661
+ if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
1662
+ const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
1663
+ if (accessCountDiff !== 0) return accessCountDiff;
1664
+ const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
1665
+ if (updatedAtDiff !== 0) return updatedAtDiff;
1666
+ return a.id.localeCompare(b.id);
1889
1667
  }
1890
- async assertNoLegacySourceTypes() {
1891
- if (!await this.entryRepo.hasLegacySourceTypes()) return;
1892
- const count = await this.entryRepo.countLegacySourceTypes();
1893
- throw new Error(
1894
- `Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
1895
-
1896
- ${this.entryRepo.getLegacyMigrationSQL()}
1668
+ };
1669
+ /**
1670
+ * Maximum number of entities whose parsed embedding vectors are held in
1671
+ * memory. This cap is intentionally conservative so the cache remains safe
1672
+ * on memory-constrained runtimes (e.g., mobile/Expo).
1673
+ */
1674
+ _SearchService.MAX_VECTOR_CACHE_ENTITIES = 16;
1675
+ /**
1676
+ * Maximum number of fact vectors cached per entity. Keep this high enough to
1677
+ * preserve the parsed-embedding reuse optimization for common mid-sized
1678
+ * entities while still maintaining a bounded memory footprint.
1679
+ */
1680
+ _SearchService.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
1681
+ var SearchService = _SearchService;
1897
1682
 
1898
- After running the migration SQL, restart your application.`
1899
- );
1900
- }
1901
- async _notifyEmbeddingPersisted(entityId, factId, vector) {
1902
- if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
1903
- const vectorCopy = vector ? vector.slice() : null;
1904
- await this.options.vectorRanker.onEmbeddingPersisted({
1905
- entityId,
1906
- factId,
1907
- vector: vectorCopy
1908
- });
1909
- }
1910
- /**
1911
- * GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
1912
- * Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
1913
- * calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
1914
- */
1915
- async _notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
1916
- if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
1917
- if (this.options.forceDeleteIgnoreRankerHook === true) return;
1918
- const vectorCopy = vector ? vector.slice() : null;
1919
- const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
1920
- if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
1921
- throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
1922
- }
1923
- const timeoutMs = rawTimeout;
1924
- let timeoutHandle;
1925
- const timeoutPromise = new Promise((_, reject) => {
1926
- timeoutHandle = setTimeout(
1927
- () => {
1928
- const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
1929
- timeoutError[HOOK_TIMEOUT_MARKER] = true;
1930
- reject(timeoutError);
1931
- },
1932
- timeoutMs
1933
- );
1934
- });
1935
- const hookPromise = Promise.resolve(
1936
- this.options.vectorRanker.onEmbeddingPersisted({
1937
- entityId,
1938
- factId,
1939
- vector: vectorCopy
1940
- })
1941
- );
1942
- try {
1943
- await Promise.race([hookPromise, timeoutPromise]);
1944
- } catch (err) {
1945
- hookPromise.catch(() => {
1946
- });
1947
- throw err;
1948
- } finally {
1949
- if (timeoutHandle) clearTimeout(timeoutHandle);
1950
- }
1683
+ // src/types.ts
1684
+ var WikiBusyError = class extends Error {
1685
+ constructor(operation, entityId) {
1686
+ super(`${operation} already running for entity ${entityId}`);
1687
+ this.name = "WikiBusyError";
1688
+ this.operation = operation;
1689
+ this.entityId = entityId;
1951
1690
  }
1952
- async setup() {
1953
- const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
1954
- await setupDatabase(this.db, this.prefix);
1955
- let currentVersion;
1956
- if (!entriesExistedBeforeSetup) {
1957
- await this.metadataRepo.setMeta("schema_version", String(CURRENT_SCHEMA_VERSION), this.db);
1958
- currentVersion = CURRENT_SCHEMA_VERSION;
1959
- } else {
1960
- const schemaVersionValue = await this.metadataRepo.getMeta("schema_version");
1961
- if (schemaVersionValue) {
1962
- currentVersion = parseInt(schemaVersionValue, 10);
1963
- if (!Number.isFinite(currentVersion)) currentVersion = 0;
1964
- } else {
1965
- const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
1966
- const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
1967
- currentVersion = hasPorter ? 1 : 0;
1968
- }
1969
- }
1970
- for (const migration of MIGRATIONS) {
1971
- if (migration.version > currentVersion) {
1972
- await migration.run(this.db, this.prefix);
1973
- await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
1974
- currentVersion = migration.version;
1975
- }
1976
- }
1977
- if (entriesExistedBeforeSetup) {
1978
- const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
1979
- if (!schemaVersionCheck) {
1980
- await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
1981
- }
1982
- }
1983
- if (entriesExistedBeforeSetup) {
1984
- await this.assertNoLegacySourceTypes();
1985
- }
1986
- const rows = await this.entryRepo.findRowsForSourceRefMigration();
1987
- await this.db.withTransactionAsync(async (tx) => {
1988
- for (const row of rows) {
1989
- const normalized = normalizeSourceRef(row.source_ref);
1990
- if (normalized !== row.source_ref) {
1991
- await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
1992
- }
1993
- }
1994
- });
1995
- await this.rebuildMiniSearchIndex();
1691
+ };
1692
+ var PrunePartialFailureError = class extends Error {
1693
+ constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
1694
+ super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
1695
+ this.name = "PrunePartialFailureError";
1696
+ this.deleted = deleted;
1697
+ this.failedAt = failedAt;
1698
+ this.remaining = remaining;
1699
+ this.deletedTasks = deletedTasks;
1700
+ this.deletedEvents = deletedEvents;
1701
+ this.cause = cause;
1996
1702
  }
1997
- async hasChanged(entityId, sourceRef, sourceHash) {
1998
- const normalizedRef = normalizeSourceRef(sourceRef);
1999
- if (!normalizedRef) {
2000
- throw new Error(`Invalid sourceRef: "${sourceRef}"`);
2001
- }
2002
- const normalizedHash = normalizeSourceHash(sourceHash);
2003
- if (!normalizedHash) {
2004
- throw new Error(`Invalid sourceHash: must be a 64-character hex string (normalized to lowercase)`);
2005
- }
2006
- const storedHash = await this.entryRepo.findLatestSourceHash(entityId, normalizedRef);
2007
- if (storedHash === null) return true;
2008
- const normalizedStoredHash = normalizeSourceHash(storedHash);
2009
- return normalizedStoredHash !== normalizedHash;
1703
+ };
1704
+ var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
1705
+
1706
+ // src/services/JobManager.ts
1707
+ var JobManager = class {
1708
+ constructor(prefix) {
1709
+ this.prefix = prefix;
1710
+ this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
1711
+ this.activeIngestJobs = /* @__PURE__ */ new Map();
1712
+ this.statusSubscribers = /* @__PURE__ */ new Map();
2010
1713
  }
2011
1714
  _pruneKey(entityId) {
2012
1715
  return `${this.prefix}:${entityId}:prune`;
@@ -2026,6 +1729,12 @@ After running the migration SQL, restart your application.`
2026
1729
  _forgetKey(entityId) {
2027
1730
  return `${this.prefix}:${entityId}:forget`;
2028
1731
  }
1732
+ _librarianKey(entityId) {
1733
+ return `${this.prefix}:${entityId}:librarian`;
1734
+ }
1735
+ _healKey(entityId) {
1736
+ return `${this.prefix}:${entityId}:heal`;
1737
+ }
2029
1738
  _isReembedActive(entityId) {
2030
1739
  return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
2031
1740
  }
@@ -2035,7 +1744,6 @@ After running the migration SQL, restart your application.`
2035
1744
  _isForgetActiveFor(entityId) {
2036
1745
  return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
2037
1746
  }
2038
- /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
2039
1747
  _isAnyMaintenanceActiveWithSuffix(suffix) {
2040
1748
  const entityKeyPrefix = `${this.prefix}:`;
2041
1749
  for (const k of this.activeMaintenanceJobs) {
@@ -2043,794 +1751,874 @@ After running the migration SQL, restart your application.`
2043
1751
  }
2044
1752
  return false;
2045
1753
  }
2046
- /** Returns true if any ingest job is active for the given entity. */
1754
+ _hasIngestJob(entityId, sourceRef) {
1755
+ return this.activeIngestJobs.get(entityId)?.has(sourceRef ?? "") ?? false;
1756
+ }
1757
+ _addIngestJob(entityId, sourceRef) {
1758
+ const sourceKey = sourceRef ?? "";
1759
+ let refs = this.activeIngestJobs.get(entityId);
1760
+ if (!refs) {
1761
+ refs = /* @__PURE__ */ new Set();
1762
+ this.activeIngestJobs.set(entityId, refs);
1763
+ }
1764
+ refs.add(sourceKey);
1765
+ }
1766
+ _removeIngestJob(entityId, sourceRef) {
1767
+ const sourceKey = sourceRef ?? "";
1768
+ const refs = this.activeIngestJobs.get(entityId);
1769
+ if (!refs) return;
1770
+ refs.delete(sourceKey);
1771
+ if (refs.size === 0) {
1772
+ this.activeIngestJobs.delete(entityId);
1773
+ }
1774
+ }
2047
1775
  _isIngestActiveFor(entityId) {
2048
- const entityKeyPrefix = `${this.prefix}:${entityId}:`;
2049
- for (const k of this.activeIngestJobs) {
2050
- if (k.startsWith(entityKeyPrefix)) return true;
1776
+ return this.activeIngestJobs.has(entityId);
1777
+ }
1778
+ acquireLock(operation, entityId, sourceRef) {
1779
+ let blockingOperation = null;
1780
+ if (operation !== "global_import" && this.activeMaintenanceJobs.has(this._globalImportKey())) {
1781
+ throw new WikiBusyError("import", "*");
2051
1782
  }
2052
- return false;
1783
+ switch (operation) {
1784
+ case "prune":
1785
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
1786
+ else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
1787
+ else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
1788
+ else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
1789
+ else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
1790
+ else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
1791
+ else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
1792
+ break;
1793
+ case "librarian":
1794
+ case "heal": {
1795
+ const opKey = operation === "librarian" ? this._librarianKey(entityId) : this._healKey(entityId);
1796
+ if (this.activeMaintenanceJobs.has(opKey)) blockingOperation = operation;
1797
+ else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
1798
+ else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
1799
+ else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
1800
+ else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
1801
+ break;
1802
+ }
1803
+ case "reembed":
1804
+ if (this.activeMaintenanceJobs.has(this._reembedKey(entityId))) blockingOperation = "reembed";
1805
+ else if (this.activeMaintenanceJobs.has(this._globalReembedKey())) blockingOperation = "reembed";
1806
+ else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
1807
+ else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
1808
+ else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
1809
+ else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
1810
+ else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
1811
+ else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
1812
+ break;
1813
+ case "global_reembed":
1814
+ if (this.activeMaintenanceJobs.has(this._globalReembedKey())) blockingOperation = "reembed";
1815
+ else if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) blockingOperation = "reembed";
1816
+ else if (this._isAnyMaintenanceActiveWithSuffix(":prune")) blockingOperation = "prune";
1817
+ else if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) blockingOperation = "librarian";
1818
+ else if (this._isAnyMaintenanceActiveWithSuffix(":heal")) blockingOperation = "heal";
1819
+ else if (this.activeIngestJobs.size > 0) blockingOperation = "ingest";
1820
+ else if (this._isAnyMaintenanceActiveWithSuffix(":import")) blockingOperation = "import";
1821
+ else if (this._isAnyMaintenanceActiveWithSuffix(":forget")) blockingOperation = "forget";
1822
+ break;
1823
+ case "import":
1824
+ case "forget": {
1825
+ const selfKey = operation === "import" ? this._importKey(entityId) : this._forgetKey(entityId);
1826
+ if (this.activeMaintenanceJobs.has(selfKey)) blockingOperation = operation;
1827
+ else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
1828
+ else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
1829
+ else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
1830
+ else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
1831
+ else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
1832
+ else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
1833
+ else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
1834
+ break;
1835
+ }
1836
+ case "global_import":
1837
+ if (this.activeMaintenanceJobs.has(this._globalImportKey())) blockingOperation = "import";
1838
+ break;
1839
+ case "ingest": {
1840
+ const sourceKey = sourceRef ?? "";
1841
+ if (this._hasIngestJob(entityId, sourceKey)) blockingOperation = "ingest";
1842
+ else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
1843
+ else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
1844
+ else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
1845
+ else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
1846
+ break;
1847
+ }
1848
+ }
1849
+ if (blockingOperation) {
1850
+ throw new WikiBusyError(
1851
+ blockingOperation,
1852
+ operation === "global_reembed" || operation === "global_import" ? "*" : entityId
1853
+ );
1854
+ }
1855
+ if (operation === "ingest") {
1856
+ this._addIngestJob(entityId, sourceRef);
1857
+ } else if (operation === "global_reembed") {
1858
+ this.activeMaintenanceJobs.add(this._globalReembedKey());
1859
+ } else if (operation === "global_import") {
1860
+ this.activeMaintenanceJobs.add(this._globalImportKey());
1861
+ } else {
1862
+ const keyFnName = `_${operation}Key`;
1863
+ const keyFn = this[keyFnName];
1864
+ this.activeMaintenanceJobs.add(keyFn.call(this, entityId));
1865
+ }
1866
+ this._notifyStatusSubscribers(entityId);
1867
+ }
1868
+ releaseLock(operation, entityId, sourceRef) {
1869
+ if (operation === "ingest") {
1870
+ this._removeIngestJob(entityId, sourceRef);
1871
+ } else if (operation === "global_reembed") {
1872
+ this.activeMaintenanceJobs.delete(this._globalReembedKey());
1873
+ } else if (operation === "global_import") {
1874
+ this.activeMaintenanceJobs.delete(this._globalImportKey());
1875
+ } else {
1876
+ const keyFnName = `_${operation}Key`;
1877
+ const keyFn = this[keyFnName];
1878
+ this.activeMaintenanceJobs.delete(keyFn.call(this, entityId));
1879
+ }
1880
+ this._notifyStatusSubscribers(entityId);
1881
+ }
1882
+ /**
1883
+ * Returns true if acquireLock(operation, entityId) would throw WikiBusyError.
1884
+ * Use for non-throwing conflict checks (e.g. auto-trigger gating in write()).
1885
+ */
1886
+ isBlocked(operation, entityId) {
1887
+ if (operation !== "global_import" && this.activeMaintenanceJobs.has(this._globalImportKey())) return true;
1888
+ switch (operation) {
1889
+ case "librarian":
1890
+ return this.activeMaintenanceJobs.has(this._librarianKey(entityId)) || this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this._isReembedActive(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
1891
+ case "heal":
1892
+ return this.activeMaintenanceJobs.has(this._healKey(entityId)) || this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this._isReembedActive(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
1893
+ case "prune":
1894
+ return this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this.activeMaintenanceJobs.has(this._librarianKey(entityId)) || this.activeMaintenanceJobs.has(this._healKey(entityId)) || this._isReembedActive(entityId) || this._isIngestActiveFor(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
1895
+ default:
1896
+ return false;
1897
+ }
1898
+ }
1899
+ /**
1900
+ * Auto-heal historically only gated on the heal self-key. Keep that behavior
1901
+ * for write() auto-trigger paths while preserving stricter checks in acquireLock().
1902
+ */
1903
+ tryAcquireAutoHealLock(entityId) {
1904
+ const healKey = this._healKey(entityId);
1905
+ if (this.activeMaintenanceJobs.has(healKey)) return false;
1906
+ this.activeMaintenanceJobs.add(healKey);
1907
+ this._notifyStatusSubscribers(entityId);
1908
+ return true;
1909
+ }
1910
+ /**
1911
+ * Validates then acquires global + per-entity import locks atomically.
1912
+ * Validates all entities before acquiring any lock (same as current importDump semantics).
1913
+ */
1914
+ acquireImportLocks(entityIds) {
1915
+ for (const entityId of entityIds) {
1916
+ if (this.activeMaintenanceJobs.has(this._importKey(entityId))) throw new WikiBusyError("import", entityId);
1917
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) throw new WikiBusyError("librarian", entityId);
1918
+ if (this.activeMaintenanceJobs.has(this._healKey(entityId))) throw new WikiBusyError("heal", entityId);
1919
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) throw new WikiBusyError("prune", entityId);
1920
+ if (this._isReembedActive(entityId)) throw new WikiBusyError("reembed", entityId);
1921
+ if (this._isIngestActiveFor(entityId)) throw new WikiBusyError("ingest", entityId);
1922
+ if (this._isForgetActiveFor(entityId)) throw new WikiBusyError("forget", entityId);
1923
+ }
1924
+ if (this.activeMaintenanceJobs.has(this._globalImportKey())) throw new WikiBusyError("import", "*");
1925
+ this.activeMaintenanceJobs.add(this._globalImportKey());
1926
+ for (const entityId of entityIds) {
1927
+ this.activeMaintenanceJobs.add(this._importKey(entityId));
1928
+ }
1929
+ }
1930
+ releaseImportLocks(entityIds) {
1931
+ this.activeMaintenanceJobs.delete(this._globalImportKey());
1932
+ for (const entityId of entityIds) {
1933
+ this.activeMaintenanceJobs.delete(this._importKey(entityId));
1934
+ }
1935
+ }
1936
+ getEntityStatus(entityId) {
1937
+ return {
1938
+ ingesting: this._isIngestActiveFor(entityId),
1939
+ librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
1940
+ heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
1941
+ };
1942
+ }
1943
+ subscribeEntityStatus(entityId, callback) {
1944
+ const initial = this.getEntityStatus(entityId);
1945
+ let set = this.statusSubscribers.get(entityId);
1946
+ if (!set) {
1947
+ set = /* @__PURE__ */ new Set();
1948
+ this.statusSubscribers.set(entityId, set);
1949
+ }
1950
+ const entry = { callback, last: this._copyEntityStatus(initial) };
1951
+ set.add(entry);
1952
+ try {
1953
+ callback(this._copyEntityStatus(initial));
1954
+ } catch (err) {
1955
+ console.error(`[JobManager] callback error for entityId="${entityId}" during initial emission`, err);
1956
+ }
1957
+ let active = true;
1958
+ return () => {
1959
+ if (!active) return;
1960
+ active = false;
1961
+ const s = this.statusSubscribers.get(entityId);
1962
+ if (!s) return;
1963
+ s.delete(entry);
1964
+ if (s.size === 0) this.statusSubscribers.delete(entityId);
1965
+ };
2053
1966
  }
2054
1967
  _copyEntityStatus(s) {
2055
1968
  return { ingesting: s.ingesting, librarian: s.librarian, heal: s.heal };
2056
1969
  }
2057
1970
  _notifyStatusSubscribers(entityId) {
1971
+ if (entityId === "*") return;
2058
1972
  const set = this.statusSubscribers.get(entityId);
2059
1973
  if (!set || set.size === 0) return;
2060
1974
  for (const entry of Array.from(set)) {
2061
1975
  if (!set.has(entry)) continue;
2062
1976
  const next = this.getEntityStatus(entityId);
2063
- if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal) continue;
1977
+ if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal) {
1978
+ continue;
1979
+ }
2064
1980
  entry.last = this._copyEntityStatus(next);
2065
1981
  try {
2066
1982
  entry.callback(this._copyEntityStatus(next));
2067
1983
  } catch (err) {
2068
- console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during transition emission`, err);
1984
+ console.error(`[JobManager] callback error for entityId="${entityId}" during transition emission`, err);
2069
1985
  }
2070
1986
  }
2071
1987
  }
2072
- _validatePruneDuration(value, name) {
2073
- if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
2074
- throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
2075
- }
2076
- }
2077
- async runPrune(entityId, options) {
2078
- const pruneKey = this._pruneKey(entityId);
2079
- const ingestPrefix = `${this.prefix}:${entityId}:`;
2080
- let isIngestRunning = false;
2081
- for (const k of this.activeIngestJobs) {
2082
- if (k.startsWith(ingestPrefix)) {
2083
- isIngestRunning = true;
2084
- break;
2085
- }
2086
- }
2087
- let blockingOperation = null;
2088
- if (this.activeMaintenanceJobs.has(pruneKey)) {
2089
- blockingOperation = "prune";
2090
- } else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
2091
- blockingOperation = "librarian";
2092
- } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
2093
- blockingOperation = "heal";
2094
- } else if (this._isReembedActive(entityId)) {
2095
- blockingOperation = "reembed";
2096
- } else if (isIngestRunning) {
2097
- blockingOperation = "ingest";
2098
- } else if (this._isImportActiveFor(entityId)) {
2099
- blockingOperation = "import";
2100
- } else if (this._isForgetActiveFor(entityId)) {
2101
- blockingOperation = "forget";
2102
- }
2103
- if (blockingOperation !== null) {
2104
- throw new WikiBusyError(blockingOperation, entityId);
2105
- }
2106
- this.activeMaintenanceJobs.add(pruneKey);
2107
- try {
2108
- const retainSoftDeletedFor = options?.retainSoftDeletedFor !== void 0 ? options.retainSoftDeletedFor : this.options.config?.pruneRetainSoftDeletedFor ?? 7;
2109
- const retainEventsFor = options?.retainEventsFor !== void 0 ? options.retainEventsFor : this.options.config?.pruneEventsAfter ?? 30;
2110
- const vacuum = options?.vacuum ?? false;
2111
- this._validatePruneDuration(retainSoftDeletedFor, "retainSoftDeletedFor");
2112
- this._validatePruneDuration(retainEventsFor, "retainEventsFor");
2113
- const now = Date.now();
2114
- let deletedEntries = 0;
2115
- let deletedTasks = 0;
2116
- let deletedEvents = 0;
2117
- if (retainSoftDeletedFor !== null) {
2118
- const cutoff = now - retainSoftDeletedFor * 864e5;
2119
- const entriesToDelete = await this.entryRepo.getPrunableMetadata(entityId, cutoff);
2120
- const succeeded = [];
2121
- let failure = null;
2122
- for (const row of entriesToDelete) {
2123
- try {
2124
- await this._notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
2125
- succeeded.push({ entity_id: row.entity_id, id: row.id });
2126
- } catch (err) {
2127
- failure = { factId: row.id, cause: err };
2128
- break;
2129
- }
2130
- }
2131
- const succeededIds = succeeded.map((r) => r.id);
2132
- await this.db.withTransactionAsync(async (tx) => {
2133
- if (succeededIds.length > 0) {
2134
- deletedEntries = await this.entryRepo.bulkDeletePruned(entityId, cutoff, succeededIds, tx);
2135
- }
2136
- deletedTasks = await this.taskRepo.bulkDeletePruned(entityId, cutoff, tx);
2137
- });
2138
- if (failure) {
2139
- await this.rebuildMiniSearchIndex(entityId);
2140
- this.vectorCache.delete(entityId);
2141
- const remaining = entriesToDelete.length - succeeded.length - 1;
2142
- const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
2143
- if (isTimeout) {
2144
- throw new PrunePartialFailureError(
2145
- succeeded.length,
2146
- failure.factId,
2147
- remaining,
2148
- new Error("Deletion hook timed out"),
2149
- deletedTasks,
2150
- 0
2151
- // events not yet deleted at this point
2152
- );
2153
- }
2154
- const errMsg = failure.cause?.message ?? "";
2155
- const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
2156
- const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
2157
- throw new PrunePartialFailureError(
2158
- succeeded.length,
2159
- failure.factId,
2160
- remaining,
2161
- sanitizedCause,
2162
- deletedTasks,
2163
- 0
2164
- // events not yet deleted at this point
2165
- );
2166
- }
2167
- }
2168
- if (retainEventsFor !== null) {
2169
- const cutoff = now - retainEventsFor * 864e5;
2170
- const eventResult = await this.eventRepo.prune(entityId, cutoff);
2171
- deletedEvents = eventResult.changes;
2172
- }
2173
- if (vacuum) {
2174
- await this.metadataRepo.vacuum();
1988
+ };
1989
+
1990
+ // src/utils/pure.ts
1991
+ function parseJsonResponse(text) {
1992
+ const firstBrace = text.indexOf("{");
1993
+ const firstBracket = text.indexOf("[");
1994
+ let start;
1995
+ let openChar;
1996
+ let closeChar;
1997
+ const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
1998
+ if (useBrace) {
1999
+ start = firstBrace;
2000
+ openChar = "{";
2001
+ closeChar = "}";
2002
+ } else if (firstBracket !== -1) {
2003
+ start = firstBracket;
2004
+ openChar = "[";
2005
+ closeChar = "]";
2006
+ } else {
2007
+ throw new SyntaxError("No JSON object/array found in LLM response");
2008
+ }
2009
+ let depth = 0;
2010
+ let inString = false;
2011
+ let escape = false;
2012
+ let end = -1;
2013
+ for (let i = start; i < text.length; i++) {
2014
+ const ch = text[i];
2015
+ if (escape) {
2016
+ escape = false;
2017
+ continue;
2018
+ }
2019
+ if (ch === "\\" && inString) {
2020
+ escape = true;
2021
+ continue;
2022
+ }
2023
+ if (ch === '"') {
2024
+ inString = !inString;
2025
+ continue;
2026
+ }
2027
+ if (inString) continue;
2028
+ if (ch === openChar) {
2029
+ depth++;
2030
+ continue;
2031
+ }
2032
+ if (ch === closeChar) {
2033
+ depth--;
2034
+ if (depth === 0) {
2035
+ end = i;
2036
+ break;
2175
2037
  }
2176
- await this.rebuildMiniSearchIndex(entityId);
2177
- this.vectorCache.delete(entityId);
2178
- return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
2179
- } finally {
2180
- this.activeMaintenanceJobs.delete(pruneKey);
2181
2038
  }
2182
2039
  }
2183
- async read(entityId, query, options) {
2184
- const config = this.options.config;
2185
- const entityIds = normalizeEntityIds(entityId);
2186
- const sanitizedTierWeights = shouldExposeReadMetadata(entityId) ? sanitizeTierWeights(entityIds, options?.tierWeights) : void 0;
2187
- const exposeMetadata = shouldExposeReadMetadata(entityId);
2188
- if (entityIds.length === 0) {
2189
- const empty = { facts: [], tasks: [], events: [] };
2190
- if (exposeMetadata) {
2191
- empty.metadata = { query, entityIds: [] };
2192
- if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
2040
+ if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
2041
+ return JSON.parse(text.slice(start, end + 1));
2042
+ }
2043
+ function sanitizeRankerError(err, sanitizeRankerErrors) {
2044
+ if (sanitizeRankerErrors === false) {
2045
+ return err instanceof Error ? err : new Error(String(err));
2046
+ }
2047
+ const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
2048
+ const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
2049
+ const sanitized = new Error(
2050
+ `VectorRanker ${typeName} (message scrubbed for security)`,
2051
+ innerCause ? { cause: innerCause } : void 0
2052
+ );
2053
+ sanitized.name = typeName;
2054
+ return sanitized;
2055
+ }
2056
+ function safeSlice(value, start, end) {
2057
+ const length = value.length;
2058
+ let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
2059
+ let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
2060
+ if (safeStart > safeEnd) {
2061
+ [safeStart, safeEnd] = [safeEnd, safeStart];
2062
+ }
2063
+ if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
2064
+ safeStart--;
2065
+ }
2066
+ if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
2067
+ safeEnd--;
2068
+ }
2069
+ return value.slice(safeStart, safeEnd);
2070
+ }
2071
+ function chunkText(input, maxChunkLength, overlap) {
2072
+ const text = input.trim();
2073
+ if (text.length === 0) return { chunks: [], truncated: false };
2074
+ if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
2075
+ throw new Error("maxChunkLength must be an integer >= 2");
2076
+ }
2077
+ if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
2078
+ throw new Error("overlap must be a non-negative integer < maxChunkLength");
2079
+ }
2080
+ const chunks = [];
2081
+ let truncated = false;
2082
+ let cursor = 0;
2083
+ const halfMax = Math.floor(maxChunkLength / 2);
2084
+ while (cursor < text.length) {
2085
+ const remaining = text.length - cursor;
2086
+ if (remaining <= maxChunkLength) {
2087
+ chunks.push(safeSlice(text, cursor, text.length));
2088
+ break;
2089
+ }
2090
+ const windowEnd = cursor + maxChunkLength;
2091
+ const minSplit = cursor + halfMax;
2092
+ let splitPoint = -1;
2093
+ const paraIdx = text.lastIndexOf("\n\n", windowEnd);
2094
+ if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
2095
+ splitPoint = paraIdx + 2;
2096
+ }
2097
+ if (splitPoint === -1) {
2098
+ let lastTerm = -1;
2099
+ for (let i = minSplit; i < windowEnd - 1; i++) {
2100
+ const ch = text[i];
2101
+ if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
2102
+ lastTerm = i + 2;
2103
+ }
2193
2104
  }
2194
- return empty;
2105
+ if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
2195
2106
  }
2196
- const MAX_ENTITY_IDS = 100;
2197
- if (entityIds.length > MAX_ENTITY_IDS) {
2198
- throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
2107
+ if (splitPoint === -1) {
2108
+ for (let i = windowEnd - 1; i >= minSplit; i--) {
2109
+ if (/\s/.test(text[i])) {
2110
+ splitPoint = i + 1;
2111
+ break;
2112
+ }
2113
+ }
2199
2114
  }
2200
- const nullByteId = entityIds.find((id) => id.includes("\0"));
2201
- if (nullByteId !== void 0) {
2202
- throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
2115
+ if (splitPoint === -1) {
2116
+ truncated = true;
2117
+ splitPoint = windowEnd;
2203
2118
  }
2204
- const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
2205
- const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
2206
- const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
2207
- const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
2208
- const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
2209
- const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
2210
- const skipEmbed = weight === 0;
2211
- const embedFn = this.options.llmProvider.embed;
2212
- const trimmedQuery = query.trim();
2213
- let facts = [];
2214
- let scoreByFactId;
2215
- if (maxResults === 0) ; else if (trimmedQuery) {
2216
- let usedEmbed = false;
2217
- const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
2218
- if (scoredEntityIds.length === 0) {
2219
- usedEmbed = true;
2220
- } else if (!skipEmbed && embedFn) {
2221
- let rankerShouldRethrow = false;
2222
- let pendingRankerFallbackError;
2223
- try {
2224
- const queryVec = await embedFn(trimmedQuery);
2225
- if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
2226
- throw new Error(
2227
- "embed() returned an empty or non-finite vector. Falling back to keyword search."
2228
- );
2229
- }
2230
- const storedDimValue = await this.metadataRepo.getMeta("embedding_dimension");
2231
- if (storedDimValue) {
2232
- const storedDim = parseInt(storedDimValue, 10);
2233
- if (storedDim !== queryVec.length) {
2234
- throw new Error(
2235
- `Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
2236
- );
2237
- }
2238
- }
2239
- const mismatchedCount = await this.entryRepo.countDimensionMismatched(scoredEntityIds, queryVec.length);
2240
- if (mismatchedCount > 0) {
2241
- throw new Error(
2242
- `Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
2243
- );
2244
- }
2245
- const useRanker = Boolean(this.options.vectorRanker);
2246
- let candidateRows;
2247
- let populateCache = entityIds.length === 1;
2248
- let miniSearchScores;
2249
- if (effectivePreFilterLimit !== void 0) {
2250
- populateCache = false;
2251
- const entityIdSet = new Set(scoredEntityIds);
2252
- const preResults = this.miniSearch.search(trimmedQuery, {
2253
- filter: (r) => entityIdSet.has(r.entity_id),
2254
- combineWith: "OR"
2255
- });
2256
- if (preResults.length === 0) {
2257
- candidateRows = null;
2258
- } else {
2259
- const topKResults = preResults.slice(0, effectivePreFilterLimit);
2260
- if (topKResults.length === 0) {
2261
- candidateRows = null;
2262
- } else {
2263
- const topKIds = topKResults.map((r) => r.id);
2264
- if (useRanker) {
2265
- candidateRows = await this.entryRepo.findMetadataByIds(topKIds);
2266
- } else {
2267
- candidateRows = await this.entryRepo.findWithEmbeddingsByIds(topKIds);
2268
- }
2269
- if (weight !== void 0 && weight < 1) {
2270
- const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
2271
- miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
2272
- }
2273
- }
2274
- }
2275
- } else {
2276
- if (useRanker) {
2277
- candidateRows = await this.entryRepo.findMetadataByEntityIds(scoredEntityIds);
2278
- } else {
2279
- candidateRows = await this.entryRepo.findWithEmbeddingsByEntityIds(scoredEntityIds);
2280
- }
2281
- if (weight !== void 0 && weight < 1) {
2282
- const entityIdSet = new Set(scoredEntityIds);
2283
- const msResults = this.miniSearch.search(trimmedQuery, {
2284
- filter: (r) => entityIdSet.has(r.entity_id),
2285
- combineWith: "OR"
2286
- });
2287
- const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
2288
- miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
2289
- }
2290
- }
2291
- if (candidateRows === null) {
2292
- usedEmbed = true;
2293
- } else {
2294
- const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
2295
- let scored;
2296
- if (useRanker) {
2297
- const candidateRowsByEntity = /* @__PURE__ */ new Map();
2298
- for (const row of candidateRows) {
2299
- const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
2300
- rows.push(row);
2301
- candidateRowsByEntity.set(row.entity_id, rows);
2302
- }
2303
- try {
2304
- const rankerResultsByEntity = await Promise.all(
2305
- scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
2306
- const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
2307
- const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
2308
- const ranked = await this._rankWithVectorRanker({
2309
- entityId: scopedEntityId,
2310
- queryVec,
2311
- candidateIds,
2312
- candidateRows: rowsForEntity,
2313
- weight,
2314
- miniSearchScores,
2315
- limit: Math.max(maxResults * 2, maxResults + 50)
2316
- });
2317
- return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
2318
- })
2319
- );
2320
- scored = rankerResultsByEntity.flat();
2321
- const scoredIds = new Set(scored.map((s) => s.id));
2322
- const metadataById = new Map(
2323
- candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
2324
- );
2325
- scored = scored.map((row) => {
2326
- const metadata = metadataById.get(row.id);
2327
- return {
2328
- ...row,
2329
- updated_at: metadata?.updated_at ?? null,
2330
- access_count: metadata?.access_count ?? null
2331
- };
2332
- });
2333
- const isHybrid = weight !== void 0 && weight < 1;
2334
- const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
2335
- if (maxBackfill > 0) {
2336
- if (isHybrid) {
2337
- const topK = [];
2338
- for (const row of candidateRows) {
2339
- if (scoredIds.has(row.id)) continue;
2340
- const kwScore = miniSearchScores?.get(row.id) ?? 0;
2341
- const candidate = { row, kwScore };
2342
- if (topK.length < maxBackfill) {
2343
- let insertIdx = topK.length;
2344
- for (let i = 0; i < topK.length; i++) {
2345
- const cmp = this._compareScoredRows(
2346
- {
2347
- id: candidate.row.id,
2348
- score: candidate.kwScore,
2349
- updated_at: candidate.row.updated_at,
2350
- access_count: candidate.row.access_count
2351
- },
2352
- {
2353
- id: topK[i].row.id,
2354
- score: topK[i].kwScore,
2355
- updated_at: topK[i].row.updated_at,
2356
- access_count: topK[i].row.access_count
2357
- }
2358
- );
2359
- if (cmp < 0) {
2360
- insertIdx = i;
2361
- break;
2362
- }
2363
- }
2364
- topK.splice(insertIdx, 0, candidate);
2365
- } else {
2366
- const cmpWorst = this._compareScoredRows(
2367
- {
2368
- id: candidate.row.id,
2369
- score: candidate.kwScore,
2370
- updated_at: candidate.row.updated_at,
2371
- access_count: candidate.row.access_count
2372
- },
2373
- {
2374
- id: topK[maxBackfill - 1].row.id,
2375
- score: topK[maxBackfill - 1].kwScore,
2376
- updated_at: topK[maxBackfill - 1].row.updated_at,
2377
- access_count: topK[maxBackfill - 1].row.access_count
2378
- }
2379
- );
2380
- if (cmpWorst < 0) {
2381
- let insertIdx = maxBackfill - 1;
2382
- for (let i = 0; i < topK.length; i++) {
2383
- const cmp = this._compareScoredRows(
2384
- {
2385
- id: candidate.row.id,
2386
- score: candidate.kwScore,
2387
- updated_at: candidate.row.updated_at,
2388
- access_count: candidate.row.access_count
2389
- },
2390
- {
2391
- id: topK[i].row.id,
2392
- score: topK[i].kwScore,
2393
- updated_at: topK[i].row.updated_at,
2394
- access_count: topK[i].row.access_count
2395
- }
2396
- );
2397
- if (cmp < 0) {
2398
- insertIdx = i;
2399
- break;
2400
- }
2401
- }
2402
- topK.splice(insertIdx, 0, candidate);
2403
- topK.pop();
2404
- }
2405
- }
2406
- }
2407
- for (const { row, kwScore } of topK) {
2408
- scored.push({
2409
- id: row.id,
2410
- entity_id: row.entity_id,
2411
- score: (1 - weight) * kwScore,
2412
- updated_at: row.updated_at,
2413
- access_count: row.access_count
2414
- });
2415
- }
2416
- } else {
2417
- const omitted = [];
2418
- for (const row of candidateRows) {
2419
- if (scoredIds.has(row.id)) continue;
2420
- omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
2421
- }
2422
- if (omitted.length > 0) {
2423
- this._tieBreakSort(omitted);
2424
- scored.push(...omitted.slice(0, maxBackfill));
2425
- }
2426
- }
2427
- }
2428
- } catch (rankerErr) {
2429
- const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
2430
- const policy = this.options.vectorRankerFallback ?? "js-cosine";
2431
- this.options.onVectorRankerFallback?.({
2432
- error: this._sanitizeRankerError(rankerError),
2433
- policy
2434
- });
2435
- if (policy === "throw") {
2436
- rankerShouldRethrow = true;
2437
- throw rankerError;
2438
- } else if (policy === "js-cosine") {
2439
- let fallbackRows = candidateRows;
2440
- if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
2441
- const rowIds = fallbackRows.map((r) => r.id);
2442
- const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
2443
- const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
2444
- fallbackRows = fallbackRows.map((r) => ({
2445
- ...r,
2446
- embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
2447
- embedding: embeddingsMap.get(r.id)?.embedding ?? null
2448
- }));
2449
- }
2450
- scored = await this._rankWithJsCosine({
2451
- entityId: entityCacheKey,
2452
- queryVec,
2453
- candidateRows: fallbackRows,
2454
- weight,
2455
- miniSearchScores,
2456
- populateCache,
2457
- limit: fallbackRows.length,
2458
- skipSort: true
2459
- // read() re-sorts after applying tier weights
2460
- });
2461
- } else if (policy === "keyword") {
2462
- const scoredEntityIdSet = new Set(scoredEntityIds);
2463
- const msResults = this.miniSearch.search(trimmedQuery, {
2464
- filter: (r) => scoredEntityIdSet.has(r.entity_id),
2465
- combineWith: "OR"
2466
- });
2467
- const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
2468
- const topResults = msResults.slice(0, keywordOversampledLimit);
2469
- const topResultIds = new Set(topResults.map((r) => r.id));
2470
- const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
2471
- scored = topResults.map((result) => {
2472
- const metadata = candidateMap.get(result.id);
2473
- const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
2474
- return {
2475
- id: result.id,
2476
- entity_id: entityForScore,
2477
- score: result.score ?? 0,
2478
- access_count: metadata?.access_count ?? null,
2479
- updated_at: metadata?.updated_at ?? null
2480
- };
2481
- });
2482
- } else {
2483
- scored = [];
2484
- }
2485
- if (this.options.propagateRankerFailureToRetrievalFallback) {
2486
- const mirrored = new Error("Vector ranker failed, falling back", {
2487
- cause: this._sanitizeRankerError(rankerErr)
2488
- });
2489
- pendingRankerFallbackError = mirrored;
2490
- }
2491
- }
2492
- } else {
2493
- const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
2494
- scored = await this._rankWithJsCosine({
2495
- entityId: entityCacheKey,
2496
- queryVec,
2497
- candidateRows,
2498
- weight,
2499
- miniSearchScores,
2500
- populateCache,
2501
- limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
2502
- skipSort: jsCosineNeedsTierSort
2503
- // read() re-sorts after applying tier weights
2504
- });
2505
- }
2506
- if (scored.length > 0) {
2507
- scored = scored.map((row) => ({
2508
- ...row,
2509
- score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
2510
- }));
2511
- this._tieBreakSort(scored);
2512
- const selectedScored = scored.slice(0, maxResults);
2513
- const topIds = selectedScored.map((s) => s.id);
2514
- if (exposeMetadata && trimmedQuery) {
2515
- scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
2516
- }
2517
- if (topIds.length > 0) {
2518
- const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
2519
- if (facts2.length < topIds.length) {
2520
- const hydrationById = new Set(facts2.map((f) => f.id));
2521
- const missingIds = topIds.filter((id) => !hydrationById.has(id));
2522
- const missingCount = missingIds.length;
2523
- const sample = missingIds.slice(0, 5);
2524
- const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
2525
- const error = new Error(
2526
- `Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
2527
- );
2528
- this.options.onRetrievalFallback?.(error);
2529
- }
2530
- facts = facts2;
2531
- }
2532
- if (pendingRankerFallbackError) {
2533
- this.options.onRetrievalFallback?.(pendingRankerFallbackError);
2534
- pendingRankerFallbackError = void 0;
2535
- }
2536
- usedEmbed = true;
2537
- } else {
2538
- if (pendingRankerFallbackError) {
2539
- this.options.onRetrievalFallback?.(pendingRankerFallbackError);
2540
- pendingRankerFallbackError = void 0;
2541
- }
2542
- usedEmbed = true;
2543
- }
2544
- }
2545
- } catch (err) {
2546
- const error = err instanceof Error ? err : new Error(String(err));
2547
- if (rankerShouldRethrow) {
2548
- throw error;
2549
- }
2550
- if (pendingRankerFallbackError) {
2551
- error.cause = pendingRankerFallbackError;
2552
- pendingRankerFallbackError = void 0;
2119
+ chunks.push(safeSlice(text, cursor, splitPoint));
2120
+ const next = Math.max(splitPoint - overlap, cursor + 1);
2121
+ cursor = next;
2122
+ }
2123
+ return { chunks, truncated };
2124
+ }
2125
+ async function withConcurrency(tasks, limit) {
2126
+ const results = new Array(tasks.length);
2127
+ let index = 0;
2128
+ let failed = false;
2129
+ let firstError;
2130
+ async function worker() {
2131
+ while (index < tasks.length && !failed) {
2132
+ const i = index++;
2133
+ try {
2134
+ results[i] = await tasks[i]();
2135
+ } catch (e) {
2136
+ if (!failed) {
2137
+ failed = true;
2138
+ firstError = e;
2139
+ }
2140
+ return;
2141
+ }
2142
+ }
2143
+ }
2144
+ const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
2145
+ await Promise.allSettled(Array.from({ length: workerCount }, worker));
2146
+ if (failed) throw firstError;
2147
+ return results;
2148
+ }
2149
+ function clip(value, max) {
2150
+ if (typeof value !== "string") return "";
2151
+ const s = value.trim();
2152
+ return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
2153
+ }
2154
+ function validateTags(tags) {
2155
+ if (!Array.isArray(tags)) return [];
2156
+ return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
2157
+ }
2158
+ function validateFact(fact) {
2159
+ if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
2160
+ const title = clip(fact.title, 80);
2161
+ const body = clip(fact.body, 800);
2162
+ if (!title || !body) return null;
2163
+ let confidence = fact.confidence;
2164
+ if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
2165
+ return {
2166
+ ...fact,
2167
+ title,
2168
+ body,
2169
+ confidence,
2170
+ tags: validateTags(fact.tags)
2171
+ };
2172
+ }
2173
+ function validateTask(task) {
2174
+ if (typeof task?.description !== "string") return null;
2175
+ const description = clip(task.description, 200);
2176
+ if (!description) return null;
2177
+ let priority = task.priority;
2178
+ if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
2179
+ priority = Math.max(0, Math.min(10, Math.round(priority)));
2180
+ return {
2181
+ ...task,
2182
+ description,
2183
+ priority
2184
+ };
2185
+ }
2186
+ function normalizeSourceRef(value) {
2187
+ if (typeof value !== "string") return null;
2188
+ const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
2189
+ return cleaned.length > 0 ? cleaned : null;
2190
+ }
2191
+ function normalizeSourceHash(value) {
2192
+ if (typeof value !== "string") return null;
2193
+ return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
2194
+ }
2195
+ function titleTokens(title) {
2196
+ return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
2197
+ }
2198
+ function jaccardScore(a, b) {
2199
+ if (a.size === 0 && b.size === 0) return 0;
2200
+ const intersection = new Set([...a].filter((x) => b.has(x)));
2201
+ const union = /* @__PURE__ */ new Set([...a, ...b]);
2202
+ return intersection.size / union.size;
2203
+ }
2204
+
2205
+ // src/prompts.ts
2206
+ var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
2207
+ Return ONLY a valid JSON object matching this schema:
2208
+ {
2209
+ "facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
2210
+ "tasks": [{ "description": "string", "priority": "number (0-10)" }]
2211
+ }
2212
+ Keep facts concise. Do not return markdown, just raw JSON.`;
2213
+ var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
2214
+ Return ONLY a valid JSON object matching this schema:
2215
+ {
2216
+ "downgraded": ["string (fact IDs)"],
2217
+ "deleted": ["string (fact IDs)"],
2218
+ "newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
2219
+ }
2220
+ Do not return markdown, just raw JSON.`;
2221
+ var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
2222
+ Return ONLY a valid JSON object matching this schema:
2223
+ {
2224
+ "facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
2225
+ }
2226
+ Extract verbatim factual content. Do not return markdown, just raw JSON.`;
2227
+
2228
+ // src/services/PromptService.ts
2229
+ var PromptService = class {
2230
+ constructor(globalOverrides) {
2231
+ this.globalOverrides = globalOverrides;
2232
+ }
2233
+ hydrate(template, variables) {
2234
+ return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
2235
+ const value = variables[key];
2236
+ if (value === void 0) return _match;
2237
+ return typeof value === "string" ? value : JSON.stringify(value, null, 2);
2238
+ });
2239
+ }
2240
+ buildIngestPrompt(documentChunk, runtimeOverride) {
2241
+ const template = runtimeOverride ?? this.globalOverrides?.ingestSystemPrompt ?? INGEST_SYSTEM_PROMPT;
2242
+ if (/\{\{\s*documentChunk\s*\}\}/.test(template)) {
2243
+ return {
2244
+ systemPrompt: this.hydrate(template, { documentChunk }),
2245
+ userPrompt: "Please extract the facts."
2246
+ };
2247
+ }
2248
+ return {
2249
+ systemPrompt: template,
2250
+ userPrompt: `Document Chunk:
2251
+ ${documentChunk}`
2252
+ };
2253
+ }
2254
+ buildLibrarianPrompt(events, currentFacts, runtimeOverride) {
2255
+ const template = runtimeOverride ?? this.globalOverrides?.librarianSystemPrompt ?? LIBRARIAN_SYSTEM_PROMPT;
2256
+ if (/\{\{\s*events\s*\}\}/.test(template) || /\{\{\s*currentFacts\s*\}\}/.test(template)) {
2257
+ return {
2258
+ systemPrompt: this.hydrate(template, { events, currentFacts }),
2259
+ userPrompt: "Please synthesize the context."
2260
+ };
2261
+ }
2262
+ return {
2263
+ systemPrompt: template,
2264
+ userPrompt: `Events:
2265
+ ${JSON.stringify(events, null, 2)}
2266
+
2267
+ Current Facts:
2268
+ ${JSON.stringify(currentFacts, null, 2)}`
2269
+ };
2270
+ }
2271
+ buildHealPrompt(healCandidates, documentAnchors, allTasks, recentEvents, runtimeOverride) {
2272
+ const template = runtimeOverride ?? this.globalOverrides?.healSystemPrompt ?? HEAL_SYSTEM_PROMPT;
2273
+ if (/\{\{\s*healCandidates\s*\}\}/.test(template) || /\{\{\s*documentAnchors\s*\}\}/.test(template) || /\{\{\s*allTasks\s*\}\}/.test(template) || /\{\{\s*recentEvents\s*\}\}/.test(template)) {
2274
+ return {
2275
+ systemPrompt: this.hydrate(template, { healCandidates, documentAnchors, allTasks, recentEvents }),
2276
+ userPrompt: "Please heal the memory graph."
2277
+ };
2278
+ }
2279
+ return {
2280
+ systemPrompt: template,
2281
+ userPrompt: `Heal Candidates:
2282
+ ${JSON.stringify(healCandidates, null, 2)}
2283
+ Document Anchors (DO NOT MODIFY OR DELETE):
2284
+ ${JSON.stringify(documentAnchors, null, 2)}
2285
+ All Tasks:
2286
+ ${JSON.stringify(allTasks, null, 2)}
2287
+ Recent Events:
2288
+ ${JSON.stringify(recentEvents, null, 2)}
2289
+ The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`
2290
+ };
2291
+ }
2292
+ };
2293
+
2294
+ // src/services/IngestionService.ts
2295
+ var IngestionService = class {
2296
+ constructor(db, prefix, options, entryRepo, searchService, jobManager, embeddingService, promptService) {
2297
+ this.db = db;
2298
+ this.prefix = prefix;
2299
+ this.options = options;
2300
+ this.entryRepo = entryRepo;
2301
+ this.searchService = searchService;
2302
+ this.jobManager = jobManager;
2303
+ this.embeddingService = embeddingService;
2304
+ this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
2305
+ }
2306
+ async ingestDocument(entityId, params) {
2307
+ const sourceRef = normalizeSourceRef(params.sourceRef);
2308
+ if (!sourceRef) throw new Error("Invalid sourceRef");
2309
+ const sourceHash = normalizeSourceHash(params.sourceHash);
2310
+ if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
2311
+ const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
2312
+ const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
2313
+ const chunkOverlap = Math.min(
2314
+ Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
2315
+ maxChunkLength - 1
2316
+ );
2317
+ const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
2318
+ const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
2319
+ if (typeof params.documentChunk !== "string") {
2320
+ throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
2321
+ }
2322
+ this.jobManager.acquireLock("ingest", entityId, sourceRef);
2323
+ try {
2324
+ const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
2325
+ if (chunks.length === 0) return { truncated: false, chunks: 0 };
2326
+ const chunkResults = await withConcurrency(
2327
+ chunks.map((chunk) => async () => {
2328
+ const { systemPrompt, userPrompt } = this.promptService.buildIngestPrompt(chunk, params.promptOverride);
2329
+ const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
2330
+ const result = parseJsonResponse(responseText);
2331
+ return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
2332
+ }),
2333
+ chunkConcurrency
2334
+ );
2335
+ const seen = /* @__PURE__ */ new Set();
2336
+ const allValidFacts = [];
2337
+ for (const facts of chunkResults) {
2338
+ for (const fact of facts) {
2339
+ const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
2340
+ if (!seen.has(normalized)) {
2341
+ seen.add(normalized);
2342
+ allValidFacts.push(fact);
2553
2343
  }
2554
- this.options.onRetrievalFallback?.(error);
2555
2344
  }
2556
2345
  }
2557
- if (!usedEmbed && scoredEntityIds.length > 0) {
2558
- const fallbackEntityIdSet = new Set(scoredEntityIds);
2559
- const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
2560
- const results = this.miniSearch.search(trimmedQuery, {
2561
- filter: (r) => fallbackEntityIdSet.has(r.entity_id),
2562
- combineWith: "OR"
2563
- });
2564
- const candidates = results.slice(0, fallbackOversampledLimit).map((r) => ({
2565
- id: r.id,
2566
- entity_id: r.entity_id,
2567
- score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
2568
- updated_at: null,
2569
- access_count: null
2570
- }));
2571
- this._tieBreakSort(candidates);
2572
- const topCandidates = candidates.slice(0, maxResults);
2573
- const topIds = topCandidates.map((c) => c.id);
2574
- if (topIds.length > 0) {
2575
- facts = await this._hydrateFactsByIds(topIds, entityIds);
2576
- if (exposeMetadata) {
2577
- scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
2578
- }
2346
+ const now = Date.now();
2347
+ const insertedFacts = [];
2348
+ const deletedSourceFactIds = [];
2349
+ await this.db.withTransactionAsync(async (tx) => {
2350
+ deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
2351
+ await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
2352
+ for (const fact of allValidFacts) {
2353
+ const id = generateId("fact_");
2354
+ const wikiFact = {
2355
+ id,
2356
+ entity_id: entityId,
2357
+ title: fact.title,
2358
+ body: fact.body,
2359
+ tags: fact.tags,
2360
+ confidence: fact.confidence,
2361
+ source_type: "immutable_document",
2362
+ source_hash: sourceHash,
2363
+ source_ref: sourceRef,
2364
+ created_at: now,
2365
+ updated_at: now,
2366
+ last_accessed_at: null,
2367
+ access_count: 0,
2368
+ deleted_at: null
2369
+ };
2370
+ await this.entryRepo.upsert(wikiFact, tx);
2371
+ insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
2372
+ }
2373
+ });
2374
+ await this.searchService.sync(entityId);
2375
+ const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
2376
+ for (const factId of uniqueDeletedSourceFactIds) {
2377
+ try {
2378
+ await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
2379
+ } catch (hookErr) {
2380
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
2579
2381
  }
2580
2382
  }
2581
- if (facts.length > 0) {
2582
- const ids = facts.map((f) => f.id);
2583
- const now = Date.now();
2584
- await this.entryRepo.trackAccess(ids, now);
2383
+ for (const fact of insertedFacts) {
2384
+ await this.embeddingService.embedFact(fact);
2585
2385
  }
2586
- } else {
2587
- facts = await this.entryRepo.findRecentByEntityIds(entityIds, maxResults);
2588
- }
2589
- const eventsLimit = Math.min(10 * entityIds.length, 100);
2590
- const [tasks, events] = await Promise.all([
2591
- this.taskRepo.findAllPending(entityIds, entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200)),
2592
- entityIds.length === 1 ? this.eventRepo.getRecent(entityIds[0], eventsLimit) : this.eventRepo.getRecentForEntities(entityIds, eventsLimit)
2593
- ]);
2594
- let factScores;
2595
- if (exposeMetadata && trimmedQuery && scoreByFactId) {
2596
- factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
2597
- }
2598
- const bundle = { facts, tasks, events: events.reverse() };
2599
- if (exposeMetadata) {
2600
- bundle.metadata = { query, entityIds };
2601
- if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
2602
- if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
2386
+ this.searchService.evictCache(entityId);
2387
+ return { truncated, chunks: chunks.length };
2388
+ } finally {
2389
+ this.jobManager.releaseLock("ingest", entityId, sourceRef);
2603
2390
  }
2604
- return bundle;
2605
- }
2606
- /**
2607
- * Returns entity IDs that will participate in scored retrieval.
2608
- * Excludes zero-weight entities unless includeZeroWeightEntities is true.
2609
- */
2610
- _filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
2611
- return entityIds.filter((id) => {
2612
- const w = sanitizedTierWeights?.[id] ?? 1;
2613
- return includeZeroWeightEntities === true || w !== 0;
2614
- });
2615
- }
2616
- /**
2617
- * Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
2618
- */
2619
- _tieBreakSort(items) {
2620
- items.sort((a, b) => this._compareScoredRows(a, b));
2621
- }
2622
- /**
2623
- * Comparator for score + deterministic tie-break fields.
2624
- * Negative return means "a ranks ahead of b" for descending score order.
2625
- */
2626
- _compareScoredRows(a, b) {
2627
- const scoreDiff = b.score - a.score;
2628
- if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
2629
- const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
2630
- if (accessCountDiff !== 0) return accessCountDiff;
2631
- const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
2632
- if (updatedAtDiff !== 0) return updatedAtDiff;
2633
- return a.id.localeCompare(b.id);
2634
2391
  }
2635
- /**
2636
- * Hydrate full facts by ID. Pass scopedEntityIds to restrict to requested namespaces in SQL
2637
- * (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
2638
- */
2639
- async _hydrateFactsByIds(ids, scopedEntityIds, tx) {
2640
- return this.entryRepo.findByIds(ids, scopedEntityIds, tx);
2392
+ };
2393
+
2394
+ // src/services/MaintenanceService.ts
2395
+ var FUZZY_THRESHOLD = 0.5;
2396
+ var MIN_TOKENS_TO_QUALIFY = 3;
2397
+ var MaintenanceService = class {
2398
+ constructor(db, prefix, options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService, promptService) {
2399
+ this.db = db;
2400
+ this.prefix = prefix;
2401
+ this.options = options;
2402
+ this.entryRepo = entryRepo;
2403
+ this.taskRepo = taskRepo;
2404
+ this.eventRepo = eventRepo;
2405
+ this.metadataRepo = metadataRepo;
2406
+ this.searchService = searchService;
2407
+ this.jobManager = jobManager;
2408
+ this.embeddingService = embeddingService;
2409
+ this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
2641
2410
  }
2642
- /**
2643
- * Strip potentially sensitive data from ranker errors before exposing to host callbacks.
2644
- * Preserves error type for debugging but removes message/stack that may contain credentials.
2645
- * Recursively sanitizes one level of .cause; deeper chains collapse to type only.
2646
- */
2647
- _sanitizeRankerError(err) {
2648
- if (this.options.sanitizeRankerErrors === false) {
2649
- return err instanceof Error ? err : new Error(String(err));
2411
+ async runPrune(entityId, options) {
2412
+ this.jobManager.acquireLock("prune", entityId);
2413
+ try {
2414
+ const retainSoftDeletedFor = options?.retainSoftDeletedFor !== void 0 ? options.retainSoftDeletedFor : this.options.config?.pruneRetainSoftDeletedFor ?? 7;
2415
+ const retainEventsFor = options?.retainEventsFor !== void 0 ? options.retainEventsFor : this.options.config?.pruneEventsAfter ?? 30;
2416
+ const vacuum = options?.vacuum ?? false;
2417
+ this._validatePruneDuration(retainSoftDeletedFor, "retainSoftDeletedFor");
2418
+ this._validatePruneDuration(retainEventsFor, "retainEventsFor");
2419
+ const now = Date.now();
2420
+ let deletedEntries = 0;
2421
+ let deletedTasks = 0;
2422
+ let deletedEvents = 0;
2423
+ if (retainSoftDeletedFor !== null) {
2424
+ const cutoff = now - retainSoftDeletedFor * 864e5;
2425
+ const entriesToDelete = await this.entryRepo.getPrunableMetadata(entityId, cutoff);
2426
+ const succeeded = [];
2427
+ let failure = null;
2428
+ for (const row of entriesToDelete) {
2429
+ try {
2430
+ await this.embeddingService.notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
2431
+ succeeded.push({ entity_id: row.entity_id, id: row.id });
2432
+ } catch (err) {
2433
+ failure = { factId: row.id, cause: err };
2434
+ break;
2435
+ }
2436
+ }
2437
+ const succeededIds = succeeded.map((r) => r.id);
2438
+ await this.db.withTransactionAsync(async (tx) => {
2439
+ if (succeededIds.length > 0) {
2440
+ deletedEntries = await this.entryRepo.bulkDeletePruned(entityId, cutoff, succeededIds, tx);
2441
+ }
2442
+ deletedTasks = await this.taskRepo.bulkDeletePruned(entityId, cutoff, tx);
2443
+ });
2444
+ if (failure) {
2445
+ await this.searchService.sync(entityId);
2446
+ const remaining = entriesToDelete.length - succeeded.length - 1;
2447
+ const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
2448
+ if (isTimeout) {
2449
+ throw new PrunePartialFailureError(
2450
+ succeeded.length,
2451
+ failure.factId,
2452
+ remaining,
2453
+ new Error("Deletion hook timed out"),
2454
+ deletedTasks,
2455
+ 0
2456
+ );
2457
+ }
2458
+ const errMsg = failure.cause?.message ?? "";
2459
+ const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
2460
+ const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
2461
+ throw new PrunePartialFailureError(
2462
+ succeeded.length,
2463
+ failure.factId,
2464
+ remaining,
2465
+ sanitizedCause,
2466
+ deletedTasks,
2467
+ 0
2468
+ );
2469
+ }
2470
+ }
2471
+ if (retainEventsFor !== null) {
2472
+ const cutoff = now - retainEventsFor * 864e5;
2473
+ const eventResult = await this.eventRepo.prune(entityId, cutoff);
2474
+ deletedEvents = eventResult.changes;
2475
+ }
2476
+ if (vacuum) {
2477
+ await this.metadataRepo.vacuum();
2478
+ }
2479
+ await this.searchService.sync(entityId);
2480
+ return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
2481
+ } finally {
2482
+ this.jobManager.releaseLock("prune", entityId);
2650
2483
  }
2651
- const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
2652
- const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
2653
- const sanitized = new Error(
2654
- `VectorRanker ${typeName} (message scrubbed for security)`,
2655
- innerCause ? { cause: innerCause } : void 0
2656
- );
2657
- sanitized.name = typeName;
2658
- return sanitized;
2659
2484
  }
2660
- /**
2661
- * Score candidate rows using in-process JS cosine similarity.
2662
- * Applies hybrid blending (if weight set) and tie-break sorting before returning.
2663
- */
2664
- async _rankWithJsCosine(args) {
2665
- const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
2666
- const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
2667
- let entityCache = this.vectorCache.get(entityId);
2668
- const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
2669
- if (tooLarge && entityCache) {
2670
- this.vectorCache.delete(entityId);
2671
- entityCache = void 0;
2485
+ async runLibrarian(entityId, options) {
2486
+ this.jobManager.acquireLock("librarian", entityId);
2487
+ try {
2488
+ await this.doRunLibrarian(entityId, options?.promptOverride);
2489
+ } finally {
2490
+ this.jobManager.releaseLock("librarian", entityId);
2672
2491
  }
2673
- const canCache = populateCache && !tooLarge;
2674
- if (canCache && !entityCache) {
2675
- entityCache = /* @__PURE__ */ new Map();
2492
+ }
2493
+ async runHeal(entityId, options) {
2494
+ this.jobManager.acquireLock("heal", entityId);
2495
+ try {
2496
+ await this.doRunHeal(entityId, options?.promptOverride);
2497
+ } finally {
2498
+ this.jobManager.releaseLock("heal", entityId);
2676
2499
  }
2677
- const scored = candidateRows.map((row) => {
2678
- let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
2679
- if (vector && canCache && entityCache && !entityCache.has(row.id)) {
2680
- entityCache.set(row.id, vector);
2681
- }
2682
- let score = 0;
2683
- if (vector && vector.length === queryVec.length) {
2684
- const cosSim = cosineSimilarity(queryVec, vector);
2685
- if (weight !== void 0) {
2686
- const kwScore = miniSearchScores?.get(row.id) ?? 0;
2687
- score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
2688
- } else {
2689
- score = cosSim;
2500
+ }
2501
+ async runReembed(entityId, opts) {
2502
+ const embedFn = this.options.llmProvider.embed;
2503
+ if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
2504
+ const op = entityId ? "reembed" : "global_reembed";
2505
+ this.jobManager.acquireLock(op, entityId ?? "*");
2506
+ try {
2507
+ const rows = await this.entryRepo.findAllForReembed(entityId);
2508
+ this.searchService.evictCache(entityId);
2509
+ const skipExisting = opts?.skipExisting ?? false;
2510
+ let effectiveSkip = skipExisting;
2511
+ if (skipExisting) {
2512
+ const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
2513
+ if (mismatchValue) {
2514
+ if (entityId) {
2515
+ const mismatchDim = parseInt(mismatchValue, 10);
2516
+ const staleCount = await this.entryRepo.countStaleForEntity(entityId, mismatchDim);
2517
+ if (staleCount > 0) effectiveSkip = false;
2518
+ } else {
2519
+ effectiveSkip = false;
2520
+ }
2690
2521
  }
2691
- } else if (weight !== void 0 && weight < 1) {
2692
- const kwScore = miniSearchScores?.get(row.id) ?? 0;
2693
- score = (1 - weight) * kwScore;
2694
- } else {
2695
- score = -2;
2696
2522
  }
2697
- return {
2698
- id: row.id,
2699
- entity_id: row.entity_id,
2700
- score,
2701
- updated_at: row.updated_at,
2702
- access_count: row.access_count
2703
- };
2704
- });
2705
- if (canCache && entityCache && entityCache.size > 0) {
2706
- if (!this.vectorCache.has(entityId)) {
2707
- if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
2708
- const oldestKey = this.vectorCache.keys().next().value;
2709
- if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
2523
+ let embedded = 0;
2524
+ let skipped = 0;
2525
+ let failed = 0;
2526
+ try {
2527
+ for (const row of rows) {
2528
+ const existingBlob = row.embedding_blob;
2529
+ const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
2530
+ if (effectiveSkip && blobIsValid) {
2531
+ const vec = parseEmbedding(existingBlob, null);
2532
+ if (vec !== null && vec.every((v) => Number.isFinite(v))) {
2533
+ skipped++;
2534
+ continue;
2535
+ }
2536
+ }
2537
+ const success = await this.embeddingService.embedFact(row);
2538
+ if (success) embedded++;
2539
+ else failed++;
2710
2540
  }
2711
- this.vectorCache.set(entityId, entityCache);
2541
+ if (embedded > 0) {
2542
+ await this.embeddingService.reconcileEmbeddingDimension();
2543
+ }
2544
+ } finally {
2545
+ this.searchService.evictCache(entityId);
2712
2546
  }
2547
+ return { embedded, skipped, failed };
2548
+ } finally {
2549
+ this.jobManager.releaseLock(op, entityId ?? "*");
2713
2550
  }
2714
- if (!skipSort) this._tieBreakSort(scored);
2715
- return scored.slice(0, limit);
2716
- }
2717
- /**
2718
- * Delegate semantic ranking to the injected VectorRanker.
2719
- * Caller should pass an oversampledLimit to preserve recall after re-ranking.
2720
- * Returns scored results ready for hybrid blending and tie-break sorting.
2721
- */
2722
- async _rankWithVectorRanker(args) {
2723
- const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
2724
- const ranker = this.options.vectorRanker;
2725
- if (!ranker) {
2726
- throw new Error("vectorRanker not configured");
2727
- }
2728
- const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
2729
- const rankerResults = await ranker.rankBySimilarity({
2730
- entityId,
2731
- queryVec: queryVecCopy,
2732
- candidateIds,
2733
- limit
2734
- });
2735
- const allowedIds = new Set(candidateRows.map((row) => row.id));
2736
- const seen = /* @__PURE__ */ new Set();
2737
- const normalized = [];
2738
- for (const r of rankerResults) {
2739
- if (normalized.length >= limit) break;
2740
- if (seen.has(r.id)) continue;
2741
- if (allowedIds && !allowedIds.has(r.id)) continue;
2742
- if (!Number.isFinite(r.semanticScore)) continue;
2743
- seen.add(r.id);
2744
- normalized.push(r);
2745
- }
2746
- const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
2747
- const scored = normalized.map((r) => {
2748
- let score = r.semanticScore;
2749
- if (weight !== void 0) {
2750
- const kwScore = miniSearchScores?.get(r.id) ?? 0;
2751
- score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
2752
- }
2753
- return {
2754
- id: r.id,
2755
- entity_id: entityIdByCandidateId.get(r.id),
2756
- // allowedIds filter above guarantees membership
2757
- score
2758
- };
2759
- });
2760
- return scored;
2761
- }
2762
- async getMemoryBundle(entityId) {
2763
- return this._getFullBundle(entityId, { maxEvents: 10 });
2764
2551
  }
2765
- async write(entityId, event) {
2766
- const id = generateId("evt_");
2767
- const now = Date.now();
2768
- let eventType = event.event_type;
2769
- if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
2770
- eventType = "observation";
2552
+ async forget(entityId, params) {
2553
+ if (params.clearAll && (params.entryId !== void 0 || params.taskId !== void 0 || params.sourceRef !== void 0 || params.sourceHash !== void 0)) {
2554
+ throw new Error("forget() clearAll is mutually exclusive with entryId, taskId, sourceRef, and sourceHash");
2771
2555
  }
2772
- const newEvent = {
2773
- id,
2774
- entity_id: entityId,
2775
- event_type: eventType,
2776
- summary: event.summary,
2777
- related_entry_id: event.related_entry_id || null,
2778
- created_at: now
2779
- };
2780
- let shouldRunLibrarian = false;
2781
- let librarianCount = 0;
2782
- let librarianJobKey = null;
2783
- await this.db.withTransactionAsync(async (tx) => {
2784
- await this.eventRepo.add(newEvent, tx);
2785
- const threshold = this.options.config?.autoLibrarianThreshold || 20;
2786
- const [count, cp] = await Promise.all([
2787
- this.eventRepo.count(entityId, tx),
2788
- this.metadataRepo.getCheckpoint(entityId, tx)
2789
- ]);
2790
- let memoryCheckpoint = cp.memory ?? 0;
2791
- if (memoryCheckpoint > count) memoryCheckpoint = 0;
2792
- if (count - memoryCheckpoint >= threshold) {
2793
- const jobKey = this._librarianKey(entityId);
2794
- if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
2795
- shouldRunLibrarian = true;
2796
- librarianCount = count;
2797
- librarianJobKey = jobKey;
2798
- await this.metadataRepo.updateCheckpoint(entityId, { memory: count }, tx);
2556
+ this.jobManager.acquireLock("forget", entityId);
2557
+ try {
2558
+ const now = Date.now();
2559
+ let deletedEntries = 0;
2560
+ let deletedTasks = 0;
2561
+ const deletedEntryIds = [];
2562
+ await this.db.withTransactionAsync(async (tx) => {
2563
+ if (params.clearAll) {
2564
+ deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, null, null, tx, true));
2565
+ deletedEntries = await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
2566
+ deletedTasks = await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
2567
+ await this.metadataRepo.updateCheckpoint(entityId, { memory: 0, heal: 0 }, tx);
2568
+ } else {
2569
+ const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
2570
+ const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
2571
+ if (hasIdSelectors && hasSourceSelectors) {
2572
+ throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
2573
+ }
2574
+ const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
2575
+ if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
2576
+ const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
2577
+ if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
2578
+ if (params.entryId) {
2579
+ const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
2580
+ if (entryId) deletedEntryIds.push(entryId);
2581
+ }
2582
+ if (sourceRef || sourceHash) {
2583
+ deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
2584
+ }
2585
+ const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
2586
+ const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
2587
+ const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
2588
+ const [entryResult, taskResult, refResult] = await Promise.all([
2589
+ entryPromise ?? Promise.resolve(false),
2590
+ taskDeletedPromise ?? Promise.resolve(false),
2591
+ refPromise ?? Promise.resolve(0)
2592
+ ]);
2593
+ if (entryResult) deletedEntries++;
2594
+ if (taskResult) deletedTasks++;
2595
+ deletedEntries += refResult;
2799
2596
  }
2800
- }
2801
- });
2802
- if (shouldRunLibrarian && librarianJobKey !== null) {
2803
- this.activeMaintenanceJobs.add(librarianJobKey);
2804
- this._notifyStatusSubscribers(entityId);
2805
- this.runLibrarianThenMaybeHeal(entityId, librarianCount).catch(console.error).finally(() => {
2806
- this.activeMaintenanceJobs.delete(librarianJobKey);
2807
- this._notifyStatusSubscribers(entityId);
2808
2597
  });
2809
- }
2810
- }
2811
- async runLibrarianThenMaybeHeal(entityId, currentEventCount) {
2812
- await this._doRunLibrarian(entityId);
2813
- const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
2814
- const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
2815
- let healCheckpoint = cp.heal ?? 0;
2816
- if (healCheckpoint > currentEventCount) healCheckpoint = 0;
2817
- const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
2818
- if (shouldRunHeal) {
2819
- const healKey = this._healKey(entityId);
2820
- if (!this.activeMaintenanceJobs.has(healKey)) {
2821
- this.activeMaintenanceJobs.add(healKey);
2822
- this._notifyStatusSubscribers(entityId);
2598
+ await this.searchService.sync(entityId);
2599
+ const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
2600
+ for (const factId of uniqueDeletedIds) {
2823
2601
  try {
2824
- await this._doRunHeal(entityId);
2825
- await this.metadataRepo.updateCheckpoint(entityId, { heal: currentEventCount }, this.db);
2826
- } finally {
2827
- this.activeMaintenanceJobs.delete(healKey);
2828
- this._notifyStatusSubscribers(entityId);
2602
+ await this.embeddingService.notifyEmbeddingPersistedOrThrow(entityId, factId, null);
2603
+ } catch (hookErr) {
2604
+ const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
2605
+ if (isTimeout) {
2606
+ throw new Error(`forget(${entityId}/${factId}) failed: ${hookErr.message}`);
2607
+ }
2608
+ const errMsg = hookErr?.message ?? "";
2609
+ if (errMsg.startsWith("Invalid deletionHookTimeoutMs")) {
2610
+ throw new Error(`forget(${entityId}/${factId}) failed: ${errMsg}`, { cause: hookErr });
2611
+ }
2612
+ throw new Error(`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`, { cause: this._sanitizeRankerError(hookErr) });
2829
2613
  }
2830
2614
  }
2615
+ return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
2616
+ } finally {
2617
+ this.jobManager.releaseLock("forget", entityId);
2831
2618
  }
2832
2619
  }
2833
- async _doRunLibrarian(entityId) {
2620
+ /** Core librarian pass (locks handled by {@link runLibrarian}). Package-internal orchestration hook. */
2621
+ async doRunLibrarian(entityId, promptOverride) {
2834
2622
  const events = await this.eventRepo.getRecent(entityId, 50);
2835
2623
  const currentFactsRows = await this.entryRepo.findRecentByEntityId(entityId, 100);
2836
2624
  const currentFacts = currentFactsRows.map((f) => {
@@ -2840,15 +2628,12 @@ After running the migration SQL, restart your application.`
2840
2628
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
2841
2629
  };
2842
2630
  });
2843
- const userPrompt = `Events:
2844
- ${JSON.stringify(events.reverse(), null, 2)}
2845
-
2846
- Current Facts:
2847
- ${JSON.stringify(currentFacts, null, 2)}`;
2848
- const responseText = await this.options.llmProvider.generateText({
2849
- systemPrompt: LIBRARIAN_SYSTEM_PROMPT,
2850
- userPrompt
2851
- });
2631
+ const { systemPrompt, userPrompt } = this.promptService.buildLibrarianPrompt(
2632
+ events.reverse(),
2633
+ currentFacts,
2634
+ promptOverride
2635
+ );
2636
+ const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
2852
2637
  const result = parseJsonResponse(responseText);
2853
2638
  const facts = Array.isArray(result.facts) ? result.facts : [];
2854
2639
  const tasks = Array.isArray(result.tasks) ? result.tasks : [];
@@ -2893,24 +2678,35 @@ ${JSON.stringify(currentFacts, null, 2)}`;
2893
2678
  };
2894
2679
  await this.entryRepo.upsert(factObj, tx);
2895
2680
  insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
2681
+ factsForDedupe.push(factObj);
2896
2682
  }
2897
2683
  for (const task of validTasks) {
2898
2684
  const id = generateId("task_");
2899
- const taskObj = { id, entity_id: entityId, description: task.description, status: "pending", priority: task.priority, created_at: now, updated_at: now, resolved_at: null, deleted_at: null };
2685
+ const taskObj = {
2686
+ id,
2687
+ entity_id: entityId,
2688
+ description: task.description,
2689
+ status: "pending",
2690
+ priority: task.priority,
2691
+ created_at: now,
2692
+ updated_at: now,
2693
+ resolved_at: null,
2694
+ deleted_at: null
2695
+ };
2900
2696
  await this.taskRepo.upsert(taskObj, tx);
2901
2697
  }
2902
2698
  });
2903
- await this.rebuildMiniSearchIndex(entityId);
2904
- this.vectorCache.delete(entityId);
2699
+ await this.searchService.sync(entityId);
2905
2700
  for (const fact of insertedFacts) {
2906
- await this.embedFact(fact);
2701
+ await this.embeddingService.embedFact(fact);
2907
2702
  }
2908
- this.vectorCache.delete(entityId);
2703
+ this.searchService.evictCache(entityId);
2909
2704
  }
2910
- async _doRunHeal(entityId) {
2705
+ /** Core heal pass (locks handled by {@link runHeal}). Package-internal orchestration hook. */
2706
+ async doRunHeal(entityId, promptOverride) {
2911
2707
  const now = Date.now();
2912
- const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config.orphanAfterDays : 30;
2913
- const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config.staleInferredAfterDays : 60;
2708
+ const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config?.orphanAfterDays : 30;
2709
+ const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config?.staleInferredAfterDays : 60;
2914
2710
  const MS_PER_DAY = 24 * 60 * 60 * 1e3;
2915
2711
  if (orphanAfterDays !== null && (typeof orphanAfterDays !== "number" || !Number.isFinite(orphanAfterDays) || orphanAfterDays < 0)) {
2916
2712
  throw new Error("Invalid orphanAfterDays: must be a finite number >= 0 or null");
@@ -2918,810 +2714,1481 @@ ${JSON.stringify(currentFacts, null, 2)}`;
2918
2714
  if (staleInferredAfterDays !== null && (typeof staleInferredAfterDays !== "number" || !Number.isFinite(staleInferredAfterDays) || staleInferredAfterDays < 0)) {
2919
2715
  throw new Error("Invalid staleInferredAfterDays: must be a finite number >= 0 or null");
2920
2716
  }
2717
+ const orphanedIds = [];
2921
2718
  await this.db.withTransactionAsync(async (tx) => {
2922
2719
  if (orphanAfterDays !== null) {
2923
2720
  const orphanThreshold = now - orphanAfterDays * MS_PER_DAY;
2924
- await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx);
2721
+ orphanedIds.push(...await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx));
2925
2722
  }
2926
2723
  if (staleInferredAfterDays !== null) {
2927
2724
  const staleThreshold = now - staleInferredAfterDays * MS_PER_DAY;
2928
2725
  await this.entryRepo.downgradeStaleInferred(entityId, staleThreshold, tx);
2929
2726
  }
2930
2727
  });
2728
+ for (const factId of orphanedIds) {
2729
+ try {
2730
+ await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
2731
+ } catch (hookErr) {
2732
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal orphan pass for ${factId}:`, hookErr);
2733
+ }
2734
+ }
2931
2735
  const allFactsRows = await this.entryRepo.findAllByEntityId(entityId);
2932
2736
  const allTasks = await this.taskRepo.findAllPending([entityId]);
2933
2737
  const recentEvents = await this.eventRepo.getRecent(entityId, 20);
2934
2738
  const healCandidates = allFactsRows.filter((f) => f.source_type !== "immutable_document");
2935
2739
  const documentAnchors = allFactsRows.filter((f) => f.source_type === "immutable_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
2936
- const userPrompt = `Heal Candidates:
2937
- ${JSON.stringify(healCandidates.map((f) => {
2740
+ const healCandidatesForPrompt = healCandidates.map((f) => {
2938
2741
  const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
2939
2742
  return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
2940
- }), null, 2)}
2941
-
2942
- Document Anchors (DO NOT MODIFY OR DELETE):
2943
- ${JSON.stringify(documentAnchors, null, 2)}
2944
-
2945
- All Tasks:
2946
- ${JSON.stringify(allTasks, null, 2)}
2947
-
2948
- Recent Events:
2949
- ${JSON.stringify(recentEvents, null, 2)}
2950
-
2951
- The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`;
2952
- const responseText = await this.options.llmProvider.generateText({
2953
- systemPrompt: HEAL_SYSTEM_PROMPT,
2954
- userPrompt
2955
2743
  });
2744
+ const { systemPrompt, userPrompt } = this.promptService.buildHealPrompt(
2745
+ healCandidatesForPrompt,
2746
+ documentAnchors,
2747
+ allTasks,
2748
+ recentEvents,
2749
+ promptOverride
2750
+ );
2751
+ const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
2956
2752
  const result = parseJsonResponse(responseText);
2957
2753
  const mutableIds = new Set(healCandidates.map((f) => f.id));
2958
2754
  const downgraded = Array.isArray(result.downgraded) ? result.downgraded : [];
2959
2755
  const deleted = Array.isArray(result.deleted) ? result.deleted : [];
2960
2756
  const newFacts = Array.isArray(result.newFacts) ? result.newFacts : [];
2961
- const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
2962
- const safeDeleted = deleted.filter((id) => mutableIds.has(id));
2757
+ const safeDowngraded = Array.from(new Set(downgraded.filter((id) => mutableIds.has(id))));
2758
+ const safeDeleted = Array.from(new Set(deleted.filter((id) => mutableIds.has(id))));
2963
2759
  const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
2964
2760
  const insertedFacts = [];
2965
2761
  const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
2762
+ const healFactsForDedupe = [...healCandidates];
2966
2763
  await this.db.withTransactionAsync(async (tx) => {
2967
2764
  await this.entryRepo.downgradeByIds(safeDowngraded, entityId, tx);
2968
2765
  await this.entryRepo.softDeleteByIds(safeDeleted, entityId, tx);
2969
2766
  for (const fact of validNewFacts) {
2767
+ const newTokens = titleTokens(fact.title);
2768
+ let skip = false;
2769
+ if (newTokens.size >= MIN_TOKENS_TO_QUALIFY) {
2770
+ for (const existing of healFactsForDedupe) {
2771
+ if (existing.source_type !== "librarian_inferred") continue;
2772
+ const existingTokens = titleTokens(existing.title);
2773
+ if (existingTokens.size >= MIN_TOKENS_TO_QUALIFY) {
2774
+ if (jaccardScore(newTokens, existingTokens) >= FUZZY_THRESHOLD) {
2775
+ skip = true;
2776
+ break;
2777
+ }
2778
+ }
2779
+ }
2780
+ }
2781
+ if (skip) continue;
2970
2782
  const id = generateId("fact_");
2971
2783
  const factObj = {
2972
2784
  id,
2973
2785
  entity_id: entityId,
2974
2786
  title: fact.title,
2975
2787
  body: fact.body,
2976
- tags: fact.tags,
2977
- confidence: fact.confidence,
2978
- source_type: "librarian_inferred",
2979
- source_hash: null,
2980
- source_ref: null,
2981
- created_at: now,
2982
- updated_at: now,
2983
- last_accessed_at: null,
2984
- access_count: 0,
2985
- deleted_at: null
2986
- };
2987
- await this.entryRepo.upsert(factObj, tx);
2988
- insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
2989
- }
2990
- });
2991
- this.vectorCache.delete(entityId);
2992
- await this.rebuildMiniSearchIndex(entityId);
2993
- for (const factId of uniqueDeletedFactIds) {
2994
- try {
2995
- await this._notifyEmbeddingPersisted(entityId, factId, null);
2996
- } catch (hookErr) {
2997
- console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
2788
+ tags: fact.tags,
2789
+ confidence: fact.confidence,
2790
+ source_type: "librarian_inferred",
2791
+ source_hash: null,
2792
+ source_ref: null,
2793
+ created_at: now,
2794
+ updated_at: now,
2795
+ last_accessed_at: null,
2796
+ access_count: 0,
2797
+ deleted_at: null
2798
+ };
2799
+ await this.entryRepo.upsert(factObj, tx);
2800
+ insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
2801
+ healFactsForDedupe.push(factObj);
2802
+ }
2803
+ });
2804
+ await this.searchService.sync(entityId);
2805
+ for (const factId of uniqueDeletedFactIds) {
2806
+ try {
2807
+ await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
2808
+ } catch (hookErr) {
2809
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
2810
+ }
2811
+ }
2812
+ for (const fact of insertedFacts) {
2813
+ await this.embeddingService.embedFact(fact);
2814
+ }
2815
+ this.searchService.evictCache(entityId);
2816
+ }
2817
+ _validatePruneDuration(value, name) {
2818
+ if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
2819
+ throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
2820
+ }
2821
+ }
2822
+ _sanitizeRankerError(err) {
2823
+ return sanitizeRankerError(err, this.options.sanitizeRankerErrors);
2824
+ }
2825
+ };
2826
+
2827
+ // src/services/ImportExportService.ts
2828
+ var ImportExportService = class {
2829
+ constructor(db, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService) {
2830
+ this.db = db;
2831
+ this.entryRepo = entryRepo;
2832
+ this.taskRepo = taskRepo;
2833
+ this.eventRepo = eventRepo;
2834
+ this.metadataRepo = metadataRepo;
2835
+ this.searchService = searchService;
2836
+ this.jobManager = jobManager;
2837
+ this.embeddingService = embeddingService;
2838
+ }
2839
+ async exportDump(entityIds) {
2840
+ let ids;
2841
+ if (entityIds && entityIds.length > 0) {
2842
+ ids = Array.from(new Set(entityIds));
2843
+ } else {
2844
+ ids = await this.metadataRepo.getDistinctEntityIds();
2845
+ }
2846
+ const entities = {};
2847
+ const BATCH = 3;
2848
+ for (let i = 0; i < ids.length; i += BATCH) {
2849
+ const batch = ids.slice(i, i + BATCH);
2850
+ const batchResults = await Promise.all(
2851
+ batch.map(
2852
+ async (id) => [
2853
+ id,
2854
+ await this.getFullBundle(id, { includeBlobs: true })
2855
+ ]
2856
+ )
2857
+ );
2858
+ for (const [id, bundle] of batchResults) {
2859
+ entities[id] = bundle;
2860
+ }
2861
+ }
2862
+ return { generatedAt: Date.now(), entities };
2863
+ }
2864
+ async importDump(dump, opts) {
2865
+ const merge = opts?.merge ?? false;
2866
+ const entityIds = Object.keys(dump.entities);
2867
+ this.jobManager.acquireImportLocks(entityIds);
2868
+ try {
2869
+ await this.assertNoLegacySourceTypes();
2870
+ for (const [entityId, bundle] of Object.entries(dump.entities)) {
2871
+ await this.doImportEntity(entityId, bundle, merge);
2872
+ }
2873
+ } finally {
2874
+ this.jobManager.releaseImportLocks(entityIds);
2875
+ }
2876
+ }
2877
+ async getFullBundle(entityId, opts) {
2878
+ const [factsRaw, tasks, events] = await Promise.all([
2879
+ opts?.includeBlobs ? this.entryRepo.findAllByEntityIdWithBlobs(entityId) : this.entryRepo.findAllByEntityId(entityId),
2880
+ this.taskRepo.findAllByEntityId(entityId),
2881
+ this.eventRepo.getByEntityId(entityId, opts?.maxEvents)
2882
+ ]);
2883
+ const facts = factsRaw.map((f) => {
2884
+ const {
2885
+ embedding: _embedding,
2886
+ embedding_blob,
2887
+ ...rest
2888
+ } = f;
2889
+ const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
2890
+ const c = new ArrayBuffer(embedding_blob.byteLength);
2891
+ new Uint8Array(c).set(embedding_blob);
2892
+ return new Uint8Array(c);
2893
+ })() : void 0;
2894
+ const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
2895
+ return {
2896
+ ...factBase,
2897
+ tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
2898
+ };
2899
+ });
2900
+ return { facts, tasks, events };
2901
+ }
2902
+ /** Single-entity import transaction + post-processing; package-internal hook for tests. */
2903
+ async doImportEntity(entityId, bundle, merge) {
2904
+ const upsertedFactIds = /* @__PURE__ */ new Set();
2905
+ const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
2906
+ const factsWithPreservedBlob = /* @__PURE__ */ new Map();
2907
+ const preservedBlobDims = /* @__PURE__ */ new Set();
2908
+ const softDeletedFactIds = [];
2909
+ await this.db.withTransactionAsync(async (tx) => {
2910
+ if (!merge) {
2911
+ const deletedLiveFactIds = await this.entryRepo.findIdsBySource(
2912
+ entityId,
2913
+ null,
2914
+ null,
2915
+ tx,
2916
+ false
2917
+ );
2918
+ softDeletedFactIds.push(...deletedLiveFactIds);
2919
+ await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
2920
+ await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
2921
+ await this.metadataRepo.deleteCheckpoint(entityId, tx);
2922
+ }
2923
+ const factIds = bundle.facts.map((fact) => fact.id);
2924
+ const existingFactsById = /* @__PURE__ */ new Map();
2925
+ const existingFacts = await this.entryRepo.findExistingMetadataByIds(
2926
+ factIds,
2927
+ tx
2928
+ );
2929
+ for (const existingFact of existingFacts) {
2930
+ existingFactsById.set(existingFact.id, existingFact);
2931
+ }
2932
+ for (const fact of bundle.facts) {
2933
+ const sourceType = this._normalizeImportedSourceType(
2934
+ String(fact.source_type),
2935
+ {
2936
+ entityId,
2937
+ factId: fact.id
2938
+ }
2939
+ );
2940
+ const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
2941
+ const existing = existingFactsById.get(fact.id);
2942
+ const rawBlobRaw = fact.embedding_blob;
2943
+ let rawBlob = null;
2944
+ if (rawBlobRaw instanceof Uint8Array) {
2945
+ rawBlob = rawBlobRaw;
2946
+ } else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
2947
+ const obj = rawBlobRaw;
2948
+ if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
2949
+ rawBlob = new Uint8Array(obj["data"]);
2950
+ } else if (!Array.isArray(rawBlobRaw)) {
2951
+ const entries = Object.keys(obj);
2952
+ if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
2953
+ const len = entries.length;
2954
+ rawBlob = new Uint8Array(len);
2955
+ for (let i = 0; i < len; i++)
2956
+ rawBlob[i] = obj[String(i)] ?? 0;
2957
+ }
2958
+ }
2959
+ }
2960
+ let blobData = null;
2961
+ if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
2962
+ const copy = new ArrayBuffer(rawBlob.byteLength);
2963
+ const alignedBlob = new Uint8Array(copy);
2964
+ alignedBlob.set(rawBlob);
2965
+ const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
2966
+ let allFinite = true;
2967
+ for (let i = 0; i < floats.length; i++) {
2968
+ if (!isFinite(floats[i])) {
2969
+ allFinite = false;
2970
+ break;
2971
+ }
2972
+ }
2973
+ if (allFinite) {
2974
+ blobData = alignedBlob;
2975
+ }
2976
+ }
2977
+ if (existing) {
2978
+ if (existing.entity_id !== entityId) {
2979
+ this._warnCrossEntityCollision(
2980
+ "entry",
2981
+ fact.id,
2982
+ existing.entity_id,
2983
+ entityId
2984
+ );
2985
+ continue;
2986
+ }
2987
+ if (merge && safeUpdatedAt <= existing.updated_at) continue;
2988
+ }
2989
+ const factObj = {
2990
+ id: fact.id,
2991
+ entity_id: entityId,
2992
+ title: fact.title,
2993
+ body: fact.body,
2994
+ tags: Array.isArray(fact.tags) ? fact.tags : [],
2995
+ confidence: fact.confidence,
2996
+ source_type: sourceType,
2997
+ source_hash: fact.source_hash,
2998
+ source_ref: fact.source_ref,
2999
+ created_at: fact.created_at,
3000
+ updated_at: safeUpdatedAt,
3001
+ last_accessed_at: fact.last_accessed_at,
3002
+ access_count: fact.access_count,
3003
+ deleted_at: fact.deleted_at,
3004
+ embedding_blob: blobData ?? void 0
3005
+ };
3006
+ await this.entryRepo.upsertForImport(factObj, tx);
3007
+ if (blobData != null) {
3008
+ factsWithPreservedBlob.set(fact.id, blobData);
3009
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
3010
+ }
3011
+ existingFactsById.set(fact.id, {
3012
+ id: fact.id,
3013
+ entity_id: entityId,
3014
+ updated_at: safeUpdatedAt
3015
+ });
3016
+ upsertedFactIds.add(fact.id);
3017
+ if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
3018
+ }
3019
+ const taskIds = bundle.tasks.map((task) => task.id);
3020
+ const existingTasksById = /* @__PURE__ */ new Map();
3021
+ const existingTasks = await this.taskRepo.findExistingMetadataByIds(
3022
+ taskIds,
3023
+ tx
3024
+ );
3025
+ for (const existingTask of existingTasks) {
3026
+ existingTasksById.set(existingTask.id, existingTask);
3027
+ }
3028
+ for (const task of bundle.tasks) {
3029
+ const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
3030
+ const existing = existingTasksById.get(task.id);
3031
+ if (existing) {
3032
+ if (existing.entity_id !== entityId) {
3033
+ this._warnCrossEntityCollision(
3034
+ "task",
3035
+ task.id,
3036
+ existing.entity_id,
3037
+ entityId
3038
+ );
3039
+ continue;
3040
+ }
3041
+ if (merge && safeUpdatedAt <= existing.updated_at) continue;
3042
+ }
3043
+ await this.taskRepo.upsertForImport(
3044
+ {
3045
+ id: task.id,
3046
+ entity_id: entityId,
3047
+ description: task.description,
3048
+ status: task.status,
3049
+ priority: task.priority,
3050
+ created_at: task.created_at,
3051
+ updated_at: safeUpdatedAt,
3052
+ resolved_at: task.resolved_at,
3053
+ deleted_at: task.deleted_at
3054
+ },
3055
+ tx,
3056
+ safeUpdatedAt
3057
+ );
3058
+ existingTasksById.set(task.id, {
3059
+ id: task.id,
3060
+ entity_id: entityId,
3061
+ updated_at: safeUpdatedAt
3062
+ });
3063
+ }
3064
+ for (const event of bundle.events) {
3065
+ await this.eventRepo.addIgnoreDuplicate(
3066
+ {
3067
+ id: event.id,
3068
+ entity_id: entityId,
3069
+ event_type: event.event_type,
3070
+ summary: event.summary,
3071
+ related_entry_id: event.related_entry_id ?? null,
3072
+ created_at: event.created_at
3073
+ },
3074
+ tx
3075
+ );
3076
+ }
3077
+ });
3078
+ await this.searchService.sync(entityId);
3079
+ for (const fact of bundle.facts) {
3080
+ if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
3081
+ const embedded = await this.embeddingService.embedFact({
3082
+ id: fact.id,
3083
+ entity_id: entityId,
3084
+ title: fact.title,
3085
+ body: fact.body,
3086
+ tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
3087
+ });
3088
+ if (!embedded) {
3089
+ await this.embeddingService.notifyEmbeddingPersisted(entityId, fact.id, null);
3090
+ }
2998
3091
  }
2999
3092
  }
3000
- for (const fact of insertedFacts) {
3001
- await this.embedFact(fact);
3002
- }
3003
- this.vectorCache.delete(entityId);
3004
- }
3005
- async runLibrarian(entityId) {
3006
- const jobKey = this._librarianKey(entityId);
3007
- if (this.activeMaintenanceJobs.has(jobKey)) {
3008
- throw new WikiBusyError("librarian", entityId);
3009
- }
3010
- if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
3011
- throw new WikiBusyError("prune", entityId);
3012
- }
3013
- if (this._isReembedActive(entityId)) {
3014
- throw new WikiBusyError("reembed", entityId);
3015
- }
3016
- if (this._isImportActiveFor(entityId)) {
3017
- throw new WikiBusyError("import", entityId);
3093
+ for (const fact of bundle.facts) {
3094
+ const blobData = factsWithPreservedBlob.get(fact.id);
3095
+ if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
3096
+ try {
3097
+ const float32Vector = new Float32Array(
3098
+ blobData.buffer,
3099
+ blobData.byteOffset,
3100
+ blobData.byteLength / 4
3101
+ );
3102
+ await this.embeddingService.notifyEmbeddingPersisted(
3103
+ entityId,
3104
+ fact.id,
3105
+ float32Vector
3106
+ );
3107
+ } catch (hookErr) {
3108
+ console.warn(
3109
+ `[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`,
3110
+ hookErr
3111
+ );
3112
+ }
3113
+ }
3018
3114
  }
3019
- if (this._isForgetActiveFor(entityId)) {
3020
- throw new WikiBusyError("forget", entityId);
3115
+ for (const factId of softDeletedFactIds) {
3116
+ if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
3117
+ try {
3118
+ await this.embeddingService.notifyEmbeddingPersisted(
3119
+ entityId,
3120
+ factId,
3121
+ null
3122
+ );
3123
+ } catch (hookErr) {
3124
+ console.warn(
3125
+ `[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`,
3126
+ hookErr
3127
+ );
3128
+ }
3129
+ }
3021
3130
  }
3022
- this.activeMaintenanceJobs.add(jobKey);
3023
- this._notifyStatusSubscribers(entityId);
3024
3131
  try {
3025
- await this._doRunLibrarian(entityId);
3132
+ const canonicalDimValue = await this.metadataRepo.getMeta(
3133
+ "embedding_dimension"
3134
+ );
3135
+ const canonicalDim = canonicalDimValue ? parseInt(canonicalDimValue, 10) : null;
3136
+ if (preservedBlobDims.size === 1) {
3137
+ const preservedDim = [...preservedBlobDims][0];
3138
+ if (canonicalDim === null || canonicalDim === preservedDim) {
3139
+ await this.embeddingService.storeEmbeddingDimension(preservedDim);
3140
+ const staleMismatchValue = await this.metadataRepo.getMeta(
3141
+ "embedding_dimension_mismatch"
3142
+ );
3143
+ if (staleMismatchValue && parseInt(staleMismatchValue, 10) !== preservedDim) {
3144
+ await this.metadataRepo.setMeta(
3145
+ "embedding_dimension_mismatch",
3146
+ String(preservedDim),
3147
+ this.db
3148
+ );
3149
+ }
3150
+ await this.embeddingService.reconcileEmbeddingDimension();
3151
+ } else {
3152
+ await this.metadataRepo.setMeta(
3153
+ "embedding_dimension_mismatch",
3154
+ String(canonicalDim),
3155
+ this.db
3156
+ );
3157
+ }
3158
+ } else if (preservedBlobDims.size > 1) {
3159
+ if (canonicalDim === null) {
3160
+ const sortedPreservedBlobDims = [...preservedBlobDims].sort(
3161
+ (a, b) => a - b
3162
+ );
3163
+ await this.embeddingService.storeEmbeddingDimension(
3164
+ sortedPreservedBlobDims[0]
3165
+ );
3166
+ await this.metadataRepo.setMeta(
3167
+ "embedding_dimension_mismatch",
3168
+ String(sortedPreservedBlobDims[0]),
3169
+ this.db
3170
+ );
3171
+ } else {
3172
+ await this.metadataRepo.setMeta(
3173
+ "embedding_dimension_mismatch",
3174
+ String(canonicalDim),
3175
+ this.db
3176
+ );
3177
+ }
3178
+ }
3026
3179
  } finally {
3027
- this.activeMaintenanceJobs.delete(jobKey);
3028
- this._notifyStatusSubscribers(entityId);
3180
+ this.searchService.evictCache(entityId);
3029
3181
  }
3030
3182
  }
3031
- async runHeal(entityId) {
3032
- const jobKey = this._healKey(entityId);
3033
- if (this.activeMaintenanceJobs.has(jobKey)) {
3034
- throw new WikiBusyError("heal", entityId);
3035
- }
3036
- if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
3037
- throw new WikiBusyError("prune", entityId);
3038
- }
3039
- if (this._isReembedActive(entityId)) {
3040
- throw new WikiBusyError("reembed", entityId);
3041
- }
3042
- if (this._isImportActiveFor(entityId)) {
3043
- throw new WikiBusyError("import", entityId);
3044
- }
3045
- if (this._isForgetActiveFor(entityId)) {
3046
- throw new WikiBusyError("forget", entityId);
3183
+ _warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
3184
+ console.warn(
3185
+ `[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`
3186
+ );
3187
+ }
3188
+ _normalizeImportedSourceType(raw, ctx) {
3189
+ if (raw === "user_document") return "immutable_document";
3190
+ if (raw === "agent_inferred") return "librarian_inferred";
3191
+ const allowed = [
3192
+ "user_stated",
3193
+ "librarian_inferred",
3194
+ "user_confirmed",
3195
+ "immutable_document"
3196
+ ];
3197
+ if (allowed.includes(raw))
3198
+ return raw;
3199
+ const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
3200
+ throw new Error(
3201
+ `importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
3202
+ );
3203
+ }
3204
+ async assertNoLegacySourceTypes() {
3205
+ if (!await this.entryRepo.hasLegacySourceTypes()) return;
3206
+ const count = await this.entryRepo.countLegacySourceTypes();
3207
+ throw new Error(
3208
+ `Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
3209
+
3210
+ ${this.entryRepo.getLegacyMigrationSQL()}
3211
+
3212
+ After running the migration SQL, restart your application.`
3213
+ );
3214
+ }
3215
+ };
3216
+
3217
+ // src/services/EmbeddingService.ts
3218
+ var EmbeddingService = class {
3219
+ constructor(db, options, entryRepo, metadataRepo) {
3220
+ this.db = db;
3221
+ this.options = options;
3222
+ this.entryRepo = entryRepo;
3223
+ this.metadataRepo = metadataRepo;
3224
+ }
3225
+ async storeEmbeddingDimension(dim) {
3226
+ const existing = await this.metadataRepo.getMeta("embedding_dimension");
3227
+ if (existing) {
3228
+ const storedDim = parseInt(existing, 10);
3229
+ if (storedDim !== dim) {
3230
+ console.warn(
3231
+ `[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
3232
+ );
3233
+ await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(dim), this.db);
3234
+ }
3235
+ } else {
3236
+ await this.metadataRepo.setMeta("embedding_dimension", String(dim), this.db);
3047
3237
  }
3048
- this.activeMaintenanceJobs.add(jobKey);
3049
- this._notifyStatusSubscribers(entityId);
3050
- try {
3051
- await this._doRunHeal(entityId);
3052
- } finally {
3053
- this.activeMaintenanceJobs.delete(jobKey);
3054
- this._notifyStatusSubscribers(entityId);
3238
+ }
3239
+ /** Promotes embedding_dimension_mismatch to canonical embedding_dimension when safe. */
3240
+ async reconcileEmbeddingDimension() {
3241
+ const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
3242
+ if (!mismatchValue) return;
3243
+ const newDim = parseInt(mismatchValue, 10);
3244
+ const residualCount = await this.entryRepo.countStaleEmbeddings(newDim);
3245
+ if (residualCount === 0) {
3246
+ await this.metadataRepo.setMeta("embedding_dimension", mismatchValue, this.db);
3247
+ await this.metadataRepo.clearDimensionMismatch(this.db);
3055
3248
  }
3056
3249
  }
3057
- async runReembed(entityId, opts) {
3250
+ async embedFact(fact) {
3058
3251
  const embedFn = this.options.llmProvider.embed;
3059
- if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
3060
- const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
3061
- if (this.activeMaintenanceJobs.has(reembedKey)) {
3062
- throw new WikiBusyError("reembed", entityId ?? "*");
3063
- }
3064
- if (entityId) {
3065
- if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
3066
- throw new WikiBusyError("reembed", entityId);
3067
- }
3068
- if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
3069
- throw new WikiBusyError("prune", entityId);
3070
- }
3071
- if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
3072
- throw new WikiBusyError("librarian", entityId);
3073
- }
3074
- if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
3075
- throw new WikiBusyError("heal", entityId);
3076
- }
3077
- if (this._isIngestActiveFor(entityId)) {
3078
- throw new WikiBusyError("ingest", entityId);
3079
- }
3080
- if (this._isImportActiveFor(entityId)) {
3081
- throw new WikiBusyError("import", entityId);
3082
- }
3083
- if (this._isForgetActiveFor(entityId)) {
3084
- throw new WikiBusyError("forget", entityId);
3085
- }
3252
+ if (!embedFn) return false;
3253
+ let tagsStr;
3254
+ if (Array.isArray(fact.tags)) {
3255
+ tagsStr = fact.tags.join(" ");
3086
3256
  } else {
3087
- if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
3088
- throw new WikiBusyError("reembed", "*");
3089
- }
3090
- if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
3091
- throw new WikiBusyError("prune", "*");
3257
+ try {
3258
+ const parsed = JSON.parse(fact.tags);
3259
+ tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
3260
+ } catch {
3261
+ tagsStr = fact.tags;
3092
3262
  }
3093
- if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
3094
- throw new WikiBusyError("librarian", "*");
3263
+ }
3264
+ const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
3265
+ try {
3266
+ const vector = await embedFn(text);
3267
+ if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
3268
+ console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
3269
+ return false;
3095
3270
  }
3096
- if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
3097
- throw new WikiBusyError("heal", "*");
3271
+ const float32Vector = new Float32Array(vector);
3272
+ let hasNonFinite = false;
3273
+ for (let i = 0; i < float32Vector.length; i++) {
3274
+ if (!isFinite(float32Vector[i])) {
3275
+ hasNonFinite = true;
3276
+ break;
3277
+ }
3098
3278
  }
3099
- if (this.activeIngestJobs.size > 0) {
3100
- throw new WikiBusyError("ingest", "*");
3279
+ if (hasNonFinite) {
3280
+ console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
3281
+ return false;
3101
3282
  }
3102
- if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
3103
- throw new WikiBusyError("import", "*");
3283
+ await this.storeEmbeddingDimension(float32Vector.length);
3284
+ const blob = new Uint8Array(float32Vector.buffer);
3285
+ await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
3286
+ try {
3287
+ await this.notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
3288
+ } catch (hookErr) {
3289
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
3104
3290
  }
3105
- if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
3106
- throw new WikiBusyError("forget", "*");
3291
+ return true;
3292
+ } catch (err) {
3293
+ console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
3294
+ return false;
3295
+ }
3296
+ }
3297
+ async notifyEmbeddingPersisted(entityId, factId, vector) {
3298
+ if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
3299
+ const vectorCopy = vector ? vector.slice() : null;
3300
+ await this.options.vectorRanker.onEmbeddingPersisted({
3301
+ entityId,
3302
+ factId,
3303
+ vector: vectorCopy
3304
+ });
3305
+ }
3306
+ async notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
3307
+ if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
3308
+ if (this.options.forceDeleteIgnoreRankerHook === true) return;
3309
+ const vectorCopy = vector ? vector.slice() : null;
3310
+ const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
3311
+ if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
3312
+ throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
3313
+ }
3314
+ const timeoutMs = rawTimeout;
3315
+ let timeoutHandle;
3316
+ const timeoutPromise = new Promise((_, reject) => {
3317
+ timeoutHandle = setTimeout(() => {
3318
+ const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
3319
+ timeoutError[HOOK_TIMEOUT_MARKER] = true;
3320
+ reject(timeoutError);
3321
+ }, timeoutMs);
3322
+ });
3323
+ const hookPromise = Promise.resolve().then(
3324
+ () => this.options.vectorRanker.onEmbeddingPersisted({
3325
+ entityId,
3326
+ factId,
3327
+ vector: vectorCopy
3328
+ })
3329
+ );
3330
+ try {
3331
+ await Promise.race([hookPromise, timeoutPromise]);
3332
+ } catch (err) {
3333
+ hookPromise.catch(() => {
3334
+ });
3335
+ throw err;
3336
+ } finally {
3337
+ if (timeoutHandle) clearTimeout(timeoutHandle);
3338
+ }
3339
+ }
3340
+ };
3341
+
3342
+ // src/readOptions.ts
3343
+ function normalizeEntityIds(entityId) {
3344
+ const input = Array.isArray(entityId) ? entityId : [entityId];
3345
+ const seen = /* @__PURE__ */ new Set();
3346
+ const normalized = [];
3347
+ for (const id of input) {
3348
+ if (seen.has(id)) continue;
3349
+ seen.add(id);
3350
+ normalized.push(id);
3351
+ }
3352
+ return normalized;
3353
+ }
3354
+ function sanitizeTierWeights(entityIds, tierWeights) {
3355
+ if (tierWeights === void 0) return void 0;
3356
+ const sanitized = /* @__PURE__ */ Object.create(null);
3357
+ for (const entityId of entityIds) {
3358
+ const raw = tierWeights[entityId];
3359
+ if (raw === void 0 || !Number.isFinite(raw)) {
3360
+ sanitized[entityId] = 1;
3361
+ } else {
3362
+ sanitized[entityId] = Math.max(0, raw);
3363
+ }
3364
+ }
3365
+ return sanitized;
3366
+ }
3367
+ function applyTierWeight(score, entityId, sanitizedTierWeights) {
3368
+ const weight = sanitizedTierWeights?.[entityId] ?? 1;
3369
+ if (weight === 0) return -Infinity;
3370
+ return score * weight;
3371
+ }
3372
+ function shouldExposeReadMetadata(entityId) {
3373
+ return Array.isArray(entityId);
3374
+ }
3375
+
3376
+ // src/services/RetrievalService.ts
3377
+ var RetrievalService = class {
3378
+ constructor(options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService) {
3379
+ this.options = options;
3380
+ this.entryRepo = entryRepo;
3381
+ this.taskRepo = taskRepo;
3382
+ this.eventRepo = eventRepo;
3383
+ this.metadataRepo = metadataRepo;
3384
+ this.searchService = searchService;
3385
+ }
3386
+ async read(entityId, query, options) {
3387
+ const config = this.options.config;
3388
+ const entityIds = normalizeEntityIds(entityId);
3389
+ const sanitizedTierWeights = shouldExposeReadMetadata(entityId) ? sanitizeTierWeights(entityIds, options?.tierWeights) : void 0;
3390
+ const exposeMetadata = shouldExposeReadMetadata(entityId);
3391
+ if (entityIds.length === 0) {
3392
+ const empty = { facts: [], tasks: [], events: [] };
3393
+ if (exposeMetadata) {
3394
+ empty.metadata = { query, entityIds: [] };
3395
+ if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
3107
3396
  }
3397
+ return empty;
3108
3398
  }
3109
- this.activeMaintenanceJobs.add(reembedKey);
3110
- try {
3111
- const rows = await this.entryRepo.findAllForReembed(entityId);
3112
- if (entityId) {
3113
- this.vectorCache.delete(entityId);
3114
- } else {
3115
- this.vectorCache.clear();
3116
- }
3117
- const skipExisting = opts?.skipExisting ?? false;
3118
- let effectiveSkip = skipExisting;
3119
- if (skipExisting) {
3120
- const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
3121
- if (mismatchValue) {
3122
- if (entityId) {
3123
- const mismatchDim = parseInt(mismatchValue, 10);
3124
- const staleCount = await this.entryRepo.countStaleForEntity(entityId, mismatchDim);
3125
- if (staleCount > 0) effectiveSkip = false;
3399
+ const MAX_ENTITY_IDS = 100;
3400
+ if (entityIds.length > MAX_ENTITY_IDS) {
3401
+ throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
3402
+ }
3403
+ const nullByteId = entityIds.find((id) => id.includes("\0"));
3404
+ if (nullByteId !== void 0) {
3405
+ throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
3406
+ }
3407
+ const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
3408
+ const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
3409
+ const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
3410
+ const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
3411
+ const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
3412
+ const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
3413
+ const skipEmbed = weight === 0;
3414
+ const embedFn = this.options.llmProvider.embed;
3415
+ const trimmedQuery = query.trim();
3416
+ let facts = [];
3417
+ let scoreByFactId;
3418
+ if (maxResults === 0) ; else if (trimmedQuery) {
3419
+ let usedEmbed = false;
3420
+ const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
3421
+ if (scoredEntityIds.length === 0) {
3422
+ usedEmbed = true;
3423
+ } else if (!skipEmbed && embedFn) {
3424
+ let rankerShouldRethrow = false;
3425
+ let pendingRankerFallbackError;
3426
+ try {
3427
+ const queryVec = await embedFn(trimmedQuery);
3428
+ if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
3429
+ throw new Error(
3430
+ "embed() returned an empty or non-finite vector. Falling back to keyword search."
3431
+ );
3432
+ }
3433
+ const storedDimValue = await this.metadataRepo.getMeta("embedding_dimension");
3434
+ if (storedDimValue) {
3435
+ const storedDim = parseInt(storedDimValue, 10);
3436
+ if (storedDim !== queryVec.length) {
3437
+ throw new Error(
3438
+ `Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
3439
+ );
3440
+ }
3441
+ }
3442
+ const mismatchedCount = await this.entryRepo.countDimensionMismatched(scoredEntityIds, queryVec.length);
3443
+ if (mismatchedCount > 0) {
3444
+ throw new Error(
3445
+ `Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
3446
+ );
3447
+ }
3448
+ const useRanker = Boolean(this.options.vectorRanker);
3449
+ let candidateRows;
3450
+ let populateCache = entityIds.length === 1;
3451
+ let miniSearchScores;
3452
+ if (effectivePreFilterLimit !== void 0) {
3453
+ populateCache = false;
3454
+ const preResults = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, Number.MAX_SAFE_INTEGER);
3455
+ if (preResults.length === 0) {
3456
+ candidateRows = null;
3457
+ } else {
3458
+ const topKResults = preResults.slice(0, effectivePreFilterLimit);
3459
+ if (topKResults.length === 0) {
3460
+ candidateRows = null;
3461
+ } else {
3462
+ const topKIds = topKResults.map((r) => r.id);
3463
+ if (useRanker) {
3464
+ candidateRows = await this.entryRepo.findMetadataByIds(topKIds);
3465
+ } else {
3466
+ candidateRows = await this.entryRepo.findWithEmbeddingsByIds(topKIds);
3467
+ }
3468
+ if (weight !== void 0 && weight < 1) {
3469
+ const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
3470
+ miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
3471
+ }
3472
+ }
3473
+ }
3126
3474
  } else {
3127
- effectiveSkip = false;
3475
+ if (useRanker) {
3476
+ candidateRows = await this.entryRepo.findMetadataByEntityIds(scoredEntityIds);
3477
+ } else {
3478
+ candidateRows = await this.entryRepo.findWithEmbeddingsByEntityIds(scoredEntityIds);
3479
+ }
3480
+ if (weight !== void 0 && weight < 1) {
3481
+ miniSearchScores = this.searchService.getMiniSearchScores(trimmedQuery, scoredEntityIds);
3482
+ }
3483
+ }
3484
+ if (candidateRows === null) {
3485
+ usedEmbed = true;
3486
+ } else {
3487
+ const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
3488
+ let scored;
3489
+ if (useRanker) {
3490
+ const candidateRowsByEntity = /* @__PURE__ */ new Map();
3491
+ for (const row of candidateRows) {
3492
+ const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
3493
+ rows.push(row);
3494
+ candidateRowsByEntity.set(row.entity_id, rows);
3495
+ }
3496
+ try {
3497
+ const rankerResultsByEntity = await Promise.all(
3498
+ scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
3499
+ const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
3500
+ const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
3501
+ const ranked = await this._rankWithVectorRanker({
3502
+ entityId: scopedEntityId,
3503
+ queryVec,
3504
+ candidateIds,
3505
+ candidateRows: rowsForEntity,
3506
+ weight,
3507
+ miniSearchScores,
3508
+ limit: Math.max(maxResults * 2, maxResults + 50)
3509
+ });
3510
+ return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
3511
+ })
3512
+ );
3513
+ scored = rankerResultsByEntity.flat();
3514
+ const scoredIds = new Set(scored.map((s) => s.id));
3515
+ const metadataById = new Map(
3516
+ candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
3517
+ );
3518
+ scored = scored.map((row) => {
3519
+ const metadata = metadataById.get(row.id);
3520
+ return {
3521
+ ...row,
3522
+ updated_at: metadata?.updated_at ?? null,
3523
+ access_count: metadata?.access_count ?? null
3524
+ };
3525
+ });
3526
+ const isHybrid = weight !== void 0 && weight < 1;
3527
+ const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
3528
+ if (maxBackfill > 0) {
3529
+ if (isHybrid) {
3530
+ const topK = [];
3531
+ for (const row of candidateRows) {
3532
+ if (scoredIds.has(row.id)) continue;
3533
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
3534
+ const candidate = { row, kwScore };
3535
+ if (topK.length < maxBackfill) {
3536
+ let insertIdx = topK.length;
3537
+ for (let i = 0; i < topK.length; i++) {
3538
+ const cmp = this._compareScoredRows(
3539
+ {
3540
+ id: candidate.row.id,
3541
+ score: candidate.kwScore,
3542
+ updated_at: candidate.row.updated_at,
3543
+ access_count: candidate.row.access_count
3544
+ },
3545
+ {
3546
+ id: topK[i].row.id,
3547
+ score: topK[i].kwScore,
3548
+ updated_at: topK[i].row.updated_at,
3549
+ access_count: topK[i].row.access_count
3550
+ }
3551
+ );
3552
+ if (cmp < 0) {
3553
+ insertIdx = i;
3554
+ break;
3555
+ }
3556
+ }
3557
+ topK.splice(insertIdx, 0, candidate);
3558
+ } else {
3559
+ const cmpWorst = this._compareScoredRows(
3560
+ {
3561
+ id: candidate.row.id,
3562
+ score: candidate.kwScore,
3563
+ updated_at: candidate.row.updated_at,
3564
+ access_count: candidate.row.access_count
3565
+ },
3566
+ {
3567
+ id: topK[maxBackfill - 1].row.id,
3568
+ score: topK[maxBackfill - 1].kwScore,
3569
+ updated_at: topK[maxBackfill - 1].row.updated_at,
3570
+ access_count: topK[maxBackfill - 1].row.access_count
3571
+ }
3572
+ );
3573
+ if (cmpWorst < 0) {
3574
+ let insertIdx = maxBackfill - 1;
3575
+ for (let i = 0; i < topK.length; i++) {
3576
+ const cmp = this._compareScoredRows(
3577
+ {
3578
+ id: candidate.row.id,
3579
+ score: candidate.kwScore,
3580
+ updated_at: candidate.row.updated_at,
3581
+ access_count: candidate.row.access_count
3582
+ },
3583
+ {
3584
+ id: topK[i].row.id,
3585
+ score: topK[i].kwScore,
3586
+ updated_at: topK[i].row.updated_at,
3587
+ access_count: topK[i].row.access_count
3588
+ }
3589
+ );
3590
+ if (cmp < 0) {
3591
+ insertIdx = i;
3592
+ break;
3593
+ }
3594
+ }
3595
+ topK.splice(insertIdx, 0, candidate);
3596
+ topK.pop();
3597
+ }
3598
+ }
3599
+ }
3600
+ for (const { row, kwScore } of topK) {
3601
+ scored.push({
3602
+ id: row.id,
3603
+ entity_id: row.entity_id,
3604
+ score: (1 - weight) * kwScore,
3605
+ updated_at: row.updated_at,
3606
+ access_count: row.access_count
3607
+ });
3608
+ }
3609
+ } else {
3610
+ const omitted = [];
3611
+ for (const row of candidateRows) {
3612
+ if (scoredIds.has(row.id)) continue;
3613
+ omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
3614
+ }
3615
+ if (omitted.length > 0) {
3616
+ this._tieBreakSort(omitted);
3617
+ scored.push(...omitted.slice(0, maxBackfill));
3618
+ }
3619
+ }
3620
+ }
3621
+ } catch (rankerErr) {
3622
+ const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
3623
+ const policy = this.options.vectorRankerFallback ?? "js-cosine";
3624
+ this.options.onVectorRankerFallback?.({
3625
+ error: this._sanitizeRankerError(rankerError),
3626
+ policy
3627
+ });
3628
+ if (policy === "throw") {
3629
+ rankerShouldRethrow = true;
3630
+ throw rankerError;
3631
+ } else if (policy === "js-cosine") {
3632
+ let fallbackRows = candidateRows;
3633
+ if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
3634
+ const rowIds = fallbackRows.map((r) => r.id);
3635
+ const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
3636
+ const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
3637
+ fallbackRows = fallbackRows.map((r) => ({
3638
+ ...r,
3639
+ embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
3640
+ embedding: embeddingsMap.get(r.id)?.embedding ?? null
3641
+ }));
3642
+ }
3643
+ scored = await this.searchService.rankSemantic({
3644
+ entityId: entityCacheKey,
3645
+ queryVec,
3646
+ candidateRows: fallbackRows,
3647
+ weight,
3648
+ miniSearchScores,
3649
+ populateCache,
3650
+ limit: fallbackRows.length,
3651
+ skipSort: true
3652
+ // read() re-sorts after applying tier weights
3653
+ });
3654
+ } else if (policy === "keyword") {
3655
+ const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
3656
+ const topResults = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, keywordOversampledLimit);
3657
+ const topResultIds = new Set(topResults.map((r) => r.id));
3658
+ const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
3659
+ scored = topResults.map((result) => {
3660
+ const metadata = candidateMap.get(result.id);
3661
+ const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
3662
+ return {
3663
+ id: result.id,
3664
+ entity_id: entityForScore,
3665
+ score: result.score ?? 0,
3666
+ access_count: metadata?.access_count ?? null,
3667
+ updated_at: metadata?.updated_at ?? null
3668
+ };
3669
+ });
3670
+ } else {
3671
+ scored = [];
3672
+ }
3673
+ if (this.options.propagateRankerFailureToRetrievalFallback) {
3674
+ const mirrored = new Error("Vector ranker failed, falling back", {
3675
+ cause: this._sanitizeRankerError(rankerErr)
3676
+ });
3677
+ pendingRankerFallbackError = mirrored;
3678
+ }
3679
+ }
3680
+ } else {
3681
+ const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
3682
+ scored = await this.searchService.rankSemantic({
3683
+ entityId: entityCacheKey,
3684
+ queryVec,
3685
+ candidateRows,
3686
+ weight,
3687
+ miniSearchScores,
3688
+ populateCache,
3689
+ limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
3690
+ skipSort: jsCosineNeedsTierSort
3691
+ // read() re-sorts after applying tier weights
3692
+ });
3693
+ }
3694
+ if (scored.length > 0) {
3695
+ scored = scored.map((row) => ({
3696
+ ...row,
3697
+ score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
3698
+ }));
3699
+ this._tieBreakSort(scored);
3700
+ const selectedScored = scored.slice(0, maxResults);
3701
+ const topIds = selectedScored.map((s) => s.id);
3702
+ if (exposeMetadata && trimmedQuery) {
3703
+ scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
3704
+ }
3705
+ if (topIds.length > 0) {
3706
+ const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
3707
+ if (facts2.length < topIds.length) {
3708
+ const hydrationById = new Set(facts2.map((f) => f.id));
3709
+ const missingIds = topIds.filter((id) => !hydrationById.has(id));
3710
+ const missingCount = missingIds.length;
3711
+ const sample = missingIds.slice(0, 5);
3712
+ const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
3713
+ const error = new Error(
3714
+ `Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
3715
+ );
3716
+ this.options.onRetrievalFallback?.(error);
3717
+ }
3718
+ facts = facts2;
3719
+ }
3720
+ if (pendingRankerFallbackError) {
3721
+ this.options.onRetrievalFallback?.(pendingRankerFallbackError);
3722
+ pendingRankerFallbackError = void 0;
3723
+ }
3724
+ usedEmbed = true;
3725
+ } else {
3726
+ if (pendingRankerFallbackError) {
3727
+ this.options.onRetrievalFallback?.(pendingRankerFallbackError);
3728
+ pendingRankerFallbackError = void 0;
3729
+ }
3730
+ usedEmbed = true;
3731
+ }
3732
+ }
3733
+ } catch (err) {
3734
+ const error = err instanceof Error ? err : new Error(String(err));
3735
+ if (rankerShouldRethrow) {
3736
+ throw error;
3737
+ }
3738
+ if (pendingRankerFallbackError) {
3739
+ error.cause = pendingRankerFallbackError;
3740
+ pendingRankerFallbackError = void 0;
3128
3741
  }
3742
+ this.options.onRetrievalFallback?.(error);
3129
3743
  }
3130
3744
  }
3131
- let embedded = 0;
3132
- let skipped = 0;
3133
- let failed = 0;
3134
- try {
3135
- for (const row of rows) {
3136
- const existingBlob = row.embedding_blob;
3137
- const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
3138
- if (effectiveSkip && blobIsValid) {
3139
- const vec = parseEmbedding(existingBlob, null);
3140
- if (vec !== null && vec.every((v) => Number.isFinite(v))) {
3141
- skipped++;
3142
- continue;
3143
- }
3745
+ if (!usedEmbed && scoredEntityIds.length > 0) {
3746
+ const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
3747
+ const results = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, fallbackOversampledLimit);
3748
+ const candidates = results.map((r) => ({
3749
+ id: r.id,
3750
+ entity_id: r.entity_id,
3751
+ score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
3752
+ updated_at: null,
3753
+ access_count: null
3754
+ }));
3755
+ this._tieBreakSort(candidates);
3756
+ const topCandidates = candidates.slice(0, maxResults);
3757
+ const topIds = topCandidates.map((c) => c.id);
3758
+ if (topIds.length > 0) {
3759
+ facts = await this._hydrateFactsByIds(topIds, entityIds);
3760
+ if (exposeMetadata) {
3761
+ scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
3144
3762
  }
3145
- const success = await this.embedFact(row);
3146
- if (success) embedded++;
3147
- else failed++;
3148
- }
3149
- if (embedded > 0) {
3150
- await this._reconcileEmbeddingDimension();
3151
- }
3152
- } finally {
3153
- if (entityId) {
3154
- this.vectorCache.delete(entityId);
3155
- } else {
3156
- this.vectorCache.clear();
3157
3763
  }
3158
3764
  }
3159
- return { embedded, skipped, failed };
3160
- } finally {
3161
- this.activeMaintenanceJobs.delete(reembedKey);
3162
- }
3163
- }
3164
- getEntityStatus(entityId) {
3165
- const ingestPrefix = `${this.prefix}:${entityId}:`;
3166
- let ingesting = false;
3167
- for (const k of this.activeIngestJobs) {
3168
- if (k.startsWith(ingestPrefix)) {
3169
- ingesting = true;
3170
- break;
3765
+ if (facts.length > 0) {
3766
+ const ids = facts.map((f) => f.id);
3767
+ const now = Date.now();
3768
+ await this.entryRepo.trackAccess(ids, now);
3171
3769
  }
3770
+ } else {
3771
+ facts = await this.entryRepo.findRecentByEntityIds(entityIds, maxResults);
3172
3772
  }
3173
- return {
3174
- ingesting,
3175
- librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
3176
- heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
3177
- };
3773
+ const eventsLimit = Math.min(10 * entityIds.length, 100);
3774
+ const [tasks, events] = await Promise.all([
3775
+ this.taskRepo.findAllPending(entityIds, entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200)),
3776
+ entityIds.length === 1 ? this.eventRepo.getRecent(entityIds[0], eventsLimit) : this.eventRepo.getRecentForEntities(entityIds, eventsLimit)
3777
+ ]);
3778
+ let factScores;
3779
+ if (exposeMetadata && trimmedQuery && scoreByFactId) {
3780
+ factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
3781
+ }
3782
+ const bundle = { facts, tasks, events: events.reverse() };
3783
+ if (exposeMetadata) {
3784
+ bundle.metadata = { query, entityIds };
3785
+ if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
3786
+ if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
3787
+ }
3788
+ return bundle;
3178
3789
  }
3179
3790
  /**
3180
- * Subscribe to {@link EntityStatus} changes for a single entity. The callback
3181
- * is invoked synchronously once with the current status before this method
3182
- * returns, then again on every transition where any of `ingesting`,
3183
- * `librarian`, or `heal` flips. No polling, no duplicate snapshots.
3184
- *
3185
- * Returns an idempotent unsubscribe function.
3186
- *
3187
- * See also {@link getEntityStatus} for a synchronous point-in-time read.
3791
+ * Returns entity IDs that will participate in scored retrieval.
3792
+ * Excludes zero-weight entities unless includeZeroWeightEntities is true.
3188
3793
  */
3189
- subscribeEntityStatus(entityId, callback) {
3190
- const initial = this.getEntityStatus(entityId);
3191
- let set = this.statusSubscribers.get(entityId);
3192
- if (!set) {
3193
- set = /* @__PURE__ */ new Set();
3194
- this.statusSubscribers.set(entityId, set);
3195
- }
3196
- const entry = { callback, last: this._copyEntityStatus(initial) };
3197
- set.add(entry);
3198
- try {
3199
- callback(this._copyEntityStatus(initial));
3200
- } catch (err) {
3201
- console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during initial emission`, err);
3202
- }
3203
- let active = true;
3204
- return () => {
3205
- if (!active) return;
3206
- active = false;
3207
- const s = this.statusSubscribers.get(entityId);
3208
- if (!s) return;
3209
- s.delete(entry);
3210
- if (s.size === 0) this.statusSubscribers.delete(entityId);
3211
- };
3794
+ _filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
3795
+ return entityIds.filter((id) => {
3796
+ const w = sanitizedTierWeights?.[id] ?? 1;
3797
+ return includeZeroWeightEntities === true || w !== 0;
3798
+ });
3212
3799
  }
3213
- clearVectorCache() {
3214
- this.vectorCache.clear();
3800
+ /**
3801
+ * Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
3802
+ */
3803
+ _tieBreakSort(items) {
3804
+ items.sort((a, b) => this._compareScoredRows(a, b));
3215
3805
  }
3216
- async _getFullBundle(entityId, opts) {
3217
- const [factsRaw, tasks, events] = await Promise.all([
3218
- this.entryRepo.findAllByEntityId(entityId),
3219
- this.taskRepo.findAllByEntityId(entityId),
3220
- this.eventRepo.getByEntityId(entityId, opts?.maxEvents)
3221
- ]);
3222
- const facts = factsRaw.map((f) => {
3223
- const { embedding: _embedding, embedding_blob, ...rest } = f;
3224
- const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
3225
- const c = new ArrayBuffer(embedding_blob.byteLength);
3226
- new Uint8Array(c).set(embedding_blob);
3227
- return new Uint8Array(c);
3228
- })() : void 0;
3229
- const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
3806
+ /**
3807
+ * Comparator for score + deterministic tie-break fields.
3808
+ * Negative return means "a ranks ahead of b" for descending score order.
3809
+ */
3810
+ _compareScoredRows(a, b) {
3811
+ const scoreDiff = b.score - a.score;
3812
+ if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
3813
+ const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
3814
+ if (accessCountDiff !== 0) return accessCountDiff;
3815
+ const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
3816
+ if (updatedAtDiff !== 0) return updatedAtDiff;
3817
+ return a.id.localeCompare(b.id);
3818
+ }
3819
+ /**
3820
+ * Hydrate full facts by ID. Pass scopedEntityIds to restrict to requested namespaces in SQL
3821
+ * (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
3822
+ */
3823
+ async _hydrateFactsByIds(ids, scopedEntityIds, tx) {
3824
+ return this.entryRepo.findByIds(ids, scopedEntityIds, tx);
3825
+ }
3826
+ _sanitizeRankerError(err) {
3827
+ return sanitizeRankerError(err, this.options.sanitizeRankerErrors);
3828
+ }
3829
+ /**
3830
+ * Delegate semantic ranking to the injected VectorRanker.
3831
+ * Caller should pass an oversampledLimit to preserve recall after re-ranking.
3832
+ * Returns scored results ready for hybrid blending and tie-break sorting.
3833
+ */
3834
+ async _rankWithVectorRanker(args) {
3835
+ const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
3836
+ const ranker = this.options.vectorRanker;
3837
+ if (!ranker) {
3838
+ throw new Error("vectorRanker not configured");
3839
+ }
3840
+ const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
3841
+ const rankerResults = await ranker.rankBySimilarity({
3842
+ entityId,
3843
+ queryVec: queryVecCopy,
3844
+ candidateIds,
3845
+ limit
3846
+ });
3847
+ const allowedIds = new Set(candidateRows.map((row) => row.id));
3848
+ const seen = /* @__PURE__ */ new Set();
3849
+ const normalized = [];
3850
+ for (const r of rankerResults) {
3851
+ if (normalized.length >= limit) break;
3852
+ if (seen.has(r.id)) continue;
3853
+ if (allowedIds && !allowedIds.has(r.id)) continue;
3854
+ if (!Number.isFinite(r.semanticScore)) continue;
3855
+ seen.add(r.id);
3856
+ normalized.push(r);
3857
+ }
3858
+ const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
3859
+ const scored = normalized.map((r) => {
3860
+ let score = r.semanticScore;
3861
+ if (weight !== void 0) {
3862
+ const kwScore = miniSearchScores?.get(r.id) ?? 0;
3863
+ score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
3864
+ }
3230
3865
  return {
3231
- ...factBase,
3232
- tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
3866
+ id: r.id,
3867
+ entity_id: entityIdByCandidateId.get(r.id),
3868
+ // allowedIds filter above guarantees membership
3869
+ score
3233
3870
  };
3234
3871
  });
3235
- return { facts, tasks, events };
3872
+ return scored;
3236
3873
  }
3237
- async exportDump(entityIds) {
3238
- let ids;
3239
- if (entityIds && entityIds.length > 0) {
3240
- ids = Array.from(new Set(entityIds));
3241
- } else {
3242
- ids = await this.metadataRepo.getDistinctEntityIds();
3243
- }
3244
- const entities = {};
3245
- const BATCH = 3;
3246
- for (let i = 0; i < ids.length; i += BATCH) {
3247
- const batch = ids.slice(i, i + BATCH);
3248
- const batchResults = await Promise.all(
3249
- batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
3250
- );
3251
- for (const [id, bundle] of batchResults) {
3252
- entities[id] = bundle;
3253
- }
3254
- }
3255
- return { generatedAt: Date.now(), entities };
3874
+ };
3875
+
3876
+ // src/services/WriteService.ts
3877
+ var WriteService = class {
3878
+ constructor(db, options, eventRepo, metadataRepo, jobManager, maintenanceService) {
3879
+ this.db = db;
3880
+ this.options = options;
3881
+ this.eventRepo = eventRepo;
3882
+ this.metadataRepo = metadataRepo;
3883
+ this.jobManager = jobManager;
3884
+ this.maintenanceService = maintenanceService;
3256
3885
  }
3257
- async importDump(dump, opts) {
3258
- const merge = opts?.merge ?? false;
3259
- const entityIds = Object.keys(dump.entities);
3260
- for (const entityId of entityIds) {
3261
- if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
3262
- throw new WikiBusyError("import", entityId);
3263
- }
3264
- if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
3265
- throw new WikiBusyError("librarian", entityId);
3266
- }
3267
- if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
3268
- throw new WikiBusyError("heal", entityId);
3269
- }
3270
- if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
3271
- throw new WikiBusyError("prune", entityId);
3272
- }
3273
- if (this._isReembedActive(entityId)) {
3274
- throw new WikiBusyError("reembed", entityId);
3275
- }
3276
- if (this._isIngestActiveFor(entityId)) {
3277
- throw new WikiBusyError("ingest", entityId);
3886
+ async write(entityId, event) {
3887
+ const id = generateId("evt_");
3888
+ const now = Date.now();
3889
+ let eventType = event.event_type;
3890
+ if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
3891
+ eventType = "observation";
3892
+ }
3893
+ const newEvent = {
3894
+ id,
3895
+ entity_id: entityId,
3896
+ event_type: eventType,
3897
+ summary: event.summary,
3898
+ related_entry_id: event.related_entry_id || null,
3899
+ created_at: now
3900
+ };
3901
+ let shouldRunLibrarian = false;
3902
+ let librarianCount = 0;
3903
+ let prevMemoryCheckpoint = 0;
3904
+ await this.db.withTransactionAsync(async (tx) => {
3905
+ await this.eventRepo.add(newEvent, tx);
3906
+ const threshold = this.options.config?.autoLibrarianThreshold || 20;
3907
+ const [count, cp] = await Promise.all([
3908
+ this.eventRepo.count(entityId, tx),
3909
+ this.metadataRepo.getCheckpoint(entityId, tx)
3910
+ ]);
3911
+ let memoryCheckpoint = cp.memory ?? 0;
3912
+ if (memoryCheckpoint > count) memoryCheckpoint = 0;
3913
+ if (count - memoryCheckpoint >= threshold) {
3914
+ if (!this.jobManager.isBlocked("librarian", entityId)) {
3915
+ shouldRunLibrarian = true;
3916
+ librarianCount = count;
3917
+ prevMemoryCheckpoint = memoryCheckpoint;
3918
+ await this.metadataRepo.updateCheckpoint(entityId, { memory: count }, tx);
3919
+ }
3278
3920
  }
3279
- if (this._isForgetActiveFor(entityId)) {
3280
- throw new WikiBusyError("forget", entityId);
3921
+ });
3922
+ if (shouldRunLibrarian) {
3923
+ try {
3924
+ this.jobManager.acquireLock("librarian", entityId);
3925
+ this.runLibrarianThenMaybeHeal(entityId, librarianCount, prevMemoryCheckpoint).catch(console.error).finally(() => {
3926
+ this.jobManager.releaseLock("librarian", entityId);
3927
+ });
3928
+ } catch (e) {
3929
+ if (!(e instanceof WikiBusyError)) throw e;
3930
+ await this.metadataRepo.updateCheckpoint(entityId, { memory: prevMemoryCheckpoint }, this.db);
3281
3931
  }
3282
3932
  }
3283
- if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
3284
- throw new WikiBusyError("import", "*");
3285
- }
3286
- this.activeMaintenanceJobs.add(this._globalImportKey());
3287
- for (const entityId of entityIds) {
3288
- this.activeMaintenanceJobs.add(this._importKey(entityId));
3289
- }
3933
+ }
3934
+ async runLibrarianThenMaybeHeal(entityId, currentEventCount, prevCheckpoint) {
3290
3935
  try {
3291
- await this.assertNoLegacySourceTypes();
3292
- for (const [entityId, bundle] of Object.entries(dump.entities)) {
3293
- await this._doImportEntity(entityId, bundle, merge);
3294
- }
3295
- } finally {
3296
- this.activeMaintenanceJobs.delete(this._globalImportKey());
3297
- for (const entityId of entityIds) {
3298
- this.activeMaintenanceJobs.delete(this._importKey(entityId));
3936
+ await this.maintenanceService.doRunLibrarian(entityId);
3937
+ await this.metadataRepo.updateCheckpoint(entityId, { memory: currentEventCount }, this.db);
3938
+ } catch (e) {
3939
+ await this.metadataRepo.updateCheckpoint(entityId, { memory: prevCheckpoint }, this.db);
3940
+ throw e;
3941
+ }
3942
+ const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
3943
+ const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
3944
+ let healCheckpoint = cp.heal ?? 0;
3945
+ if (healCheckpoint > currentEventCount) healCheckpoint = 0;
3946
+ const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
3947
+ if (shouldRunHeal && this.jobManager.tryAcquireAutoHealLock(entityId)) {
3948
+ try {
3949
+ await this.maintenanceService.doRunHeal(entityId);
3950
+ await this.metadataRepo.updateCheckpoint(entityId, { heal: currentEventCount }, this.db);
3951
+ } finally {
3952
+ this.jobManager.releaseLock("heal", entityId);
3299
3953
  }
3300
3954
  }
3301
3955
  }
3302
- async _doImportEntity(entityId, bundle, merge) {
3303
- const upsertedFactIds = /* @__PURE__ */ new Set();
3304
- const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
3305
- const factsWithPreservedBlob = /* @__PURE__ */ new Map();
3306
- const preservedBlobDims = /* @__PURE__ */ new Set();
3307
- const softDeletedFactIds = [];
3308
- await this.db.withTransactionAsync(async (tx) => {
3309
- if (!merge) {
3310
- const deletedLiveFactIds = await this.entryRepo.findIdsBySource(entityId, null, null, tx, false);
3311
- softDeletedFactIds.push(...deletedLiveFactIds);
3312
- await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
3313
- await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
3314
- await this.metadataRepo.deleteCheckpoint(entityId, tx);
3315
- }
3316
- const factIds = bundle.facts.map((fact) => fact.id);
3317
- const existingFactsById = /* @__PURE__ */ new Map();
3318
- const existingFacts = await this.entryRepo.findExistingMetadataByIds(factIds, tx);
3319
- for (const existingFact of existingFacts) {
3320
- existingFactsById.set(existingFact.id, existingFact);
3321
- }
3322
- for (const fact of bundle.facts) {
3323
- const sourceType = this._normalizeImportedSourceType(String(fact.source_type), {
3324
- entityId,
3325
- factId: fact.id
3326
- });
3327
- JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
3328
- const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
3329
- const existing = existingFactsById.get(fact.id);
3330
- const rawBlobRaw = fact.embedding_blob;
3331
- let rawBlob = null;
3332
- if (rawBlobRaw instanceof Uint8Array) {
3333
- rawBlob = rawBlobRaw;
3334
- } else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
3335
- const obj = rawBlobRaw;
3336
- if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
3337
- rawBlob = new Uint8Array(obj["data"]);
3338
- } else if (!Array.isArray(rawBlobRaw)) {
3339
- const entries = Object.keys(obj);
3340
- if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
3341
- const len = entries.length;
3342
- rawBlob = new Uint8Array(len);
3343
- for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
3344
- }
3345
- }
3346
- }
3347
- let blobData = null;
3348
- if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
3349
- const copy = new ArrayBuffer(rawBlob.byteLength);
3350
- const alignedBlob = new Uint8Array(copy);
3351
- alignedBlob.set(rawBlob);
3352
- const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
3353
- let allFinite = true;
3354
- for (let i = 0; i < floats.length; i++) {
3355
- if (!isFinite(floats[i])) {
3356
- allFinite = false;
3357
- break;
3358
- }
3359
- }
3360
- if (allFinite) {
3361
- blobData = alignedBlob;
3362
- }
3363
- }
3364
- if (existing) {
3365
- if (existing.entity_id !== entityId) {
3366
- this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
3367
- continue;
3368
- }
3369
- if (merge) {
3370
- if (safeUpdatedAt <= existing.updated_at) continue;
3371
- }
3372
- }
3373
- const factObj = {
3374
- id: fact.id,
3375
- entity_id: entityId,
3376
- title: fact.title,
3377
- body: fact.body,
3378
- tags: Array.isArray(fact.tags) ? fact.tags : [],
3379
- confidence: fact.confidence,
3380
- source_type: sourceType,
3381
- source_hash: fact.source_hash,
3382
- source_ref: fact.source_ref,
3383
- created_at: fact.created_at,
3384
- updated_at: safeUpdatedAt,
3385
- last_accessed_at: fact.last_accessed_at,
3386
- access_count: fact.access_count,
3387
- deleted_at: fact.deleted_at,
3388
- embedding_blob: blobData ?? void 0
3389
- };
3390
- await this.entryRepo.upsertForImport(factObj, tx);
3391
- if (blobData != null) {
3392
- factsWithPreservedBlob.set(fact.id, blobData);
3393
- if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
3394
- }
3395
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
3396
- upsertedFactIds.add(fact.id);
3397
- if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
3398
- }
3399
- const taskIds = bundle.tasks.map((task) => task.id);
3400
- const existingTasksById = /* @__PURE__ */ new Map();
3401
- const existingTasks = await this.taskRepo.findExistingMetadataByIds(taskIds, tx);
3402
- for (const existingTask of existingTasks) {
3403
- existingTasksById.set(existingTask.id, existingTask);
3404
- }
3405
- for (const task of bundle.tasks) {
3406
- const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
3407
- const existing = existingTasksById.get(task.id);
3408
- if (existing) {
3409
- if (existing.entity_id !== entityId) {
3410
- this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
3411
- continue;
3412
- }
3413
- if (merge) {
3414
- if (safeUpdatedAt <= existing.updated_at) continue;
3415
- }
3416
- }
3417
- await this.taskRepo.upsertForImport({
3418
- id: task.id,
3419
- entity_id: entityId,
3420
- description: task.description,
3421
- status: task.status,
3422
- priority: task.priority,
3423
- created_at: task.created_at,
3424
- updated_at: safeUpdatedAt,
3425
- resolved_at: task.resolved_at,
3426
- deleted_at: task.deleted_at
3427
- }, tx, safeUpdatedAt);
3428
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
3429
- }
3430
- for (const event of bundle.events) {
3431
- await this.eventRepo.addIgnoreDuplicate({
3432
- id: event.id,
3433
- entity_id: entityId,
3434
- event_type: event.event_type,
3435
- summary: event.summary,
3436
- related_entry_id: event.related_entry_id ?? null,
3437
- created_at: event.created_at
3438
- }, tx);
3439
- }
3440
- });
3441
- this.vectorCache.delete(entityId);
3442
- await this.rebuildMiniSearchIndex(entityId);
3443
- for (const fact of bundle.facts) {
3444
- if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
3445
- await this.embedFact({
3446
- id: fact.id,
3447
- entity_id: entityId,
3448
- // Use authoritative entityId from dump key, not fact.entity_id
3449
- title: fact.title,
3450
- body: fact.body,
3451
- tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
3452
- });
3956
+ };
3957
+
3958
+ // src/WikiMemory.ts
3959
+ var _testAccessNonTestEnvWarned;
3960
+ var WikiMemory = class {
3961
+ constructor(db, options) {
3962
+ /** Emits `__testAccess` console warning at most once per instance when NODE_ENV ≠ "test". */
3963
+ __privateAdd(this, _testAccessNonTestEnvWarned, false);
3964
+ this.db = db;
3965
+ this.options = options;
3966
+ this.prefix = options.config?.tablePrefix || "llm_wiki_";
3967
+ this.outboxRepo = new OutboxRepository(db, this.prefix, !!options.config?.enableOutbox);
3968
+ this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
3969
+ this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
3970
+ this.eventRepo = new EventRepository(db, this.prefix);
3971
+ this.metadataRepo = new MetadataRepository(db, this.prefix);
3972
+ this.embeddingService = new EmbeddingService(this.db, this.options, this.entryRepo, this.metadataRepo);
3973
+ this.searchService = new SearchService(this.entryRepo);
3974
+ this.jobManager = new JobManager(this.prefix);
3975
+ this.promptService = new PromptService(options.config?.prompts);
3976
+ this.ingestionService = new IngestionService(
3977
+ this.db,
3978
+ this.prefix,
3979
+ this.options,
3980
+ this.entryRepo,
3981
+ this.searchService,
3982
+ this.jobManager,
3983
+ this.embeddingService,
3984
+ this.promptService
3985
+ );
3986
+ this.maintenanceService = new MaintenanceService(
3987
+ this.db,
3988
+ this.prefix,
3989
+ this.options,
3990
+ this.entryRepo,
3991
+ this.taskRepo,
3992
+ this.eventRepo,
3993
+ this.metadataRepo,
3994
+ this.searchService,
3995
+ this.jobManager,
3996
+ this.embeddingService,
3997
+ this.promptService
3998
+ );
3999
+ this.importExportService = new ImportExportService(
4000
+ this.db,
4001
+ this.entryRepo,
4002
+ this.taskRepo,
4003
+ this.eventRepo,
4004
+ this.metadataRepo,
4005
+ this.searchService,
4006
+ this.jobManager,
4007
+ this.embeddingService
4008
+ );
4009
+ this.retrievalService = new RetrievalService(
4010
+ this.options,
4011
+ this.entryRepo,
4012
+ this.taskRepo,
4013
+ this.eventRepo,
4014
+ this.metadataRepo,
4015
+ this.searchService
4016
+ );
4017
+ this.writeService = new WriteService(
4018
+ this.db,
4019
+ this.options,
4020
+ this.eventRepo,
4021
+ this.metadataRepo,
4022
+ this.jobManager,
4023
+ this.maintenanceService
4024
+ );
4025
+ }
4026
+ /**
4027
+ * Explicit escape hatch for test suites: typed access to composed services for mocks/spies.
4028
+ * If `NODE_ENV` is not `"test"`, emits a single `console.warn` per instance (skipped when `process` is undefined).
4029
+ */
4030
+ get __testAccess() {
4031
+ const processEnv = typeof globalThis !== "undefined" ? globalThis.process?.env : void 0;
4032
+ if (processEnv !== void 0 && processEnv.NODE_ENV !== "test" && !__privateGet(this, _testAccessNonTestEnvWarned)) {
4033
+ __privateSet(this, _testAccessNonTestEnvWarned, true);
4034
+ console.warn('Warning: WikiMemory.__testAccess is intended for tests (NODE_ENV !== "test").');
4035
+ }
4036
+ return {
4037
+ embeddingService: this.embeddingService,
4038
+ importExportService: this.importExportService,
4039
+ ingestionService: this.ingestionService,
4040
+ maintenanceService: this.maintenanceService,
4041
+ retrievalService: this.retrievalService,
4042
+ searchService: this.searchService,
4043
+ writeService: this.writeService,
4044
+ promptService: this.promptService,
4045
+ entryRepo: this.entryRepo,
4046
+ metadataRepo: this.metadataRepo,
4047
+ jobManager: this.jobManager
4048
+ };
4049
+ }
4050
+ async setup() {
4051
+ const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
4052
+ await setupDatabase(this.db, this.prefix);
4053
+ let currentVersion;
4054
+ if (!entriesExistedBeforeSetup) {
4055
+ await this.metadataRepo.setMeta("schema_version", String(CURRENT_SCHEMA_VERSION), this.db);
4056
+ currentVersion = CURRENT_SCHEMA_VERSION;
4057
+ } else {
4058
+ const schemaVersionValue = await this.metadataRepo.getMeta("schema_version");
4059
+ if (schemaVersionValue) {
4060
+ currentVersion = parseInt(schemaVersionValue, 10);
4061
+ if (!Number.isFinite(currentVersion)) currentVersion = 0;
4062
+ } else {
4063
+ const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
4064
+ const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
4065
+ currentVersion = hasPorter ? 1 : 0;
3453
4066
  }
3454
4067
  }
3455
- for (const fact of bundle.facts) {
3456
- const blobData = factsWithPreservedBlob.get(fact.id);
3457
- if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
3458
- try {
3459
- const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
3460
- await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
3461
- } catch (hookErr) {
3462
- console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
3463
- }
4068
+ for (const migration of MIGRATIONS) {
4069
+ if (migration.version > currentVersion) {
4070
+ await migration.run(this.db, this.prefix);
4071
+ await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
4072
+ currentVersion = migration.version;
3464
4073
  }
3465
4074
  }
3466
- for (const factId of softDeletedFactIds) {
3467
- if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
3468
- try {
3469
- await this._notifyEmbeddingPersisted(entityId, factId, null);
3470
- } catch (hookErr) {
3471
- console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
3472
- }
4075
+ if (entriesExistedBeforeSetup) {
4076
+ const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
4077
+ if (!schemaVersionCheck) {
4078
+ await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
3473
4079
  }
3474
4080
  }
3475
- try {
3476
- const canonicalDimValue = await this.metadataRepo.getMeta("embedding_dimension");
3477
- const canonicalDim = canonicalDimValue ? parseInt(canonicalDimValue, 10) : null;
3478
- if (preservedBlobDims.size === 1) {
3479
- const preservedDim = [...preservedBlobDims][0];
3480
- if (canonicalDim === null || canonicalDim === preservedDim) {
3481
- await this.storeEmbeddingDimension(preservedDim);
3482
- const staleMismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
3483
- if (staleMismatchValue && parseInt(staleMismatchValue, 10) !== preservedDim) {
3484
- await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(preservedDim), this.db);
3485
- }
3486
- await this._reconcileEmbeddingDimension();
3487
- } else {
3488
- await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(canonicalDim), this.db);
3489
- }
3490
- } else if (preservedBlobDims.size > 1) {
3491
- if (canonicalDim === null) {
3492
- const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
3493
- await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
3494
- await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(sortedPreservedBlobDims[0]), this.db);
3495
- } else {
3496
- await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(canonicalDim), this.db);
4081
+ if (entriesExistedBeforeSetup) {
4082
+ await this.importExportService.assertNoLegacySourceTypes();
4083
+ }
4084
+ const rows = await this.entryRepo.findRowsForSourceRefMigration();
4085
+ await this.db.withTransactionAsync(async (tx) => {
4086
+ for (const row of rows) {
4087
+ const normalized = normalizeSourceRef(row.source_ref);
4088
+ if (normalized !== row.source_ref) {
4089
+ await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
3497
4090
  }
3498
4091
  }
3499
- } finally {
3500
- this.vectorCache.delete(entityId);
4092
+ });
4093
+ await this.searchService.sync();
4094
+ }
4095
+ async hasChanged(entityId, sourceRef, sourceHash) {
4096
+ const normalizedRef = normalizeSourceRef(sourceRef);
4097
+ if (!normalizedRef) {
4098
+ throw new Error(`Invalid sourceRef: "${sourceRef}"`);
4099
+ }
4100
+ const normalizedHash = normalizeSourceHash(sourceHash);
4101
+ if (!normalizedHash) {
4102
+ throw new Error(`Invalid sourceHash: must be a 64-character hex string (normalized to lowercase)`);
3501
4103
  }
4104
+ const storedHash = await this.entryRepo.findLatestSourceHash(entityId, normalizedRef);
4105
+ if (storedHash === null) return true;
4106
+ const normalizedStoredHash = normalizeSourceHash(storedHash);
4107
+ return normalizedStoredHash !== normalizedHash;
4108
+ }
4109
+ async runPrune(entityId, options) {
4110
+ return this.maintenanceService.runPrune(entityId, options);
4111
+ }
4112
+ async read(entityId, query, options) {
4113
+ return this.retrievalService.read(entityId, query, options);
4114
+ }
4115
+ async getMemoryBundle(entityId) {
4116
+ return this.importExportService.getFullBundle(entityId, { maxEvents: 10 });
4117
+ }
4118
+ async write(entityId, event) {
4119
+ return this.writeService.write(entityId, event);
4120
+ }
4121
+ /**
4122
+ * @param options.promptOverride - Applies only to this manual call. Does NOT affect
4123
+ * WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
4124
+ * set `options.config.prompts.librarianSystemPrompt` at WikiMemory construction time.
4125
+ */
4126
+ async runLibrarian(entityId, options) {
4127
+ return this.maintenanceService.runLibrarian(entityId, options);
4128
+ }
4129
+ /**
4130
+ * @param options.promptOverride - Applies only to this manual call. Does NOT affect
4131
+ * WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
4132
+ * set `options.config.prompts.healSystemPrompt` at WikiMemory construction time.
4133
+ */
4134
+ async runHeal(entityId, options) {
4135
+ return this.maintenanceService.runHeal(entityId, options);
4136
+ }
4137
+ async runReembed(entityId, opts) {
4138
+ return this.maintenanceService.runReembed(entityId, opts);
4139
+ }
4140
+ getEntityStatus(entityId) {
4141
+ return this.jobManager.getEntityStatus(entityId);
4142
+ }
4143
+ subscribeEntityStatus(entityId, callback) {
4144
+ return this.jobManager.subscribeEntityStatus(entityId, callback);
4145
+ }
4146
+ clearVectorCache() {
4147
+ this.searchService.evictCache();
4148
+ }
4149
+ async exportDump(entityIds) {
4150
+ return this.importExportService.exportDump(entityIds);
4151
+ }
4152
+ async importDump(dump, opts) {
4153
+ return this.importExportService.importDump(dump, opts);
3502
4154
  }
3503
4155
  async forget(entityId, params) {
3504
- let blockingOperation = null;
3505
- if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
3506
- blockingOperation = "librarian";
3507
- } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
3508
- blockingOperation = "heal";
3509
- } else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
3510
- blockingOperation = "prune";
3511
- } else if (this._isReembedActive(entityId)) {
3512
- blockingOperation = "reembed";
3513
- } else if (this._isIngestActiveFor(entityId)) {
3514
- blockingOperation = "ingest";
3515
- } else if (this._isImportActiveFor(entityId)) {
3516
- blockingOperation = "import";
3517
- } else if (this._isForgetActiveFor(entityId)) {
3518
- blockingOperation = "forget";
3519
- }
3520
- if (blockingOperation !== null) {
3521
- throw new WikiBusyError(blockingOperation, entityId);
3522
- }
3523
- const forgetKey = this._forgetKey(entityId);
3524
- this.activeMaintenanceJobs.add(forgetKey);
3525
- try {
3526
- const now = Date.now();
3527
- let deletedEntries = 0;
3528
- let deletedTasks = 0;
3529
- const deletedEntryIds = [];
3530
- await this.db.withTransactionAsync(async (tx) => {
3531
- if (params.clearAll) {
3532
- deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, null, null, tx, true));
3533
- const entriesRes = await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
3534
- const tasksRes = await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
3535
- await this.metadataRepo.updateCheckpoint(entityId, { memory: 0, heal: 0 }, tx);
3536
- deletedEntries = entriesRes;
3537
- deletedTasks = tasksRes;
3538
- } else {
3539
- const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
3540
- const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
3541
- if (hasIdSelectors && hasSourceSelectors) {
3542
- throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
3543
- }
3544
- const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
3545
- if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
3546
- const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
3547
- if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
3548
- if (params.entryId) {
3549
- const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
3550
- if (entryId) deletedEntryIds.push(entryId);
3551
- }
3552
- if (sourceRef || sourceHash) {
3553
- deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
3554
- }
3555
- const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
3556
- const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
3557
- const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
3558
- const [entryResult, taskResult, refResult] = await Promise.all([
3559
- entryPromise ?? Promise.resolve(false),
3560
- taskDeletedPromise ?? Promise.resolve(false),
3561
- refPromise ?? Promise.resolve(0)
3562
- ]);
3563
- if (entryResult) deletedEntries++;
3564
- if (taskResult) deletedTasks++;
3565
- deletedEntries += refResult;
3566
- }
3567
- });
3568
- await this.rebuildMiniSearchIndex(entityId);
3569
- this.vectorCache.delete(entityId);
3570
- const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
3571
- for (const factId of uniqueDeletedIds) {
3572
- try {
3573
- await this._notifyEmbeddingPersistedOrThrow(entityId, factId, null);
3574
- } catch (hookErr) {
3575
- const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
3576
- if (isTimeout) {
3577
- throw new Error(
3578
- `forget(${entityId}/${factId}) failed: ${hookErr.message}`
3579
- );
3580
- }
3581
- const errMsg = hookErr?.message ?? "";
3582
- const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
3583
- if (isValidationError) {
3584
- throw new Error(
3585
- `forget(${entityId}/${factId}) failed: ${errMsg}`,
3586
- { cause: hookErr }
3587
- );
3588
- }
3589
- throw new Error(
3590
- `forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`,
3591
- { cause: this._sanitizeRankerError(hookErr) }
3592
- );
3593
- }
3594
- }
3595
- return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
3596
- } finally {
3597
- this.activeMaintenanceJobs.delete(forgetKey);
3598
- }
4156
+ return this.maintenanceService.forget(entityId, params);
3599
4157
  }
4158
+ /**
4159
+ * @param params.promptOverride - Overrides the system prompt for this ingest call only.
4160
+ * For persistent customization, set `options.config.prompts.ingestSystemPrompt` at
4161
+ * WikiMemory construction time.
4162
+ */
3600
4163
  async ingestDocument(entityId, params) {
3601
- const sourceRef = normalizeSourceRef(params.sourceRef);
3602
- if (!sourceRef) throw new Error("Invalid sourceRef");
3603
- const sourceHash = normalizeSourceHash(params.sourceHash);
3604
- if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
3605
- const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
3606
- const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
3607
- const chunkOverlap = Math.min(
3608
- Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
3609
- maxChunkLength - 1
3610
- );
3611
- const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
3612
- const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
3613
- if (typeof params.documentChunk !== "string") {
3614
- throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
3615
- }
3616
- const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
3617
- if (this.activeIngestJobs.has(jobKey)) {
3618
- throw new WikiBusyError("ingest", entityId);
3619
- }
3620
- if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
3621
- throw new WikiBusyError("prune", entityId);
3622
- }
3623
- if (this._isReembedActive(entityId)) {
3624
- throw new WikiBusyError("reembed", entityId);
3625
- }
3626
- if (this._isImportActiveFor(entityId)) {
3627
- throw new WikiBusyError("import", entityId);
3628
- }
3629
- if (this._isForgetActiveFor(entityId)) {
3630
- throw new WikiBusyError("forget", entityId);
3631
- }
3632
- this.activeIngestJobs.add(jobKey);
3633
- this._notifyStatusSubscribers(entityId);
3634
- try {
3635
- const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
3636
- if (chunks.length === 0) {
3637
- return { truncated: false, chunks: 0 };
3638
- }
3639
- const chunkResults = await withConcurrency(
3640
- chunks.map((chunk) => async () => {
3641
- const userPrompt = `Document Chunk:
3642
- ${chunk}`;
3643
- const responseText = await this.options.llmProvider.generateText({
3644
- systemPrompt: INGEST_SYSTEM_PROMPT,
3645
- userPrompt
3646
- });
3647
- const result = parseJsonResponse(responseText);
3648
- return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
3649
- }),
3650
- chunkConcurrency
3651
- );
3652
- const seen = /* @__PURE__ */ new Set();
3653
- const allValidFacts = [];
3654
- for (const facts of chunkResults) {
3655
- for (const fact of facts) {
3656
- const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
3657
- if (!seen.has(normalized)) {
3658
- seen.add(normalized);
3659
- allValidFacts.push(fact);
3660
- }
3661
- }
3662
- }
3663
- const now = Date.now();
3664
- const insertedFacts = [];
3665
- const deletedSourceFactIds = [];
3666
- await this.db.withTransactionAsync(async (tx) => {
3667
- deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
3668
- await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
3669
- for (const fact of allValidFacts) {
3670
- const id = generateId("fact_");
3671
- const wikiFact = {
3672
- id,
3673
- entity_id: entityId,
3674
- title: fact.title,
3675
- body: fact.body,
3676
- tags: fact.tags,
3677
- confidence: fact.confidence,
3678
- source_type: "immutable_document",
3679
- source_hash: sourceHash,
3680
- source_ref: sourceRef,
3681
- created_at: now,
3682
- updated_at: now,
3683
- last_accessed_at: null,
3684
- access_count: 0,
3685
- deleted_at: null
3686
- };
3687
- await this.entryRepo.upsert(wikiFact, tx);
3688
- insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
3689
- }
3690
- });
3691
- await this.rebuildMiniSearchIndex(entityId);
3692
- this.vectorCache.delete(entityId);
3693
- const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
3694
- for (const factId of uniqueDeletedSourceFactIds) {
3695
- try {
3696
- await this._notifyEmbeddingPersisted(entityId, factId, null);
3697
- } catch (hookErr) {
3698
- console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
3699
- }
3700
- }
3701
- for (const fact of insertedFacts) {
3702
- await this.embedFact(fact);
4164
+ return this.ingestionService.ingestDocument(entityId, params);
4165
+ }
4166
+ /**
4167
+ * Returns up to `limit` unprocessed outbox events, oldest first.
4168
+ * Works regardless of enableOutbox value allows draining after disabling.
4169
+ */
4170
+ async getUnprocessedOutboxEvents(limit = 100) {
4171
+ if (Number.isFinite(limit) && limit <= 0) return [];
4172
+ const safeLimit = Number.isFinite(limit) && limit >= 1 ? Math.trunc(limit) : 100;
4173
+ const rows = await this.outboxRepo.fetchPending(safeLimit);
4174
+ return rows.map((row) => {
4175
+ let payload = null;
4176
+ try {
4177
+ payload = JSON.parse(row.payload);
4178
+ } catch {
3703
4179
  }
3704
- this.vectorCache.delete(entityId);
3705
- return { truncated, chunks: chunks.length };
3706
- } finally {
3707
- this.activeIngestJobs.delete(jobKey);
3708
- this._notifyStatusSubscribers(entityId);
3709
- }
4180
+ return { ...row, payload };
4181
+ });
4182
+ }
4183
+ /**
4184
+ * Deletes the given event IDs from the outbox table.
4185
+ * Call after successfully committing events to the external system.
4186
+ */
4187
+ async markOutboxEventsProcessed(eventIds) {
4188
+ await this.outboxRepo.acknowledge(eventIds);
3710
4189
  }
3711
4190
  };
3712
- /**
3713
- * Maximum number of entities whose parsed embedding vectors are held in
3714
- * memory. This cap is intentionally conservative so the cache remains safe
3715
- * on memory-constrained runtimes (e.g., mobile/Expo).
3716
- */
3717
- _WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
3718
- /**
3719
- * Maximum number of fact vectors cached per entity. Keep this high enough to
3720
- * preserve the parsed-embedding reuse optimization for common mid-sized
3721
- * entities while still maintaining a bounded memory footprint.
3722
- */
3723
- _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
3724
- var WikiMemory = _WikiMemory;
4191
+ _testAccessNonTestEnvWarned = new WeakMap();
3725
4192
 
3726
4193
  // src/utils/formatContext.ts
3727
4194
  function validateMaxOption(value, name) {
@@ -3989,6 +4456,8 @@ function createWiki(db, options) {
3989
4456
  }
3990
4457
 
3991
4458
  exports.DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT = DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT;
4459
+ exports.HOOK_TIMEOUT_MARKER = HOOK_TIMEOUT_MARKER;
4460
+ exports.PromptService = PromptService;
3992
4461
  exports.PrunePartialFailureError = PrunePartialFailureError;
3993
4462
  exports.WikiBusyError = WikiBusyError;
3994
4463
  exports.WikiMemory = WikiMemory;