@equationalapplications/core-llm-wiki 4.7.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/dist/chunk-6FWG2DG4.mjs +2547 -0
- package/dist/chunk-6FWG2DG4.mjs.map +1 -0
- package/dist/index.d.mts +4 -530
- package/dist/index.d.ts +4 -530
- package/dist/index.js +2489 -2020
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +200 -2274
- package/dist/index.mjs.map +1 -1
- package/dist/testing-CDIDE4Jd.d.mts +1141 -0
- package/dist/testing-CDIDE4Jd.d.ts +1141 -0
- package/dist/testing.d.mts +2 -0
- package/dist/testing.d.ts +2 -0
- package/dist/testing.js +2552 -0
- package/dist/testing.js.map +1 -0
- package/dist/testing.mjs +3 -0
- package/dist/testing.mjs.map +1 -0
- package/package.json +6 -1
package/dist/index.js
CHANGED
|
@@ -6,6 +6,14 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
|
6
6
|
|
|
7
7
|
var MiniSearch__default = /*#__PURE__*/_interopDefault(MiniSearch);
|
|
8
8
|
|
|
9
|
+
var __typeError = (msg) => {
|
|
10
|
+
throw TypeError(msg);
|
|
11
|
+
};
|
|
12
|
+
var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
|
|
13
|
+
var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
|
|
14
|
+
var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
|
|
15
|
+
var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), member.set(obj, value), value);
|
|
16
|
+
|
|
9
17
|
// src/db/schema.ts
|
|
10
18
|
async function setupDatabase(db, prefix) {
|
|
11
19
|
await db.execAsync(`
|
|
@@ -81,6 +89,9 @@ async function setupDatabase(db, prefix) {
|
|
|
81
89
|
|
|
82
90
|
CREATE INDEX IF NOT EXISTS ${prefix}outbox_entity_id_created_at
|
|
83
91
|
ON ${prefix}outbox (entity_id, created_at);
|
|
92
|
+
|
|
93
|
+
CREATE INDEX IF NOT EXISTS ${prefix}outbox_created_at
|
|
94
|
+
ON ${prefix}outbox (created_at);
|
|
84
95
|
`);
|
|
85
96
|
}
|
|
86
97
|
|
|
@@ -155,28 +166,6 @@ for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
|
155
166
|
}
|
|
156
167
|
var CURRENT_SCHEMA_VERSION = MIGRATIONS.length > 0 ? MIGRATIONS[MIGRATIONS.length - 1].version : 0;
|
|
157
168
|
|
|
158
|
-
// src/types.ts
|
|
159
|
-
var WikiBusyError = class extends Error {
|
|
160
|
-
constructor(operation, entityId) {
|
|
161
|
-
super(`${operation} already running for entity ${entityId}`);
|
|
162
|
-
this.name = "WikiBusyError";
|
|
163
|
-
this.operation = operation;
|
|
164
|
-
this.entityId = entityId;
|
|
165
|
-
}
|
|
166
|
-
};
|
|
167
|
-
var PrunePartialFailureError = class extends Error {
|
|
168
|
-
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
169
|
-
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
170
|
-
this.name = "PrunePartialFailureError";
|
|
171
|
-
this.deleted = deleted;
|
|
172
|
-
this.failedAt = failedAt;
|
|
173
|
-
this.remaining = remaining;
|
|
174
|
-
this.deletedTasks = deletedTasks;
|
|
175
|
-
this.deletedEvents = deletedEvents;
|
|
176
|
-
this.cause = cause;
|
|
177
|
-
}
|
|
178
|
-
};
|
|
179
|
-
|
|
180
169
|
// src/repositories/BaseRepository.ts
|
|
181
170
|
var BaseRepository = class {
|
|
182
171
|
constructor(db, prefix) {
|
|
@@ -220,6 +209,28 @@ function mapRowToFact(row) {
|
|
|
220
209
|
access_count: Number(row.access_count ?? 0)
|
|
221
210
|
};
|
|
222
211
|
}
|
|
212
|
+
function normalizeEmbeddingBlobValue(blob) {
|
|
213
|
+
if (blob instanceof Uint8Array) return blob;
|
|
214
|
+
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
215
|
+
const obj = blob;
|
|
216
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
217
|
+
return new Uint8Array(obj["data"]);
|
|
218
|
+
}
|
|
219
|
+
const entries = Object.keys(obj);
|
|
220
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
221
|
+
const len = entries.length;
|
|
222
|
+
const arr = new Uint8Array(len);
|
|
223
|
+
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
224
|
+
return arr;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
function mapRowToFactWithBlobs(row) {
|
|
230
|
+
const base = mapRowToFact(row);
|
|
231
|
+
const embeddingBlob = normalizeEmbeddingBlobValue(row.embedding_blob);
|
|
232
|
+
return embeddingBlob ? { ...base, embedding_blob: embeddingBlob } : base;
|
|
233
|
+
}
|
|
223
234
|
var EntryRepository = class extends BaseRepository {
|
|
224
235
|
constructor(db, prefix, outbox) {
|
|
225
236
|
super(db, prefix);
|
|
@@ -315,21 +326,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
315
326
|
* Normalize an embedding blob value to Uint8Array or null.
|
|
316
327
|
*/
|
|
317
328
|
normalizeEmbeddingBlob(blob) {
|
|
318
|
-
|
|
319
|
-
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
320
|
-
const obj = blob;
|
|
321
|
-
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
322
|
-
return new Uint8Array(obj["data"]);
|
|
323
|
-
}
|
|
324
|
-
const entries = Object.keys(obj);
|
|
325
|
-
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
326
|
-
const len = entries.length;
|
|
327
|
-
const arr = new Uint8Array(len);
|
|
328
|
-
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
329
|
-
return arr;
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
return null;
|
|
329
|
+
return normalizeEmbeddingBlobValue(blob);
|
|
333
330
|
}
|
|
334
331
|
/**
|
|
335
332
|
* Fetch existing rows by IDs and return id/entity_id/updated_at for import collision resolution.
|
|
@@ -509,7 +506,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
509
506
|
}
|
|
510
507
|
/**
|
|
511
508
|
* Fetch recent non-deleted entries for an entity (limited), ordered by updated_at DESC.
|
|
512
|
-
* Used by
|
|
509
|
+
* Used by MaintenanceService.doRunLibrarian().
|
|
513
510
|
*/
|
|
514
511
|
async findRecentByEntityId(entityId, limit, tx) {
|
|
515
512
|
const executor = this.getExecutor(tx);
|
|
@@ -519,6 +516,18 @@ var EntryRepository = class extends BaseRepository {
|
|
|
519
516
|
);
|
|
520
517
|
return rows.map(mapRowToFact);
|
|
521
518
|
}
|
|
519
|
+
/**
|
|
520
|
+
* Fetch all non-deleted entries for an entity with embedding blobs preserved.
|
|
521
|
+
* Used by ImportExportService for export/import round-tripping.
|
|
522
|
+
*/
|
|
523
|
+
async findAllByEntityIdWithBlobs(entityId, tx) {
|
|
524
|
+
const executor = this.getExecutor(tx);
|
|
525
|
+
const rows = await executor.getAllAsync(
|
|
526
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
527
|
+
[entityId]
|
|
528
|
+
);
|
|
529
|
+
return rows.map(mapRowToFactWithBlobs);
|
|
530
|
+
}
|
|
522
531
|
/**
|
|
523
532
|
* Count non-deleted entries for the given entities whose embedding_blob dimension
|
|
524
533
|
* doesn't match queryVecLength. Used by read() to detect model-switch mismatches.
|
|
@@ -605,24 +614,19 @@ var EntryRepository = class extends BaseRepository {
|
|
|
605
614
|
}
|
|
606
615
|
/**
|
|
607
616
|
* Mark orphaned entries (never accessed, old) as deleted.
|
|
608
|
-
* Used by
|
|
617
|
+
* Used by MaintenanceService.doRunHeal().
|
|
609
618
|
*/
|
|
610
619
|
async markOrphaned(entityId, orphanThreshold, tx) {
|
|
611
620
|
const executor = this.getExecutor(tx);
|
|
612
621
|
const now = Date.now();
|
|
613
|
-
const
|
|
614
|
-
`SELECT id FROM ${this.prefix}entries
|
|
615
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL`,
|
|
616
|
-
[entityId, orphanThreshold]
|
|
617
|
-
);
|
|
618
|
-
if (orphanedRows.length === 0) return 0;
|
|
619
|
-
const result = await executor.runAsync(
|
|
622
|
+
const updatedRows = await executor.getAllAsync(
|
|
620
623
|
`UPDATE ${this.prefix}entries
|
|
621
624
|
SET deleted_at = ?, updated_at = ?
|
|
622
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
625
|
+
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
626
|
+
RETURNING id`,
|
|
623
627
|
[now, now, entityId, orphanThreshold]
|
|
624
628
|
);
|
|
625
|
-
for (const row of
|
|
629
|
+
for (const row of updatedRows) {
|
|
626
630
|
await this.outbox.push({
|
|
627
631
|
entityId,
|
|
628
632
|
tableName: "entries",
|
|
@@ -631,11 +635,11 @@ var EntryRepository = class extends BaseRepository {
|
|
|
631
635
|
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
632
636
|
}, tx);
|
|
633
637
|
}
|
|
634
|
-
return
|
|
638
|
+
return updatedRows.map((r) => r.id);
|
|
635
639
|
}
|
|
636
640
|
/**
|
|
637
641
|
* Downgrade stale inferred entries to 'tentative'.
|
|
638
|
-
* Used by
|
|
642
|
+
* Used by MaintenanceService.doRunHeal().
|
|
639
643
|
*/
|
|
640
644
|
async downgradeStaleInferred(entityId, staleThreshold, tx) {
|
|
641
645
|
const executor = this.getExecutor(tx);
|
|
@@ -667,7 +671,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
667
671
|
}
|
|
668
672
|
/**
|
|
669
673
|
* Downgrade specific entries to 'tentative' by IDs.
|
|
670
|
-
* Used by
|
|
674
|
+
* Used by MaintenanceService.doRunHeal().
|
|
671
675
|
*/
|
|
672
676
|
async downgradeByIds(ids, entityId, tx) {
|
|
673
677
|
if (ids.length === 0) return;
|
|
@@ -690,7 +694,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
690
694
|
}
|
|
691
695
|
/**
|
|
692
696
|
* Soft-delete specific entries by IDs.
|
|
693
|
-
* Used by
|
|
697
|
+
* Used by MaintenanceService.doRunHeal().
|
|
694
698
|
*/
|
|
695
699
|
async softDeleteByIds(ids, entityId, tx) {
|
|
696
700
|
if (ids.length === 0) return;
|
|
@@ -926,12 +930,18 @@ function generateId(prefix = "") {
|
|
|
926
930
|
|
|
927
931
|
// src/repositories/OutboxRepository.ts
|
|
928
932
|
var OutboxRepository = class extends BaseRepository {
|
|
933
|
+
constructor(db, prefix, enableOutbox = false) {
|
|
934
|
+
super(db, prefix);
|
|
935
|
+
this.enableOutbox = enableOutbox;
|
|
936
|
+
}
|
|
929
937
|
/**
|
|
930
938
|
* Insert a new outbox event within the provided transaction.
|
|
939
|
+
* No-op when enableOutbox is false.
|
|
931
940
|
* `tx` is required — callers must always pass the active transaction
|
|
932
941
|
* so the write is atomic with the main table mutation.
|
|
933
942
|
*/
|
|
934
943
|
async push(params, tx) {
|
|
944
|
+
if (!this.enableOutbox) return;
|
|
935
945
|
const executor = this.getExecutor(tx);
|
|
936
946
|
const id = generateId("out_");
|
|
937
947
|
const now = Date.now();
|
|
@@ -942,12 +952,12 @@ var OutboxRepository = class extends BaseRepository {
|
|
|
942
952
|
);
|
|
943
953
|
}
|
|
944
954
|
/**
|
|
945
|
-
* Fetch pending outbox rows ordered by created_at ASC.
|
|
955
|
+
* Fetch pending outbox rows ordered by created_at ASC, rowid ASC.
|
|
946
956
|
* Reads directly from `this.db` (not a transaction).
|
|
947
957
|
*/
|
|
948
958
|
async fetchPending(limit = 50) {
|
|
949
959
|
return this.db.getAllAsync(
|
|
950
|
-
`SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC LIMIT ?`,
|
|
960
|
+
`SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC, rowid ASC LIMIT ?`,
|
|
951
961
|
[limit]
|
|
952
962
|
);
|
|
953
963
|
}
|
|
@@ -958,11 +968,15 @@ var OutboxRepository = class extends BaseRepository {
|
|
|
958
968
|
*/
|
|
959
969
|
async acknowledge(ids) {
|
|
960
970
|
if (ids.length === 0) return;
|
|
961
|
-
const
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
971
|
+
const chunkSize = 500;
|
|
972
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
973
|
+
const chunk = ids.slice(i, i + chunkSize);
|
|
974
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
975
|
+
await this.db.runAsync(
|
|
976
|
+
`DELETE FROM ${this.prefix}outbox WHERE id IN (${placeholders})`,
|
|
977
|
+
chunk
|
|
978
|
+
);
|
|
979
|
+
}
|
|
966
980
|
}
|
|
967
981
|
};
|
|
968
982
|
|
|
@@ -1420,29 +1434,6 @@ var MetadataRepository = class extends BaseRepository {
|
|
|
1420
1434
|
}
|
|
1421
1435
|
};
|
|
1422
1436
|
|
|
1423
|
-
// src/prompts.ts
|
|
1424
|
-
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
1425
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1426
|
-
{
|
|
1427
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
1428
|
-
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
1429
|
-
}
|
|
1430
|
-
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
1431
|
-
var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
|
|
1432
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1433
|
-
{
|
|
1434
|
-
"downgraded": ["string (fact IDs)"],
|
|
1435
|
-
"deleted": ["string (fact IDs)"],
|
|
1436
|
-
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
1437
|
-
}
|
|
1438
|
-
Do not return markdown, just raw JSON.`;
|
|
1439
|
-
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
1440
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1441
|
-
{
|
|
1442
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
1443
|
-
}
|
|
1444
|
-
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
1445
|
-
|
|
1446
1437
|
// src/utils/cosine.ts
|
|
1447
1438
|
function cosineSimilarity(a, b) {
|
|
1448
1439
|
let dot = 0, normA = 0, normB = 0;
|
|
@@ -1484,284 +1475,137 @@ function parseEmbedding(blob, text) {
|
|
|
1484
1475
|
return null;
|
|
1485
1476
|
}
|
|
1486
1477
|
|
|
1487
|
-
// src/
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1478
|
+
// src/services/SearchService.ts
|
|
1479
|
+
var _SearchService = class _SearchService {
|
|
1480
|
+
constructor(entryRepo) {
|
|
1481
|
+
this.entryRepo = entryRepo;
|
|
1482
|
+
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
1483
|
+
this.vectorCache = /* @__PURE__ */ new Map();
|
|
1484
|
+
this.miniSearch = new MiniSearch__default.default({
|
|
1485
|
+
fields: ["title", "body", "tags"],
|
|
1486
|
+
storeFields: ["entity_id"],
|
|
1487
|
+
searchOptions: {
|
|
1488
|
+
boost: { title: 2 },
|
|
1489
|
+
fuzzy: 0.2,
|
|
1490
|
+
prefix: true
|
|
1491
|
+
}
|
|
1492
|
+
});
|
|
1496
1493
|
}
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1494
|
+
/**
|
|
1495
|
+
* Rebuilds the search index and clears the vector cache for a given entity.
|
|
1496
|
+
* A direct replacement for manually syncing state after a DB transaction.
|
|
1497
|
+
*/
|
|
1498
|
+
async sync(entityId) {
|
|
1499
|
+
await this.rebuildIndex(entityId);
|
|
1500
|
+
this.evictCache(entityId);
|
|
1501
|
+
}
|
|
1502
|
+
/**
|
|
1503
|
+
* Clears the parsed vector cache. Useful for mid-loop flush guarantees
|
|
1504
|
+
* or memory pressure evictions.
|
|
1505
|
+
*/
|
|
1506
|
+
evictCache(entityId) {
|
|
1507
|
+
if (entityId) {
|
|
1508
|
+
this.vectorCache.delete(entityId);
|
|
1506
1509
|
} else {
|
|
1507
|
-
|
|
1510
|
+
this.vectorCache.clear();
|
|
1508
1511
|
}
|
|
1509
1512
|
}
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
function shouldExposeReadMetadata(entityId) {
|
|
1518
|
-
return Array.isArray(entityId);
|
|
1519
|
-
}
|
|
1520
|
-
|
|
1521
|
-
// src/WikiMemory.ts
|
|
1522
|
-
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
1523
|
-
function parseJsonResponse(text) {
|
|
1524
|
-
const firstBrace = text.indexOf("{");
|
|
1525
|
-
const firstBracket = text.indexOf("[");
|
|
1526
|
-
let start;
|
|
1527
|
-
let openChar;
|
|
1528
|
-
let closeChar;
|
|
1529
|
-
if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
|
|
1530
|
-
start = firstBrace;
|
|
1531
|
-
openChar = "{";
|
|
1532
|
-
closeChar = "}";
|
|
1533
|
-
} else if (firstBracket !== -1) {
|
|
1534
|
-
start = firstBracket;
|
|
1535
|
-
openChar = "[";
|
|
1536
|
-
closeChar = "]";
|
|
1537
|
-
} else {
|
|
1538
|
-
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
1513
|
+
/**
|
|
1514
|
+
* Fully resets the search service.
|
|
1515
|
+
*/
|
|
1516
|
+
clearAll() {
|
|
1517
|
+
this.vectorCache.clear();
|
|
1518
|
+
this.miniSearch.removeAll();
|
|
1519
|
+
this.miniSearchEntryIdsByEntity.clear();
|
|
1539
1520
|
}
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
const
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1521
|
+
/**
|
|
1522
|
+
* Executes a keyword search against the active MiniSearch index.
|
|
1523
|
+
*/
|
|
1524
|
+
searchKeyword(query, entityIds, limit) {
|
|
1525
|
+
const entityIdSet = new Set(entityIds);
|
|
1526
|
+
const results = this.miniSearch.search(query, {
|
|
1527
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1528
|
+
combineWith: "OR"
|
|
1529
|
+
});
|
|
1530
|
+
return results.slice(0, limit);
|
|
1531
|
+
}
|
|
1532
|
+
/**
|
|
1533
|
+
* Pre-fetches MiniSearch scores for candidate hydration, used during hybrid weighting.
|
|
1534
|
+
*/
|
|
1535
|
+
getMiniSearchScores(query, entityIds, preFilterLimit) {
|
|
1536
|
+
const entityIdSet = new Set(entityIds);
|
|
1537
|
+
let results = this.miniSearch.search(query, {
|
|
1538
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1539
|
+
combineWith: "OR"
|
|
1540
|
+
});
|
|
1541
|
+
if (preFilterLimit !== void 0) {
|
|
1542
|
+
results = results.slice(0, preFilterLimit);
|
|
1553
1543
|
}
|
|
1554
|
-
if (
|
|
1555
|
-
|
|
1556
|
-
|
|
1544
|
+
if (results.length === 0) return /* @__PURE__ */ new Map();
|
|
1545
|
+
const maxMsScore = Math.max(1, results[0]?.score ?? 1);
|
|
1546
|
+
return new Map(results.map((r) => [r.id, r.score / maxMsScore]));
|
|
1547
|
+
}
|
|
1548
|
+
/**
|
|
1549
|
+
* Score candidate rows using in-process JS cosine similarity.
|
|
1550
|
+
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
1551
|
+
*/
|
|
1552
|
+
async rankSemantic(args) {
|
|
1553
|
+
const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
1554
|
+
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
1555
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
1556
|
+
const tooLarge = populateCache && candidateRows.length > _SearchService.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1557
|
+
if (tooLarge && entityCache) {
|
|
1558
|
+
this.vectorCache.delete(entityId);
|
|
1559
|
+
entityCache = void 0;
|
|
1557
1560
|
}
|
|
1558
|
-
|
|
1559
|
-
if (
|
|
1560
|
-
|
|
1561
|
-
continue;
|
|
1561
|
+
const canCache = populateCache && !tooLarge;
|
|
1562
|
+
if (canCache && !entityCache) {
|
|
1563
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
1562
1564
|
}
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
if (
|
|
1566
|
-
|
|
1567
|
-
|
|
1565
|
+
const scored = candidateRows.map((row) => {
|
|
1566
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
1567
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
1568
|
+
entityCache.set(row.id, vector);
|
|
1569
|
+
}
|
|
1570
|
+
let score = 0;
|
|
1571
|
+
if (vector && vector.length === queryVec.length) {
|
|
1572
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
1573
|
+
if (weight !== void 0) {
|
|
1574
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1575
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
1576
|
+
} else {
|
|
1577
|
+
score = cosSim;
|
|
1578
|
+
}
|
|
1579
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
1580
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1581
|
+
score = (1 - weight) * kwScore;
|
|
1582
|
+
} else {
|
|
1583
|
+
score = -2;
|
|
1584
|
+
}
|
|
1585
|
+
return {
|
|
1586
|
+
id: row.id,
|
|
1587
|
+
entity_id: row.entity_id,
|
|
1588
|
+
score,
|
|
1589
|
+
updated_at: row.updated_at,
|
|
1590
|
+
access_count: row.access_count
|
|
1591
|
+
};
|
|
1592
|
+
});
|
|
1593
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
1594
|
+
if (!this.vectorCache.has(entityId)) {
|
|
1595
|
+
if (this.vectorCache.size >= _SearchService.MAX_VECTOR_CACHE_ENTITIES) {
|
|
1596
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
1597
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1598
|
+
}
|
|
1599
|
+
this.vectorCache.set(entityId, entityCache);
|
|
1568
1600
|
}
|
|
1569
1601
|
}
|
|
1602
|
+
if (!skipSort) {
|
|
1603
|
+
this._tieBreakSort(scored);
|
|
1604
|
+
}
|
|
1605
|
+
return scored.slice(0, limit);
|
|
1570
1606
|
}
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
}
|
|
1574
|
-
function safeSlice(value, start, end) {
|
|
1575
|
-
const length = value.length;
|
|
1576
|
-
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
1577
|
-
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
1578
|
-
if (safeStart > safeEnd) {
|
|
1579
|
-
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
1580
|
-
}
|
|
1581
|
-
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
1582
|
-
safeStart--;
|
|
1583
|
-
}
|
|
1584
|
-
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
1585
|
-
safeEnd--;
|
|
1586
|
-
}
|
|
1587
|
-
return value.slice(safeStart, safeEnd);
|
|
1588
|
-
}
|
|
1589
|
-
function chunkText(input, maxChunkLength, overlap) {
|
|
1590
|
-
const text = input.trim();
|
|
1591
|
-
if (text.length === 0) return { chunks: [], truncated: false };
|
|
1592
|
-
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
1593
|
-
throw new Error("maxChunkLength must be an integer >= 2");
|
|
1594
|
-
}
|
|
1595
|
-
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
1596
|
-
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
1597
|
-
}
|
|
1598
|
-
const chunks = [];
|
|
1599
|
-
let truncated = false;
|
|
1600
|
-
let cursor = 0;
|
|
1601
|
-
const halfMax = Math.floor(maxChunkLength / 2);
|
|
1602
|
-
while (cursor < text.length) {
|
|
1603
|
-
const remaining = text.length - cursor;
|
|
1604
|
-
if (remaining <= maxChunkLength) {
|
|
1605
|
-
chunks.push(safeSlice(text, cursor, text.length));
|
|
1606
|
-
break;
|
|
1607
|
-
}
|
|
1608
|
-
const windowEnd = cursor + maxChunkLength;
|
|
1609
|
-
const minSplit = cursor + halfMax;
|
|
1610
|
-
let splitPoint = -1;
|
|
1611
|
-
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
1612
|
-
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
1613
|
-
splitPoint = paraIdx + 2;
|
|
1614
|
-
}
|
|
1615
|
-
if (splitPoint === -1) {
|
|
1616
|
-
let lastTerm = -1;
|
|
1617
|
-
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
1618
|
-
const ch = text[i];
|
|
1619
|
-
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
1620
|
-
lastTerm = i + 2;
|
|
1621
|
-
}
|
|
1622
|
-
}
|
|
1623
|
-
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
1624
|
-
}
|
|
1625
|
-
if (splitPoint === -1) {
|
|
1626
|
-
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
1627
|
-
if (/\s/.test(text[i])) {
|
|
1628
|
-
splitPoint = i + 1;
|
|
1629
|
-
break;
|
|
1630
|
-
}
|
|
1631
|
-
}
|
|
1632
|
-
}
|
|
1633
|
-
if (splitPoint === -1) {
|
|
1634
|
-
truncated = true;
|
|
1635
|
-
splitPoint = windowEnd;
|
|
1636
|
-
}
|
|
1637
|
-
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
1638
|
-
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
1639
|
-
cursor = next;
|
|
1640
|
-
}
|
|
1641
|
-
return { chunks, truncated };
|
|
1642
|
-
}
|
|
1643
|
-
async function withConcurrency(tasks, limit) {
|
|
1644
|
-
const results = new Array(tasks.length);
|
|
1645
|
-
let index = 0;
|
|
1646
|
-
let failed = false;
|
|
1647
|
-
let firstError;
|
|
1648
|
-
async function worker() {
|
|
1649
|
-
while (index < tasks.length && !failed) {
|
|
1650
|
-
const i = index++;
|
|
1651
|
-
try {
|
|
1652
|
-
results[i] = await tasks[i]();
|
|
1653
|
-
} catch (e) {
|
|
1654
|
-
if (!failed) {
|
|
1655
|
-
failed = true;
|
|
1656
|
-
firstError = e;
|
|
1657
|
-
}
|
|
1658
|
-
return;
|
|
1659
|
-
}
|
|
1660
|
-
}
|
|
1661
|
-
}
|
|
1662
|
-
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
1663
|
-
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
1664
|
-
if (failed) throw firstError;
|
|
1665
|
-
return results;
|
|
1666
|
-
}
|
|
1667
|
-
function clip(value, max) {
|
|
1668
|
-
if (typeof value !== "string") return "";
|
|
1669
|
-
const s = value.trim();
|
|
1670
|
-
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
1671
|
-
}
|
|
1672
|
-
function validateTags(tags) {
|
|
1673
|
-
if (!Array.isArray(tags)) return [];
|
|
1674
|
-
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
1675
|
-
}
|
|
1676
|
-
function validateFact(fact) {
|
|
1677
|
-
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
1678
|
-
const title = clip(fact.title, 80);
|
|
1679
|
-
const body = clip(fact.body, 800);
|
|
1680
|
-
if (!title || !body) return null;
|
|
1681
|
-
let confidence = fact.confidence;
|
|
1682
|
-
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
1683
|
-
return {
|
|
1684
|
-
...fact,
|
|
1685
|
-
title,
|
|
1686
|
-
body,
|
|
1687
|
-
confidence,
|
|
1688
|
-
tags: validateTags(fact.tags)
|
|
1689
|
-
};
|
|
1690
|
-
}
|
|
1691
|
-
function validateTask(task) {
|
|
1692
|
-
if (typeof task?.description !== "string") return null;
|
|
1693
|
-
const description = clip(task.description, 200);
|
|
1694
|
-
if (!description) return null;
|
|
1695
|
-
let priority = task.priority;
|
|
1696
|
-
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
1697
|
-
return {
|
|
1698
|
-
...task,
|
|
1699
|
-
description,
|
|
1700
|
-
priority
|
|
1701
|
-
};
|
|
1702
|
-
}
|
|
1703
|
-
function normalizeSourceRef(value) {
|
|
1704
|
-
if (typeof value !== "string") return null;
|
|
1705
|
-
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
1706
|
-
return cleaned.length > 0 ? cleaned : null;
|
|
1707
|
-
}
|
|
1708
|
-
function normalizeSourceHash(value) {
|
|
1709
|
-
if (typeof value !== "string") return null;
|
|
1710
|
-
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
1711
|
-
}
|
|
1712
|
-
function titleTokens(title) {
|
|
1713
|
-
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
1714
|
-
}
|
|
1715
|
-
function jaccardScore(a, b) {
|
|
1716
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
1717
|
-
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
1718
|
-
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
1719
|
-
return intersection.size / union.size;
|
|
1720
|
-
}
|
|
1721
|
-
var FUZZY_THRESHOLD = 0.5;
|
|
1722
|
-
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
1723
|
-
var _WikiMemory = class _WikiMemory {
|
|
1724
|
-
constructor(db, options) {
|
|
1725
|
-
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
1726
|
-
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
1727
|
-
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
1728
|
-
this.miniSearch = new MiniSearch__default.default({
|
|
1729
|
-
fields: ["title", "body", "tags"],
|
|
1730
|
-
storeFields: ["entity_id"],
|
|
1731
|
-
searchOptions: {
|
|
1732
|
-
boost: { title: 2 },
|
|
1733
|
-
fuzzy: 0.2,
|
|
1734
|
-
prefix: true
|
|
1735
|
-
}
|
|
1736
|
-
});
|
|
1737
|
-
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
1738
|
-
this.vectorCache = /* @__PURE__ */ new Map();
|
|
1739
|
-
this.db = db;
|
|
1740
|
-
this.options = options;
|
|
1741
|
-
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
1742
|
-
this.outboxRepo = new OutboxRepository(db, this.prefix);
|
|
1743
|
-
this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
|
|
1744
|
-
this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
|
|
1745
|
-
this.eventRepo = new EventRepository(db, this.prefix);
|
|
1746
|
-
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
1747
|
-
}
|
|
1748
|
-
normalizeMiniSearchRow(row) {
|
|
1749
|
-
return {
|
|
1750
|
-
id: row.id,
|
|
1751
|
-
entity_id: row.entity_id,
|
|
1752
|
-
title: row.title,
|
|
1753
|
-
body: row.body,
|
|
1754
|
-
tags: (() => {
|
|
1755
|
-
try {
|
|
1756
|
-
const parsed = JSON.parse(row.tags);
|
|
1757
|
-
return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
|
|
1758
|
-
} catch {
|
|
1759
|
-
return row.tags;
|
|
1760
|
-
}
|
|
1761
|
-
})()
|
|
1762
|
-
};
|
|
1763
|
-
}
|
|
1764
|
-
async rebuildMiniSearchIndex(entityId) {
|
|
1607
|
+
// --- Internal Index Management ---
|
|
1608
|
+
async rebuildIndex(entityId) {
|
|
1765
1609
|
if (entityId) {
|
|
1766
1610
|
const rows2 = await this.entryRepo.findMiniSearchRows(entityId);
|
|
1767
1611
|
const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
|
|
@@ -1774,7 +1618,10 @@ var _WikiMemory = class _WikiMemory {
|
|
|
1774
1618
|
if (documents2.length > 0) {
|
|
1775
1619
|
this.miniSearch.addAll(documents2);
|
|
1776
1620
|
}
|
|
1777
|
-
this.miniSearchEntryIdsByEntity.set(
|
|
1621
|
+
this.miniSearchEntryIdsByEntity.set(
|
|
1622
|
+
entityId,
|
|
1623
|
+
new Set(documents2.map((document) => document.id))
|
|
1624
|
+
);
|
|
1778
1625
|
return;
|
|
1779
1626
|
}
|
|
1780
1627
|
const rows = await this.entryRepo.findMiniSearchRows();
|
|
@@ -1790,223 +1637,79 @@ var _WikiMemory = class _WikiMemory {
|
|
|
1790
1637
|
this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
|
|
1791
1638
|
}
|
|
1792
1639
|
}
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
}
|
|
1806
|
-
}
|
|
1807
|
-
/**
|
|
1808
|
-
* After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
|
|
1809
|
-
* value to the canonical `embedding_dimension` key and clear the mismatch flag.
|
|
1810
|
-
* This ensures future read() calls use embedding-based retrieval rather than staying
|
|
1811
|
-
* stuck on the MiniSearch fallback.
|
|
1812
|
-
*/
|
|
1813
|
-
async _reconcileEmbeddingDimension() {
|
|
1814
|
-
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
1815
|
-
if (!mismatchValue) return;
|
|
1816
|
-
const newDim = parseInt(mismatchValue, 10);
|
|
1817
|
-
const residualCount = await this.entryRepo.countStaleEmbeddings(newDim);
|
|
1818
|
-
if (residualCount === 0) {
|
|
1819
|
-
await this.metadataRepo.setMeta("embedding_dimension", mismatchValue, this.db);
|
|
1820
|
-
await this.metadataRepo.clearDimensionMismatch(this.db);
|
|
1821
|
-
}
|
|
1822
|
-
}
|
|
1823
|
-
async embedFact(fact) {
|
|
1824
|
-
const embedFn = this.options.llmProvider.embed;
|
|
1825
|
-
if (!embedFn) return false;
|
|
1826
|
-
let tagsStr;
|
|
1827
|
-
if (Array.isArray(fact.tags)) {
|
|
1828
|
-
tagsStr = fact.tags.join(" ");
|
|
1829
|
-
} else {
|
|
1830
|
-
try {
|
|
1831
|
-
const parsed = JSON.parse(fact.tags);
|
|
1832
|
-
tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
|
|
1833
|
-
} catch {
|
|
1834
|
-
tagsStr = fact.tags;
|
|
1835
|
-
}
|
|
1836
|
-
}
|
|
1837
|
-
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
1838
|
-
try {
|
|
1839
|
-
const vector = await embedFn(text);
|
|
1840
|
-
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
1841
|
-
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
1842
|
-
return false;
|
|
1843
|
-
}
|
|
1844
|
-
const float32Vector = new Float32Array(vector);
|
|
1845
|
-
let hasNonFinite = false;
|
|
1846
|
-
for (let i = 0; i < float32Vector.length; i++) {
|
|
1847
|
-
if (!isFinite(float32Vector[i])) {
|
|
1848
|
-
hasNonFinite = true;
|
|
1849
|
-
break;
|
|
1640
|
+
normalizeMiniSearchRow(row) {
|
|
1641
|
+
return {
|
|
1642
|
+
id: row.id,
|
|
1643
|
+
entity_id: row.entity_id,
|
|
1644
|
+
title: row.title,
|
|
1645
|
+
body: row.body,
|
|
1646
|
+
tags: (() => {
|
|
1647
|
+
try {
|
|
1648
|
+
const parsed = JSON.parse(row.tags);
|
|
1649
|
+
return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
|
|
1650
|
+
} catch {
|
|
1651
|
+
return row.tags;
|
|
1850
1652
|
}
|
|
1851
|
-
}
|
|
1852
|
-
|
|
1853
|
-
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
1854
|
-
return false;
|
|
1855
|
-
}
|
|
1856
|
-
await this.storeEmbeddingDimension(float32Vector.length);
|
|
1857
|
-
const blob = new Uint8Array(float32Vector.buffer);
|
|
1858
|
-
await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
|
|
1859
|
-
try {
|
|
1860
|
-
await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
1861
|
-
} catch (hookErr) {
|
|
1862
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
1863
|
-
}
|
|
1864
|
-
return true;
|
|
1865
|
-
} catch (err) {
|
|
1866
|
-
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
1867
|
-
return false;
|
|
1868
|
-
}
|
|
1869
|
-
}
|
|
1870
|
-
_librarianKey(entityId) {
|
|
1871
|
-
return `${this.prefix}:${entityId}:librarian`;
|
|
1653
|
+
})()
|
|
1654
|
+
};
|
|
1872
1655
|
}
|
|
1873
|
-
|
|
1874
|
-
|
|
1656
|
+
_tieBreakSort(items) {
|
|
1657
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
1875
1658
|
}
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
if (
|
|
1883
|
-
|
|
1884
|
-
if (allowed.includes(raw)) return raw;
|
|
1885
|
-
const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
|
|
1886
|
-
throw new Error(
|
|
1887
|
-
`importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
|
|
1888
|
-
);
|
|
1659
|
+
_compareScoredRows(a, b) {
|
|
1660
|
+
const scoreDiff = b.score - a.score;
|
|
1661
|
+
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
1662
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1663
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1664
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1665
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1666
|
+
return a.id.localeCompare(b.id);
|
|
1889
1667
|
}
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1668
|
+
};
|
|
1669
|
+
/**
|
|
1670
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
1671
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
1672
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
1673
|
+
*/
|
|
1674
|
+
_SearchService.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
1675
|
+
/**
|
|
1676
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
1677
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
1678
|
+
* entities while still maintaining a bounded memory footprint.
|
|
1679
|
+
*/
|
|
1680
|
+
_SearchService.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
1681
|
+
var SearchService = _SearchService;
|
|
1897
1682
|
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
entityId,
|
|
1906
|
-
factId,
|
|
1907
|
-
vector: vectorCopy
|
|
1908
|
-
});
|
|
1909
|
-
}
|
|
1910
|
-
/**
|
|
1911
|
-
* GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
|
|
1912
|
-
* Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
|
|
1913
|
-
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
1914
|
-
*/
|
|
1915
|
-
async _notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
|
|
1916
|
-
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
1917
|
-
if (this.options.forceDeleteIgnoreRankerHook === true) return;
|
|
1918
|
-
const vectorCopy = vector ? vector.slice() : null;
|
|
1919
|
-
const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
|
|
1920
|
-
if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
|
|
1921
|
-
throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
|
|
1922
|
-
}
|
|
1923
|
-
const timeoutMs = rawTimeout;
|
|
1924
|
-
let timeoutHandle;
|
|
1925
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
1926
|
-
timeoutHandle = setTimeout(
|
|
1927
|
-
() => {
|
|
1928
|
-
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
1929
|
-
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
1930
|
-
reject(timeoutError);
|
|
1931
|
-
},
|
|
1932
|
-
timeoutMs
|
|
1933
|
-
);
|
|
1934
|
-
});
|
|
1935
|
-
const hookPromise = Promise.resolve(
|
|
1936
|
-
this.options.vectorRanker.onEmbeddingPersisted({
|
|
1937
|
-
entityId,
|
|
1938
|
-
factId,
|
|
1939
|
-
vector: vectorCopy
|
|
1940
|
-
})
|
|
1941
|
-
);
|
|
1942
|
-
try {
|
|
1943
|
-
await Promise.race([hookPromise, timeoutPromise]);
|
|
1944
|
-
} catch (err) {
|
|
1945
|
-
hookPromise.catch(() => {
|
|
1946
|
-
});
|
|
1947
|
-
throw err;
|
|
1948
|
-
} finally {
|
|
1949
|
-
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
1950
|
-
}
|
|
1683
|
+
// src/types.ts
|
|
1684
|
+
var WikiBusyError = class extends Error {
|
|
1685
|
+
constructor(operation, entityId) {
|
|
1686
|
+
super(`${operation} already running for entity ${entityId}`);
|
|
1687
|
+
this.name = "WikiBusyError";
|
|
1688
|
+
this.operation = operation;
|
|
1689
|
+
this.entityId = entityId;
|
|
1951
1690
|
}
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
if (!Number.isFinite(currentVersion)) currentVersion = 0;
|
|
1964
|
-
} else {
|
|
1965
|
-
const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
|
|
1966
|
-
const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
|
|
1967
|
-
currentVersion = hasPorter ? 1 : 0;
|
|
1968
|
-
}
|
|
1969
|
-
}
|
|
1970
|
-
for (const migration of MIGRATIONS) {
|
|
1971
|
-
if (migration.version > currentVersion) {
|
|
1972
|
-
await migration.run(this.db, this.prefix);
|
|
1973
|
-
await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
|
|
1974
|
-
currentVersion = migration.version;
|
|
1975
|
-
}
|
|
1976
|
-
}
|
|
1977
|
-
if (entriesExistedBeforeSetup) {
|
|
1978
|
-
const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
|
|
1979
|
-
if (!schemaVersionCheck) {
|
|
1980
|
-
await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
|
|
1981
|
-
}
|
|
1982
|
-
}
|
|
1983
|
-
if (entriesExistedBeforeSetup) {
|
|
1984
|
-
await this.assertNoLegacySourceTypes();
|
|
1985
|
-
}
|
|
1986
|
-
const rows = await this.entryRepo.findRowsForSourceRefMigration();
|
|
1987
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
1988
|
-
for (const row of rows) {
|
|
1989
|
-
const normalized = normalizeSourceRef(row.source_ref);
|
|
1990
|
-
if (normalized !== row.source_ref) {
|
|
1991
|
-
await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
|
|
1992
|
-
}
|
|
1993
|
-
}
|
|
1994
|
-
});
|
|
1995
|
-
await this.rebuildMiniSearchIndex();
|
|
1691
|
+
};
|
|
1692
|
+
var PrunePartialFailureError = class extends Error {
|
|
1693
|
+
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
1694
|
+
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
1695
|
+
this.name = "PrunePartialFailureError";
|
|
1696
|
+
this.deleted = deleted;
|
|
1697
|
+
this.failedAt = failedAt;
|
|
1698
|
+
this.remaining = remaining;
|
|
1699
|
+
this.deletedTasks = deletedTasks;
|
|
1700
|
+
this.deletedEvents = deletedEvents;
|
|
1701
|
+
this.cause = cause;
|
|
1996
1702
|
}
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
if (storedHash === null) return true;
|
|
2008
|
-
const normalizedStoredHash = normalizeSourceHash(storedHash);
|
|
2009
|
-
return normalizedStoredHash !== normalizedHash;
|
|
1703
|
+
};
|
|
1704
|
+
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
1705
|
+
|
|
1706
|
+
// src/services/JobManager.ts
|
|
1707
|
+
var JobManager = class {
|
|
1708
|
+
constructor(prefix) {
|
|
1709
|
+
this.prefix = prefix;
|
|
1710
|
+
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
1711
|
+
this.activeIngestJobs = /* @__PURE__ */ new Map();
|
|
1712
|
+
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
2010
1713
|
}
|
|
2011
1714
|
_pruneKey(entityId) {
|
|
2012
1715
|
return `${this.prefix}:${entityId}:prune`;
|
|
@@ -2026,6 +1729,12 @@ After running the migration SQL, restart your application.`
|
|
|
2026
1729
|
_forgetKey(entityId) {
|
|
2027
1730
|
return `${this.prefix}:${entityId}:forget`;
|
|
2028
1731
|
}
|
|
1732
|
+
_librarianKey(entityId) {
|
|
1733
|
+
return `${this.prefix}:${entityId}:librarian`;
|
|
1734
|
+
}
|
|
1735
|
+
_healKey(entityId) {
|
|
1736
|
+
return `${this.prefix}:${entityId}:heal`;
|
|
1737
|
+
}
|
|
2029
1738
|
_isReembedActive(entityId) {
|
|
2030
1739
|
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
2031
1740
|
}
|
|
@@ -2035,7 +1744,6 @@ After running the migration SQL, restart your application.`
|
|
|
2035
1744
|
_isForgetActiveFor(entityId) {
|
|
2036
1745
|
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
2037
1746
|
}
|
|
2038
|
-
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
2039
1747
|
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
2040
1748
|
const entityKeyPrefix = `${this.prefix}:`;
|
|
2041
1749
|
for (const k of this.activeMaintenanceJobs) {
|
|
@@ -2043,794 +1751,874 @@ After running the migration SQL, restart your application.`
|
|
|
2043
1751
|
}
|
|
2044
1752
|
return false;
|
|
2045
1753
|
}
|
|
2046
|
-
|
|
1754
|
+
_hasIngestJob(entityId, sourceRef) {
|
|
1755
|
+
return this.activeIngestJobs.get(entityId)?.has(sourceRef ?? "") ?? false;
|
|
1756
|
+
}
|
|
1757
|
+
_addIngestJob(entityId, sourceRef) {
|
|
1758
|
+
const sourceKey = sourceRef ?? "";
|
|
1759
|
+
let refs = this.activeIngestJobs.get(entityId);
|
|
1760
|
+
if (!refs) {
|
|
1761
|
+
refs = /* @__PURE__ */ new Set();
|
|
1762
|
+
this.activeIngestJobs.set(entityId, refs);
|
|
1763
|
+
}
|
|
1764
|
+
refs.add(sourceKey);
|
|
1765
|
+
}
|
|
1766
|
+
_removeIngestJob(entityId, sourceRef) {
|
|
1767
|
+
const sourceKey = sourceRef ?? "";
|
|
1768
|
+
const refs = this.activeIngestJobs.get(entityId);
|
|
1769
|
+
if (!refs) return;
|
|
1770
|
+
refs.delete(sourceKey);
|
|
1771
|
+
if (refs.size === 0) {
|
|
1772
|
+
this.activeIngestJobs.delete(entityId);
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
2047
1775
|
_isIngestActiveFor(entityId) {
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
1776
|
+
return this.activeIngestJobs.has(entityId);
|
|
1777
|
+
}
|
|
1778
|
+
acquireLock(operation, entityId, sourceRef) {
|
|
1779
|
+
let blockingOperation = null;
|
|
1780
|
+
if (operation !== "global_import" && this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
1781
|
+
throw new WikiBusyError("import", "*");
|
|
2051
1782
|
}
|
|
2052
|
-
|
|
1783
|
+
switch (operation) {
|
|
1784
|
+
case "prune":
|
|
1785
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1786
|
+
else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
|
|
1787
|
+
else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
|
|
1788
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1789
|
+
else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
|
|
1790
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1791
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1792
|
+
break;
|
|
1793
|
+
case "librarian":
|
|
1794
|
+
case "heal": {
|
|
1795
|
+
const opKey = operation === "librarian" ? this._librarianKey(entityId) : this._healKey(entityId);
|
|
1796
|
+
if (this.activeMaintenanceJobs.has(opKey)) blockingOperation = operation;
|
|
1797
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1798
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1799
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1800
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1801
|
+
break;
|
|
1802
|
+
}
|
|
1803
|
+
case "reembed":
|
|
1804
|
+
if (this.activeMaintenanceJobs.has(this._reembedKey(entityId))) blockingOperation = "reembed";
|
|
1805
|
+
else if (this.activeMaintenanceJobs.has(this._globalReembedKey())) blockingOperation = "reembed";
|
|
1806
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1807
|
+
else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
|
|
1808
|
+
else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
|
|
1809
|
+
else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
|
|
1810
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1811
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1812
|
+
break;
|
|
1813
|
+
case "global_reembed":
|
|
1814
|
+
if (this.activeMaintenanceJobs.has(this._globalReembedKey())) blockingOperation = "reembed";
|
|
1815
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) blockingOperation = "reembed";
|
|
1816
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":prune")) blockingOperation = "prune";
|
|
1817
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) blockingOperation = "librarian";
|
|
1818
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":heal")) blockingOperation = "heal";
|
|
1819
|
+
else if (this.activeIngestJobs.size > 0) blockingOperation = "ingest";
|
|
1820
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":import")) blockingOperation = "import";
|
|
1821
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":forget")) blockingOperation = "forget";
|
|
1822
|
+
break;
|
|
1823
|
+
case "import":
|
|
1824
|
+
case "forget": {
|
|
1825
|
+
const selfKey = operation === "import" ? this._importKey(entityId) : this._forgetKey(entityId);
|
|
1826
|
+
if (this.activeMaintenanceJobs.has(selfKey)) blockingOperation = operation;
|
|
1827
|
+
else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
|
|
1828
|
+
else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
|
|
1829
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1830
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1831
|
+
else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
|
|
1832
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1833
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1834
|
+
break;
|
|
1835
|
+
}
|
|
1836
|
+
case "global_import":
|
|
1837
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) blockingOperation = "import";
|
|
1838
|
+
break;
|
|
1839
|
+
case "ingest": {
|
|
1840
|
+
const sourceKey = sourceRef ?? "";
|
|
1841
|
+
if (this._hasIngestJob(entityId, sourceKey)) blockingOperation = "ingest";
|
|
1842
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1843
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1844
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1845
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1846
|
+
break;
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
if (blockingOperation) {
|
|
1850
|
+
throw new WikiBusyError(
|
|
1851
|
+
blockingOperation,
|
|
1852
|
+
operation === "global_reembed" || operation === "global_import" ? "*" : entityId
|
|
1853
|
+
);
|
|
1854
|
+
}
|
|
1855
|
+
if (operation === "ingest") {
|
|
1856
|
+
this._addIngestJob(entityId, sourceRef);
|
|
1857
|
+
} else if (operation === "global_reembed") {
|
|
1858
|
+
this.activeMaintenanceJobs.add(this._globalReembedKey());
|
|
1859
|
+
} else if (operation === "global_import") {
|
|
1860
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1861
|
+
} else {
|
|
1862
|
+
const keyFnName = `_${operation}Key`;
|
|
1863
|
+
const keyFn = this[keyFnName];
|
|
1864
|
+
this.activeMaintenanceJobs.add(keyFn.call(this, entityId));
|
|
1865
|
+
}
|
|
1866
|
+
this._notifyStatusSubscribers(entityId);
|
|
1867
|
+
}
|
|
1868
|
+
releaseLock(operation, entityId, sourceRef) {
|
|
1869
|
+
if (operation === "ingest") {
|
|
1870
|
+
this._removeIngestJob(entityId, sourceRef);
|
|
1871
|
+
} else if (operation === "global_reembed") {
|
|
1872
|
+
this.activeMaintenanceJobs.delete(this._globalReembedKey());
|
|
1873
|
+
} else if (operation === "global_import") {
|
|
1874
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1875
|
+
} else {
|
|
1876
|
+
const keyFnName = `_${operation}Key`;
|
|
1877
|
+
const keyFn = this[keyFnName];
|
|
1878
|
+
this.activeMaintenanceJobs.delete(keyFn.call(this, entityId));
|
|
1879
|
+
}
|
|
1880
|
+
this._notifyStatusSubscribers(entityId);
|
|
1881
|
+
}
|
|
1882
|
+
/**
|
|
1883
|
+
* Returns true if acquireLock(operation, entityId) would throw WikiBusyError.
|
|
1884
|
+
* Use for non-throwing conflict checks (e.g. auto-trigger gating in write()).
|
|
1885
|
+
*/
|
|
1886
|
+
isBlocked(operation, entityId) {
|
|
1887
|
+
if (operation !== "global_import" && this.activeMaintenanceJobs.has(this._globalImportKey())) return true;
|
|
1888
|
+
switch (operation) {
|
|
1889
|
+
case "librarian":
|
|
1890
|
+
return this.activeMaintenanceJobs.has(this._librarianKey(entityId)) || this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this._isReembedActive(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
|
|
1891
|
+
case "heal":
|
|
1892
|
+
return this.activeMaintenanceJobs.has(this._healKey(entityId)) || this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this._isReembedActive(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
|
|
1893
|
+
case "prune":
|
|
1894
|
+
return this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this.activeMaintenanceJobs.has(this._librarianKey(entityId)) || this.activeMaintenanceJobs.has(this._healKey(entityId)) || this._isReembedActive(entityId) || this._isIngestActiveFor(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
|
|
1895
|
+
default:
|
|
1896
|
+
return false;
|
|
1897
|
+
}
|
|
1898
|
+
}
|
|
1899
|
+
/**
|
|
1900
|
+
* Auto-heal historically only gated on the heal self-key. Keep that behavior
|
|
1901
|
+
* for write() auto-trigger paths while preserving stricter checks in acquireLock().
|
|
1902
|
+
*/
|
|
1903
|
+
tryAcquireAutoHealLock(entityId) {
|
|
1904
|
+
const healKey = this._healKey(entityId);
|
|
1905
|
+
if (this.activeMaintenanceJobs.has(healKey)) return false;
|
|
1906
|
+
this.activeMaintenanceJobs.add(healKey);
|
|
1907
|
+
this._notifyStatusSubscribers(entityId);
|
|
1908
|
+
return true;
|
|
1909
|
+
}
|
|
1910
|
+
/**
|
|
1911
|
+
* Validates then acquires global + per-entity import locks atomically.
|
|
1912
|
+
* Validates all entities before acquiring any lock (same as current importDump semantics).
|
|
1913
|
+
*/
|
|
1914
|
+
acquireImportLocks(entityIds) {
|
|
1915
|
+
for (const entityId of entityIds) {
|
|
1916
|
+
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) throw new WikiBusyError("import", entityId);
|
|
1917
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) throw new WikiBusyError("librarian", entityId);
|
|
1918
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) throw new WikiBusyError("heal", entityId);
|
|
1919
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) throw new WikiBusyError("prune", entityId);
|
|
1920
|
+
if (this._isReembedActive(entityId)) throw new WikiBusyError("reembed", entityId);
|
|
1921
|
+
if (this._isIngestActiveFor(entityId)) throw new WikiBusyError("ingest", entityId);
|
|
1922
|
+
if (this._isForgetActiveFor(entityId)) throw new WikiBusyError("forget", entityId);
|
|
1923
|
+
}
|
|
1924
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) throw new WikiBusyError("import", "*");
|
|
1925
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1926
|
+
for (const entityId of entityIds) {
|
|
1927
|
+
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
releaseImportLocks(entityIds) {
|
|
1931
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1932
|
+
for (const entityId of entityIds) {
|
|
1933
|
+
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1934
|
+
}
|
|
1935
|
+
}
|
|
1936
|
+
getEntityStatus(entityId) {
|
|
1937
|
+
return {
|
|
1938
|
+
ingesting: this._isIngestActiveFor(entityId),
|
|
1939
|
+
librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
|
|
1940
|
+
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1941
|
+
};
|
|
1942
|
+
}
|
|
1943
|
+
subscribeEntityStatus(entityId, callback) {
|
|
1944
|
+
const initial = this.getEntityStatus(entityId);
|
|
1945
|
+
let set = this.statusSubscribers.get(entityId);
|
|
1946
|
+
if (!set) {
|
|
1947
|
+
set = /* @__PURE__ */ new Set();
|
|
1948
|
+
this.statusSubscribers.set(entityId, set);
|
|
1949
|
+
}
|
|
1950
|
+
const entry = { callback, last: this._copyEntityStatus(initial) };
|
|
1951
|
+
set.add(entry);
|
|
1952
|
+
try {
|
|
1953
|
+
callback(this._copyEntityStatus(initial));
|
|
1954
|
+
} catch (err) {
|
|
1955
|
+
console.error(`[JobManager] callback error for entityId="${entityId}" during initial emission`, err);
|
|
1956
|
+
}
|
|
1957
|
+
let active = true;
|
|
1958
|
+
return () => {
|
|
1959
|
+
if (!active) return;
|
|
1960
|
+
active = false;
|
|
1961
|
+
const s = this.statusSubscribers.get(entityId);
|
|
1962
|
+
if (!s) return;
|
|
1963
|
+
s.delete(entry);
|
|
1964
|
+
if (s.size === 0) this.statusSubscribers.delete(entityId);
|
|
1965
|
+
};
|
|
2053
1966
|
}
|
|
2054
1967
|
_copyEntityStatus(s) {
|
|
2055
1968
|
return { ingesting: s.ingesting, librarian: s.librarian, heal: s.heal };
|
|
2056
1969
|
}
|
|
2057
1970
|
_notifyStatusSubscribers(entityId) {
|
|
1971
|
+
if (entityId === "*") return;
|
|
2058
1972
|
const set = this.statusSubscribers.get(entityId);
|
|
2059
1973
|
if (!set || set.size === 0) return;
|
|
2060
1974
|
for (const entry of Array.from(set)) {
|
|
2061
1975
|
if (!set.has(entry)) continue;
|
|
2062
1976
|
const next = this.getEntityStatus(entityId);
|
|
2063
|
-
if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal)
|
|
1977
|
+
if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal) {
|
|
1978
|
+
continue;
|
|
1979
|
+
}
|
|
2064
1980
|
entry.last = this._copyEntityStatus(next);
|
|
2065
1981
|
try {
|
|
2066
1982
|
entry.callback(this._copyEntityStatus(next));
|
|
2067
1983
|
} catch (err) {
|
|
2068
|
-
console.error(`[
|
|
1984
|
+
console.error(`[JobManager] callback error for entityId="${entityId}" during transition emission`, err);
|
|
2069
1985
|
}
|
|
2070
1986
|
}
|
|
2071
1987
|
}
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
}
|
|
2103
|
-
if (
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
let failure = null;
|
|
2122
|
-
for (const row of entriesToDelete) {
|
|
2123
|
-
try {
|
|
2124
|
-
await this._notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
|
|
2125
|
-
succeeded.push({ entity_id: row.entity_id, id: row.id });
|
|
2126
|
-
} catch (err) {
|
|
2127
|
-
failure = { factId: row.id, cause: err };
|
|
2128
|
-
break;
|
|
2129
|
-
}
|
|
2130
|
-
}
|
|
2131
|
-
const succeededIds = succeeded.map((r) => r.id);
|
|
2132
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
2133
|
-
if (succeededIds.length > 0) {
|
|
2134
|
-
deletedEntries = await this.entryRepo.bulkDeletePruned(entityId, cutoff, succeededIds, tx);
|
|
2135
|
-
}
|
|
2136
|
-
deletedTasks = await this.taskRepo.bulkDeletePruned(entityId, cutoff, tx);
|
|
2137
|
-
});
|
|
2138
|
-
if (failure) {
|
|
2139
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2140
|
-
this.vectorCache.delete(entityId);
|
|
2141
|
-
const remaining = entriesToDelete.length - succeeded.length - 1;
|
|
2142
|
-
const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2143
|
-
if (isTimeout) {
|
|
2144
|
-
throw new PrunePartialFailureError(
|
|
2145
|
-
succeeded.length,
|
|
2146
|
-
failure.factId,
|
|
2147
|
-
remaining,
|
|
2148
|
-
new Error("Deletion hook timed out"),
|
|
2149
|
-
deletedTasks,
|
|
2150
|
-
0
|
|
2151
|
-
// events not yet deleted at this point
|
|
2152
|
-
);
|
|
2153
|
-
}
|
|
2154
|
-
const errMsg = failure.cause?.message ?? "";
|
|
2155
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2156
|
-
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
2157
|
-
throw new PrunePartialFailureError(
|
|
2158
|
-
succeeded.length,
|
|
2159
|
-
failure.factId,
|
|
2160
|
-
remaining,
|
|
2161
|
-
sanitizedCause,
|
|
2162
|
-
deletedTasks,
|
|
2163
|
-
0
|
|
2164
|
-
// events not yet deleted at this point
|
|
2165
|
-
);
|
|
2166
|
-
}
|
|
2167
|
-
}
|
|
2168
|
-
if (retainEventsFor !== null) {
|
|
2169
|
-
const cutoff = now - retainEventsFor * 864e5;
|
|
2170
|
-
const eventResult = await this.eventRepo.prune(entityId, cutoff);
|
|
2171
|
-
deletedEvents = eventResult.changes;
|
|
2172
|
-
}
|
|
2173
|
-
if (vacuum) {
|
|
2174
|
-
await this.metadataRepo.vacuum();
|
|
1988
|
+
};
|
|
1989
|
+
|
|
1990
|
+
// src/utils/pure.ts
|
|
1991
|
+
function parseJsonResponse(text) {
|
|
1992
|
+
const firstBrace = text.indexOf("{");
|
|
1993
|
+
const firstBracket = text.indexOf("[");
|
|
1994
|
+
let start;
|
|
1995
|
+
let openChar;
|
|
1996
|
+
let closeChar;
|
|
1997
|
+
const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
|
|
1998
|
+
if (useBrace) {
|
|
1999
|
+
start = firstBrace;
|
|
2000
|
+
openChar = "{";
|
|
2001
|
+
closeChar = "}";
|
|
2002
|
+
} else if (firstBracket !== -1) {
|
|
2003
|
+
start = firstBracket;
|
|
2004
|
+
openChar = "[";
|
|
2005
|
+
closeChar = "]";
|
|
2006
|
+
} else {
|
|
2007
|
+
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
2008
|
+
}
|
|
2009
|
+
let depth = 0;
|
|
2010
|
+
let inString = false;
|
|
2011
|
+
let escape = false;
|
|
2012
|
+
let end = -1;
|
|
2013
|
+
for (let i = start; i < text.length; i++) {
|
|
2014
|
+
const ch = text[i];
|
|
2015
|
+
if (escape) {
|
|
2016
|
+
escape = false;
|
|
2017
|
+
continue;
|
|
2018
|
+
}
|
|
2019
|
+
if (ch === "\\" && inString) {
|
|
2020
|
+
escape = true;
|
|
2021
|
+
continue;
|
|
2022
|
+
}
|
|
2023
|
+
if (ch === '"') {
|
|
2024
|
+
inString = !inString;
|
|
2025
|
+
continue;
|
|
2026
|
+
}
|
|
2027
|
+
if (inString) continue;
|
|
2028
|
+
if (ch === openChar) {
|
|
2029
|
+
depth++;
|
|
2030
|
+
continue;
|
|
2031
|
+
}
|
|
2032
|
+
if (ch === closeChar) {
|
|
2033
|
+
depth--;
|
|
2034
|
+
if (depth === 0) {
|
|
2035
|
+
end = i;
|
|
2036
|
+
break;
|
|
2175
2037
|
}
|
|
2176
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2177
|
-
this.vectorCache.delete(entityId);
|
|
2178
|
-
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
2179
|
-
} finally {
|
|
2180
|
-
this.activeMaintenanceJobs.delete(pruneKey);
|
|
2181
2038
|
}
|
|
2182
2039
|
}
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2040
|
+
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
2041
|
+
return JSON.parse(text.slice(start, end + 1));
|
|
2042
|
+
}
|
|
2043
|
+
function sanitizeRankerError(err, sanitizeRankerErrors) {
|
|
2044
|
+
if (sanitizeRankerErrors === false) {
|
|
2045
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
2046
|
+
}
|
|
2047
|
+
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
2048
|
+
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
2049
|
+
const sanitized = new Error(
|
|
2050
|
+
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
2051
|
+
innerCause ? { cause: innerCause } : void 0
|
|
2052
|
+
);
|
|
2053
|
+
sanitized.name = typeName;
|
|
2054
|
+
return sanitized;
|
|
2055
|
+
}
|
|
2056
|
+
function safeSlice(value, start, end) {
|
|
2057
|
+
const length = value.length;
|
|
2058
|
+
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
2059
|
+
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
2060
|
+
if (safeStart > safeEnd) {
|
|
2061
|
+
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
2062
|
+
}
|
|
2063
|
+
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
2064
|
+
safeStart--;
|
|
2065
|
+
}
|
|
2066
|
+
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
2067
|
+
safeEnd--;
|
|
2068
|
+
}
|
|
2069
|
+
return value.slice(safeStart, safeEnd);
|
|
2070
|
+
}
|
|
2071
|
+
function chunkText(input, maxChunkLength, overlap) {
|
|
2072
|
+
const text = input.trim();
|
|
2073
|
+
if (text.length === 0) return { chunks: [], truncated: false };
|
|
2074
|
+
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
2075
|
+
throw new Error("maxChunkLength must be an integer >= 2");
|
|
2076
|
+
}
|
|
2077
|
+
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
2078
|
+
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
2079
|
+
}
|
|
2080
|
+
const chunks = [];
|
|
2081
|
+
let truncated = false;
|
|
2082
|
+
let cursor = 0;
|
|
2083
|
+
const halfMax = Math.floor(maxChunkLength / 2);
|
|
2084
|
+
while (cursor < text.length) {
|
|
2085
|
+
const remaining = text.length - cursor;
|
|
2086
|
+
if (remaining <= maxChunkLength) {
|
|
2087
|
+
chunks.push(safeSlice(text, cursor, text.length));
|
|
2088
|
+
break;
|
|
2089
|
+
}
|
|
2090
|
+
const windowEnd = cursor + maxChunkLength;
|
|
2091
|
+
const minSplit = cursor + halfMax;
|
|
2092
|
+
let splitPoint = -1;
|
|
2093
|
+
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
2094
|
+
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
2095
|
+
splitPoint = paraIdx + 2;
|
|
2096
|
+
}
|
|
2097
|
+
if (splitPoint === -1) {
|
|
2098
|
+
let lastTerm = -1;
|
|
2099
|
+
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
2100
|
+
const ch = text[i];
|
|
2101
|
+
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
2102
|
+
lastTerm = i + 2;
|
|
2103
|
+
}
|
|
2193
2104
|
}
|
|
2194
|
-
|
|
2105
|
+
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
2195
2106
|
}
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2107
|
+
if (splitPoint === -1) {
|
|
2108
|
+
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
2109
|
+
if (/\s/.test(text[i])) {
|
|
2110
|
+
splitPoint = i + 1;
|
|
2111
|
+
break;
|
|
2112
|
+
}
|
|
2113
|
+
}
|
|
2199
2114
|
}
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2115
|
+
if (splitPoint === -1) {
|
|
2116
|
+
truncated = true;
|
|
2117
|
+
splitPoint = windowEnd;
|
|
2203
2118
|
}
|
|
2204
|
-
|
|
2205
|
-
const
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
const
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
}
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
} catch (rankerErr) {
|
|
2429
|
-
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
2430
|
-
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
2431
|
-
this.options.onVectorRankerFallback?.({
|
|
2432
|
-
error: this._sanitizeRankerError(rankerError),
|
|
2433
|
-
policy
|
|
2434
|
-
});
|
|
2435
|
-
if (policy === "throw") {
|
|
2436
|
-
rankerShouldRethrow = true;
|
|
2437
|
-
throw rankerError;
|
|
2438
|
-
} else if (policy === "js-cosine") {
|
|
2439
|
-
let fallbackRows = candidateRows;
|
|
2440
|
-
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
2441
|
-
const rowIds = fallbackRows.map((r) => r.id);
|
|
2442
|
-
const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
|
|
2443
|
-
const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
|
|
2444
|
-
fallbackRows = fallbackRows.map((r) => ({
|
|
2445
|
-
...r,
|
|
2446
|
-
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
2447
|
-
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
2448
|
-
}));
|
|
2449
|
-
}
|
|
2450
|
-
scored = await this._rankWithJsCosine({
|
|
2451
|
-
entityId: entityCacheKey,
|
|
2452
|
-
queryVec,
|
|
2453
|
-
candidateRows: fallbackRows,
|
|
2454
|
-
weight,
|
|
2455
|
-
miniSearchScores,
|
|
2456
|
-
populateCache,
|
|
2457
|
-
limit: fallbackRows.length,
|
|
2458
|
-
skipSort: true
|
|
2459
|
-
// read() re-sorts after applying tier weights
|
|
2460
|
-
});
|
|
2461
|
-
} else if (policy === "keyword") {
|
|
2462
|
-
const scoredEntityIdSet = new Set(scoredEntityIds);
|
|
2463
|
-
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
2464
|
-
filter: (r) => scoredEntityIdSet.has(r.entity_id),
|
|
2465
|
-
combineWith: "OR"
|
|
2466
|
-
});
|
|
2467
|
-
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
2468
|
-
const topResults = msResults.slice(0, keywordOversampledLimit);
|
|
2469
|
-
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
2470
|
-
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
2471
|
-
scored = topResults.map((result) => {
|
|
2472
|
-
const metadata = candidateMap.get(result.id);
|
|
2473
|
-
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
2474
|
-
return {
|
|
2475
|
-
id: result.id,
|
|
2476
|
-
entity_id: entityForScore,
|
|
2477
|
-
score: result.score ?? 0,
|
|
2478
|
-
access_count: metadata?.access_count ?? null,
|
|
2479
|
-
updated_at: metadata?.updated_at ?? null
|
|
2480
|
-
};
|
|
2481
|
-
});
|
|
2482
|
-
} else {
|
|
2483
|
-
scored = [];
|
|
2484
|
-
}
|
|
2485
|
-
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
2486
|
-
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
2487
|
-
cause: this._sanitizeRankerError(rankerErr)
|
|
2488
|
-
});
|
|
2489
|
-
pendingRankerFallbackError = mirrored;
|
|
2490
|
-
}
|
|
2491
|
-
}
|
|
2492
|
-
} else {
|
|
2493
|
-
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
2494
|
-
scored = await this._rankWithJsCosine({
|
|
2495
|
-
entityId: entityCacheKey,
|
|
2496
|
-
queryVec,
|
|
2497
|
-
candidateRows,
|
|
2498
|
-
weight,
|
|
2499
|
-
miniSearchScores,
|
|
2500
|
-
populateCache,
|
|
2501
|
-
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
2502
|
-
skipSort: jsCosineNeedsTierSort
|
|
2503
|
-
// read() re-sorts after applying tier weights
|
|
2504
|
-
});
|
|
2505
|
-
}
|
|
2506
|
-
if (scored.length > 0) {
|
|
2507
|
-
scored = scored.map((row) => ({
|
|
2508
|
-
...row,
|
|
2509
|
-
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
2510
|
-
}));
|
|
2511
|
-
this._tieBreakSort(scored);
|
|
2512
|
-
const selectedScored = scored.slice(0, maxResults);
|
|
2513
|
-
const topIds = selectedScored.map((s) => s.id);
|
|
2514
|
-
if (exposeMetadata && trimmedQuery) {
|
|
2515
|
-
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
2516
|
-
}
|
|
2517
|
-
if (topIds.length > 0) {
|
|
2518
|
-
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
2519
|
-
if (facts2.length < topIds.length) {
|
|
2520
|
-
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
2521
|
-
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
2522
|
-
const missingCount = missingIds.length;
|
|
2523
|
-
const sample = missingIds.slice(0, 5);
|
|
2524
|
-
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
2525
|
-
const error = new Error(
|
|
2526
|
-
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
2527
|
-
);
|
|
2528
|
-
this.options.onRetrievalFallback?.(error);
|
|
2529
|
-
}
|
|
2530
|
-
facts = facts2;
|
|
2531
|
-
}
|
|
2532
|
-
if (pendingRankerFallbackError) {
|
|
2533
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
2534
|
-
pendingRankerFallbackError = void 0;
|
|
2535
|
-
}
|
|
2536
|
-
usedEmbed = true;
|
|
2537
|
-
} else {
|
|
2538
|
-
if (pendingRankerFallbackError) {
|
|
2539
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
2540
|
-
pendingRankerFallbackError = void 0;
|
|
2541
|
-
}
|
|
2542
|
-
usedEmbed = true;
|
|
2543
|
-
}
|
|
2544
|
-
}
|
|
2545
|
-
} catch (err) {
|
|
2546
|
-
const error = err instanceof Error ? err : new Error(String(err));
|
|
2547
|
-
if (rankerShouldRethrow) {
|
|
2548
|
-
throw error;
|
|
2549
|
-
}
|
|
2550
|
-
if (pendingRankerFallbackError) {
|
|
2551
|
-
error.cause = pendingRankerFallbackError;
|
|
2552
|
-
pendingRankerFallbackError = void 0;
|
|
2119
|
+
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
2120
|
+
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
2121
|
+
cursor = next;
|
|
2122
|
+
}
|
|
2123
|
+
return { chunks, truncated };
|
|
2124
|
+
}
|
|
2125
|
+
async function withConcurrency(tasks, limit) {
|
|
2126
|
+
const results = new Array(tasks.length);
|
|
2127
|
+
let index = 0;
|
|
2128
|
+
let failed = false;
|
|
2129
|
+
let firstError;
|
|
2130
|
+
async function worker() {
|
|
2131
|
+
while (index < tasks.length && !failed) {
|
|
2132
|
+
const i = index++;
|
|
2133
|
+
try {
|
|
2134
|
+
results[i] = await tasks[i]();
|
|
2135
|
+
} catch (e) {
|
|
2136
|
+
if (!failed) {
|
|
2137
|
+
failed = true;
|
|
2138
|
+
firstError = e;
|
|
2139
|
+
}
|
|
2140
|
+
return;
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
2145
|
+
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
2146
|
+
if (failed) throw firstError;
|
|
2147
|
+
return results;
|
|
2148
|
+
}
|
|
2149
|
+
function clip(value, max) {
|
|
2150
|
+
if (typeof value !== "string") return "";
|
|
2151
|
+
const s = value.trim();
|
|
2152
|
+
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
2153
|
+
}
|
|
2154
|
+
function validateTags(tags) {
|
|
2155
|
+
if (!Array.isArray(tags)) return [];
|
|
2156
|
+
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
2157
|
+
}
|
|
2158
|
+
function validateFact(fact) {
|
|
2159
|
+
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
2160
|
+
const title = clip(fact.title, 80);
|
|
2161
|
+
const body = clip(fact.body, 800);
|
|
2162
|
+
if (!title || !body) return null;
|
|
2163
|
+
let confidence = fact.confidence;
|
|
2164
|
+
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
2165
|
+
return {
|
|
2166
|
+
...fact,
|
|
2167
|
+
title,
|
|
2168
|
+
body,
|
|
2169
|
+
confidence,
|
|
2170
|
+
tags: validateTags(fact.tags)
|
|
2171
|
+
};
|
|
2172
|
+
}
|
|
2173
|
+
function validateTask(task) {
|
|
2174
|
+
if (typeof task?.description !== "string") return null;
|
|
2175
|
+
const description = clip(task.description, 200);
|
|
2176
|
+
if (!description) return null;
|
|
2177
|
+
let priority = task.priority;
|
|
2178
|
+
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
2179
|
+
priority = Math.max(0, Math.min(10, Math.round(priority)));
|
|
2180
|
+
return {
|
|
2181
|
+
...task,
|
|
2182
|
+
description,
|
|
2183
|
+
priority
|
|
2184
|
+
};
|
|
2185
|
+
}
|
|
2186
|
+
function normalizeSourceRef(value) {
|
|
2187
|
+
if (typeof value !== "string") return null;
|
|
2188
|
+
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
2189
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
2190
|
+
}
|
|
2191
|
+
function normalizeSourceHash(value) {
|
|
2192
|
+
if (typeof value !== "string") return null;
|
|
2193
|
+
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
2194
|
+
}
|
|
2195
|
+
function titleTokens(title) {
|
|
2196
|
+
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
2197
|
+
}
|
|
2198
|
+
function jaccardScore(a, b) {
|
|
2199
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
2200
|
+
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
2201
|
+
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
2202
|
+
return intersection.size / union.size;
|
|
2203
|
+
}
|
|
2204
|
+
|
|
2205
|
+
// src/prompts.ts
|
|
2206
|
+
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
2207
|
+
Return ONLY a valid JSON object matching this schema:
|
|
2208
|
+
{
|
|
2209
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
2210
|
+
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
2211
|
+
}
|
|
2212
|
+
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
2213
|
+
var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
|
|
2214
|
+
Return ONLY a valid JSON object matching this schema:
|
|
2215
|
+
{
|
|
2216
|
+
"downgraded": ["string (fact IDs)"],
|
|
2217
|
+
"deleted": ["string (fact IDs)"],
|
|
2218
|
+
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
2219
|
+
}
|
|
2220
|
+
Do not return markdown, just raw JSON.`;
|
|
2221
|
+
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
2222
|
+
Return ONLY a valid JSON object matching this schema:
|
|
2223
|
+
{
|
|
2224
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
2225
|
+
}
|
|
2226
|
+
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
2227
|
+
|
|
2228
|
+
// src/services/PromptService.ts
|
|
2229
|
+
var PromptService = class {
|
|
2230
|
+
constructor(globalOverrides) {
|
|
2231
|
+
this.globalOverrides = globalOverrides;
|
|
2232
|
+
}
|
|
2233
|
+
hydrate(template, variables) {
|
|
2234
|
+
return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
|
|
2235
|
+
const value = variables[key];
|
|
2236
|
+
if (value === void 0) return _match;
|
|
2237
|
+
return typeof value === "string" ? value : JSON.stringify(value, null, 2);
|
|
2238
|
+
});
|
|
2239
|
+
}
|
|
2240
|
+
buildIngestPrompt(documentChunk, runtimeOverride) {
|
|
2241
|
+
const template = runtimeOverride ?? this.globalOverrides?.ingestSystemPrompt ?? INGEST_SYSTEM_PROMPT;
|
|
2242
|
+
if (/\{\{\s*documentChunk\s*\}\}/.test(template)) {
|
|
2243
|
+
return {
|
|
2244
|
+
systemPrompt: this.hydrate(template, { documentChunk }),
|
|
2245
|
+
userPrompt: "Please extract the facts."
|
|
2246
|
+
};
|
|
2247
|
+
}
|
|
2248
|
+
return {
|
|
2249
|
+
systemPrompt: template,
|
|
2250
|
+
userPrompt: `Document Chunk:
|
|
2251
|
+
${documentChunk}`
|
|
2252
|
+
};
|
|
2253
|
+
}
|
|
2254
|
+
buildLibrarianPrompt(events, currentFacts, runtimeOverride) {
|
|
2255
|
+
const template = runtimeOverride ?? this.globalOverrides?.librarianSystemPrompt ?? LIBRARIAN_SYSTEM_PROMPT;
|
|
2256
|
+
if (/\{\{\s*events\s*\}\}/.test(template) || /\{\{\s*currentFacts\s*\}\}/.test(template)) {
|
|
2257
|
+
return {
|
|
2258
|
+
systemPrompt: this.hydrate(template, { events, currentFacts }),
|
|
2259
|
+
userPrompt: "Please synthesize the context."
|
|
2260
|
+
};
|
|
2261
|
+
}
|
|
2262
|
+
return {
|
|
2263
|
+
systemPrompt: template,
|
|
2264
|
+
userPrompt: `Events:
|
|
2265
|
+
${JSON.stringify(events, null, 2)}
|
|
2266
|
+
|
|
2267
|
+
Current Facts:
|
|
2268
|
+
${JSON.stringify(currentFacts, null, 2)}`
|
|
2269
|
+
};
|
|
2270
|
+
}
|
|
2271
|
+
buildHealPrompt(healCandidates, documentAnchors, allTasks, recentEvents, runtimeOverride) {
|
|
2272
|
+
const template = runtimeOverride ?? this.globalOverrides?.healSystemPrompt ?? HEAL_SYSTEM_PROMPT;
|
|
2273
|
+
if (/\{\{\s*healCandidates\s*\}\}/.test(template) || /\{\{\s*documentAnchors\s*\}\}/.test(template) || /\{\{\s*allTasks\s*\}\}/.test(template) || /\{\{\s*recentEvents\s*\}\}/.test(template)) {
|
|
2274
|
+
return {
|
|
2275
|
+
systemPrompt: this.hydrate(template, { healCandidates, documentAnchors, allTasks, recentEvents }),
|
|
2276
|
+
userPrompt: "Please heal the memory graph."
|
|
2277
|
+
};
|
|
2278
|
+
}
|
|
2279
|
+
return {
|
|
2280
|
+
systemPrompt: template,
|
|
2281
|
+
userPrompt: `Heal Candidates:
|
|
2282
|
+
${JSON.stringify(healCandidates, null, 2)}
|
|
2283
|
+
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
2284
|
+
${JSON.stringify(documentAnchors, null, 2)}
|
|
2285
|
+
All Tasks:
|
|
2286
|
+
${JSON.stringify(allTasks, null, 2)}
|
|
2287
|
+
Recent Events:
|
|
2288
|
+
${JSON.stringify(recentEvents, null, 2)}
|
|
2289
|
+
The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`
|
|
2290
|
+
};
|
|
2291
|
+
}
|
|
2292
|
+
};
|
|
2293
|
+
|
|
2294
|
+
// src/services/IngestionService.ts
|
|
2295
|
+
var IngestionService = class {
|
|
2296
|
+
constructor(db, prefix, options, entryRepo, searchService, jobManager, embeddingService, promptService) {
|
|
2297
|
+
this.db = db;
|
|
2298
|
+
this.prefix = prefix;
|
|
2299
|
+
this.options = options;
|
|
2300
|
+
this.entryRepo = entryRepo;
|
|
2301
|
+
this.searchService = searchService;
|
|
2302
|
+
this.jobManager = jobManager;
|
|
2303
|
+
this.embeddingService = embeddingService;
|
|
2304
|
+
this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
|
|
2305
|
+
}
|
|
2306
|
+
async ingestDocument(entityId, params) {
|
|
2307
|
+
const sourceRef = normalizeSourceRef(params.sourceRef);
|
|
2308
|
+
if (!sourceRef) throw new Error("Invalid sourceRef");
|
|
2309
|
+
const sourceHash = normalizeSourceHash(params.sourceHash);
|
|
2310
|
+
if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2311
|
+
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
|
|
2312
|
+
const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
|
|
2313
|
+
const chunkOverlap = Math.min(
|
|
2314
|
+
Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
|
|
2315
|
+
maxChunkLength - 1
|
|
2316
|
+
);
|
|
2317
|
+
const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
|
|
2318
|
+
const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
|
|
2319
|
+
if (typeof params.documentChunk !== "string") {
|
|
2320
|
+
throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
|
|
2321
|
+
}
|
|
2322
|
+
this.jobManager.acquireLock("ingest", entityId, sourceRef);
|
|
2323
|
+
try {
|
|
2324
|
+
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
2325
|
+
if (chunks.length === 0) return { truncated: false, chunks: 0 };
|
|
2326
|
+
const chunkResults = await withConcurrency(
|
|
2327
|
+
chunks.map((chunk) => async () => {
|
|
2328
|
+
const { systemPrompt, userPrompt } = this.promptService.buildIngestPrompt(chunk, params.promptOverride);
|
|
2329
|
+
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2330
|
+
const result = parseJsonResponse(responseText);
|
|
2331
|
+
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
2332
|
+
}),
|
|
2333
|
+
chunkConcurrency
|
|
2334
|
+
);
|
|
2335
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2336
|
+
const allValidFacts = [];
|
|
2337
|
+
for (const facts of chunkResults) {
|
|
2338
|
+
for (const fact of facts) {
|
|
2339
|
+
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
2340
|
+
if (!seen.has(normalized)) {
|
|
2341
|
+
seen.add(normalized);
|
|
2342
|
+
allValidFacts.push(fact);
|
|
2553
2343
|
}
|
|
2554
|
-
this.options.onRetrievalFallback?.(error);
|
|
2555
2344
|
}
|
|
2556
2345
|
}
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2346
|
+
const now = Date.now();
|
|
2347
|
+
const insertedFacts = [];
|
|
2348
|
+
const deletedSourceFactIds = [];
|
|
2349
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2350
|
+
deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
|
|
2351
|
+
await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
|
|
2352
|
+
for (const fact of allValidFacts) {
|
|
2353
|
+
const id = generateId("fact_");
|
|
2354
|
+
const wikiFact = {
|
|
2355
|
+
id,
|
|
2356
|
+
entity_id: entityId,
|
|
2357
|
+
title: fact.title,
|
|
2358
|
+
body: fact.body,
|
|
2359
|
+
tags: fact.tags,
|
|
2360
|
+
confidence: fact.confidence,
|
|
2361
|
+
source_type: "immutable_document",
|
|
2362
|
+
source_hash: sourceHash,
|
|
2363
|
+
source_ref: sourceRef,
|
|
2364
|
+
created_at: now,
|
|
2365
|
+
updated_at: now,
|
|
2366
|
+
last_accessed_at: null,
|
|
2367
|
+
access_count: 0,
|
|
2368
|
+
deleted_at: null
|
|
2369
|
+
};
|
|
2370
|
+
await this.entryRepo.upsert(wikiFact, tx);
|
|
2371
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2372
|
+
}
|
|
2373
|
+
});
|
|
2374
|
+
await this.searchService.sync(entityId);
|
|
2375
|
+
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
2376
|
+
for (const factId of uniqueDeletedSourceFactIds) {
|
|
2377
|
+
try {
|
|
2378
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
|
|
2379
|
+
} catch (hookErr) {
|
|
2380
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
2579
2381
|
}
|
|
2580
2382
|
}
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
const now = Date.now();
|
|
2584
|
-
await this.entryRepo.trackAccess(ids, now);
|
|
2383
|
+
for (const fact of insertedFacts) {
|
|
2384
|
+
await this.embeddingService.embedFact(fact);
|
|
2585
2385
|
}
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
}
|
|
2589
|
-
|
|
2590
|
-
const [tasks, events] = await Promise.all([
|
|
2591
|
-
this.taskRepo.findAllPending(entityIds, entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200)),
|
|
2592
|
-
entityIds.length === 1 ? this.eventRepo.getRecent(entityIds[0], eventsLimit) : this.eventRepo.getRecentForEntities(entityIds, eventsLimit)
|
|
2593
|
-
]);
|
|
2594
|
-
let factScores;
|
|
2595
|
-
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
2596
|
-
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
2597
|
-
}
|
|
2598
|
-
const bundle = { facts, tasks, events: events.reverse() };
|
|
2599
|
-
if (exposeMetadata) {
|
|
2600
|
-
bundle.metadata = { query, entityIds };
|
|
2601
|
-
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
2602
|
-
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
2386
|
+
this.searchService.evictCache(entityId);
|
|
2387
|
+
return { truncated, chunks: chunks.length };
|
|
2388
|
+
} finally {
|
|
2389
|
+
this.jobManager.releaseLock("ingest", entityId, sourceRef);
|
|
2603
2390
|
}
|
|
2604
|
-
return bundle;
|
|
2605
|
-
}
|
|
2606
|
-
/**
|
|
2607
|
-
* Returns entity IDs that will participate in scored retrieval.
|
|
2608
|
-
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
2609
|
-
*/
|
|
2610
|
-
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
2611
|
-
return entityIds.filter((id) => {
|
|
2612
|
-
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
2613
|
-
return includeZeroWeightEntities === true || w !== 0;
|
|
2614
|
-
});
|
|
2615
|
-
}
|
|
2616
|
-
/**
|
|
2617
|
-
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
2618
|
-
*/
|
|
2619
|
-
_tieBreakSort(items) {
|
|
2620
|
-
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
2621
|
-
}
|
|
2622
|
-
/**
|
|
2623
|
-
* Comparator for score + deterministic tie-break fields.
|
|
2624
|
-
* Negative return means "a ranks ahead of b" for descending score order.
|
|
2625
|
-
*/
|
|
2626
|
-
_compareScoredRows(a, b) {
|
|
2627
|
-
const scoreDiff = b.score - a.score;
|
|
2628
|
-
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
2629
|
-
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
2630
|
-
if (accessCountDiff !== 0) return accessCountDiff;
|
|
2631
|
-
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
2632
|
-
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
2633
|
-
return a.id.localeCompare(b.id);
|
|
2634
2391
|
}
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2392
|
+
};
|
|
2393
|
+
|
|
2394
|
+
// src/services/MaintenanceService.ts
|
|
2395
|
+
var FUZZY_THRESHOLD = 0.5;
|
|
2396
|
+
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
2397
|
+
var MaintenanceService = class {
|
|
2398
|
+
constructor(db, prefix, options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService, promptService) {
|
|
2399
|
+
this.db = db;
|
|
2400
|
+
this.prefix = prefix;
|
|
2401
|
+
this.options = options;
|
|
2402
|
+
this.entryRepo = entryRepo;
|
|
2403
|
+
this.taskRepo = taskRepo;
|
|
2404
|
+
this.eventRepo = eventRepo;
|
|
2405
|
+
this.metadataRepo = metadataRepo;
|
|
2406
|
+
this.searchService = searchService;
|
|
2407
|
+
this.jobManager = jobManager;
|
|
2408
|
+
this.embeddingService = embeddingService;
|
|
2409
|
+
this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
|
|
2641
2410
|
}
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2411
|
+
async runPrune(entityId, options) {
|
|
2412
|
+
this.jobManager.acquireLock("prune", entityId);
|
|
2413
|
+
try {
|
|
2414
|
+
const retainSoftDeletedFor = options?.retainSoftDeletedFor !== void 0 ? options.retainSoftDeletedFor : this.options.config?.pruneRetainSoftDeletedFor ?? 7;
|
|
2415
|
+
const retainEventsFor = options?.retainEventsFor !== void 0 ? options.retainEventsFor : this.options.config?.pruneEventsAfter ?? 30;
|
|
2416
|
+
const vacuum = options?.vacuum ?? false;
|
|
2417
|
+
this._validatePruneDuration(retainSoftDeletedFor, "retainSoftDeletedFor");
|
|
2418
|
+
this._validatePruneDuration(retainEventsFor, "retainEventsFor");
|
|
2419
|
+
const now = Date.now();
|
|
2420
|
+
let deletedEntries = 0;
|
|
2421
|
+
let deletedTasks = 0;
|
|
2422
|
+
let deletedEvents = 0;
|
|
2423
|
+
if (retainSoftDeletedFor !== null) {
|
|
2424
|
+
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
2425
|
+
const entriesToDelete = await this.entryRepo.getPrunableMetadata(entityId, cutoff);
|
|
2426
|
+
const succeeded = [];
|
|
2427
|
+
let failure = null;
|
|
2428
|
+
for (const row of entriesToDelete) {
|
|
2429
|
+
try {
|
|
2430
|
+
await this.embeddingService.notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
|
|
2431
|
+
succeeded.push({ entity_id: row.entity_id, id: row.id });
|
|
2432
|
+
} catch (err) {
|
|
2433
|
+
failure = { factId: row.id, cause: err };
|
|
2434
|
+
break;
|
|
2435
|
+
}
|
|
2436
|
+
}
|
|
2437
|
+
const succeededIds = succeeded.map((r) => r.id);
|
|
2438
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2439
|
+
if (succeededIds.length > 0) {
|
|
2440
|
+
deletedEntries = await this.entryRepo.bulkDeletePruned(entityId, cutoff, succeededIds, tx);
|
|
2441
|
+
}
|
|
2442
|
+
deletedTasks = await this.taskRepo.bulkDeletePruned(entityId, cutoff, tx);
|
|
2443
|
+
});
|
|
2444
|
+
if (failure) {
|
|
2445
|
+
await this.searchService.sync(entityId);
|
|
2446
|
+
const remaining = entriesToDelete.length - succeeded.length - 1;
|
|
2447
|
+
const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2448
|
+
if (isTimeout) {
|
|
2449
|
+
throw new PrunePartialFailureError(
|
|
2450
|
+
succeeded.length,
|
|
2451
|
+
failure.factId,
|
|
2452
|
+
remaining,
|
|
2453
|
+
new Error("Deletion hook timed out"),
|
|
2454
|
+
deletedTasks,
|
|
2455
|
+
0
|
|
2456
|
+
);
|
|
2457
|
+
}
|
|
2458
|
+
const errMsg = failure.cause?.message ?? "";
|
|
2459
|
+
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2460
|
+
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
2461
|
+
throw new PrunePartialFailureError(
|
|
2462
|
+
succeeded.length,
|
|
2463
|
+
failure.factId,
|
|
2464
|
+
remaining,
|
|
2465
|
+
sanitizedCause,
|
|
2466
|
+
deletedTasks,
|
|
2467
|
+
0
|
|
2468
|
+
);
|
|
2469
|
+
}
|
|
2470
|
+
}
|
|
2471
|
+
if (retainEventsFor !== null) {
|
|
2472
|
+
const cutoff = now - retainEventsFor * 864e5;
|
|
2473
|
+
const eventResult = await this.eventRepo.prune(entityId, cutoff);
|
|
2474
|
+
deletedEvents = eventResult.changes;
|
|
2475
|
+
}
|
|
2476
|
+
if (vacuum) {
|
|
2477
|
+
await this.metadataRepo.vacuum();
|
|
2478
|
+
}
|
|
2479
|
+
await this.searchService.sync(entityId);
|
|
2480
|
+
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
2481
|
+
} finally {
|
|
2482
|
+
this.jobManager.releaseLock("prune", entityId);
|
|
2650
2483
|
}
|
|
2651
|
-
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
2652
|
-
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
2653
|
-
const sanitized = new Error(
|
|
2654
|
-
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
2655
|
-
innerCause ? { cause: innerCause } : void 0
|
|
2656
|
-
);
|
|
2657
|
-
sanitized.name = typeName;
|
|
2658
|
-
return sanitized;
|
|
2659
2484
|
}
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
2667
|
-
let entityCache = this.vectorCache.get(entityId);
|
|
2668
|
-
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
2669
|
-
if (tooLarge && entityCache) {
|
|
2670
|
-
this.vectorCache.delete(entityId);
|
|
2671
|
-
entityCache = void 0;
|
|
2485
|
+
async runLibrarian(entityId, options) {
|
|
2486
|
+
this.jobManager.acquireLock("librarian", entityId);
|
|
2487
|
+
try {
|
|
2488
|
+
await this.doRunLibrarian(entityId, options?.promptOverride);
|
|
2489
|
+
} finally {
|
|
2490
|
+
this.jobManager.releaseLock("librarian", entityId);
|
|
2672
2491
|
}
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2492
|
+
}
|
|
2493
|
+
async runHeal(entityId, options) {
|
|
2494
|
+
this.jobManager.acquireLock("heal", entityId);
|
|
2495
|
+
try {
|
|
2496
|
+
await this.doRunHeal(entityId, options?.promptOverride);
|
|
2497
|
+
} finally {
|
|
2498
|
+
this.jobManager.releaseLock("heal", entityId);
|
|
2676
2499
|
}
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
|
|
2500
|
+
}
|
|
2501
|
+
async runReembed(entityId, opts) {
|
|
2502
|
+
const embedFn = this.options.llmProvider.embed;
|
|
2503
|
+
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
2504
|
+
const op = entityId ? "reembed" : "global_reembed";
|
|
2505
|
+
this.jobManager.acquireLock(op, entityId ?? "*");
|
|
2506
|
+
try {
|
|
2507
|
+
const rows = await this.entryRepo.findAllForReembed(entityId);
|
|
2508
|
+
this.searchService.evictCache(entityId);
|
|
2509
|
+
const skipExisting = opts?.skipExisting ?? false;
|
|
2510
|
+
let effectiveSkip = skipExisting;
|
|
2511
|
+
if (skipExisting) {
|
|
2512
|
+
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
2513
|
+
if (mismatchValue) {
|
|
2514
|
+
if (entityId) {
|
|
2515
|
+
const mismatchDim = parseInt(mismatchValue, 10);
|
|
2516
|
+
const staleCount = await this.entryRepo.countStaleForEntity(entityId, mismatchDim);
|
|
2517
|
+
if (staleCount > 0) effectiveSkip = false;
|
|
2518
|
+
} else {
|
|
2519
|
+
effectiveSkip = false;
|
|
2520
|
+
}
|
|
2690
2521
|
}
|
|
2691
|
-
} else if (weight !== void 0 && weight < 1) {
|
|
2692
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
2693
|
-
score = (1 - weight) * kwScore;
|
|
2694
|
-
} else {
|
|
2695
|
-
score = -2;
|
|
2696
2522
|
}
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2523
|
+
let embedded = 0;
|
|
2524
|
+
let skipped = 0;
|
|
2525
|
+
let failed = 0;
|
|
2526
|
+
try {
|
|
2527
|
+
for (const row of rows) {
|
|
2528
|
+
const existingBlob = row.embedding_blob;
|
|
2529
|
+
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
2530
|
+
if (effectiveSkip && blobIsValid) {
|
|
2531
|
+
const vec = parseEmbedding(existingBlob, null);
|
|
2532
|
+
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
2533
|
+
skipped++;
|
|
2534
|
+
continue;
|
|
2535
|
+
}
|
|
2536
|
+
}
|
|
2537
|
+
const success = await this.embeddingService.embedFact(row);
|
|
2538
|
+
if (success) embedded++;
|
|
2539
|
+
else failed++;
|
|
2710
2540
|
}
|
|
2711
|
-
|
|
2541
|
+
if (embedded > 0) {
|
|
2542
|
+
await this.embeddingService.reconcileEmbeddingDimension();
|
|
2543
|
+
}
|
|
2544
|
+
} finally {
|
|
2545
|
+
this.searchService.evictCache(entityId);
|
|
2712
2546
|
}
|
|
2547
|
+
return { embedded, skipped, failed };
|
|
2548
|
+
} finally {
|
|
2549
|
+
this.jobManager.releaseLock(op, entityId ?? "*");
|
|
2713
2550
|
}
|
|
2714
|
-
if (!skipSort) this._tieBreakSort(scored);
|
|
2715
|
-
return scored.slice(0, limit);
|
|
2716
|
-
}
|
|
2717
|
-
/**
|
|
2718
|
-
* Delegate semantic ranking to the injected VectorRanker.
|
|
2719
|
-
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
2720
|
-
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
2721
|
-
*/
|
|
2722
|
-
async _rankWithVectorRanker(args) {
|
|
2723
|
-
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
2724
|
-
const ranker = this.options.vectorRanker;
|
|
2725
|
-
if (!ranker) {
|
|
2726
|
-
throw new Error("vectorRanker not configured");
|
|
2727
|
-
}
|
|
2728
|
-
const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
2729
|
-
const rankerResults = await ranker.rankBySimilarity({
|
|
2730
|
-
entityId,
|
|
2731
|
-
queryVec: queryVecCopy,
|
|
2732
|
-
candidateIds,
|
|
2733
|
-
limit
|
|
2734
|
-
});
|
|
2735
|
-
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
2736
|
-
const seen = /* @__PURE__ */ new Set();
|
|
2737
|
-
const normalized = [];
|
|
2738
|
-
for (const r of rankerResults) {
|
|
2739
|
-
if (normalized.length >= limit) break;
|
|
2740
|
-
if (seen.has(r.id)) continue;
|
|
2741
|
-
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
2742
|
-
if (!Number.isFinite(r.semanticScore)) continue;
|
|
2743
|
-
seen.add(r.id);
|
|
2744
|
-
normalized.push(r);
|
|
2745
|
-
}
|
|
2746
|
-
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
2747
|
-
const scored = normalized.map((r) => {
|
|
2748
|
-
let score = r.semanticScore;
|
|
2749
|
-
if (weight !== void 0) {
|
|
2750
|
-
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
2751
|
-
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
2752
|
-
}
|
|
2753
|
-
return {
|
|
2754
|
-
id: r.id,
|
|
2755
|
-
entity_id: entityIdByCandidateId.get(r.id),
|
|
2756
|
-
// allowedIds filter above guarantees membership
|
|
2757
|
-
score
|
|
2758
|
-
};
|
|
2759
|
-
});
|
|
2760
|
-
return scored;
|
|
2761
|
-
}
|
|
2762
|
-
async getMemoryBundle(entityId) {
|
|
2763
|
-
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
2764
2551
|
}
|
|
2765
|
-
async
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
let eventType = event.event_type;
|
|
2769
|
-
if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
|
|
2770
|
-
eventType = "observation";
|
|
2552
|
+
async forget(entityId, params) {
|
|
2553
|
+
if (params.clearAll && (params.entryId !== void 0 || params.taskId !== void 0 || params.sourceRef !== void 0 || params.sourceHash !== void 0)) {
|
|
2554
|
+
throw new Error("forget() clearAll is mutually exclusive with entryId, taskId, sourceRef, and sourceHash");
|
|
2771
2555
|
}
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2556
|
+
this.jobManager.acquireLock("forget", entityId);
|
|
2557
|
+
try {
|
|
2558
|
+
const now = Date.now();
|
|
2559
|
+
let deletedEntries = 0;
|
|
2560
|
+
let deletedTasks = 0;
|
|
2561
|
+
const deletedEntryIds = [];
|
|
2562
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2563
|
+
if (params.clearAll) {
|
|
2564
|
+
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, null, null, tx, true));
|
|
2565
|
+
deletedEntries = await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2566
|
+
deletedTasks = await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2567
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: 0, heal: 0 }, tx);
|
|
2568
|
+
} else {
|
|
2569
|
+
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
2570
|
+
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
2571
|
+
if (hasIdSelectors && hasSourceSelectors) {
|
|
2572
|
+
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
2573
|
+
}
|
|
2574
|
+
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
2575
|
+
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
2576
|
+
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
2577
|
+
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2578
|
+
if (params.entryId) {
|
|
2579
|
+
const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
|
|
2580
|
+
if (entryId) deletedEntryIds.push(entryId);
|
|
2581
|
+
}
|
|
2582
|
+
if (sourceRef || sourceHash) {
|
|
2583
|
+
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
|
|
2584
|
+
}
|
|
2585
|
+
const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
2586
|
+
const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
2587
|
+
const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
|
|
2588
|
+
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
2589
|
+
entryPromise ?? Promise.resolve(false),
|
|
2590
|
+
taskDeletedPromise ?? Promise.resolve(false),
|
|
2591
|
+
refPromise ?? Promise.resolve(0)
|
|
2592
|
+
]);
|
|
2593
|
+
if (entryResult) deletedEntries++;
|
|
2594
|
+
if (taskResult) deletedTasks++;
|
|
2595
|
+
deletedEntries += refResult;
|
|
2799
2596
|
}
|
|
2800
|
-
}
|
|
2801
|
-
});
|
|
2802
|
-
if (shouldRunLibrarian && librarianJobKey !== null) {
|
|
2803
|
-
this.activeMaintenanceJobs.add(librarianJobKey);
|
|
2804
|
-
this._notifyStatusSubscribers(entityId);
|
|
2805
|
-
this.runLibrarianThenMaybeHeal(entityId, librarianCount).catch(console.error).finally(() => {
|
|
2806
|
-
this.activeMaintenanceJobs.delete(librarianJobKey);
|
|
2807
|
-
this._notifyStatusSubscribers(entityId);
|
|
2808
2597
|
});
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
await this._doRunLibrarian(entityId);
|
|
2813
|
-
const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
|
|
2814
|
-
const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
|
|
2815
|
-
let healCheckpoint = cp.heal ?? 0;
|
|
2816
|
-
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
2817
|
-
const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
|
|
2818
|
-
if (shouldRunHeal) {
|
|
2819
|
-
const healKey = this._healKey(entityId);
|
|
2820
|
-
if (!this.activeMaintenanceJobs.has(healKey)) {
|
|
2821
|
-
this.activeMaintenanceJobs.add(healKey);
|
|
2822
|
-
this._notifyStatusSubscribers(entityId);
|
|
2598
|
+
await this.searchService.sync(entityId);
|
|
2599
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
2600
|
+
for (const factId of uniqueDeletedIds) {
|
|
2823
2601
|
try {
|
|
2824
|
-
await this.
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2602
|
+
await this.embeddingService.notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
2603
|
+
} catch (hookErr) {
|
|
2604
|
+
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2605
|
+
if (isTimeout) {
|
|
2606
|
+
throw new Error(`forget(${entityId}/${factId}) failed: ${hookErr.message}`);
|
|
2607
|
+
}
|
|
2608
|
+
const errMsg = hookErr?.message ?? "";
|
|
2609
|
+
if (errMsg.startsWith("Invalid deletionHookTimeoutMs")) {
|
|
2610
|
+
throw new Error(`forget(${entityId}/${factId}) failed: ${errMsg}`, { cause: hookErr });
|
|
2611
|
+
}
|
|
2612
|
+
throw new Error(`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`, { cause: this._sanitizeRankerError(hookErr) });
|
|
2829
2613
|
}
|
|
2830
2614
|
}
|
|
2615
|
+
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
2616
|
+
} finally {
|
|
2617
|
+
this.jobManager.releaseLock("forget", entityId);
|
|
2831
2618
|
}
|
|
2832
2619
|
}
|
|
2833
|
-
|
|
2620
|
+
/** Core librarian pass (locks handled by {@link runLibrarian}). Package-internal orchestration hook. */
|
|
2621
|
+
async doRunLibrarian(entityId, promptOverride) {
|
|
2834
2622
|
const events = await this.eventRepo.getRecent(entityId, 50);
|
|
2835
2623
|
const currentFactsRows = await this.entryRepo.findRecentByEntityId(entityId, 100);
|
|
2836
2624
|
const currentFacts = currentFactsRows.map((f) => {
|
|
@@ -2840,15 +2628,12 @@ After running the migration SQL, restart your application.`
|
|
|
2840
2628
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
2841
2629
|
};
|
|
2842
2630
|
});
|
|
2843
|
-
const userPrompt =
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2849
|
-
systemPrompt: LIBRARIAN_SYSTEM_PROMPT,
|
|
2850
|
-
userPrompt
|
|
2851
|
-
});
|
|
2631
|
+
const { systemPrompt, userPrompt } = this.promptService.buildLibrarianPrompt(
|
|
2632
|
+
events.reverse(),
|
|
2633
|
+
currentFacts,
|
|
2634
|
+
promptOverride
|
|
2635
|
+
);
|
|
2636
|
+
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2852
2637
|
const result = parseJsonResponse(responseText);
|
|
2853
2638
|
const facts = Array.isArray(result.facts) ? result.facts : [];
|
|
2854
2639
|
const tasks = Array.isArray(result.tasks) ? result.tasks : [];
|
|
@@ -2893,24 +2678,35 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
2893
2678
|
};
|
|
2894
2679
|
await this.entryRepo.upsert(factObj, tx);
|
|
2895
2680
|
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2681
|
+
factsForDedupe.push(factObj);
|
|
2896
2682
|
}
|
|
2897
2683
|
for (const task of validTasks) {
|
|
2898
2684
|
const id = generateId("task_");
|
|
2899
|
-
const taskObj = {
|
|
2685
|
+
const taskObj = {
|
|
2686
|
+
id,
|
|
2687
|
+
entity_id: entityId,
|
|
2688
|
+
description: task.description,
|
|
2689
|
+
status: "pending",
|
|
2690
|
+
priority: task.priority,
|
|
2691
|
+
created_at: now,
|
|
2692
|
+
updated_at: now,
|
|
2693
|
+
resolved_at: null,
|
|
2694
|
+
deleted_at: null
|
|
2695
|
+
};
|
|
2900
2696
|
await this.taskRepo.upsert(taskObj, tx);
|
|
2901
2697
|
}
|
|
2902
2698
|
});
|
|
2903
|
-
await this.
|
|
2904
|
-
this.vectorCache.delete(entityId);
|
|
2699
|
+
await this.searchService.sync(entityId);
|
|
2905
2700
|
for (const fact of insertedFacts) {
|
|
2906
|
-
await this.embedFact(fact);
|
|
2701
|
+
await this.embeddingService.embedFact(fact);
|
|
2907
2702
|
}
|
|
2908
|
-
this.
|
|
2703
|
+
this.searchService.evictCache(entityId);
|
|
2909
2704
|
}
|
|
2910
|
-
|
|
2705
|
+
/** Core heal pass (locks handled by {@link runHeal}). Package-internal orchestration hook. */
|
|
2706
|
+
async doRunHeal(entityId, promptOverride) {
|
|
2911
2707
|
const now = Date.now();
|
|
2912
|
-
const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config
|
|
2913
|
-
const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config
|
|
2708
|
+
const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config?.orphanAfterDays : 30;
|
|
2709
|
+
const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config?.staleInferredAfterDays : 60;
|
|
2914
2710
|
const MS_PER_DAY = 24 * 60 * 60 * 1e3;
|
|
2915
2711
|
if (orphanAfterDays !== null && (typeof orphanAfterDays !== "number" || !Number.isFinite(orphanAfterDays) || orphanAfterDays < 0)) {
|
|
2916
2712
|
throw new Error("Invalid orphanAfterDays: must be a finite number >= 0 or null");
|
|
@@ -2918,810 +2714,1481 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
2918
2714
|
if (staleInferredAfterDays !== null && (typeof staleInferredAfterDays !== "number" || !Number.isFinite(staleInferredAfterDays) || staleInferredAfterDays < 0)) {
|
|
2919
2715
|
throw new Error("Invalid staleInferredAfterDays: must be a finite number >= 0 or null");
|
|
2920
2716
|
}
|
|
2717
|
+
const orphanedIds = [];
|
|
2921
2718
|
await this.db.withTransactionAsync(async (tx) => {
|
|
2922
2719
|
if (orphanAfterDays !== null) {
|
|
2923
2720
|
const orphanThreshold = now - orphanAfterDays * MS_PER_DAY;
|
|
2924
|
-
await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx);
|
|
2721
|
+
orphanedIds.push(...await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx));
|
|
2925
2722
|
}
|
|
2926
2723
|
if (staleInferredAfterDays !== null) {
|
|
2927
2724
|
const staleThreshold = now - staleInferredAfterDays * MS_PER_DAY;
|
|
2928
2725
|
await this.entryRepo.downgradeStaleInferred(entityId, staleThreshold, tx);
|
|
2929
2726
|
}
|
|
2930
2727
|
});
|
|
2728
|
+
for (const factId of orphanedIds) {
|
|
2729
|
+
try {
|
|
2730
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
|
|
2731
|
+
} catch (hookErr) {
|
|
2732
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal orphan pass for ${factId}:`, hookErr);
|
|
2733
|
+
}
|
|
2734
|
+
}
|
|
2931
2735
|
const allFactsRows = await this.entryRepo.findAllByEntityId(entityId);
|
|
2932
2736
|
const allTasks = await this.taskRepo.findAllPending([entityId]);
|
|
2933
2737
|
const recentEvents = await this.eventRepo.getRecent(entityId, 20);
|
|
2934
2738
|
const healCandidates = allFactsRows.filter((f) => f.source_type !== "immutable_document");
|
|
2935
2739
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "immutable_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
2936
|
-
const
|
|
2937
|
-
${JSON.stringify(healCandidates.map((f) => {
|
|
2740
|
+
const healCandidatesForPrompt = healCandidates.map((f) => {
|
|
2938
2741
|
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
2939
2742
|
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
2940
|
-
}), null, 2)}
|
|
2941
|
-
|
|
2942
|
-
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
2943
|
-
${JSON.stringify(documentAnchors, null, 2)}
|
|
2944
|
-
|
|
2945
|
-
All Tasks:
|
|
2946
|
-
${JSON.stringify(allTasks, null, 2)}
|
|
2947
|
-
|
|
2948
|
-
Recent Events:
|
|
2949
|
-
${JSON.stringify(recentEvents, null, 2)}
|
|
2950
|
-
|
|
2951
|
-
The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`;
|
|
2952
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2953
|
-
systemPrompt: HEAL_SYSTEM_PROMPT,
|
|
2954
|
-
userPrompt
|
|
2955
2743
|
});
|
|
2744
|
+
const { systemPrompt, userPrompt } = this.promptService.buildHealPrompt(
|
|
2745
|
+
healCandidatesForPrompt,
|
|
2746
|
+
documentAnchors,
|
|
2747
|
+
allTasks,
|
|
2748
|
+
recentEvents,
|
|
2749
|
+
promptOverride
|
|
2750
|
+
);
|
|
2751
|
+
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2956
2752
|
const result = parseJsonResponse(responseText);
|
|
2957
2753
|
const mutableIds = new Set(healCandidates.map((f) => f.id));
|
|
2958
2754
|
const downgraded = Array.isArray(result.downgraded) ? result.downgraded : [];
|
|
2959
2755
|
const deleted = Array.isArray(result.deleted) ? result.deleted : [];
|
|
2960
2756
|
const newFacts = Array.isArray(result.newFacts) ? result.newFacts : [];
|
|
2961
|
-
const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
|
|
2962
|
-
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
2757
|
+
const safeDowngraded = Array.from(new Set(downgraded.filter((id) => mutableIds.has(id))));
|
|
2758
|
+
const safeDeleted = Array.from(new Set(deleted.filter((id) => mutableIds.has(id))));
|
|
2963
2759
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
2964
2760
|
const insertedFacts = [];
|
|
2965
2761
|
const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
|
|
2762
|
+
const healFactsForDedupe = [...healCandidates];
|
|
2966
2763
|
await this.db.withTransactionAsync(async (tx) => {
|
|
2967
2764
|
await this.entryRepo.downgradeByIds(safeDowngraded, entityId, tx);
|
|
2968
2765
|
await this.entryRepo.softDeleteByIds(safeDeleted, entityId, tx);
|
|
2969
2766
|
for (const fact of validNewFacts) {
|
|
2767
|
+
const newTokens = titleTokens(fact.title);
|
|
2768
|
+
let skip = false;
|
|
2769
|
+
if (newTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
2770
|
+
for (const existing of healFactsForDedupe) {
|
|
2771
|
+
if (existing.source_type !== "librarian_inferred") continue;
|
|
2772
|
+
const existingTokens = titleTokens(existing.title);
|
|
2773
|
+
if (existingTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
2774
|
+
if (jaccardScore(newTokens, existingTokens) >= FUZZY_THRESHOLD) {
|
|
2775
|
+
skip = true;
|
|
2776
|
+
break;
|
|
2777
|
+
}
|
|
2778
|
+
}
|
|
2779
|
+
}
|
|
2780
|
+
}
|
|
2781
|
+
if (skip) continue;
|
|
2970
2782
|
const id = generateId("fact_");
|
|
2971
2783
|
const factObj = {
|
|
2972
2784
|
id,
|
|
2973
2785
|
entity_id: entityId,
|
|
2974
2786
|
title: fact.title,
|
|
2975
2787
|
body: fact.body,
|
|
2976
|
-
tags: fact.tags,
|
|
2977
|
-
confidence: fact.confidence,
|
|
2978
|
-
source_type: "librarian_inferred",
|
|
2979
|
-
source_hash: null,
|
|
2980
|
-
source_ref: null,
|
|
2981
|
-
created_at: now,
|
|
2982
|
-
updated_at: now,
|
|
2983
|
-
last_accessed_at: null,
|
|
2984
|
-
access_count: 0,
|
|
2985
|
-
deleted_at: null
|
|
2986
|
-
};
|
|
2987
|
-
await this.entryRepo.upsert(factObj, tx);
|
|
2988
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
await this.
|
|
2993
|
-
for (const factId of uniqueDeletedFactIds) {
|
|
2994
|
-
try {
|
|
2995
|
-
await this.
|
|
2996
|
-
} catch (hookErr) {
|
|
2997
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
2788
|
+
tags: fact.tags,
|
|
2789
|
+
confidence: fact.confidence,
|
|
2790
|
+
source_type: "librarian_inferred",
|
|
2791
|
+
source_hash: null,
|
|
2792
|
+
source_ref: null,
|
|
2793
|
+
created_at: now,
|
|
2794
|
+
updated_at: now,
|
|
2795
|
+
last_accessed_at: null,
|
|
2796
|
+
access_count: 0,
|
|
2797
|
+
deleted_at: null
|
|
2798
|
+
};
|
|
2799
|
+
await this.entryRepo.upsert(factObj, tx);
|
|
2800
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2801
|
+
healFactsForDedupe.push(factObj);
|
|
2802
|
+
}
|
|
2803
|
+
});
|
|
2804
|
+
await this.searchService.sync(entityId);
|
|
2805
|
+
for (const factId of uniqueDeletedFactIds) {
|
|
2806
|
+
try {
|
|
2807
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
|
|
2808
|
+
} catch (hookErr) {
|
|
2809
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
2810
|
+
}
|
|
2811
|
+
}
|
|
2812
|
+
for (const fact of insertedFacts) {
|
|
2813
|
+
await this.embeddingService.embedFact(fact);
|
|
2814
|
+
}
|
|
2815
|
+
this.searchService.evictCache(entityId);
|
|
2816
|
+
}
|
|
2817
|
+
_validatePruneDuration(value, name) {
|
|
2818
|
+
if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
|
|
2819
|
+
throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
|
|
2820
|
+
}
|
|
2821
|
+
}
|
|
2822
|
+
_sanitizeRankerError(err) {
|
|
2823
|
+
return sanitizeRankerError(err, this.options.sanitizeRankerErrors);
|
|
2824
|
+
}
|
|
2825
|
+
};
|
|
2826
|
+
|
|
2827
|
+
// src/services/ImportExportService.ts
|
|
2828
|
+
var ImportExportService = class {
|
|
2829
|
+
constructor(db, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService) {
|
|
2830
|
+
this.db = db;
|
|
2831
|
+
this.entryRepo = entryRepo;
|
|
2832
|
+
this.taskRepo = taskRepo;
|
|
2833
|
+
this.eventRepo = eventRepo;
|
|
2834
|
+
this.metadataRepo = metadataRepo;
|
|
2835
|
+
this.searchService = searchService;
|
|
2836
|
+
this.jobManager = jobManager;
|
|
2837
|
+
this.embeddingService = embeddingService;
|
|
2838
|
+
}
|
|
2839
|
+
async exportDump(entityIds) {
|
|
2840
|
+
let ids;
|
|
2841
|
+
if (entityIds && entityIds.length > 0) {
|
|
2842
|
+
ids = Array.from(new Set(entityIds));
|
|
2843
|
+
} else {
|
|
2844
|
+
ids = await this.metadataRepo.getDistinctEntityIds();
|
|
2845
|
+
}
|
|
2846
|
+
const entities = {};
|
|
2847
|
+
const BATCH = 3;
|
|
2848
|
+
for (let i = 0; i < ids.length; i += BATCH) {
|
|
2849
|
+
const batch = ids.slice(i, i + BATCH);
|
|
2850
|
+
const batchResults = await Promise.all(
|
|
2851
|
+
batch.map(
|
|
2852
|
+
async (id) => [
|
|
2853
|
+
id,
|
|
2854
|
+
await this.getFullBundle(id, { includeBlobs: true })
|
|
2855
|
+
]
|
|
2856
|
+
)
|
|
2857
|
+
);
|
|
2858
|
+
for (const [id, bundle] of batchResults) {
|
|
2859
|
+
entities[id] = bundle;
|
|
2860
|
+
}
|
|
2861
|
+
}
|
|
2862
|
+
return { generatedAt: Date.now(), entities };
|
|
2863
|
+
}
|
|
2864
|
+
async importDump(dump, opts) {
|
|
2865
|
+
const merge = opts?.merge ?? false;
|
|
2866
|
+
const entityIds = Object.keys(dump.entities);
|
|
2867
|
+
this.jobManager.acquireImportLocks(entityIds);
|
|
2868
|
+
try {
|
|
2869
|
+
await this.assertNoLegacySourceTypes();
|
|
2870
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
2871
|
+
await this.doImportEntity(entityId, bundle, merge);
|
|
2872
|
+
}
|
|
2873
|
+
} finally {
|
|
2874
|
+
this.jobManager.releaseImportLocks(entityIds);
|
|
2875
|
+
}
|
|
2876
|
+
}
|
|
2877
|
+
async getFullBundle(entityId, opts) {
|
|
2878
|
+
const [factsRaw, tasks, events] = await Promise.all([
|
|
2879
|
+
opts?.includeBlobs ? this.entryRepo.findAllByEntityIdWithBlobs(entityId) : this.entryRepo.findAllByEntityId(entityId),
|
|
2880
|
+
this.taskRepo.findAllByEntityId(entityId),
|
|
2881
|
+
this.eventRepo.getByEntityId(entityId, opts?.maxEvents)
|
|
2882
|
+
]);
|
|
2883
|
+
const facts = factsRaw.map((f) => {
|
|
2884
|
+
const {
|
|
2885
|
+
embedding: _embedding,
|
|
2886
|
+
embedding_blob,
|
|
2887
|
+
...rest
|
|
2888
|
+
} = f;
|
|
2889
|
+
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
2890
|
+
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
2891
|
+
new Uint8Array(c).set(embedding_blob);
|
|
2892
|
+
return new Uint8Array(c);
|
|
2893
|
+
})() : void 0;
|
|
2894
|
+
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
2895
|
+
return {
|
|
2896
|
+
...factBase,
|
|
2897
|
+
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
2898
|
+
};
|
|
2899
|
+
});
|
|
2900
|
+
return { facts, tasks, events };
|
|
2901
|
+
}
|
|
2902
|
+
/** Single-entity import transaction + post-processing; package-internal hook for tests. */
|
|
2903
|
+
async doImportEntity(entityId, bundle, merge) {
|
|
2904
|
+
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
2905
|
+
const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
|
|
2906
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
2907
|
+
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
2908
|
+
const softDeletedFactIds = [];
|
|
2909
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2910
|
+
if (!merge) {
|
|
2911
|
+
const deletedLiveFactIds = await this.entryRepo.findIdsBySource(
|
|
2912
|
+
entityId,
|
|
2913
|
+
null,
|
|
2914
|
+
null,
|
|
2915
|
+
tx,
|
|
2916
|
+
false
|
|
2917
|
+
);
|
|
2918
|
+
softDeletedFactIds.push(...deletedLiveFactIds);
|
|
2919
|
+
await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2920
|
+
await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2921
|
+
await this.metadataRepo.deleteCheckpoint(entityId, tx);
|
|
2922
|
+
}
|
|
2923
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
2924
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
2925
|
+
const existingFacts = await this.entryRepo.findExistingMetadataByIds(
|
|
2926
|
+
factIds,
|
|
2927
|
+
tx
|
|
2928
|
+
);
|
|
2929
|
+
for (const existingFact of existingFacts) {
|
|
2930
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
2931
|
+
}
|
|
2932
|
+
for (const fact of bundle.facts) {
|
|
2933
|
+
const sourceType = this._normalizeImportedSourceType(
|
|
2934
|
+
String(fact.source_type),
|
|
2935
|
+
{
|
|
2936
|
+
entityId,
|
|
2937
|
+
factId: fact.id
|
|
2938
|
+
}
|
|
2939
|
+
);
|
|
2940
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
2941
|
+
const existing = existingFactsById.get(fact.id);
|
|
2942
|
+
const rawBlobRaw = fact.embedding_blob;
|
|
2943
|
+
let rawBlob = null;
|
|
2944
|
+
if (rawBlobRaw instanceof Uint8Array) {
|
|
2945
|
+
rawBlob = rawBlobRaw;
|
|
2946
|
+
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
2947
|
+
const obj = rawBlobRaw;
|
|
2948
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
2949
|
+
rawBlob = new Uint8Array(obj["data"]);
|
|
2950
|
+
} else if (!Array.isArray(rawBlobRaw)) {
|
|
2951
|
+
const entries = Object.keys(obj);
|
|
2952
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
2953
|
+
const len = entries.length;
|
|
2954
|
+
rawBlob = new Uint8Array(len);
|
|
2955
|
+
for (let i = 0; i < len; i++)
|
|
2956
|
+
rawBlob[i] = obj[String(i)] ?? 0;
|
|
2957
|
+
}
|
|
2958
|
+
}
|
|
2959
|
+
}
|
|
2960
|
+
let blobData = null;
|
|
2961
|
+
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
2962
|
+
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
2963
|
+
const alignedBlob = new Uint8Array(copy);
|
|
2964
|
+
alignedBlob.set(rawBlob);
|
|
2965
|
+
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
2966
|
+
let allFinite = true;
|
|
2967
|
+
for (let i = 0; i < floats.length; i++) {
|
|
2968
|
+
if (!isFinite(floats[i])) {
|
|
2969
|
+
allFinite = false;
|
|
2970
|
+
break;
|
|
2971
|
+
}
|
|
2972
|
+
}
|
|
2973
|
+
if (allFinite) {
|
|
2974
|
+
blobData = alignedBlob;
|
|
2975
|
+
}
|
|
2976
|
+
}
|
|
2977
|
+
if (existing) {
|
|
2978
|
+
if (existing.entity_id !== entityId) {
|
|
2979
|
+
this._warnCrossEntityCollision(
|
|
2980
|
+
"entry",
|
|
2981
|
+
fact.id,
|
|
2982
|
+
existing.entity_id,
|
|
2983
|
+
entityId
|
|
2984
|
+
);
|
|
2985
|
+
continue;
|
|
2986
|
+
}
|
|
2987
|
+
if (merge && safeUpdatedAt <= existing.updated_at) continue;
|
|
2988
|
+
}
|
|
2989
|
+
const factObj = {
|
|
2990
|
+
id: fact.id,
|
|
2991
|
+
entity_id: entityId,
|
|
2992
|
+
title: fact.title,
|
|
2993
|
+
body: fact.body,
|
|
2994
|
+
tags: Array.isArray(fact.tags) ? fact.tags : [],
|
|
2995
|
+
confidence: fact.confidence,
|
|
2996
|
+
source_type: sourceType,
|
|
2997
|
+
source_hash: fact.source_hash,
|
|
2998
|
+
source_ref: fact.source_ref,
|
|
2999
|
+
created_at: fact.created_at,
|
|
3000
|
+
updated_at: safeUpdatedAt,
|
|
3001
|
+
last_accessed_at: fact.last_accessed_at,
|
|
3002
|
+
access_count: fact.access_count,
|
|
3003
|
+
deleted_at: fact.deleted_at,
|
|
3004
|
+
embedding_blob: blobData ?? void 0
|
|
3005
|
+
};
|
|
3006
|
+
await this.entryRepo.upsertForImport(factObj, tx);
|
|
3007
|
+
if (blobData != null) {
|
|
3008
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
3009
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
3010
|
+
}
|
|
3011
|
+
existingFactsById.set(fact.id, {
|
|
3012
|
+
id: fact.id,
|
|
3013
|
+
entity_id: entityId,
|
|
3014
|
+
updated_at: safeUpdatedAt
|
|
3015
|
+
});
|
|
3016
|
+
upsertedFactIds.add(fact.id);
|
|
3017
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
3018
|
+
}
|
|
3019
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
3020
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
3021
|
+
const existingTasks = await this.taskRepo.findExistingMetadataByIds(
|
|
3022
|
+
taskIds,
|
|
3023
|
+
tx
|
|
3024
|
+
);
|
|
3025
|
+
for (const existingTask of existingTasks) {
|
|
3026
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
3027
|
+
}
|
|
3028
|
+
for (const task of bundle.tasks) {
|
|
3029
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
3030
|
+
const existing = existingTasksById.get(task.id);
|
|
3031
|
+
if (existing) {
|
|
3032
|
+
if (existing.entity_id !== entityId) {
|
|
3033
|
+
this._warnCrossEntityCollision(
|
|
3034
|
+
"task",
|
|
3035
|
+
task.id,
|
|
3036
|
+
existing.entity_id,
|
|
3037
|
+
entityId
|
|
3038
|
+
);
|
|
3039
|
+
continue;
|
|
3040
|
+
}
|
|
3041
|
+
if (merge && safeUpdatedAt <= existing.updated_at) continue;
|
|
3042
|
+
}
|
|
3043
|
+
await this.taskRepo.upsertForImport(
|
|
3044
|
+
{
|
|
3045
|
+
id: task.id,
|
|
3046
|
+
entity_id: entityId,
|
|
3047
|
+
description: task.description,
|
|
3048
|
+
status: task.status,
|
|
3049
|
+
priority: task.priority,
|
|
3050
|
+
created_at: task.created_at,
|
|
3051
|
+
updated_at: safeUpdatedAt,
|
|
3052
|
+
resolved_at: task.resolved_at,
|
|
3053
|
+
deleted_at: task.deleted_at
|
|
3054
|
+
},
|
|
3055
|
+
tx,
|
|
3056
|
+
safeUpdatedAt
|
|
3057
|
+
);
|
|
3058
|
+
existingTasksById.set(task.id, {
|
|
3059
|
+
id: task.id,
|
|
3060
|
+
entity_id: entityId,
|
|
3061
|
+
updated_at: safeUpdatedAt
|
|
3062
|
+
});
|
|
3063
|
+
}
|
|
3064
|
+
for (const event of bundle.events) {
|
|
3065
|
+
await this.eventRepo.addIgnoreDuplicate(
|
|
3066
|
+
{
|
|
3067
|
+
id: event.id,
|
|
3068
|
+
entity_id: entityId,
|
|
3069
|
+
event_type: event.event_type,
|
|
3070
|
+
summary: event.summary,
|
|
3071
|
+
related_entry_id: event.related_entry_id ?? null,
|
|
3072
|
+
created_at: event.created_at
|
|
3073
|
+
},
|
|
3074
|
+
tx
|
|
3075
|
+
);
|
|
3076
|
+
}
|
|
3077
|
+
});
|
|
3078
|
+
await this.searchService.sync(entityId);
|
|
3079
|
+
for (const fact of bundle.facts) {
|
|
3080
|
+
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
3081
|
+
const embedded = await this.embeddingService.embedFact({
|
|
3082
|
+
id: fact.id,
|
|
3083
|
+
entity_id: entityId,
|
|
3084
|
+
title: fact.title,
|
|
3085
|
+
body: fact.body,
|
|
3086
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
3087
|
+
});
|
|
3088
|
+
if (!embedded) {
|
|
3089
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, fact.id, null);
|
|
3090
|
+
}
|
|
2998
3091
|
}
|
|
2999
3092
|
}
|
|
3000
|
-
for (const fact of
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
|
|
3093
|
+
for (const fact of bundle.facts) {
|
|
3094
|
+
const blobData = factsWithPreservedBlob.get(fact.id);
|
|
3095
|
+
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
3096
|
+
try {
|
|
3097
|
+
const float32Vector = new Float32Array(
|
|
3098
|
+
blobData.buffer,
|
|
3099
|
+
blobData.byteOffset,
|
|
3100
|
+
blobData.byteLength / 4
|
|
3101
|
+
);
|
|
3102
|
+
await this.embeddingService.notifyEmbeddingPersisted(
|
|
3103
|
+
entityId,
|
|
3104
|
+
fact.id,
|
|
3105
|
+
float32Vector
|
|
3106
|
+
);
|
|
3107
|
+
} catch (hookErr) {
|
|
3108
|
+
console.warn(
|
|
3109
|
+
`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`,
|
|
3110
|
+
hookErr
|
|
3111
|
+
);
|
|
3112
|
+
}
|
|
3113
|
+
}
|
|
3018
3114
|
}
|
|
3019
|
-
|
|
3020
|
-
|
|
3115
|
+
for (const factId of softDeletedFactIds) {
|
|
3116
|
+
if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
|
|
3117
|
+
try {
|
|
3118
|
+
await this.embeddingService.notifyEmbeddingPersisted(
|
|
3119
|
+
entityId,
|
|
3120
|
+
factId,
|
|
3121
|
+
null
|
|
3122
|
+
);
|
|
3123
|
+
} catch (hookErr) {
|
|
3124
|
+
console.warn(
|
|
3125
|
+
`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`,
|
|
3126
|
+
hookErr
|
|
3127
|
+
);
|
|
3128
|
+
}
|
|
3129
|
+
}
|
|
3021
3130
|
}
|
|
3022
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
3023
|
-
this._notifyStatusSubscribers(entityId);
|
|
3024
3131
|
try {
|
|
3025
|
-
await this.
|
|
3132
|
+
const canonicalDimValue = await this.metadataRepo.getMeta(
|
|
3133
|
+
"embedding_dimension"
|
|
3134
|
+
);
|
|
3135
|
+
const canonicalDim = canonicalDimValue ? parseInt(canonicalDimValue, 10) : null;
|
|
3136
|
+
if (preservedBlobDims.size === 1) {
|
|
3137
|
+
const preservedDim = [...preservedBlobDims][0];
|
|
3138
|
+
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
3139
|
+
await this.embeddingService.storeEmbeddingDimension(preservedDim);
|
|
3140
|
+
const staleMismatchValue = await this.metadataRepo.getMeta(
|
|
3141
|
+
"embedding_dimension_mismatch"
|
|
3142
|
+
);
|
|
3143
|
+
if (staleMismatchValue && parseInt(staleMismatchValue, 10) !== preservedDim) {
|
|
3144
|
+
await this.metadataRepo.setMeta(
|
|
3145
|
+
"embedding_dimension_mismatch",
|
|
3146
|
+
String(preservedDim),
|
|
3147
|
+
this.db
|
|
3148
|
+
);
|
|
3149
|
+
}
|
|
3150
|
+
await this.embeddingService.reconcileEmbeddingDimension();
|
|
3151
|
+
} else {
|
|
3152
|
+
await this.metadataRepo.setMeta(
|
|
3153
|
+
"embedding_dimension_mismatch",
|
|
3154
|
+
String(canonicalDim),
|
|
3155
|
+
this.db
|
|
3156
|
+
);
|
|
3157
|
+
}
|
|
3158
|
+
} else if (preservedBlobDims.size > 1) {
|
|
3159
|
+
if (canonicalDim === null) {
|
|
3160
|
+
const sortedPreservedBlobDims = [...preservedBlobDims].sort(
|
|
3161
|
+
(a, b) => a - b
|
|
3162
|
+
);
|
|
3163
|
+
await this.embeddingService.storeEmbeddingDimension(
|
|
3164
|
+
sortedPreservedBlobDims[0]
|
|
3165
|
+
);
|
|
3166
|
+
await this.metadataRepo.setMeta(
|
|
3167
|
+
"embedding_dimension_mismatch",
|
|
3168
|
+
String(sortedPreservedBlobDims[0]),
|
|
3169
|
+
this.db
|
|
3170
|
+
);
|
|
3171
|
+
} else {
|
|
3172
|
+
await this.metadataRepo.setMeta(
|
|
3173
|
+
"embedding_dimension_mismatch",
|
|
3174
|
+
String(canonicalDim),
|
|
3175
|
+
this.db
|
|
3176
|
+
);
|
|
3177
|
+
}
|
|
3178
|
+
}
|
|
3026
3179
|
} finally {
|
|
3027
|
-
this.
|
|
3028
|
-
this._notifyStatusSubscribers(entityId);
|
|
3180
|
+
this.searchService.evictCache(entityId);
|
|
3029
3181
|
}
|
|
3030
3182
|
}
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
if (
|
|
3046
|
-
|
|
3183
|
+
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
3184
|
+
console.warn(
|
|
3185
|
+
`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`
|
|
3186
|
+
);
|
|
3187
|
+
}
|
|
3188
|
+
_normalizeImportedSourceType(raw, ctx) {
|
|
3189
|
+
if (raw === "user_document") return "immutable_document";
|
|
3190
|
+
if (raw === "agent_inferred") return "librarian_inferred";
|
|
3191
|
+
const allowed = [
|
|
3192
|
+
"user_stated",
|
|
3193
|
+
"librarian_inferred",
|
|
3194
|
+
"user_confirmed",
|
|
3195
|
+
"immutable_document"
|
|
3196
|
+
];
|
|
3197
|
+
if (allowed.includes(raw))
|
|
3198
|
+
return raw;
|
|
3199
|
+
const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
|
|
3200
|
+
throw new Error(
|
|
3201
|
+
`importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
|
|
3202
|
+
);
|
|
3203
|
+
}
|
|
3204
|
+
async assertNoLegacySourceTypes() {
|
|
3205
|
+
if (!await this.entryRepo.hasLegacySourceTypes()) return;
|
|
3206
|
+
const count = await this.entryRepo.countLegacySourceTypes();
|
|
3207
|
+
throw new Error(
|
|
3208
|
+
`Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
|
|
3209
|
+
|
|
3210
|
+
${this.entryRepo.getLegacyMigrationSQL()}
|
|
3211
|
+
|
|
3212
|
+
After running the migration SQL, restart your application.`
|
|
3213
|
+
);
|
|
3214
|
+
}
|
|
3215
|
+
};
|
|
3216
|
+
|
|
3217
|
+
// src/services/EmbeddingService.ts
|
|
3218
|
+
var EmbeddingService = class {
|
|
3219
|
+
constructor(db, options, entryRepo, metadataRepo) {
|
|
3220
|
+
this.db = db;
|
|
3221
|
+
this.options = options;
|
|
3222
|
+
this.entryRepo = entryRepo;
|
|
3223
|
+
this.metadataRepo = metadataRepo;
|
|
3224
|
+
}
|
|
3225
|
+
async storeEmbeddingDimension(dim) {
|
|
3226
|
+
const existing = await this.metadataRepo.getMeta("embedding_dimension");
|
|
3227
|
+
if (existing) {
|
|
3228
|
+
const storedDim = parseInt(existing, 10);
|
|
3229
|
+
if (storedDim !== dim) {
|
|
3230
|
+
console.warn(
|
|
3231
|
+
`[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
|
|
3232
|
+
);
|
|
3233
|
+
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(dim), this.db);
|
|
3234
|
+
}
|
|
3235
|
+
} else {
|
|
3236
|
+
await this.metadataRepo.setMeta("embedding_dimension", String(dim), this.db);
|
|
3047
3237
|
}
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3238
|
+
}
|
|
3239
|
+
/** Promotes embedding_dimension_mismatch to canonical embedding_dimension when safe. */
|
|
3240
|
+
async reconcileEmbeddingDimension() {
|
|
3241
|
+
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
3242
|
+
if (!mismatchValue) return;
|
|
3243
|
+
const newDim = parseInt(mismatchValue, 10);
|
|
3244
|
+
const residualCount = await this.entryRepo.countStaleEmbeddings(newDim);
|
|
3245
|
+
if (residualCount === 0) {
|
|
3246
|
+
await this.metadataRepo.setMeta("embedding_dimension", mismatchValue, this.db);
|
|
3247
|
+
await this.metadataRepo.clearDimensionMismatch(this.db);
|
|
3055
3248
|
}
|
|
3056
3249
|
}
|
|
3057
|
-
async
|
|
3250
|
+
async embedFact(fact) {
|
|
3058
3251
|
const embedFn = this.options.llmProvider.embed;
|
|
3059
|
-
if (!embedFn) return
|
|
3060
|
-
|
|
3061
|
-
if (
|
|
3062
|
-
|
|
3063
|
-
}
|
|
3064
|
-
if (entityId) {
|
|
3065
|
-
if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
|
|
3066
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3067
|
-
}
|
|
3068
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3069
|
-
throw new WikiBusyError("prune", entityId);
|
|
3070
|
-
}
|
|
3071
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3072
|
-
throw new WikiBusyError("librarian", entityId);
|
|
3073
|
-
}
|
|
3074
|
-
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3075
|
-
throw new WikiBusyError("heal", entityId);
|
|
3076
|
-
}
|
|
3077
|
-
if (this._isIngestActiveFor(entityId)) {
|
|
3078
|
-
throw new WikiBusyError("ingest", entityId);
|
|
3079
|
-
}
|
|
3080
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3081
|
-
throw new WikiBusyError("import", entityId);
|
|
3082
|
-
}
|
|
3083
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3084
|
-
throw new WikiBusyError("forget", entityId);
|
|
3085
|
-
}
|
|
3252
|
+
if (!embedFn) return false;
|
|
3253
|
+
let tagsStr;
|
|
3254
|
+
if (Array.isArray(fact.tags)) {
|
|
3255
|
+
tagsStr = fact.tags.join(" ");
|
|
3086
3256
|
} else {
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3257
|
+
try {
|
|
3258
|
+
const parsed = JSON.parse(fact.tags);
|
|
3259
|
+
tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
|
|
3260
|
+
} catch {
|
|
3261
|
+
tagsStr = fact.tags;
|
|
3092
3262
|
}
|
|
3093
|
-
|
|
3094
|
-
|
|
3263
|
+
}
|
|
3264
|
+
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
3265
|
+
try {
|
|
3266
|
+
const vector = await embedFn(text);
|
|
3267
|
+
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
3268
|
+
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
3269
|
+
return false;
|
|
3095
3270
|
}
|
|
3096
|
-
|
|
3097
|
-
|
|
3271
|
+
const float32Vector = new Float32Array(vector);
|
|
3272
|
+
let hasNonFinite = false;
|
|
3273
|
+
for (let i = 0; i < float32Vector.length; i++) {
|
|
3274
|
+
if (!isFinite(float32Vector[i])) {
|
|
3275
|
+
hasNonFinite = true;
|
|
3276
|
+
break;
|
|
3277
|
+
}
|
|
3098
3278
|
}
|
|
3099
|
-
if (
|
|
3100
|
-
|
|
3279
|
+
if (hasNonFinite) {
|
|
3280
|
+
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
3281
|
+
return false;
|
|
3101
3282
|
}
|
|
3102
|
-
|
|
3103
|
-
|
|
3283
|
+
await this.storeEmbeddingDimension(float32Vector.length);
|
|
3284
|
+
const blob = new Uint8Array(float32Vector.buffer);
|
|
3285
|
+
await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
|
|
3286
|
+
try {
|
|
3287
|
+
await this.notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
3288
|
+
} catch (hookErr) {
|
|
3289
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
3104
3290
|
}
|
|
3105
|
-
|
|
3106
|
-
|
|
3291
|
+
return true;
|
|
3292
|
+
} catch (err) {
|
|
3293
|
+
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
3294
|
+
return false;
|
|
3295
|
+
}
|
|
3296
|
+
}
|
|
3297
|
+
async notifyEmbeddingPersisted(entityId, factId, vector) {
|
|
3298
|
+
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
3299
|
+
const vectorCopy = vector ? vector.slice() : null;
|
|
3300
|
+
await this.options.vectorRanker.onEmbeddingPersisted({
|
|
3301
|
+
entityId,
|
|
3302
|
+
factId,
|
|
3303
|
+
vector: vectorCopy
|
|
3304
|
+
});
|
|
3305
|
+
}
|
|
3306
|
+
async notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
|
|
3307
|
+
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
3308
|
+
if (this.options.forceDeleteIgnoreRankerHook === true) return;
|
|
3309
|
+
const vectorCopy = vector ? vector.slice() : null;
|
|
3310
|
+
const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
|
|
3311
|
+
if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
|
|
3312
|
+
throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
|
|
3313
|
+
}
|
|
3314
|
+
const timeoutMs = rawTimeout;
|
|
3315
|
+
let timeoutHandle;
|
|
3316
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
3317
|
+
timeoutHandle = setTimeout(() => {
|
|
3318
|
+
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
3319
|
+
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
3320
|
+
reject(timeoutError);
|
|
3321
|
+
}, timeoutMs);
|
|
3322
|
+
});
|
|
3323
|
+
const hookPromise = Promise.resolve().then(
|
|
3324
|
+
() => this.options.vectorRanker.onEmbeddingPersisted({
|
|
3325
|
+
entityId,
|
|
3326
|
+
factId,
|
|
3327
|
+
vector: vectorCopy
|
|
3328
|
+
})
|
|
3329
|
+
);
|
|
3330
|
+
try {
|
|
3331
|
+
await Promise.race([hookPromise, timeoutPromise]);
|
|
3332
|
+
} catch (err) {
|
|
3333
|
+
hookPromise.catch(() => {
|
|
3334
|
+
});
|
|
3335
|
+
throw err;
|
|
3336
|
+
} finally {
|
|
3337
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
3338
|
+
}
|
|
3339
|
+
}
|
|
3340
|
+
};
|
|
3341
|
+
|
|
3342
|
+
// src/readOptions.ts
|
|
3343
|
+
function normalizeEntityIds(entityId) {
|
|
3344
|
+
const input = Array.isArray(entityId) ? entityId : [entityId];
|
|
3345
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3346
|
+
const normalized = [];
|
|
3347
|
+
for (const id of input) {
|
|
3348
|
+
if (seen.has(id)) continue;
|
|
3349
|
+
seen.add(id);
|
|
3350
|
+
normalized.push(id);
|
|
3351
|
+
}
|
|
3352
|
+
return normalized;
|
|
3353
|
+
}
|
|
3354
|
+
function sanitizeTierWeights(entityIds, tierWeights) {
|
|
3355
|
+
if (tierWeights === void 0) return void 0;
|
|
3356
|
+
const sanitized = /* @__PURE__ */ Object.create(null);
|
|
3357
|
+
for (const entityId of entityIds) {
|
|
3358
|
+
const raw = tierWeights[entityId];
|
|
3359
|
+
if (raw === void 0 || !Number.isFinite(raw)) {
|
|
3360
|
+
sanitized[entityId] = 1;
|
|
3361
|
+
} else {
|
|
3362
|
+
sanitized[entityId] = Math.max(0, raw);
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
return sanitized;
|
|
3366
|
+
}
|
|
3367
|
+
function applyTierWeight(score, entityId, sanitizedTierWeights) {
|
|
3368
|
+
const weight = sanitizedTierWeights?.[entityId] ?? 1;
|
|
3369
|
+
if (weight === 0) return -Infinity;
|
|
3370
|
+
return score * weight;
|
|
3371
|
+
}
|
|
3372
|
+
function shouldExposeReadMetadata(entityId) {
|
|
3373
|
+
return Array.isArray(entityId);
|
|
3374
|
+
}
|
|
3375
|
+
|
|
3376
|
+
// src/services/RetrievalService.ts
|
|
3377
|
+
var RetrievalService = class {
|
|
3378
|
+
constructor(options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService) {
|
|
3379
|
+
this.options = options;
|
|
3380
|
+
this.entryRepo = entryRepo;
|
|
3381
|
+
this.taskRepo = taskRepo;
|
|
3382
|
+
this.eventRepo = eventRepo;
|
|
3383
|
+
this.metadataRepo = metadataRepo;
|
|
3384
|
+
this.searchService = searchService;
|
|
3385
|
+
}
|
|
3386
|
+
async read(entityId, query, options) {
|
|
3387
|
+
const config = this.options.config;
|
|
3388
|
+
const entityIds = normalizeEntityIds(entityId);
|
|
3389
|
+
const sanitizedTierWeights = shouldExposeReadMetadata(entityId) ? sanitizeTierWeights(entityIds, options?.tierWeights) : void 0;
|
|
3390
|
+
const exposeMetadata = shouldExposeReadMetadata(entityId);
|
|
3391
|
+
if (entityIds.length === 0) {
|
|
3392
|
+
const empty = { facts: [], tasks: [], events: [] };
|
|
3393
|
+
if (exposeMetadata) {
|
|
3394
|
+
empty.metadata = { query, entityIds: [] };
|
|
3395
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
3107
3396
|
}
|
|
3397
|
+
return empty;
|
|
3108
3398
|
}
|
|
3109
|
-
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3399
|
+
const MAX_ENTITY_IDS = 100;
|
|
3400
|
+
if (entityIds.length > MAX_ENTITY_IDS) {
|
|
3401
|
+
throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
|
|
3402
|
+
}
|
|
3403
|
+
const nullByteId = entityIds.find((id) => id.includes("\0"));
|
|
3404
|
+
if (nullByteId !== void 0) {
|
|
3405
|
+
throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
|
|
3406
|
+
}
|
|
3407
|
+
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
3408
|
+
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
3409
|
+
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
3410
|
+
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
3411
|
+
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
3412
|
+
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
3413
|
+
const skipEmbed = weight === 0;
|
|
3414
|
+
const embedFn = this.options.llmProvider.embed;
|
|
3415
|
+
const trimmedQuery = query.trim();
|
|
3416
|
+
let facts = [];
|
|
3417
|
+
let scoreByFactId;
|
|
3418
|
+
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
3419
|
+
let usedEmbed = false;
|
|
3420
|
+
const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
|
|
3421
|
+
if (scoredEntityIds.length === 0) {
|
|
3422
|
+
usedEmbed = true;
|
|
3423
|
+
} else if (!skipEmbed && embedFn) {
|
|
3424
|
+
let rankerShouldRethrow = false;
|
|
3425
|
+
let pendingRankerFallbackError;
|
|
3426
|
+
try {
|
|
3427
|
+
const queryVec = await embedFn(trimmedQuery);
|
|
3428
|
+
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
3429
|
+
throw new Error(
|
|
3430
|
+
"embed() returned an empty or non-finite vector. Falling back to keyword search."
|
|
3431
|
+
);
|
|
3432
|
+
}
|
|
3433
|
+
const storedDimValue = await this.metadataRepo.getMeta("embedding_dimension");
|
|
3434
|
+
if (storedDimValue) {
|
|
3435
|
+
const storedDim = parseInt(storedDimValue, 10);
|
|
3436
|
+
if (storedDim !== queryVec.length) {
|
|
3437
|
+
throw new Error(
|
|
3438
|
+
`Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
|
|
3439
|
+
);
|
|
3440
|
+
}
|
|
3441
|
+
}
|
|
3442
|
+
const mismatchedCount = await this.entryRepo.countDimensionMismatched(scoredEntityIds, queryVec.length);
|
|
3443
|
+
if (mismatchedCount > 0) {
|
|
3444
|
+
throw new Error(
|
|
3445
|
+
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
3446
|
+
);
|
|
3447
|
+
}
|
|
3448
|
+
const useRanker = Boolean(this.options.vectorRanker);
|
|
3449
|
+
let candidateRows;
|
|
3450
|
+
let populateCache = entityIds.length === 1;
|
|
3451
|
+
let miniSearchScores;
|
|
3452
|
+
if (effectivePreFilterLimit !== void 0) {
|
|
3453
|
+
populateCache = false;
|
|
3454
|
+
const preResults = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, Number.MAX_SAFE_INTEGER);
|
|
3455
|
+
if (preResults.length === 0) {
|
|
3456
|
+
candidateRows = null;
|
|
3457
|
+
} else {
|
|
3458
|
+
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
3459
|
+
if (topKResults.length === 0) {
|
|
3460
|
+
candidateRows = null;
|
|
3461
|
+
} else {
|
|
3462
|
+
const topKIds = topKResults.map((r) => r.id);
|
|
3463
|
+
if (useRanker) {
|
|
3464
|
+
candidateRows = await this.entryRepo.findMetadataByIds(topKIds);
|
|
3465
|
+
} else {
|
|
3466
|
+
candidateRows = await this.entryRepo.findWithEmbeddingsByIds(topKIds);
|
|
3467
|
+
}
|
|
3468
|
+
if (weight !== void 0 && weight < 1) {
|
|
3469
|
+
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
3470
|
+
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
3471
|
+
}
|
|
3472
|
+
}
|
|
3473
|
+
}
|
|
3126
3474
|
} else {
|
|
3127
|
-
|
|
3475
|
+
if (useRanker) {
|
|
3476
|
+
candidateRows = await this.entryRepo.findMetadataByEntityIds(scoredEntityIds);
|
|
3477
|
+
} else {
|
|
3478
|
+
candidateRows = await this.entryRepo.findWithEmbeddingsByEntityIds(scoredEntityIds);
|
|
3479
|
+
}
|
|
3480
|
+
if (weight !== void 0 && weight < 1) {
|
|
3481
|
+
miniSearchScores = this.searchService.getMiniSearchScores(trimmedQuery, scoredEntityIds);
|
|
3482
|
+
}
|
|
3483
|
+
}
|
|
3484
|
+
if (candidateRows === null) {
|
|
3485
|
+
usedEmbed = true;
|
|
3486
|
+
} else {
|
|
3487
|
+
const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
|
|
3488
|
+
let scored;
|
|
3489
|
+
if (useRanker) {
|
|
3490
|
+
const candidateRowsByEntity = /* @__PURE__ */ new Map();
|
|
3491
|
+
for (const row of candidateRows) {
|
|
3492
|
+
const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
|
|
3493
|
+
rows.push(row);
|
|
3494
|
+
candidateRowsByEntity.set(row.entity_id, rows);
|
|
3495
|
+
}
|
|
3496
|
+
try {
|
|
3497
|
+
const rankerResultsByEntity = await Promise.all(
|
|
3498
|
+
scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
|
|
3499
|
+
const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
|
|
3500
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
|
|
3501
|
+
const ranked = await this._rankWithVectorRanker({
|
|
3502
|
+
entityId: scopedEntityId,
|
|
3503
|
+
queryVec,
|
|
3504
|
+
candidateIds,
|
|
3505
|
+
candidateRows: rowsForEntity,
|
|
3506
|
+
weight,
|
|
3507
|
+
miniSearchScores,
|
|
3508
|
+
limit: Math.max(maxResults * 2, maxResults + 50)
|
|
3509
|
+
});
|
|
3510
|
+
return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
|
|
3511
|
+
})
|
|
3512
|
+
);
|
|
3513
|
+
scored = rankerResultsByEntity.flat();
|
|
3514
|
+
const scoredIds = new Set(scored.map((s) => s.id));
|
|
3515
|
+
const metadataById = new Map(
|
|
3516
|
+
candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
|
|
3517
|
+
);
|
|
3518
|
+
scored = scored.map((row) => {
|
|
3519
|
+
const metadata = metadataById.get(row.id);
|
|
3520
|
+
return {
|
|
3521
|
+
...row,
|
|
3522
|
+
updated_at: metadata?.updated_at ?? null,
|
|
3523
|
+
access_count: metadata?.access_count ?? null
|
|
3524
|
+
};
|
|
3525
|
+
});
|
|
3526
|
+
const isHybrid = weight !== void 0 && weight < 1;
|
|
3527
|
+
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
3528
|
+
if (maxBackfill > 0) {
|
|
3529
|
+
if (isHybrid) {
|
|
3530
|
+
const topK = [];
|
|
3531
|
+
for (const row of candidateRows) {
|
|
3532
|
+
if (scoredIds.has(row.id)) continue;
|
|
3533
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
3534
|
+
const candidate = { row, kwScore };
|
|
3535
|
+
if (topK.length < maxBackfill) {
|
|
3536
|
+
let insertIdx = topK.length;
|
|
3537
|
+
for (let i = 0; i < topK.length; i++) {
|
|
3538
|
+
const cmp = this._compareScoredRows(
|
|
3539
|
+
{
|
|
3540
|
+
id: candidate.row.id,
|
|
3541
|
+
score: candidate.kwScore,
|
|
3542
|
+
updated_at: candidate.row.updated_at,
|
|
3543
|
+
access_count: candidate.row.access_count
|
|
3544
|
+
},
|
|
3545
|
+
{
|
|
3546
|
+
id: topK[i].row.id,
|
|
3547
|
+
score: topK[i].kwScore,
|
|
3548
|
+
updated_at: topK[i].row.updated_at,
|
|
3549
|
+
access_count: topK[i].row.access_count
|
|
3550
|
+
}
|
|
3551
|
+
);
|
|
3552
|
+
if (cmp < 0) {
|
|
3553
|
+
insertIdx = i;
|
|
3554
|
+
break;
|
|
3555
|
+
}
|
|
3556
|
+
}
|
|
3557
|
+
topK.splice(insertIdx, 0, candidate);
|
|
3558
|
+
} else {
|
|
3559
|
+
const cmpWorst = this._compareScoredRows(
|
|
3560
|
+
{
|
|
3561
|
+
id: candidate.row.id,
|
|
3562
|
+
score: candidate.kwScore,
|
|
3563
|
+
updated_at: candidate.row.updated_at,
|
|
3564
|
+
access_count: candidate.row.access_count
|
|
3565
|
+
},
|
|
3566
|
+
{
|
|
3567
|
+
id: topK[maxBackfill - 1].row.id,
|
|
3568
|
+
score: topK[maxBackfill - 1].kwScore,
|
|
3569
|
+
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
3570
|
+
access_count: topK[maxBackfill - 1].row.access_count
|
|
3571
|
+
}
|
|
3572
|
+
);
|
|
3573
|
+
if (cmpWorst < 0) {
|
|
3574
|
+
let insertIdx = maxBackfill - 1;
|
|
3575
|
+
for (let i = 0; i < topK.length; i++) {
|
|
3576
|
+
const cmp = this._compareScoredRows(
|
|
3577
|
+
{
|
|
3578
|
+
id: candidate.row.id,
|
|
3579
|
+
score: candidate.kwScore,
|
|
3580
|
+
updated_at: candidate.row.updated_at,
|
|
3581
|
+
access_count: candidate.row.access_count
|
|
3582
|
+
},
|
|
3583
|
+
{
|
|
3584
|
+
id: topK[i].row.id,
|
|
3585
|
+
score: topK[i].kwScore,
|
|
3586
|
+
updated_at: topK[i].row.updated_at,
|
|
3587
|
+
access_count: topK[i].row.access_count
|
|
3588
|
+
}
|
|
3589
|
+
);
|
|
3590
|
+
if (cmp < 0) {
|
|
3591
|
+
insertIdx = i;
|
|
3592
|
+
break;
|
|
3593
|
+
}
|
|
3594
|
+
}
|
|
3595
|
+
topK.splice(insertIdx, 0, candidate);
|
|
3596
|
+
topK.pop();
|
|
3597
|
+
}
|
|
3598
|
+
}
|
|
3599
|
+
}
|
|
3600
|
+
for (const { row, kwScore } of topK) {
|
|
3601
|
+
scored.push({
|
|
3602
|
+
id: row.id,
|
|
3603
|
+
entity_id: row.entity_id,
|
|
3604
|
+
score: (1 - weight) * kwScore,
|
|
3605
|
+
updated_at: row.updated_at,
|
|
3606
|
+
access_count: row.access_count
|
|
3607
|
+
});
|
|
3608
|
+
}
|
|
3609
|
+
} else {
|
|
3610
|
+
const omitted = [];
|
|
3611
|
+
for (const row of candidateRows) {
|
|
3612
|
+
if (scoredIds.has(row.id)) continue;
|
|
3613
|
+
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
3614
|
+
}
|
|
3615
|
+
if (omitted.length > 0) {
|
|
3616
|
+
this._tieBreakSort(omitted);
|
|
3617
|
+
scored.push(...omitted.slice(0, maxBackfill));
|
|
3618
|
+
}
|
|
3619
|
+
}
|
|
3620
|
+
}
|
|
3621
|
+
} catch (rankerErr) {
|
|
3622
|
+
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
3623
|
+
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
3624
|
+
this.options.onVectorRankerFallback?.({
|
|
3625
|
+
error: this._sanitizeRankerError(rankerError),
|
|
3626
|
+
policy
|
|
3627
|
+
});
|
|
3628
|
+
if (policy === "throw") {
|
|
3629
|
+
rankerShouldRethrow = true;
|
|
3630
|
+
throw rankerError;
|
|
3631
|
+
} else if (policy === "js-cosine") {
|
|
3632
|
+
let fallbackRows = candidateRows;
|
|
3633
|
+
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
3634
|
+
const rowIds = fallbackRows.map((r) => r.id);
|
|
3635
|
+
const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
|
|
3636
|
+
const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
|
|
3637
|
+
fallbackRows = fallbackRows.map((r) => ({
|
|
3638
|
+
...r,
|
|
3639
|
+
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
3640
|
+
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
3641
|
+
}));
|
|
3642
|
+
}
|
|
3643
|
+
scored = await this.searchService.rankSemantic({
|
|
3644
|
+
entityId: entityCacheKey,
|
|
3645
|
+
queryVec,
|
|
3646
|
+
candidateRows: fallbackRows,
|
|
3647
|
+
weight,
|
|
3648
|
+
miniSearchScores,
|
|
3649
|
+
populateCache,
|
|
3650
|
+
limit: fallbackRows.length,
|
|
3651
|
+
skipSort: true
|
|
3652
|
+
// read() re-sorts after applying tier weights
|
|
3653
|
+
});
|
|
3654
|
+
} else if (policy === "keyword") {
|
|
3655
|
+
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
3656
|
+
const topResults = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, keywordOversampledLimit);
|
|
3657
|
+
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
3658
|
+
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
3659
|
+
scored = topResults.map((result) => {
|
|
3660
|
+
const metadata = candidateMap.get(result.id);
|
|
3661
|
+
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
3662
|
+
return {
|
|
3663
|
+
id: result.id,
|
|
3664
|
+
entity_id: entityForScore,
|
|
3665
|
+
score: result.score ?? 0,
|
|
3666
|
+
access_count: metadata?.access_count ?? null,
|
|
3667
|
+
updated_at: metadata?.updated_at ?? null
|
|
3668
|
+
};
|
|
3669
|
+
});
|
|
3670
|
+
} else {
|
|
3671
|
+
scored = [];
|
|
3672
|
+
}
|
|
3673
|
+
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
3674
|
+
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
3675
|
+
cause: this._sanitizeRankerError(rankerErr)
|
|
3676
|
+
});
|
|
3677
|
+
pendingRankerFallbackError = mirrored;
|
|
3678
|
+
}
|
|
3679
|
+
}
|
|
3680
|
+
} else {
|
|
3681
|
+
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
3682
|
+
scored = await this.searchService.rankSemantic({
|
|
3683
|
+
entityId: entityCacheKey,
|
|
3684
|
+
queryVec,
|
|
3685
|
+
candidateRows,
|
|
3686
|
+
weight,
|
|
3687
|
+
miniSearchScores,
|
|
3688
|
+
populateCache,
|
|
3689
|
+
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
3690
|
+
skipSort: jsCosineNeedsTierSort
|
|
3691
|
+
// read() re-sorts after applying tier weights
|
|
3692
|
+
});
|
|
3693
|
+
}
|
|
3694
|
+
if (scored.length > 0) {
|
|
3695
|
+
scored = scored.map((row) => ({
|
|
3696
|
+
...row,
|
|
3697
|
+
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
3698
|
+
}));
|
|
3699
|
+
this._tieBreakSort(scored);
|
|
3700
|
+
const selectedScored = scored.slice(0, maxResults);
|
|
3701
|
+
const topIds = selectedScored.map((s) => s.id);
|
|
3702
|
+
if (exposeMetadata && trimmedQuery) {
|
|
3703
|
+
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
3704
|
+
}
|
|
3705
|
+
if (topIds.length > 0) {
|
|
3706
|
+
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
3707
|
+
if (facts2.length < topIds.length) {
|
|
3708
|
+
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
3709
|
+
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
3710
|
+
const missingCount = missingIds.length;
|
|
3711
|
+
const sample = missingIds.slice(0, 5);
|
|
3712
|
+
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
3713
|
+
const error = new Error(
|
|
3714
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
3715
|
+
);
|
|
3716
|
+
this.options.onRetrievalFallback?.(error);
|
|
3717
|
+
}
|
|
3718
|
+
facts = facts2;
|
|
3719
|
+
}
|
|
3720
|
+
if (pendingRankerFallbackError) {
|
|
3721
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
3722
|
+
pendingRankerFallbackError = void 0;
|
|
3723
|
+
}
|
|
3724
|
+
usedEmbed = true;
|
|
3725
|
+
} else {
|
|
3726
|
+
if (pendingRankerFallbackError) {
|
|
3727
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
3728
|
+
pendingRankerFallbackError = void 0;
|
|
3729
|
+
}
|
|
3730
|
+
usedEmbed = true;
|
|
3731
|
+
}
|
|
3732
|
+
}
|
|
3733
|
+
} catch (err) {
|
|
3734
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
3735
|
+
if (rankerShouldRethrow) {
|
|
3736
|
+
throw error;
|
|
3737
|
+
}
|
|
3738
|
+
if (pendingRankerFallbackError) {
|
|
3739
|
+
error.cause = pendingRankerFallbackError;
|
|
3740
|
+
pendingRankerFallbackError = void 0;
|
|
3128
3741
|
}
|
|
3742
|
+
this.options.onRetrievalFallback?.(error);
|
|
3129
3743
|
}
|
|
3130
3744
|
}
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
|
|
3745
|
+
if (!usedEmbed && scoredEntityIds.length > 0) {
|
|
3746
|
+
const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
3747
|
+
const results = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, fallbackOversampledLimit);
|
|
3748
|
+
const candidates = results.map((r) => ({
|
|
3749
|
+
id: r.id,
|
|
3750
|
+
entity_id: r.entity_id,
|
|
3751
|
+
score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
|
|
3752
|
+
updated_at: null,
|
|
3753
|
+
access_count: null
|
|
3754
|
+
}));
|
|
3755
|
+
this._tieBreakSort(candidates);
|
|
3756
|
+
const topCandidates = candidates.slice(0, maxResults);
|
|
3757
|
+
const topIds = topCandidates.map((c) => c.id);
|
|
3758
|
+
if (topIds.length > 0) {
|
|
3759
|
+
facts = await this._hydrateFactsByIds(topIds, entityIds);
|
|
3760
|
+
if (exposeMetadata) {
|
|
3761
|
+
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
3144
3762
|
}
|
|
3145
|
-
const success = await this.embedFact(row);
|
|
3146
|
-
if (success) embedded++;
|
|
3147
|
-
else failed++;
|
|
3148
|
-
}
|
|
3149
|
-
if (embedded > 0) {
|
|
3150
|
-
await this._reconcileEmbeddingDimension();
|
|
3151
|
-
}
|
|
3152
|
-
} finally {
|
|
3153
|
-
if (entityId) {
|
|
3154
|
-
this.vectorCache.delete(entityId);
|
|
3155
|
-
} else {
|
|
3156
|
-
this.vectorCache.clear();
|
|
3157
3763
|
}
|
|
3158
3764
|
}
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
|
|
3163
|
-
}
|
|
3164
|
-
getEntityStatus(entityId) {
|
|
3165
|
-
const ingestPrefix = `${this.prefix}:${entityId}:`;
|
|
3166
|
-
let ingesting = false;
|
|
3167
|
-
for (const k of this.activeIngestJobs) {
|
|
3168
|
-
if (k.startsWith(ingestPrefix)) {
|
|
3169
|
-
ingesting = true;
|
|
3170
|
-
break;
|
|
3765
|
+
if (facts.length > 0) {
|
|
3766
|
+
const ids = facts.map((f) => f.id);
|
|
3767
|
+
const now = Date.now();
|
|
3768
|
+
await this.entryRepo.trackAccess(ids, now);
|
|
3171
3769
|
}
|
|
3770
|
+
} else {
|
|
3771
|
+
facts = await this.entryRepo.findRecentByEntityIds(entityIds, maxResults);
|
|
3172
3772
|
}
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3773
|
+
const eventsLimit = Math.min(10 * entityIds.length, 100);
|
|
3774
|
+
const [tasks, events] = await Promise.all([
|
|
3775
|
+
this.taskRepo.findAllPending(entityIds, entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200)),
|
|
3776
|
+
entityIds.length === 1 ? this.eventRepo.getRecent(entityIds[0], eventsLimit) : this.eventRepo.getRecentForEntities(entityIds, eventsLimit)
|
|
3777
|
+
]);
|
|
3778
|
+
let factScores;
|
|
3779
|
+
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
3780
|
+
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
3781
|
+
}
|
|
3782
|
+
const bundle = { facts, tasks, events: events.reverse() };
|
|
3783
|
+
if (exposeMetadata) {
|
|
3784
|
+
bundle.metadata = { query, entityIds };
|
|
3785
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
3786
|
+
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
3787
|
+
}
|
|
3788
|
+
return bundle;
|
|
3178
3789
|
}
|
|
3179
3790
|
/**
|
|
3180
|
-
*
|
|
3181
|
-
*
|
|
3182
|
-
* returns, then again on every transition where any of `ingesting`,
|
|
3183
|
-
* `librarian`, or `heal` flips. No polling, no duplicate snapshots.
|
|
3184
|
-
*
|
|
3185
|
-
* Returns an idempotent unsubscribe function.
|
|
3186
|
-
*
|
|
3187
|
-
* See also {@link getEntityStatus} for a synchronous point-in-time read.
|
|
3791
|
+
* Returns entity IDs that will participate in scored retrieval.
|
|
3792
|
+
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
3188
3793
|
*/
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
this.statusSubscribers.set(entityId, set);
|
|
3195
|
-
}
|
|
3196
|
-
const entry = { callback, last: this._copyEntityStatus(initial) };
|
|
3197
|
-
set.add(entry);
|
|
3198
|
-
try {
|
|
3199
|
-
callback(this._copyEntityStatus(initial));
|
|
3200
|
-
} catch (err) {
|
|
3201
|
-
console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during initial emission`, err);
|
|
3202
|
-
}
|
|
3203
|
-
let active = true;
|
|
3204
|
-
return () => {
|
|
3205
|
-
if (!active) return;
|
|
3206
|
-
active = false;
|
|
3207
|
-
const s = this.statusSubscribers.get(entityId);
|
|
3208
|
-
if (!s) return;
|
|
3209
|
-
s.delete(entry);
|
|
3210
|
-
if (s.size === 0) this.statusSubscribers.delete(entityId);
|
|
3211
|
-
};
|
|
3794
|
+
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
3795
|
+
return entityIds.filter((id) => {
|
|
3796
|
+
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
3797
|
+
return includeZeroWeightEntities === true || w !== 0;
|
|
3798
|
+
});
|
|
3212
3799
|
}
|
|
3213
|
-
|
|
3214
|
-
|
|
3800
|
+
/**
|
|
3801
|
+
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
3802
|
+
*/
|
|
3803
|
+
_tieBreakSort(items) {
|
|
3804
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
3215
3805
|
}
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
|
|
3225
|
-
|
|
3226
|
-
|
|
3227
|
-
|
|
3228
|
-
|
|
3229
|
-
|
|
3806
|
+
/**
|
|
3807
|
+
* Comparator for score + deterministic tie-break fields.
|
|
3808
|
+
* Negative return means "a ranks ahead of b" for descending score order.
|
|
3809
|
+
*/
|
|
3810
|
+
_compareScoredRows(a, b) {
|
|
3811
|
+
const scoreDiff = b.score - a.score;
|
|
3812
|
+
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
3813
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
3814
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
3815
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
3816
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
3817
|
+
return a.id.localeCompare(b.id);
|
|
3818
|
+
}
|
|
3819
|
+
/**
|
|
3820
|
+
* Hydrate full facts by ID. Pass scopedEntityIds to restrict to requested namespaces in SQL
|
|
3821
|
+
* (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
|
|
3822
|
+
*/
|
|
3823
|
+
async _hydrateFactsByIds(ids, scopedEntityIds, tx) {
|
|
3824
|
+
return this.entryRepo.findByIds(ids, scopedEntityIds, tx);
|
|
3825
|
+
}
|
|
3826
|
+
_sanitizeRankerError(err) {
|
|
3827
|
+
return sanitizeRankerError(err, this.options.sanitizeRankerErrors);
|
|
3828
|
+
}
|
|
3829
|
+
/**
|
|
3830
|
+
* Delegate semantic ranking to the injected VectorRanker.
|
|
3831
|
+
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
3832
|
+
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
3833
|
+
*/
|
|
3834
|
+
async _rankWithVectorRanker(args) {
|
|
3835
|
+
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
3836
|
+
const ranker = this.options.vectorRanker;
|
|
3837
|
+
if (!ranker) {
|
|
3838
|
+
throw new Error("vectorRanker not configured");
|
|
3839
|
+
}
|
|
3840
|
+
const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
3841
|
+
const rankerResults = await ranker.rankBySimilarity({
|
|
3842
|
+
entityId,
|
|
3843
|
+
queryVec: queryVecCopy,
|
|
3844
|
+
candidateIds,
|
|
3845
|
+
limit
|
|
3846
|
+
});
|
|
3847
|
+
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
3848
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3849
|
+
const normalized = [];
|
|
3850
|
+
for (const r of rankerResults) {
|
|
3851
|
+
if (normalized.length >= limit) break;
|
|
3852
|
+
if (seen.has(r.id)) continue;
|
|
3853
|
+
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
3854
|
+
if (!Number.isFinite(r.semanticScore)) continue;
|
|
3855
|
+
seen.add(r.id);
|
|
3856
|
+
normalized.push(r);
|
|
3857
|
+
}
|
|
3858
|
+
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
3859
|
+
const scored = normalized.map((r) => {
|
|
3860
|
+
let score = r.semanticScore;
|
|
3861
|
+
if (weight !== void 0) {
|
|
3862
|
+
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
3863
|
+
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
3864
|
+
}
|
|
3230
3865
|
return {
|
|
3231
|
-
|
|
3232
|
-
|
|
3866
|
+
id: r.id,
|
|
3867
|
+
entity_id: entityIdByCandidateId.get(r.id),
|
|
3868
|
+
// allowedIds filter above guarantees membership
|
|
3869
|
+
score
|
|
3233
3870
|
};
|
|
3234
3871
|
});
|
|
3235
|
-
return
|
|
3872
|
+
return scored;
|
|
3236
3873
|
}
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
3243
|
-
|
|
3244
|
-
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
|
|
3248
|
-
const batchResults = await Promise.all(
|
|
3249
|
-
batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
|
|
3250
|
-
);
|
|
3251
|
-
for (const [id, bundle] of batchResults) {
|
|
3252
|
-
entities[id] = bundle;
|
|
3253
|
-
}
|
|
3254
|
-
}
|
|
3255
|
-
return { generatedAt: Date.now(), entities };
|
|
3874
|
+
};
|
|
3875
|
+
|
|
3876
|
+
// src/services/WriteService.ts
|
|
3877
|
+
var WriteService = class {
|
|
3878
|
+
constructor(db, options, eventRepo, metadataRepo, jobManager, maintenanceService) {
|
|
3879
|
+
this.db = db;
|
|
3880
|
+
this.options = options;
|
|
3881
|
+
this.eventRepo = eventRepo;
|
|
3882
|
+
this.metadataRepo = metadataRepo;
|
|
3883
|
+
this.jobManager = jobManager;
|
|
3884
|
+
this.maintenanceService = maintenanceService;
|
|
3256
3885
|
}
|
|
3257
|
-
async
|
|
3258
|
-
const
|
|
3259
|
-
const
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
|
|
3274
|
-
|
|
3275
|
-
|
|
3276
|
-
|
|
3277
|
-
|
|
3886
|
+
async write(entityId, event) {
|
|
3887
|
+
const id = generateId("evt_");
|
|
3888
|
+
const now = Date.now();
|
|
3889
|
+
let eventType = event.event_type;
|
|
3890
|
+
if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
|
|
3891
|
+
eventType = "observation";
|
|
3892
|
+
}
|
|
3893
|
+
const newEvent = {
|
|
3894
|
+
id,
|
|
3895
|
+
entity_id: entityId,
|
|
3896
|
+
event_type: eventType,
|
|
3897
|
+
summary: event.summary,
|
|
3898
|
+
related_entry_id: event.related_entry_id || null,
|
|
3899
|
+
created_at: now
|
|
3900
|
+
};
|
|
3901
|
+
let shouldRunLibrarian = false;
|
|
3902
|
+
let librarianCount = 0;
|
|
3903
|
+
let prevMemoryCheckpoint = 0;
|
|
3904
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
3905
|
+
await this.eventRepo.add(newEvent, tx);
|
|
3906
|
+
const threshold = this.options.config?.autoLibrarianThreshold || 20;
|
|
3907
|
+
const [count, cp] = await Promise.all([
|
|
3908
|
+
this.eventRepo.count(entityId, tx),
|
|
3909
|
+
this.metadataRepo.getCheckpoint(entityId, tx)
|
|
3910
|
+
]);
|
|
3911
|
+
let memoryCheckpoint = cp.memory ?? 0;
|
|
3912
|
+
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
3913
|
+
if (count - memoryCheckpoint >= threshold) {
|
|
3914
|
+
if (!this.jobManager.isBlocked("librarian", entityId)) {
|
|
3915
|
+
shouldRunLibrarian = true;
|
|
3916
|
+
librarianCount = count;
|
|
3917
|
+
prevMemoryCheckpoint = memoryCheckpoint;
|
|
3918
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: count }, tx);
|
|
3919
|
+
}
|
|
3278
3920
|
}
|
|
3279
|
-
|
|
3280
|
-
|
|
3921
|
+
});
|
|
3922
|
+
if (shouldRunLibrarian) {
|
|
3923
|
+
try {
|
|
3924
|
+
this.jobManager.acquireLock("librarian", entityId);
|
|
3925
|
+
this.runLibrarianThenMaybeHeal(entityId, librarianCount, prevMemoryCheckpoint).catch(console.error).finally(() => {
|
|
3926
|
+
this.jobManager.releaseLock("librarian", entityId);
|
|
3927
|
+
});
|
|
3928
|
+
} catch (e) {
|
|
3929
|
+
if (!(e instanceof WikiBusyError)) throw e;
|
|
3930
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: prevMemoryCheckpoint }, this.db);
|
|
3281
3931
|
}
|
|
3282
3932
|
}
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
}
|
|
3286
|
-
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
3287
|
-
for (const entityId of entityIds) {
|
|
3288
|
-
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
3289
|
-
}
|
|
3933
|
+
}
|
|
3934
|
+
async runLibrarianThenMaybeHeal(entityId, currentEventCount, prevCheckpoint) {
|
|
3290
3935
|
try {
|
|
3291
|
-
await this.
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
}
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3936
|
+
await this.maintenanceService.doRunLibrarian(entityId);
|
|
3937
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: currentEventCount }, this.db);
|
|
3938
|
+
} catch (e) {
|
|
3939
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: prevCheckpoint }, this.db);
|
|
3940
|
+
throw e;
|
|
3941
|
+
}
|
|
3942
|
+
const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
|
|
3943
|
+
const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
|
|
3944
|
+
let healCheckpoint = cp.heal ?? 0;
|
|
3945
|
+
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
3946
|
+
const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
|
|
3947
|
+
if (shouldRunHeal && this.jobManager.tryAcquireAutoHealLock(entityId)) {
|
|
3948
|
+
try {
|
|
3949
|
+
await this.maintenanceService.doRunHeal(entityId);
|
|
3950
|
+
await this.metadataRepo.updateCheckpoint(entityId, { heal: currentEventCount }, this.db);
|
|
3951
|
+
} finally {
|
|
3952
|
+
this.jobManager.releaseLock("heal", entityId);
|
|
3299
3953
|
}
|
|
3300
3954
|
}
|
|
3301
3955
|
}
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
|
|
3356
|
-
|
|
3357
|
-
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
|
|
3365
|
-
|
|
3366
|
-
|
|
3367
|
-
|
|
3368
|
-
|
|
3369
|
-
|
|
3370
|
-
|
|
3371
|
-
|
|
3372
|
-
|
|
3373
|
-
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3404
|
-
|
|
3405
|
-
|
|
3406
|
-
|
|
3407
|
-
|
|
3408
|
-
|
|
3409
|
-
|
|
3410
|
-
|
|
3411
|
-
|
|
3412
|
-
}
|
|
3413
|
-
if (merge) {
|
|
3414
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
3415
|
-
}
|
|
3416
|
-
}
|
|
3417
|
-
await this.taskRepo.upsertForImport({
|
|
3418
|
-
id: task.id,
|
|
3419
|
-
entity_id: entityId,
|
|
3420
|
-
description: task.description,
|
|
3421
|
-
status: task.status,
|
|
3422
|
-
priority: task.priority,
|
|
3423
|
-
created_at: task.created_at,
|
|
3424
|
-
updated_at: safeUpdatedAt,
|
|
3425
|
-
resolved_at: task.resolved_at,
|
|
3426
|
-
deleted_at: task.deleted_at
|
|
3427
|
-
}, tx, safeUpdatedAt);
|
|
3428
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
3429
|
-
}
|
|
3430
|
-
for (const event of bundle.events) {
|
|
3431
|
-
await this.eventRepo.addIgnoreDuplicate({
|
|
3432
|
-
id: event.id,
|
|
3433
|
-
entity_id: entityId,
|
|
3434
|
-
event_type: event.event_type,
|
|
3435
|
-
summary: event.summary,
|
|
3436
|
-
related_entry_id: event.related_entry_id ?? null,
|
|
3437
|
-
created_at: event.created_at
|
|
3438
|
-
}, tx);
|
|
3439
|
-
}
|
|
3440
|
-
});
|
|
3441
|
-
this.vectorCache.delete(entityId);
|
|
3442
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3443
|
-
for (const fact of bundle.facts) {
|
|
3444
|
-
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
3445
|
-
await this.embedFact({
|
|
3446
|
-
id: fact.id,
|
|
3447
|
-
entity_id: entityId,
|
|
3448
|
-
// Use authoritative entityId from dump key, not fact.entity_id
|
|
3449
|
-
title: fact.title,
|
|
3450
|
-
body: fact.body,
|
|
3451
|
-
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
3452
|
-
});
|
|
3956
|
+
};
|
|
3957
|
+
|
|
3958
|
+
// src/WikiMemory.ts
|
|
3959
|
+
var _testAccessNonTestEnvWarned;
|
|
3960
|
+
var WikiMemory = class {
|
|
3961
|
+
constructor(db, options) {
|
|
3962
|
+
/** Emits `__testAccess` console warning at most once per instance when NODE_ENV ≠ "test". */
|
|
3963
|
+
__privateAdd(this, _testAccessNonTestEnvWarned, false);
|
|
3964
|
+
this.db = db;
|
|
3965
|
+
this.options = options;
|
|
3966
|
+
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
3967
|
+
this.outboxRepo = new OutboxRepository(db, this.prefix, !!options.config?.enableOutbox);
|
|
3968
|
+
this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
|
|
3969
|
+
this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
|
|
3970
|
+
this.eventRepo = new EventRepository(db, this.prefix);
|
|
3971
|
+
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
3972
|
+
this.embeddingService = new EmbeddingService(this.db, this.options, this.entryRepo, this.metadataRepo);
|
|
3973
|
+
this.searchService = new SearchService(this.entryRepo);
|
|
3974
|
+
this.jobManager = new JobManager(this.prefix);
|
|
3975
|
+
this.promptService = new PromptService(options.config?.prompts);
|
|
3976
|
+
this.ingestionService = new IngestionService(
|
|
3977
|
+
this.db,
|
|
3978
|
+
this.prefix,
|
|
3979
|
+
this.options,
|
|
3980
|
+
this.entryRepo,
|
|
3981
|
+
this.searchService,
|
|
3982
|
+
this.jobManager,
|
|
3983
|
+
this.embeddingService,
|
|
3984
|
+
this.promptService
|
|
3985
|
+
);
|
|
3986
|
+
this.maintenanceService = new MaintenanceService(
|
|
3987
|
+
this.db,
|
|
3988
|
+
this.prefix,
|
|
3989
|
+
this.options,
|
|
3990
|
+
this.entryRepo,
|
|
3991
|
+
this.taskRepo,
|
|
3992
|
+
this.eventRepo,
|
|
3993
|
+
this.metadataRepo,
|
|
3994
|
+
this.searchService,
|
|
3995
|
+
this.jobManager,
|
|
3996
|
+
this.embeddingService,
|
|
3997
|
+
this.promptService
|
|
3998
|
+
);
|
|
3999
|
+
this.importExportService = new ImportExportService(
|
|
4000
|
+
this.db,
|
|
4001
|
+
this.entryRepo,
|
|
4002
|
+
this.taskRepo,
|
|
4003
|
+
this.eventRepo,
|
|
4004
|
+
this.metadataRepo,
|
|
4005
|
+
this.searchService,
|
|
4006
|
+
this.jobManager,
|
|
4007
|
+
this.embeddingService
|
|
4008
|
+
);
|
|
4009
|
+
this.retrievalService = new RetrievalService(
|
|
4010
|
+
this.options,
|
|
4011
|
+
this.entryRepo,
|
|
4012
|
+
this.taskRepo,
|
|
4013
|
+
this.eventRepo,
|
|
4014
|
+
this.metadataRepo,
|
|
4015
|
+
this.searchService
|
|
4016
|
+
);
|
|
4017
|
+
this.writeService = new WriteService(
|
|
4018
|
+
this.db,
|
|
4019
|
+
this.options,
|
|
4020
|
+
this.eventRepo,
|
|
4021
|
+
this.metadataRepo,
|
|
4022
|
+
this.jobManager,
|
|
4023
|
+
this.maintenanceService
|
|
4024
|
+
);
|
|
4025
|
+
}
|
|
4026
|
+
/**
|
|
4027
|
+
* Explicit escape hatch for test suites: typed access to composed services for mocks/spies.
|
|
4028
|
+
* If `NODE_ENV` is not `"test"`, emits a single `console.warn` per instance (skipped when `process` is undefined).
|
|
4029
|
+
*/
|
|
4030
|
+
get __testAccess() {
|
|
4031
|
+
const processEnv = typeof globalThis !== "undefined" ? globalThis.process?.env : void 0;
|
|
4032
|
+
if (processEnv !== void 0 && processEnv.NODE_ENV !== "test" && !__privateGet(this, _testAccessNonTestEnvWarned)) {
|
|
4033
|
+
__privateSet(this, _testAccessNonTestEnvWarned, true);
|
|
4034
|
+
console.warn('Warning: WikiMemory.__testAccess is intended for tests (NODE_ENV !== "test").');
|
|
4035
|
+
}
|
|
4036
|
+
return {
|
|
4037
|
+
embeddingService: this.embeddingService,
|
|
4038
|
+
importExportService: this.importExportService,
|
|
4039
|
+
ingestionService: this.ingestionService,
|
|
4040
|
+
maintenanceService: this.maintenanceService,
|
|
4041
|
+
retrievalService: this.retrievalService,
|
|
4042
|
+
searchService: this.searchService,
|
|
4043
|
+
writeService: this.writeService,
|
|
4044
|
+
promptService: this.promptService,
|
|
4045
|
+
entryRepo: this.entryRepo,
|
|
4046
|
+
metadataRepo: this.metadataRepo,
|
|
4047
|
+
jobManager: this.jobManager
|
|
4048
|
+
};
|
|
4049
|
+
}
|
|
4050
|
+
async setup() {
|
|
4051
|
+
const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
|
|
4052
|
+
await setupDatabase(this.db, this.prefix);
|
|
4053
|
+
let currentVersion;
|
|
4054
|
+
if (!entriesExistedBeforeSetup) {
|
|
4055
|
+
await this.metadataRepo.setMeta("schema_version", String(CURRENT_SCHEMA_VERSION), this.db);
|
|
4056
|
+
currentVersion = CURRENT_SCHEMA_VERSION;
|
|
4057
|
+
} else {
|
|
4058
|
+
const schemaVersionValue = await this.metadataRepo.getMeta("schema_version");
|
|
4059
|
+
if (schemaVersionValue) {
|
|
4060
|
+
currentVersion = parseInt(schemaVersionValue, 10);
|
|
4061
|
+
if (!Number.isFinite(currentVersion)) currentVersion = 0;
|
|
4062
|
+
} else {
|
|
4063
|
+
const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
|
|
4064
|
+
const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
|
|
4065
|
+
currentVersion = hasPorter ? 1 : 0;
|
|
3453
4066
|
}
|
|
3454
4067
|
}
|
|
3455
|
-
for (const
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
|
|
3461
|
-
} catch (hookErr) {
|
|
3462
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
|
|
3463
|
-
}
|
|
4068
|
+
for (const migration of MIGRATIONS) {
|
|
4069
|
+
if (migration.version > currentVersion) {
|
|
4070
|
+
await migration.run(this.db, this.prefix);
|
|
4071
|
+
await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
|
|
4072
|
+
currentVersion = migration.version;
|
|
3464
4073
|
}
|
|
3465
4074
|
}
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
} catch (hookErr) {
|
|
3471
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
|
|
3472
|
-
}
|
|
4075
|
+
if (entriesExistedBeforeSetup) {
|
|
4076
|
+
const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
|
|
4077
|
+
if (!schemaVersionCheck) {
|
|
4078
|
+
await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
|
|
3473
4079
|
}
|
|
3474
4080
|
}
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
|
|
3479
|
-
|
|
3480
|
-
|
|
3481
|
-
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(preservedDim), this.db);
|
|
3485
|
-
}
|
|
3486
|
-
await this._reconcileEmbeddingDimension();
|
|
3487
|
-
} else {
|
|
3488
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(canonicalDim), this.db);
|
|
3489
|
-
}
|
|
3490
|
-
} else if (preservedBlobDims.size > 1) {
|
|
3491
|
-
if (canonicalDim === null) {
|
|
3492
|
-
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
3493
|
-
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
3494
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(sortedPreservedBlobDims[0]), this.db);
|
|
3495
|
-
} else {
|
|
3496
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(canonicalDim), this.db);
|
|
4081
|
+
if (entriesExistedBeforeSetup) {
|
|
4082
|
+
await this.importExportService.assertNoLegacySourceTypes();
|
|
4083
|
+
}
|
|
4084
|
+
const rows = await this.entryRepo.findRowsForSourceRefMigration();
|
|
4085
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
4086
|
+
for (const row of rows) {
|
|
4087
|
+
const normalized = normalizeSourceRef(row.source_ref);
|
|
4088
|
+
if (normalized !== row.source_ref) {
|
|
4089
|
+
await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
|
|
3497
4090
|
}
|
|
3498
4091
|
}
|
|
3499
|
-
}
|
|
3500
|
-
|
|
4092
|
+
});
|
|
4093
|
+
await this.searchService.sync();
|
|
4094
|
+
}
|
|
4095
|
+
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
4096
|
+
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
4097
|
+
if (!normalizedRef) {
|
|
4098
|
+
throw new Error(`Invalid sourceRef: "${sourceRef}"`);
|
|
4099
|
+
}
|
|
4100
|
+
const normalizedHash = normalizeSourceHash(sourceHash);
|
|
4101
|
+
if (!normalizedHash) {
|
|
4102
|
+
throw new Error(`Invalid sourceHash: must be a 64-character hex string (normalized to lowercase)`);
|
|
3501
4103
|
}
|
|
4104
|
+
const storedHash = await this.entryRepo.findLatestSourceHash(entityId, normalizedRef);
|
|
4105
|
+
if (storedHash === null) return true;
|
|
4106
|
+
const normalizedStoredHash = normalizeSourceHash(storedHash);
|
|
4107
|
+
return normalizedStoredHash !== normalizedHash;
|
|
4108
|
+
}
|
|
4109
|
+
async runPrune(entityId, options) {
|
|
4110
|
+
return this.maintenanceService.runPrune(entityId, options);
|
|
4111
|
+
}
|
|
4112
|
+
async read(entityId, query, options) {
|
|
4113
|
+
return this.retrievalService.read(entityId, query, options);
|
|
4114
|
+
}
|
|
4115
|
+
async getMemoryBundle(entityId) {
|
|
4116
|
+
return this.importExportService.getFullBundle(entityId, { maxEvents: 10 });
|
|
4117
|
+
}
|
|
4118
|
+
async write(entityId, event) {
|
|
4119
|
+
return this.writeService.write(entityId, event);
|
|
4120
|
+
}
|
|
4121
|
+
/**
|
|
4122
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
4123
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
4124
|
+
* set `options.config.prompts.librarianSystemPrompt` at WikiMemory construction time.
|
|
4125
|
+
*/
|
|
4126
|
+
async runLibrarian(entityId, options) {
|
|
4127
|
+
return this.maintenanceService.runLibrarian(entityId, options);
|
|
4128
|
+
}
|
|
4129
|
+
/**
|
|
4130
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
4131
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
4132
|
+
* set `options.config.prompts.healSystemPrompt` at WikiMemory construction time.
|
|
4133
|
+
*/
|
|
4134
|
+
async runHeal(entityId, options) {
|
|
4135
|
+
return this.maintenanceService.runHeal(entityId, options);
|
|
4136
|
+
}
|
|
4137
|
+
async runReembed(entityId, opts) {
|
|
4138
|
+
return this.maintenanceService.runReembed(entityId, opts);
|
|
4139
|
+
}
|
|
4140
|
+
getEntityStatus(entityId) {
|
|
4141
|
+
return this.jobManager.getEntityStatus(entityId);
|
|
4142
|
+
}
|
|
4143
|
+
subscribeEntityStatus(entityId, callback) {
|
|
4144
|
+
return this.jobManager.subscribeEntityStatus(entityId, callback);
|
|
4145
|
+
}
|
|
4146
|
+
clearVectorCache() {
|
|
4147
|
+
this.searchService.evictCache();
|
|
4148
|
+
}
|
|
4149
|
+
async exportDump(entityIds) {
|
|
4150
|
+
return this.importExportService.exportDump(entityIds);
|
|
4151
|
+
}
|
|
4152
|
+
async importDump(dump, opts) {
|
|
4153
|
+
return this.importExportService.importDump(dump, opts);
|
|
3502
4154
|
}
|
|
3503
4155
|
async forget(entityId, params) {
|
|
3504
|
-
|
|
3505
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3506
|
-
blockingOperation = "librarian";
|
|
3507
|
-
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3508
|
-
blockingOperation = "heal";
|
|
3509
|
-
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3510
|
-
blockingOperation = "prune";
|
|
3511
|
-
} else if (this._isReembedActive(entityId)) {
|
|
3512
|
-
blockingOperation = "reembed";
|
|
3513
|
-
} else if (this._isIngestActiveFor(entityId)) {
|
|
3514
|
-
blockingOperation = "ingest";
|
|
3515
|
-
} else if (this._isImportActiveFor(entityId)) {
|
|
3516
|
-
blockingOperation = "import";
|
|
3517
|
-
} else if (this._isForgetActiveFor(entityId)) {
|
|
3518
|
-
blockingOperation = "forget";
|
|
3519
|
-
}
|
|
3520
|
-
if (blockingOperation !== null) {
|
|
3521
|
-
throw new WikiBusyError(blockingOperation, entityId);
|
|
3522
|
-
}
|
|
3523
|
-
const forgetKey = this._forgetKey(entityId);
|
|
3524
|
-
this.activeMaintenanceJobs.add(forgetKey);
|
|
3525
|
-
try {
|
|
3526
|
-
const now = Date.now();
|
|
3527
|
-
let deletedEntries = 0;
|
|
3528
|
-
let deletedTasks = 0;
|
|
3529
|
-
const deletedEntryIds = [];
|
|
3530
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
3531
|
-
if (params.clearAll) {
|
|
3532
|
-
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, null, null, tx, true));
|
|
3533
|
-
const entriesRes = await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
3534
|
-
const tasksRes = await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
3535
|
-
await this.metadataRepo.updateCheckpoint(entityId, { memory: 0, heal: 0 }, tx);
|
|
3536
|
-
deletedEntries = entriesRes;
|
|
3537
|
-
deletedTasks = tasksRes;
|
|
3538
|
-
} else {
|
|
3539
|
-
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
3540
|
-
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
3541
|
-
if (hasIdSelectors && hasSourceSelectors) {
|
|
3542
|
-
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
3543
|
-
}
|
|
3544
|
-
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
3545
|
-
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
3546
|
-
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
3547
|
-
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
3548
|
-
if (params.entryId) {
|
|
3549
|
-
const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
|
|
3550
|
-
if (entryId) deletedEntryIds.push(entryId);
|
|
3551
|
-
}
|
|
3552
|
-
if (sourceRef || sourceHash) {
|
|
3553
|
-
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
|
|
3554
|
-
}
|
|
3555
|
-
const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
3556
|
-
const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
3557
|
-
const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
|
|
3558
|
-
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
3559
|
-
entryPromise ?? Promise.resolve(false),
|
|
3560
|
-
taskDeletedPromise ?? Promise.resolve(false),
|
|
3561
|
-
refPromise ?? Promise.resolve(0)
|
|
3562
|
-
]);
|
|
3563
|
-
if (entryResult) deletedEntries++;
|
|
3564
|
-
if (taskResult) deletedTasks++;
|
|
3565
|
-
deletedEntries += refResult;
|
|
3566
|
-
}
|
|
3567
|
-
});
|
|
3568
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3569
|
-
this.vectorCache.delete(entityId);
|
|
3570
|
-
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
3571
|
-
for (const factId of uniqueDeletedIds) {
|
|
3572
|
-
try {
|
|
3573
|
-
await this._notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
3574
|
-
} catch (hookErr) {
|
|
3575
|
-
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
3576
|
-
if (isTimeout) {
|
|
3577
|
-
throw new Error(
|
|
3578
|
-
`forget(${entityId}/${factId}) failed: ${hookErr.message}`
|
|
3579
|
-
);
|
|
3580
|
-
}
|
|
3581
|
-
const errMsg = hookErr?.message ?? "";
|
|
3582
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
3583
|
-
if (isValidationError) {
|
|
3584
|
-
throw new Error(
|
|
3585
|
-
`forget(${entityId}/${factId}) failed: ${errMsg}`,
|
|
3586
|
-
{ cause: hookErr }
|
|
3587
|
-
);
|
|
3588
|
-
}
|
|
3589
|
-
throw new Error(
|
|
3590
|
-
`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`,
|
|
3591
|
-
{ cause: this._sanitizeRankerError(hookErr) }
|
|
3592
|
-
);
|
|
3593
|
-
}
|
|
3594
|
-
}
|
|
3595
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
3596
|
-
} finally {
|
|
3597
|
-
this.activeMaintenanceJobs.delete(forgetKey);
|
|
3598
|
-
}
|
|
4156
|
+
return this.maintenanceService.forget(entityId, params);
|
|
3599
4157
|
}
|
|
4158
|
+
/**
|
|
4159
|
+
* @param params.promptOverride - Overrides the system prompt for this ingest call only.
|
|
4160
|
+
* For persistent customization, set `options.config.prompts.ingestSystemPrompt` at
|
|
4161
|
+
* WikiMemory construction time.
|
|
4162
|
+
*/
|
|
3600
4163
|
async ingestDocument(entityId, params) {
|
|
3601
|
-
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3610
|
-
);
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3614
|
-
|
|
3615
|
-
|
|
3616
|
-
const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
|
|
3617
|
-
if (this.activeIngestJobs.has(jobKey)) {
|
|
3618
|
-
throw new WikiBusyError("ingest", entityId);
|
|
3619
|
-
}
|
|
3620
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3621
|
-
throw new WikiBusyError("prune", entityId);
|
|
3622
|
-
}
|
|
3623
|
-
if (this._isReembedActive(entityId)) {
|
|
3624
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3625
|
-
}
|
|
3626
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3627
|
-
throw new WikiBusyError("import", entityId);
|
|
3628
|
-
}
|
|
3629
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3630
|
-
throw new WikiBusyError("forget", entityId);
|
|
3631
|
-
}
|
|
3632
|
-
this.activeIngestJobs.add(jobKey);
|
|
3633
|
-
this._notifyStatusSubscribers(entityId);
|
|
3634
|
-
try {
|
|
3635
|
-
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
3636
|
-
if (chunks.length === 0) {
|
|
3637
|
-
return { truncated: false, chunks: 0 };
|
|
3638
|
-
}
|
|
3639
|
-
const chunkResults = await withConcurrency(
|
|
3640
|
-
chunks.map((chunk) => async () => {
|
|
3641
|
-
const userPrompt = `Document Chunk:
|
|
3642
|
-
${chunk}`;
|
|
3643
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
3644
|
-
systemPrompt: INGEST_SYSTEM_PROMPT,
|
|
3645
|
-
userPrompt
|
|
3646
|
-
});
|
|
3647
|
-
const result = parseJsonResponse(responseText);
|
|
3648
|
-
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
3649
|
-
}),
|
|
3650
|
-
chunkConcurrency
|
|
3651
|
-
);
|
|
3652
|
-
const seen = /* @__PURE__ */ new Set();
|
|
3653
|
-
const allValidFacts = [];
|
|
3654
|
-
for (const facts of chunkResults) {
|
|
3655
|
-
for (const fact of facts) {
|
|
3656
|
-
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
3657
|
-
if (!seen.has(normalized)) {
|
|
3658
|
-
seen.add(normalized);
|
|
3659
|
-
allValidFacts.push(fact);
|
|
3660
|
-
}
|
|
3661
|
-
}
|
|
3662
|
-
}
|
|
3663
|
-
const now = Date.now();
|
|
3664
|
-
const insertedFacts = [];
|
|
3665
|
-
const deletedSourceFactIds = [];
|
|
3666
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
3667
|
-
deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
|
|
3668
|
-
await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
|
|
3669
|
-
for (const fact of allValidFacts) {
|
|
3670
|
-
const id = generateId("fact_");
|
|
3671
|
-
const wikiFact = {
|
|
3672
|
-
id,
|
|
3673
|
-
entity_id: entityId,
|
|
3674
|
-
title: fact.title,
|
|
3675
|
-
body: fact.body,
|
|
3676
|
-
tags: fact.tags,
|
|
3677
|
-
confidence: fact.confidence,
|
|
3678
|
-
source_type: "immutable_document",
|
|
3679
|
-
source_hash: sourceHash,
|
|
3680
|
-
source_ref: sourceRef,
|
|
3681
|
-
created_at: now,
|
|
3682
|
-
updated_at: now,
|
|
3683
|
-
last_accessed_at: null,
|
|
3684
|
-
access_count: 0,
|
|
3685
|
-
deleted_at: null
|
|
3686
|
-
};
|
|
3687
|
-
await this.entryRepo.upsert(wikiFact, tx);
|
|
3688
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
3689
|
-
}
|
|
3690
|
-
});
|
|
3691
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3692
|
-
this.vectorCache.delete(entityId);
|
|
3693
|
-
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
3694
|
-
for (const factId of uniqueDeletedSourceFactIds) {
|
|
3695
|
-
try {
|
|
3696
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
3697
|
-
} catch (hookErr) {
|
|
3698
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
3699
|
-
}
|
|
3700
|
-
}
|
|
3701
|
-
for (const fact of insertedFacts) {
|
|
3702
|
-
await this.embedFact(fact);
|
|
4164
|
+
return this.ingestionService.ingestDocument(entityId, params);
|
|
4165
|
+
}
|
|
4166
|
+
/**
|
|
4167
|
+
* Returns up to `limit` unprocessed outbox events, oldest first.
|
|
4168
|
+
* Works regardless of enableOutbox value — allows draining after disabling.
|
|
4169
|
+
*/
|
|
4170
|
+
async getUnprocessedOutboxEvents(limit = 100) {
|
|
4171
|
+
if (Number.isFinite(limit) && limit <= 0) return [];
|
|
4172
|
+
const safeLimit = Number.isFinite(limit) && limit >= 1 ? Math.trunc(limit) : 100;
|
|
4173
|
+
const rows = await this.outboxRepo.fetchPending(safeLimit);
|
|
4174
|
+
return rows.map((row) => {
|
|
4175
|
+
let payload = null;
|
|
4176
|
+
try {
|
|
4177
|
+
payload = JSON.parse(row.payload);
|
|
4178
|
+
} catch {
|
|
3703
4179
|
}
|
|
3704
|
-
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
|
|
3708
|
-
|
|
3709
|
-
|
|
4180
|
+
return { ...row, payload };
|
|
4181
|
+
});
|
|
4182
|
+
}
|
|
4183
|
+
/**
|
|
4184
|
+
* Deletes the given event IDs from the outbox table.
|
|
4185
|
+
* Call after successfully committing events to the external system.
|
|
4186
|
+
*/
|
|
4187
|
+
async markOutboxEventsProcessed(eventIds) {
|
|
4188
|
+
await this.outboxRepo.acknowledge(eventIds);
|
|
3710
4189
|
}
|
|
3711
4190
|
};
|
|
3712
|
-
|
|
3713
|
-
* Maximum number of entities whose parsed embedding vectors are held in
|
|
3714
|
-
* memory. This cap is intentionally conservative so the cache remains safe
|
|
3715
|
-
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
3716
|
-
*/
|
|
3717
|
-
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
3718
|
-
/**
|
|
3719
|
-
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
3720
|
-
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
3721
|
-
* entities while still maintaining a bounded memory footprint.
|
|
3722
|
-
*/
|
|
3723
|
-
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
3724
|
-
var WikiMemory = _WikiMemory;
|
|
4191
|
+
_testAccessNonTestEnvWarned = new WeakMap();
|
|
3725
4192
|
|
|
3726
4193
|
// src/utils/formatContext.ts
|
|
3727
4194
|
function validateMaxOption(value, name) {
|
|
@@ -3989,6 +4456,8 @@ function createWiki(db, options) {
|
|
|
3989
4456
|
}
|
|
3990
4457
|
|
|
3991
4458
|
exports.DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT = DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT;
|
|
4459
|
+
exports.HOOK_TIMEOUT_MARKER = HOOK_TIMEOUT_MARKER;
|
|
4460
|
+
exports.PromptService = PromptService;
|
|
3992
4461
|
exports.PrunePartialFailureError = PrunePartialFailureError;
|
|
3993
4462
|
exports.WikiBusyError = WikiBusyError;
|
|
3994
4463
|
exports.WikiMemory = WikiMemory;
|