@equationalapplications/core-llm-wiki 4.7.0 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -0
- package/dist/chunk-2FGDZKC2.mjs +2547 -0
- package/dist/chunk-2FGDZKC2.mjs.map +1 -0
- package/dist/index.d.mts +4 -530
- package/dist/index.d.ts +4 -530
- package/dist/index.js +2412 -1980
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +156 -2267
- package/dist/index.mjs.map +1 -1
- package/dist/testing-hfpeX01Q.d.mts +1112 -0
- package/dist/testing-hfpeX01Q.d.ts +1112 -0
- package/dist/testing.d.mts +2 -0
- package/dist/testing.d.ts +2 -0
- package/dist/testing.js +2552 -0
- package/dist/testing.js.map +1 -0
- package/dist/testing.mjs +3 -0
- package/dist/testing.mjs.map +1 -0
- package/package.json +6 -1
package/dist/index.js
CHANGED
|
@@ -6,6 +6,14 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
|
6
6
|
|
|
7
7
|
var MiniSearch__default = /*#__PURE__*/_interopDefault(MiniSearch);
|
|
8
8
|
|
|
9
|
+
var __typeError = (msg) => {
|
|
10
|
+
throw TypeError(msg);
|
|
11
|
+
};
|
|
12
|
+
var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
|
|
13
|
+
var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
|
|
14
|
+
var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
|
|
15
|
+
var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), member.set(obj, value), value);
|
|
16
|
+
|
|
9
17
|
// src/db/schema.ts
|
|
10
18
|
async function setupDatabase(db, prefix) {
|
|
11
19
|
await db.execAsync(`
|
|
@@ -155,28 +163,6 @@ for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
|
155
163
|
}
|
|
156
164
|
var CURRENT_SCHEMA_VERSION = MIGRATIONS.length > 0 ? MIGRATIONS[MIGRATIONS.length - 1].version : 0;
|
|
157
165
|
|
|
158
|
-
// src/types.ts
|
|
159
|
-
var WikiBusyError = class extends Error {
|
|
160
|
-
constructor(operation, entityId) {
|
|
161
|
-
super(`${operation} already running for entity ${entityId}`);
|
|
162
|
-
this.name = "WikiBusyError";
|
|
163
|
-
this.operation = operation;
|
|
164
|
-
this.entityId = entityId;
|
|
165
|
-
}
|
|
166
|
-
};
|
|
167
|
-
var PrunePartialFailureError = class extends Error {
|
|
168
|
-
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
169
|
-
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
170
|
-
this.name = "PrunePartialFailureError";
|
|
171
|
-
this.deleted = deleted;
|
|
172
|
-
this.failedAt = failedAt;
|
|
173
|
-
this.remaining = remaining;
|
|
174
|
-
this.deletedTasks = deletedTasks;
|
|
175
|
-
this.deletedEvents = deletedEvents;
|
|
176
|
-
this.cause = cause;
|
|
177
|
-
}
|
|
178
|
-
};
|
|
179
|
-
|
|
180
166
|
// src/repositories/BaseRepository.ts
|
|
181
167
|
var BaseRepository = class {
|
|
182
168
|
constructor(db, prefix) {
|
|
@@ -220,6 +206,28 @@ function mapRowToFact(row) {
|
|
|
220
206
|
access_count: Number(row.access_count ?? 0)
|
|
221
207
|
};
|
|
222
208
|
}
|
|
209
|
+
function normalizeEmbeddingBlobValue(blob) {
|
|
210
|
+
if (blob instanceof Uint8Array) return blob;
|
|
211
|
+
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
212
|
+
const obj = blob;
|
|
213
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
214
|
+
return new Uint8Array(obj["data"]);
|
|
215
|
+
}
|
|
216
|
+
const entries = Object.keys(obj);
|
|
217
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
218
|
+
const len = entries.length;
|
|
219
|
+
const arr = new Uint8Array(len);
|
|
220
|
+
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
221
|
+
return arr;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
return null;
|
|
225
|
+
}
|
|
226
|
+
function mapRowToFactWithBlobs(row) {
|
|
227
|
+
const base = mapRowToFact(row);
|
|
228
|
+
const embeddingBlob = normalizeEmbeddingBlobValue(row.embedding_blob);
|
|
229
|
+
return embeddingBlob ? { ...base, embedding_blob: embeddingBlob } : base;
|
|
230
|
+
}
|
|
223
231
|
var EntryRepository = class extends BaseRepository {
|
|
224
232
|
constructor(db, prefix, outbox) {
|
|
225
233
|
super(db, prefix);
|
|
@@ -315,21 +323,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
315
323
|
* Normalize an embedding blob value to Uint8Array or null.
|
|
316
324
|
*/
|
|
317
325
|
normalizeEmbeddingBlob(blob) {
|
|
318
|
-
|
|
319
|
-
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
320
|
-
const obj = blob;
|
|
321
|
-
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
322
|
-
return new Uint8Array(obj["data"]);
|
|
323
|
-
}
|
|
324
|
-
const entries = Object.keys(obj);
|
|
325
|
-
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
326
|
-
const len = entries.length;
|
|
327
|
-
const arr = new Uint8Array(len);
|
|
328
|
-
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
329
|
-
return arr;
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
return null;
|
|
326
|
+
return normalizeEmbeddingBlobValue(blob);
|
|
333
327
|
}
|
|
334
328
|
/**
|
|
335
329
|
* Fetch existing rows by IDs and return id/entity_id/updated_at for import collision resolution.
|
|
@@ -509,7 +503,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
509
503
|
}
|
|
510
504
|
/**
|
|
511
505
|
* Fetch recent non-deleted entries for an entity (limited), ordered by updated_at DESC.
|
|
512
|
-
* Used by
|
|
506
|
+
* Used by MaintenanceService.doRunLibrarian().
|
|
513
507
|
*/
|
|
514
508
|
async findRecentByEntityId(entityId, limit, tx) {
|
|
515
509
|
const executor = this.getExecutor(tx);
|
|
@@ -519,6 +513,18 @@ var EntryRepository = class extends BaseRepository {
|
|
|
519
513
|
);
|
|
520
514
|
return rows.map(mapRowToFact);
|
|
521
515
|
}
|
|
516
|
+
/**
|
|
517
|
+
* Fetch all non-deleted entries for an entity with embedding blobs preserved.
|
|
518
|
+
* Used by ImportExportService for export/import round-tripping.
|
|
519
|
+
*/
|
|
520
|
+
async findAllByEntityIdWithBlobs(entityId, tx) {
|
|
521
|
+
const executor = this.getExecutor(tx);
|
|
522
|
+
const rows = await executor.getAllAsync(
|
|
523
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
524
|
+
[entityId]
|
|
525
|
+
);
|
|
526
|
+
return rows.map(mapRowToFactWithBlobs);
|
|
527
|
+
}
|
|
522
528
|
/**
|
|
523
529
|
* Count non-deleted entries for the given entities whose embedding_blob dimension
|
|
524
530
|
* doesn't match queryVecLength. Used by read() to detect model-switch mismatches.
|
|
@@ -605,24 +611,19 @@ var EntryRepository = class extends BaseRepository {
|
|
|
605
611
|
}
|
|
606
612
|
/**
|
|
607
613
|
* Mark orphaned entries (never accessed, old) as deleted.
|
|
608
|
-
* Used by
|
|
614
|
+
* Used by MaintenanceService.doRunHeal().
|
|
609
615
|
*/
|
|
610
616
|
async markOrphaned(entityId, orphanThreshold, tx) {
|
|
611
617
|
const executor = this.getExecutor(tx);
|
|
612
618
|
const now = Date.now();
|
|
613
|
-
const
|
|
614
|
-
`SELECT id FROM ${this.prefix}entries
|
|
615
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL`,
|
|
616
|
-
[entityId, orphanThreshold]
|
|
617
|
-
);
|
|
618
|
-
if (orphanedRows.length === 0) return 0;
|
|
619
|
-
const result = await executor.runAsync(
|
|
619
|
+
const updatedRows = await executor.getAllAsync(
|
|
620
620
|
`UPDATE ${this.prefix}entries
|
|
621
621
|
SET deleted_at = ?, updated_at = ?
|
|
622
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
622
|
+
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
623
|
+
RETURNING id`,
|
|
623
624
|
[now, now, entityId, orphanThreshold]
|
|
624
625
|
);
|
|
625
|
-
for (const row of
|
|
626
|
+
for (const row of updatedRows) {
|
|
626
627
|
await this.outbox.push({
|
|
627
628
|
entityId,
|
|
628
629
|
tableName: "entries",
|
|
@@ -631,11 +632,11 @@ var EntryRepository = class extends BaseRepository {
|
|
|
631
632
|
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
632
633
|
}, tx);
|
|
633
634
|
}
|
|
634
|
-
return
|
|
635
|
+
return updatedRows.map((r) => r.id);
|
|
635
636
|
}
|
|
636
637
|
/**
|
|
637
638
|
* Downgrade stale inferred entries to 'tentative'.
|
|
638
|
-
* Used by
|
|
639
|
+
* Used by MaintenanceService.doRunHeal().
|
|
639
640
|
*/
|
|
640
641
|
async downgradeStaleInferred(entityId, staleThreshold, tx) {
|
|
641
642
|
const executor = this.getExecutor(tx);
|
|
@@ -667,7 +668,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
667
668
|
}
|
|
668
669
|
/**
|
|
669
670
|
* Downgrade specific entries to 'tentative' by IDs.
|
|
670
|
-
* Used by
|
|
671
|
+
* Used by MaintenanceService.doRunHeal().
|
|
671
672
|
*/
|
|
672
673
|
async downgradeByIds(ids, entityId, tx) {
|
|
673
674
|
if (ids.length === 0) return;
|
|
@@ -690,7 +691,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
690
691
|
}
|
|
691
692
|
/**
|
|
692
693
|
* Soft-delete specific entries by IDs.
|
|
693
|
-
* Used by
|
|
694
|
+
* Used by MaintenanceService.doRunHeal().
|
|
694
695
|
*/
|
|
695
696
|
async softDeleteByIds(ids, entityId, tx) {
|
|
696
697
|
if (ids.length === 0) return;
|
|
@@ -1420,29 +1421,6 @@ var MetadataRepository = class extends BaseRepository {
|
|
|
1420
1421
|
}
|
|
1421
1422
|
};
|
|
1422
1423
|
|
|
1423
|
-
// src/prompts.ts
|
|
1424
|
-
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
1425
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1426
|
-
{
|
|
1427
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
1428
|
-
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
1429
|
-
}
|
|
1430
|
-
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
1431
|
-
var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
|
|
1432
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1433
|
-
{
|
|
1434
|
-
"downgraded": ["string (fact IDs)"],
|
|
1435
|
-
"deleted": ["string (fact IDs)"],
|
|
1436
|
-
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
1437
|
-
}
|
|
1438
|
-
Do not return markdown, just raw JSON.`;
|
|
1439
|
-
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
1440
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1441
|
-
{
|
|
1442
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
1443
|
-
}
|
|
1444
|
-
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
1445
|
-
|
|
1446
1424
|
// src/utils/cosine.ts
|
|
1447
1425
|
function cosineSimilarity(a, b) {
|
|
1448
1426
|
let dot = 0, normA = 0, normB = 0;
|
|
@@ -1484,267 +1462,168 @@ function parseEmbedding(blob, text) {
|
|
|
1484
1462
|
return null;
|
|
1485
1463
|
}
|
|
1486
1464
|
|
|
1487
|
-
// src/
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1465
|
+
// src/services/SearchService.ts
|
|
1466
|
+
var _SearchService = class _SearchService {
|
|
1467
|
+
constructor(entryRepo) {
|
|
1468
|
+
this.entryRepo = entryRepo;
|
|
1469
|
+
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
1470
|
+
this.vectorCache = /* @__PURE__ */ new Map();
|
|
1471
|
+
this.miniSearch = new MiniSearch__default.default({
|
|
1472
|
+
fields: ["title", "body", "tags"],
|
|
1473
|
+
storeFields: ["entity_id"],
|
|
1474
|
+
searchOptions: {
|
|
1475
|
+
boost: { title: 2 },
|
|
1476
|
+
fuzzy: 0.2,
|
|
1477
|
+
prefix: true
|
|
1478
|
+
}
|
|
1479
|
+
});
|
|
1496
1480
|
}
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1481
|
+
/**
|
|
1482
|
+
* Rebuilds the search index and clears the vector cache for a given entity.
|
|
1483
|
+
* A direct replacement for manually syncing state after a DB transaction.
|
|
1484
|
+
*/
|
|
1485
|
+
async sync(entityId) {
|
|
1486
|
+
await this.rebuildIndex(entityId);
|
|
1487
|
+
this.evictCache(entityId);
|
|
1488
|
+
}
|
|
1489
|
+
/**
|
|
1490
|
+
* Clears the parsed vector cache. Useful for mid-loop flush guarantees
|
|
1491
|
+
* or memory pressure evictions.
|
|
1492
|
+
*/
|
|
1493
|
+
evictCache(entityId) {
|
|
1494
|
+
if (entityId) {
|
|
1495
|
+
this.vectorCache.delete(entityId);
|
|
1506
1496
|
} else {
|
|
1507
|
-
|
|
1497
|
+
this.vectorCache.clear();
|
|
1508
1498
|
}
|
|
1509
1499
|
}
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
function shouldExposeReadMetadata(entityId) {
|
|
1518
|
-
return Array.isArray(entityId);
|
|
1519
|
-
}
|
|
1520
|
-
|
|
1521
|
-
// src/WikiMemory.ts
|
|
1522
|
-
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
1523
|
-
function parseJsonResponse(text) {
|
|
1524
|
-
const firstBrace = text.indexOf("{");
|
|
1525
|
-
const firstBracket = text.indexOf("[");
|
|
1526
|
-
let start;
|
|
1527
|
-
let openChar;
|
|
1528
|
-
let closeChar;
|
|
1529
|
-
if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
|
|
1530
|
-
start = firstBrace;
|
|
1531
|
-
openChar = "{";
|
|
1532
|
-
closeChar = "}";
|
|
1533
|
-
} else if (firstBracket !== -1) {
|
|
1534
|
-
start = firstBracket;
|
|
1535
|
-
openChar = "[";
|
|
1536
|
-
closeChar = "]";
|
|
1537
|
-
} else {
|
|
1538
|
-
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
1500
|
+
/**
|
|
1501
|
+
* Fully resets the search service.
|
|
1502
|
+
*/
|
|
1503
|
+
clearAll() {
|
|
1504
|
+
this.vectorCache.clear();
|
|
1505
|
+
this.miniSearch.removeAll();
|
|
1506
|
+
this.miniSearchEntryIdsByEntity.clear();
|
|
1539
1507
|
}
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
const
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1508
|
+
/**
|
|
1509
|
+
* Executes a keyword search against the active MiniSearch index.
|
|
1510
|
+
*/
|
|
1511
|
+
searchKeyword(query, entityIds, limit) {
|
|
1512
|
+
const entityIdSet = new Set(entityIds);
|
|
1513
|
+
const results = this.miniSearch.search(query, {
|
|
1514
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1515
|
+
combineWith: "OR"
|
|
1516
|
+
});
|
|
1517
|
+
return results.slice(0, limit);
|
|
1518
|
+
}
|
|
1519
|
+
/**
|
|
1520
|
+
* Pre-fetches MiniSearch scores for candidate hydration, used during hybrid weighting.
|
|
1521
|
+
*/
|
|
1522
|
+
getMiniSearchScores(query, entityIds, preFilterLimit) {
|
|
1523
|
+
const entityIdSet = new Set(entityIds);
|
|
1524
|
+
let results = this.miniSearch.search(query, {
|
|
1525
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1526
|
+
combineWith: "OR"
|
|
1527
|
+
});
|
|
1528
|
+
if (preFilterLimit !== void 0) {
|
|
1529
|
+
results = results.slice(0, preFilterLimit);
|
|
1549
1530
|
}
|
|
1550
|
-
if (
|
|
1551
|
-
|
|
1552
|
-
|
|
1531
|
+
if (results.length === 0) return /* @__PURE__ */ new Map();
|
|
1532
|
+
const maxMsScore = Math.max(1, results[0]?.score ?? 1);
|
|
1533
|
+
return new Map(results.map((r) => [r.id, r.score / maxMsScore]));
|
|
1534
|
+
}
|
|
1535
|
+
/**
|
|
1536
|
+
* Score candidate rows using in-process JS cosine similarity.
|
|
1537
|
+
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
1538
|
+
*/
|
|
1539
|
+
async rankSemantic(args) {
|
|
1540
|
+
const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
1541
|
+
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
1542
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
1543
|
+
const tooLarge = populateCache && candidateRows.length > _SearchService.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1544
|
+
if (tooLarge && entityCache) {
|
|
1545
|
+
this.vectorCache.delete(entityId);
|
|
1546
|
+
entityCache = void 0;
|
|
1553
1547
|
}
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1548
|
+
const canCache = populateCache && !tooLarge;
|
|
1549
|
+
if (canCache && !entityCache) {
|
|
1550
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
1557
1551
|
}
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1552
|
+
const scored = candidateRows.map((row) => {
|
|
1553
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
1554
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
1555
|
+
entityCache.set(row.id, vector);
|
|
1556
|
+
}
|
|
1557
|
+
let score = 0;
|
|
1558
|
+
if (vector && vector.length === queryVec.length) {
|
|
1559
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
1560
|
+
if (weight !== void 0) {
|
|
1561
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1562
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
1563
|
+
} else {
|
|
1564
|
+
score = cosSim;
|
|
1565
|
+
}
|
|
1566
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
1567
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1568
|
+
score = (1 - weight) * kwScore;
|
|
1569
|
+
} else {
|
|
1570
|
+
score = -2;
|
|
1571
|
+
}
|
|
1572
|
+
return {
|
|
1573
|
+
id: row.id,
|
|
1574
|
+
entity_id: row.entity_id,
|
|
1575
|
+
score,
|
|
1576
|
+
updated_at: row.updated_at,
|
|
1577
|
+
access_count: row.access_count
|
|
1578
|
+
};
|
|
1579
|
+
});
|
|
1580
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
1581
|
+
if (!this.vectorCache.has(entityId)) {
|
|
1582
|
+
if (this.vectorCache.size >= _SearchService.MAX_VECTOR_CACHE_ENTITIES) {
|
|
1583
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
1584
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1585
|
+
}
|
|
1586
|
+
this.vectorCache.set(entityId, entityCache);
|
|
1587
|
+
}
|
|
1562
1588
|
}
|
|
1563
|
-
if (
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1589
|
+
if (!skipSort) {
|
|
1590
|
+
this._tieBreakSort(scored);
|
|
1591
|
+
}
|
|
1592
|
+
return scored.slice(0, limit);
|
|
1593
|
+
}
|
|
1594
|
+
// --- Internal Index Management ---
|
|
1595
|
+
async rebuildIndex(entityId) {
|
|
1596
|
+
if (entityId) {
|
|
1597
|
+
const rows2 = await this.entryRepo.findMiniSearchRows(entityId);
|
|
1598
|
+
const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
|
|
1599
|
+
if (previousIds) {
|
|
1600
|
+
for (const id of previousIds) {
|
|
1601
|
+
this.miniSearch.discard(id);
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
|
|
1605
|
+
if (documents2.length > 0) {
|
|
1606
|
+
this.miniSearch.addAll(documents2);
|
|
1568
1607
|
}
|
|
1608
|
+
this.miniSearchEntryIdsByEntity.set(
|
|
1609
|
+
entityId,
|
|
1610
|
+
new Set(documents2.map((document) => document.id))
|
|
1611
|
+
);
|
|
1612
|
+
return;
|
|
1613
|
+
}
|
|
1614
|
+
const rows = await this.entryRepo.findMiniSearchRows();
|
|
1615
|
+
this.miniSearch.removeAll();
|
|
1616
|
+
this.miniSearchEntryIdsByEntity.clear();
|
|
1617
|
+
const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
|
|
1618
|
+
if (documents.length > 0) {
|
|
1619
|
+
this.miniSearch.addAll(documents);
|
|
1620
|
+
}
|
|
1621
|
+
for (const document of documents) {
|
|
1622
|
+
const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
|
|
1623
|
+
ids.add(document.id);
|
|
1624
|
+
this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
|
|
1569
1625
|
}
|
|
1570
1626
|
}
|
|
1571
|
-
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
1572
|
-
return JSON.parse(text.slice(start, end + 1));
|
|
1573
|
-
}
|
|
1574
|
-
function safeSlice(value, start, end) {
|
|
1575
|
-
const length = value.length;
|
|
1576
|
-
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
1577
|
-
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
1578
|
-
if (safeStart > safeEnd) {
|
|
1579
|
-
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
1580
|
-
}
|
|
1581
|
-
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
1582
|
-
safeStart--;
|
|
1583
|
-
}
|
|
1584
|
-
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
1585
|
-
safeEnd--;
|
|
1586
|
-
}
|
|
1587
|
-
return value.slice(safeStart, safeEnd);
|
|
1588
|
-
}
|
|
1589
|
-
function chunkText(input, maxChunkLength, overlap) {
|
|
1590
|
-
const text = input.trim();
|
|
1591
|
-
if (text.length === 0) return { chunks: [], truncated: false };
|
|
1592
|
-
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
1593
|
-
throw new Error("maxChunkLength must be an integer >= 2");
|
|
1594
|
-
}
|
|
1595
|
-
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
1596
|
-
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
1597
|
-
}
|
|
1598
|
-
const chunks = [];
|
|
1599
|
-
let truncated = false;
|
|
1600
|
-
let cursor = 0;
|
|
1601
|
-
const halfMax = Math.floor(maxChunkLength / 2);
|
|
1602
|
-
while (cursor < text.length) {
|
|
1603
|
-
const remaining = text.length - cursor;
|
|
1604
|
-
if (remaining <= maxChunkLength) {
|
|
1605
|
-
chunks.push(safeSlice(text, cursor, text.length));
|
|
1606
|
-
break;
|
|
1607
|
-
}
|
|
1608
|
-
const windowEnd = cursor + maxChunkLength;
|
|
1609
|
-
const minSplit = cursor + halfMax;
|
|
1610
|
-
let splitPoint = -1;
|
|
1611
|
-
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
1612
|
-
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
1613
|
-
splitPoint = paraIdx + 2;
|
|
1614
|
-
}
|
|
1615
|
-
if (splitPoint === -1) {
|
|
1616
|
-
let lastTerm = -1;
|
|
1617
|
-
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
1618
|
-
const ch = text[i];
|
|
1619
|
-
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
1620
|
-
lastTerm = i + 2;
|
|
1621
|
-
}
|
|
1622
|
-
}
|
|
1623
|
-
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
1624
|
-
}
|
|
1625
|
-
if (splitPoint === -1) {
|
|
1626
|
-
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
1627
|
-
if (/\s/.test(text[i])) {
|
|
1628
|
-
splitPoint = i + 1;
|
|
1629
|
-
break;
|
|
1630
|
-
}
|
|
1631
|
-
}
|
|
1632
|
-
}
|
|
1633
|
-
if (splitPoint === -1) {
|
|
1634
|
-
truncated = true;
|
|
1635
|
-
splitPoint = windowEnd;
|
|
1636
|
-
}
|
|
1637
|
-
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
1638
|
-
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
1639
|
-
cursor = next;
|
|
1640
|
-
}
|
|
1641
|
-
return { chunks, truncated };
|
|
1642
|
-
}
|
|
1643
|
-
async function withConcurrency(tasks, limit) {
|
|
1644
|
-
const results = new Array(tasks.length);
|
|
1645
|
-
let index = 0;
|
|
1646
|
-
let failed = false;
|
|
1647
|
-
let firstError;
|
|
1648
|
-
async function worker() {
|
|
1649
|
-
while (index < tasks.length && !failed) {
|
|
1650
|
-
const i = index++;
|
|
1651
|
-
try {
|
|
1652
|
-
results[i] = await tasks[i]();
|
|
1653
|
-
} catch (e) {
|
|
1654
|
-
if (!failed) {
|
|
1655
|
-
failed = true;
|
|
1656
|
-
firstError = e;
|
|
1657
|
-
}
|
|
1658
|
-
return;
|
|
1659
|
-
}
|
|
1660
|
-
}
|
|
1661
|
-
}
|
|
1662
|
-
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
1663
|
-
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
1664
|
-
if (failed) throw firstError;
|
|
1665
|
-
return results;
|
|
1666
|
-
}
|
|
1667
|
-
function clip(value, max) {
|
|
1668
|
-
if (typeof value !== "string") return "";
|
|
1669
|
-
const s = value.trim();
|
|
1670
|
-
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
1671
|
-
}
|
|
1672
|
-
function validateTags(tags) {
|
|
1673
|
-
if (!Array.isArray(tags)) return [];
|
|
1674
|
-
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
1675
|
-
}
|
|
1676
|
-
function validateFact(fact) {
|
|
1677
|
-
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
1678
|
-
const title = clip(fact.title, 80);
|
|
1679
|
-
const body = clip(fact.body, 800);
|
|
1680
|
-
if (!title || !body) return null;
|
|
1681
|
-
let confidence = fact.confidence;
|
|
1682
|
-
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
1683
|
-
return {
|
|
1684
|
-
...fact,
|
|
1685
|
-
title,
|
|
1686
|
-
body,
|
|
1687
|
-
confidence,
|
|
1688
|
-
tags: validateTags(fact.tags)
|
|
1689
|
-
};
|
|
1690
|
-
}
|
|
1691
|
-
function validateTask(task) {
|
|
1692
|
-
if (typeof task?.description !== "string") return null;
|
|
1693
|
-
const description = clip(task.description, 200);
|
|
1694
|
-
if (!description) return null;
|
|
1695
|
-
let priority = task.priority;
|
|
1696
|
-
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
1697
|
-
return {
|
|
1698
|
-
...task,
|
|
1699
|
-
description,
|
|
1700
|
-
priority
|
|
1701
|
-
};
|
|
1702
|
-
}
|
|
1703
|
-
function normalizeSourceRef(value) {
|
|
1704
|
-
if (typeof value !== "string") return null;
|
|
1705
|
-
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
1706
|
-
return cleaned.length > 0 ? cleaned : null;
|
|
1707
|
-
}
|
|
1708
|
-
function normalizeSourceHash(value) {
|
|
1709
|
-
if (typeof value !== "string") return null;
|
|
1710
|
-
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
1711
|
-
}
|
|
1712
|
-
function titleTokens(title) {
|
|
1713
|
-
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
1714
|
-
}
|
|
1715
|
-
function jaccardScore(a, b) {
|
|
1716
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
1717
|
-
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
1718
|
-
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
1719
|
-
return intersection.size / union.size;
|
|
1720
|
-
}
|
|
1721
|
-
var FUZZY_THRESHOLD = 0.5;
|
|
1722
|
-
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
1723
|
-
var _WikiMemory = class _WikiMemory {
|
|
1724
|
-
constructor(db, options) {
|
|
1725
|
-
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
1726
|
-
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
1727
|
-
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
1728
|
-
this.miniSearch = new MiniSearch__default.default({
|
|
1729
|
-
fields: ["title", "body", "tags"],
|
|
1730
|
-
storeFields: ["entity_id"],
|
|
1731
|
-
searchOptions: {
|
|
1732
|
-
boost: { title: 2 },
|
|
1733
|
-
fuzzy: 0.2,
|
|
1734
|
-
prefix: true
|
|
1735
|
-
}
|
|
1736
|
-
});
|
|
1737
|
-
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
1738
|
-
this.vectorCache = /* @__PURE__ */ new Map();
|
|
1739
|
-
this.db = db;
|
|
1740
|
-
this.options = options;
|
|
1741
|
-
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
1742
|
-
this.outboxRepo = new OutboxRepository(db, this.prefix);
|
|
1743
|
-
this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
|
|
1744
|
-
this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
|
|
1745
|
-
this.eventRepo = new EventRepository(db, this.prefix);
|
|
1746
|
-
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
1747
|
-
}
|
|
1748
1627
|
normalizeMiniSearchRow(row) {
|
|
1749
1628
|
return {
|
|
1750
1629
|
id: row.id,
|
|
@@ -1761,252 +1640,63 @@ var _WikiMemory = class _WikiMemory {
|
|
|
1761
1640
|
})()
|
|
1762
1641
|
};
|
|
1763
1642
|
}
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
const rows2 = await this.entryRepo.findMiniSearchRows(entityId);
|
|
1767
|
-
const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
|
|
1768
|
-
if (previousIds) {
|
|
1769
|
-
for (const id of previousIds) {
|
|
1770
|
-
this.miniSearch.discard(id);
|
|
1771
|
-
}
|
|
1772
|
-
}
|
|
1773
|
-
const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
|
|
1774
|
-
if (documents2.length > 0) {
|
|
1775
|
-
this.miniSearch.addAll(documents2);
|
|
1776
|
-
}
|
|
1777
|
-
this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
|
|
1778
|
-
return;
|
|
1779
|
-
}
|
|
1780
|
-
const rows = await this.entryRepo.findMiniSearchRows();
|
|
1781
|
-
this.miniSearch.removeAll();
|
|
1782
|
-
this.miniSearchEntryIdsByEntity.clear();
|
|
1783
|
-
const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
|
|
1784
|
-
if (documents.length > 0) {
|
|
1785
|
-
this.miniSearch.addAll(documents);
|
|
1786
|
-
}
|
|
1787
|
-
for (const document of documents) {
|
|
1788
|
-
const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
|
|
1789
|
-
ids.add(document.id);
|
|
1790
|
-
this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
|
|
1791
|
-
}
|
|
1643
|
+
_tieBreakSort(items) {
|
|
1644
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
1792
1645
|
}
|
|
1793
|
-
|
|
1794
|
-
const
|
|
1795
|
-
if (
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(dim), this.db);
|
|
1802
|
-
}
|
|
1803
|
-
} else {
|
|
1804
|
-
await this.metadataRepo.setMeta("embedding_dimension", String(dim), this.db);
|
|
1805
|
-
}
|
|
1646
|
+
_compareScoredRows(a, b) {
|
|
1647
|
+
const scoreDiff = b.score - a.score;
|
|
1648
|
+
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
1649
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1650
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1651
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1652
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1653
|
+
return a.id.localeCompare(b.id);
|
|
1806
1654
|
}
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1655
|
+
};
|
|
1656
|
+
/**
|
|
1657
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
1658
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
1659
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
1660
|
+
*/
|
|
1661
|
+
_SearchService.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
1662
|
+
/**
|
|
1663
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
1664
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
1665
|
+
* entities while still maintaining a bounded memory footprint.
|
|
1666
|
+
*/
|
|
1667
|
+
_SearchService.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
1668
|
+
var SearchService = _SearchService;
|
|
1669
|
+
|
|
1670
|
+
// src/types.ts
|
|
1671
|
+
var WikiBusyError = class extends Error {
|
|
1672
|
+
constructor(operation, entityId) {
|
|
1673
|
+
super(`${operation} already running for entity ${entityId}`);
|
|
1674
|
+
this.name = "WikiBusyError";
|
|
1675
|
+
this.operation = operation;
|
|
1676
|
+
this.entityId = entityId;
|
|
1822
1677
|
}
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
tagsStr = fact.tags;
|
|
1835
|
-
}
|
|
1836
|
-
}
|
|
1837
|
-
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
1838
|
-
try {
|
|
1839
|
-
const vector = await embedFn(text);
|
|
1840
|
-
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
1841
|
-
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
1842
|
-
return false;
|
|
1843
|
-
}
|
|
1844
|
-
const float32Vector = new Float32Array(vector);
|
|
1845
|
-
let hasNonFinite = false;
|
|
1846
|
-
for (let i = 0; i < float32Vector.length; i++) {
|
|
1847
|
-
if (!isFinite(float32Vector[i])) {
|
|
1848
|
-
hasNonFinite = true;
|
|
1849
|
-
break;
|
|
1850
|
-
}
|
|
1851
|
-
}
|
|
1852
|
-
if (hasNonFinite) {
|
|
1853
|
-
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
1854
|
-
return false;
|
|
1855
|
-
}
|
|
1856
|
-
await this.storeEmbeddingDimension(float32Vector.length);
|
|
1857
|
-
const blob = new Uint8Array(float32Vector.buffer);
|
|
1858
|
-
await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
|
|
1859
|
-
try {
|
|
1860
|
-
await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
1861
|
-
} catch (hookErr) {
|
|
1862
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
1863
|
-
}
|
|
1864
|
-
return true;
|
|
1865
|
-
} catch (err) {
|
|
1866
|
-
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
1867
|
-
return false;
|
|
1868
|
-
}
|
|
1869
|
-
}
|
|
1870
|
-
_librarianKey(entityId) {
|
|
1871
|
-
return `${this.prefix}:${entityId}:librarian`;
|
|
1872
|
-
}
|
|
1873
|
-
_healKey(entityId) {
|
|
1874
|
-
return `${this.prefix}:${entityId}:heal`;
|
|
1875
|
-
}
|
|
1876
|
-
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
1877
|
-
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
1878
|
-
}
|
|
1879
|
-
/** Maps pre-rename enum strings from older dumps to current source_type values. */
|
|
1880
|
-
_normalizeImportedSourceType(raw, ctx) {
|
|
1881
|
-
if (raw === "user_document") return "immutable_document";
|
|
1882
|
-
if (raw === "agent_inferred") return "librarian_inferred";
|
|
1883
|
-
const allowed = ["user_stated", "librarian_inferred", "user_confirmed", "immutable_document"];
|
|
1884
|
-
if (allowed.includes(raw)) return raw;
|
|
1885
|
-
const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
|
|
1886
|
-
throw new Error(
|
|
1887
|
-
`importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
|
|
1888
|
-
);
|
|
1678
|
+
};
|
|
1679
|
+
var PrunePartialFailureError = class extends Error {
|
|
1680
|
+
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
1681
|
+
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
1682
|
+
this.name = "PrunePartialFailureError";
|
|
1683
|
+
this.deleted = deleted;
|
|
1684
|
+
this.failedAt = failedAt;
|
|
1685
|
+
this.remaining = remaining;
|
|
1686
|
+
this.deletedTasks = deletedTasks;
|
|
1687
|
+
this.deletedEvents = deletedEvents;
|
|
1688
|
+
this.cause = cause;
|
|
1889
1689
|
}
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
const count = await this.entryRepo.countLegacySourceTypes();
|
|
1893
|
-
throw new Error(
|
|
1894
|
-
`Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
|
|
1895
|
-
|
|
1896
|
-
${this.entryRepo.getLegacyMigrationSQL()}
|
|
1690
|
+
};
|
|
1691
|
+
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
1897
1692
|
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
entityId,
|
|
1906
|
-
factId,
|
|
1907
|
-
vector: vectorCopy
|
|
1908
|
-
});
|
|
1909
|
-
}
|
|
1910
|
-
/**
|
|
1911
|
-
* GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
|
|
1912
|
-
* Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
|
|
1913
|
-
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
1914
|
-
*/
|
|
1915
|
-
async _notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
|
|
1916
|
-
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
1917
|
-
if (this.options.forceDeleteIgnoreRankerHook === true) return;
|
|
1918
|
-
const vectorCopy = vector ? vector.slice() : null;
|
|
1919
|
-
const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
|
|
1920
|
-
if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
|
|
1921
|
-
throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
|
|
1922
|
-
}
|
|
1923
|
-
const timeoutMs = rawTimeout;
|
|
1924
|
-
let timeoutHandle;
|
|
1925
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
1926
|
-
timeoutHandle = setTimeout(
|
|
1927
|
-
() => {
|
|
1928
|
-
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
1929
|
-
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
1930
|
-
reject(timeoutError);
|
|
1931
|
-
},
|
|
1932
|
-
timeoutMs
|
|
1933
|
-
);
|
|
1934
|
-
});
|
|
1935
|
-
const hookPromise = Promise.resolve(
|
|
1936
|
-
this.options.vectorRanker.onEmbeddingPersisted({
|
|
1937
|
-
entityId,
|
|
1938
|
-
factId,
|
|
1939
|
-
vector: vectorCopy
|
|
1940
|
-
})
|
|
1941
|
-
);
|
|
1942
|
-
try {
|
|
1943
|
-
await Promise.race([hookPromise, timeoutPromise]);
|
|
1944
|
-
} catch (err) {
|
|
1945
|
-
hookPromise.catch(() => {
|
|
1946
|
-
});
|
|
1947
|
-
throw err;
|
|
1948
|
-
} finally {
|
|
1949
|
-
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
1950
|
-
}
|
|
1951
|
-
}
|
|
1952
|
-
async setup() {
|
|
1953
|
-
const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
|
|
1954
|
-
await setupDatabase(this.db, this.prefix);
|
|
1955
|
-
let currentVersion;
|
|
1956
|
-
if (!entriesExistedBeforeSetup) {
|
|
1957
|
-
await this.metadataRepo.setMeta("schema_version", String(CURRENT_SCHEMA_VERSION), this.db);
|
|
1958
|
-
currentVersion = CURRENT_SCHEMA_VERSION;
|
|
1959
|
-
} else {
|
|
1960
|
-
const schemaVersionValue = await this.metadataRepo.getMeta("schema_version");
|
|
1961
|
-
if (schemaVersionValue) {
|
|
1962
|
-
currentVersion = parseInt(schemaVersionValue, 10);
|
|
1963
|
-
if (!Number.isFinite(currentVersion)) currentVersion = 0;
|
|
1964
|
-
} else {
|
|
1965
|
-
const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
|
|
1966
|
-
const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
|
|
1967
|
-
currentVersion = hasPorter ? 1 : 0;
|
|
1968
|
-
}
|
|
1969
|
-
}
|
|
1970
|
-
for (const migration of MIGRATIONS) {
|
|
1971
|
-
if (migration.version > currentVersion) {
|
|
1972
|
-
await migration.run(this.db, this.prefix);
|
|
1973
|
-
await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
|
|
1974
|
-
currentVersion = migration.version;
|
|
1975
|
-
}
|
|
1976
|
-
}
|
|
1977
|
-
if (entriesExistedBeforeSetup) {
|
|
1978
|
-
const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
|
|
1979
|
-
if (!schemaVersionCheck) {
|
|
1980
|
-
await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
|
|
1981
|
-
}
|
|
1982
|
-
}
|
|
1983
|
-
if (entriesExistedBeforeSetup) {
|
|
1984
|
-
await this.assertNoLegacySourceTypes();
|
|
1985
|
-
}
|
|
1986
|
-
const rows = await this.entryRepo.findRowsForSourceRefMigration();
|
|
1987
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
1988
|
-
for (const row of rows) {
|
|
1989
|
-
const normalized = normalizeSourceRef(row.source_ref);
|
|
1990
|
-
if (normalized !== row.source_ref) {
|
|
1991
|
-
await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
|
|
1992
|
-
}
|
|
1993
|
-
}
|
|
1994
|
-
});
|
|
1995
|
-
await this.rebuildMiniSearchIndex();
|
|
1996
|
-
}
|
|
1997
|
-
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
1998
|
-
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
1999
|
-
if (!normalizedRef) {
|
|
2000
|
-
throw new Error(`Invalid sourceRef: "${sourceRef}"`);
|
|
2001
|
-
}
|
|
2002
|
-
const normalizedHash = normalizeSourceHash(sourceHash);
|
|
2003
|
-
if (!normalizedHash) {
|
|
2004
|
-
throw new Error(`Invalid sourceHash: must be a 64-character hex string (normalized to lowercase)`);
|
|
2005
|
-
}
|
|
2006
|
-
const storedHash = await this.entryRepo.findLatestSourceHash(entityId, normalizedRef);
|
|
2007
|
-
if (storedHash === null) return true;
|
|
2008
|
-
const normalizedStoredHash = normalizeSourceHash(storedHash);
|
|
2009
|
-
return normalizedStoredHash !== normalizedHash;
|
|
1693
|
+
// src/services/JobManager.ts
|
|
1694
|
+
var JobManager = class {
|
|
1695
|
+
constructor(prefix) {
|
|
1696
|
+
this.prefix = prefix;
|
|
1697
|
+
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
1698
|
+
this.activeIngestJobs = /* @__PURE__ */ new Map();
|
|
1699
|
+
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
2010
1700
|
}
|
|
2011
1701
|
_pruneKey(entityId) {
|
|
2012
1702
|
return `${this.prefix}:${entityId}:prune`;
|
|
@@ -2026,6 +1716,12 @@ After running the migration SQL, restart your application.`
|
|
|
2026
1716
|
_forgetKey(entityId) {
|
|
2027
1717
|
return `${this.prefix}:${entityId}:forget`;
|
|
2028
1718
|
}
|
|
1719
|
+
_librarianKey(entityId) {
|
|
1720
|
+
return `${this.prefix}:${entityId}:librarian`;
|
|
1721
|
+
}
|
|
1722
|
+
_healKey(entityId) {
|
|
1723
|
+
return `${this.prefix}:${entityId}:heal`;
|
|
1724
|
+
}
|
|
2029
1725
|
_isReembedActive(entityId) {
|
|
2030
1726
|
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
2031
1727
|
}
|
|
@@ -2035,7 +1731,6 @@ After running the migration SQL, restart your application.`
|
|
|
2035
1731
|
_isForgetActiveFor(entityId) {
|
|
2036
1732
|
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
2037
1733
|
}
|
|
2038
|
-
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
2039
1734
|
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
2040
1735
|
const entityKeyPrefix = `${this.prefix}:`;
|
|
2041
1736
|
for (const k of this.activeMaintenanceJobs) {
|
|
@@ -2043,794 +1738,874 @@ After running the migration SQL, restart your application.`
|
|
|
2043
1738
|
}
|
|
2044
1739
|
return false;
|
|
2045
1740
|
}
|
|
2046
|
-
|
|
1741
|
+
_hasIngestJob(entityId, sourceRef) {
|
|
1742
|
+
return this.activeIngestJobs.get(entityId)?.has(sourceRef ?? "") ?? false;
|
|
1743
|
+
}
|
|
1744
|
+
_addIngestJob(entityId, sourceRef) {
|
|
1745
|
+
const sourceKey = sourceRef ?? "";
|
|
1746
|
+
let refs = this.activeIngestJobs.get(entityId);
|
|
1747
|
+
if (!refs) {
|
|
1748
|
+
refs = /* @__PURE__ */ new Set();
|
|
1749
|
+
this.activeIngestJobs.set(entityId, refs);
|
|
1750
|
+
}
|
|
1751
|
+
refs.add(sourceKey);
|
|
1752
|
+
}
|
|
1753
|
+
_removeIngestJob(entityId, sourceRef) {
|
|
1754
|
+
const sourceKey = sourceRef ?? "";
|
|
1755
|
+
const refs = this.activeIngestJobs.get(entityId);
|
|
1756
|
+
if (!refs) return;
|
|
1757
|
+
refs.delete(sourceKey);
|
|
1758
|
+
if (refs.size === 0) {
|
|
1759
|
+
this.activeIngestJobs.delete(entityId);
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
2047
1762
|
_isIngestActiveFor(entityId) {
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
1763
|
+
return this.activeIngestJobs.has(entityId);
|
|
1764
|
+
}
|
|
1765
|
+
acquireLock(operation, entityId, sourceRef) {
|
|
1766
|
+
let blockingOperation = null;
|
|
1767
|
+
if (operation !== "global_import" && this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
1768
|
+
throw new WikiBusyError("import", "*");
|
|
2051
1769
|
}
|
|
2052
|
-
|
|
1770
|
+
switch (operation) {
|
|
1771
|
+
case "prune":
|
|
1772
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1773
|
+
else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
|
|
1774
|
+
else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
|
|
1775
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1776
|
+
else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
|
|
1777
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1778
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1779
|
+
break;
|
|
1780
|
+
case "librarian":
|
|
1781
|
+
case "heal": {
|
|
1782
|
+
const opKey = operation === "librarian" ? this._librarianKey(entityId) : this._healKey(entityId);
|
|
1783
|
+
if (this.activeMaintenanceJobs.has(opKey)) blockingOperation = operation;
|
|
1784
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1785
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1786
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1787
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1788
|
+
break;
|
|
1789
|
+
}
|
|
1790
|
+
case "reembed":
|
|
1791
|
+
if (this.activeMaintenanceJobs.has(this._reembedKey(entityId))) blockingOperation = "reembed";
|
|
1792
|
+
else if (this.activeMaintenanceJobs.has(this._globalReembedKey())) blockingOperation = "reembed";
|
|
1793
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1794
|
+
else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
|
|
1795
|
+
else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
|
|
1796
|
+
else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
|
|
1797
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1798
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1799
|
+
break;
|
|
1800
|
+
case "global_reembed":
|
|
1801
|
+
if (this.activeMaintenanceJobs.has(this._globalReembedKey())) blockingOperation = "reembed";
|
|
1802
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) blockingOperation = "reembed";
|
|
1803
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":prune")) blockingOperation = "prune";
|
|
1804
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) blockingOperation = "librarian";
|
|
1805
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":heal")) blockingOperation = "heal";
|
|
1806
|
+
else if (this.activeIngestJobs.size > 0) blockingOperation = "ingest";
|
|
1807
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":import")) blockingOperation = "import";
|
|
1808
|
+
else if (this._isAnyMaintenanceActiveWithSuffix(":forget")) blockingOperation = "forget";
|
|
1809
|
+
break;
|
|
1810
|
+
case "import":
|
|
1811
|
+
case "forget": {
|
|
1812
|
+
const selfKey = operation === "import" ? this._importKey(entityId) : this._forgetKey(entityId);
|
|
1813
|
+
if (this.activeMaintenanceJobs.has(selfKey)) blockingOperation = operation;
|
|
1814
|
+
else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) blockingOperation = "librarian";
|
|
1815
|
+
else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) blockingOperation = "heal";
|
|
1816
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1817
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1818
|
+
else if (this._isIngestActiveFor(entityId)) blockingOperation = "ingest";
|
|
1819
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1820
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1821
|
+
break;
|
|
1822
|
+
}
|
|
1823
|
+
case "global_import":
|
|
1824
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) blockingOperation = "import";
|
|
1825
|
+
break;
|
|
1826
|
+
case "ingest": {
|
|
1827
|
+
const sourceKey = sourceRef ?? "";
|
|
1828
|
+
if (this._hasIngestJob(entityId, sourceKey)) blockingOperation = "ingest";
|
|
1829
|
+
else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) blockingOperation = "prune";
|
|
1830
|
+
else if (this._isReembedActive(entityId)) blockingOperation = "reembed";
|
|
1831
|
+
else if (this._isImportActiveFor(entityId)) blockingOperation = "import";
|
|
1832
|
+
else if (this._isForgetActiveFor(entityId)) blockingOperation = "forget";
|
|
1833
|
+
break;
|
|
1834
|
+
}
|
|
1835
|
+
}
|
|
1836
|
+
if (blockingOperation) {
|
|
1837
|
+
throw new WikiBusyError(
|
|
1838
|
+
blockingOperation,
|
|
1839
|
+
operation === "global_reembed" || operation === "global_import" ? "*" : entityId
|
|
1840
|
+
);
|
|
1841
|
+
}
|
|
1842
|
+
if (operation === "ingest") {
|
|
1843
|
+
this._addIngestJob(entityId, sourceRef);
|
|
1844
|
+
} else if (operation === "global_reembed") {
|
|
1845
|
+
this.activeMaintenanceJobs.add(this._globalReembedKey());
|
|
1846
|
+
} else if (operation === "global_import") {
|
|
1847
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1848
|
+
} else {
|
|
1849
|
+
const keyFnName = `_${operation}Key`;
|
|
1850
|
+
const keyFn = this[keyFnName];
|
|
1851
|
+
this.activeMaintenanceJobs.add(keyFn.call(this, entityId));
|
|
1852
|
+
}
|
|
1853
|
+
this._notifyStatusSubscribers(entityId);
|
|
1854
|
+
}
|
|
1855
|
+
releaseLock(operation, entityId, sourceRef) {
|
|
1856
|
+
if (operation === "ingest") {
|
|
1857
|
+
this._removeIngestJob(entityId, sourceRef);
|
|
1858
|
+
} else if (operation === "global_reembed") {
|
|
1859
|
+
this.activeMaintenanceJobs.delete(this._globalReembedKey());
|
|
1860
|
+
} else if (operation === "global_import") {
|
|
1861
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1862
|
+
} else {
|
|
1863
|
+
const keyFnName = `_${operation}Key`;
|
|
1864
|
+
const keyFn = this[keyFnName];
|
|
1865
|
+
this.activeMaintenanceJobs.delete(keyFn.call(this, entityId));
|
|
1866
|
+
}
|
|
1867
|
+
this._notifyStatusSubscribers(entityId);
|
|
1868
|
+
}
|
|
1869
|
+
/**
|
|
1870
|
+
* Returns true if acquireLock(operation, entityId) would throw WikiBusyError.
|
|
1871
|
+
* Use for non-throwing conflict checks (e.g. auto-trigger gating in write()).
|
|
1872
|
+
*/
|
|
1873
|
+
isBlocked(operation, entityId) {
|
|
1874
|
+
if (operation !== "global_import" && this.activeMaintenanceJobs.has(this._globalImportKey())) return true;
|
|
1875
|
+
switch (operation) {
|
|
1876
|
+
case "librarian":
|
|
1877
|
+
return this.activeMaintenanceJobs.has(this._librarianKey(entityId)) || this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this._isReembedActive(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
|
|
1878
|
+
case "heal":
|
|
1879
|
+
return this.activeMaintenanceJobs.has(this._healKey(entityId)) || this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this._isReembedActive(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
|
|
1880
|
+
case "prune":
|
|
1881
|
+
return this.activeMaintenanceJobs.has(this._pruneKey(entityId)) || this.activeMaintenanceJobs.has(this._librarianKey(entityId)) || this.activeMaintenanceJobs.has(this._healKey(entityId)) || this._isReembedActive(entityId) || this._isIngestActiveFor(entityId) || this._isImportActiveFor(entityId) || this._isForgetActiveFor(entityId);
|
|
1882
|
+
default:
|
|
1883
|
+
return false;
|
|
1884
|
+
}
|
|
1885
|
+
}
|
|
1886
|
+
/**
|
|
1887
|
+
* Auto-heal historically only gated on the heal self-key. Keep that behavior
|
|
1888
|
+
* for write() auto-trigger paths while preserving stricter checks in acquireLock().
|
|
1889
|
+
*/
|
|
1890
|
+
tryAcquireAutoHealLock(entityId) {
|
|
1891
|
+
const healKey = this._healKey(entityId);
|
|
1892
|
+
if (this.activeMaintenanceJobs.has(healKey)) return false;
|
|
1893
|
+
this.activeMaintenanceJobs.add(healKey);
|
|
1894
|
+
this._notifyStatusSubscribers(entityId);
|
|
1895
|
+
return true;
|
|
1896
|
+
}
|
|
1897
|
+
/**
|
|
1898
|
+
* Validates then acquires global + per-entity import locks atomically.
|
|
1899
|
+
* Validates all entities before acquiring any lock (same as current importDump semantics).
|
|
1900
|
+
*/
|
|
1901
|
+
acquireImportLocks(entityIds) {
|
|
1902
|
+
for (const entityId of entityIds) {
|
|
1903
|
+
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) throw new WikiBusyError("import", entityId);
|
|
1904
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) throw new WikiBusyError("librarian", entityId);
|
|
1905
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) throw new WikiBusyError("heal", entityId);
|
|
1906
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) throw new WikiBusyError("prune", entityId);
|
|
1907
|
+
if (this._isReembedActive(entityId)) throw new WikiBusyError("reembed", entityId);
|
|
1908
|
+
if (this._isIngestActiveFor(entityId)) throw new WikiBusyError("ingest", entityId);
|
|
1909
|
+
if (this._isForgetActiveFor(entityId)) throw new WikiBusyError("forget", entityId);
|
|
1910
|
+
}
|
|
1911
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) throw new WikiBusyError("import", "*");
|
|
1912
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1913
|
+
for (const entityId of entityIds) {
|
|
1914
|
+
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
releaseImportLocks(entityIds) {
|
|
1918
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1919
|
+
for (const entityId of entityIds) {
|
|
1920
|
+
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
getEntityStatus(entityId) {
|
|
1924
|
+
return {
|
|
1925
|
+
ingesting: this._isIngestActiveFor(entityId),
|
|
1926
|
+
librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
|
|
1927
|
+
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1928
|
+
};
|
|
1929
|
+
}
|
|
1930
|
+
subscribeEntityStatus(entityId, callback) {
|
|
1931
|
+
const initial = this.getEntityStatus(entityId);
|
|
1932
|
+
let set = this.statusSubscribers.get(entityId);
|
|
1933
|
+
if (!set) {
|
|
1934
|
+
set = /* @__PURE__ */ new Set();
|
|
1935
|
+
this.statusSubscribers.set(entityId, set);
|
|
1936
|
+
}
|
|
1937
|
+
const entry = { callback, last: this._copyEntityStatus(initial) };
|
|
1938
|
+
set.add(entry);
|
|
1939
|
+
try {
|
|
1940
|
+
callback(this._copyEntityStatus(initial));
|
|
1941
|
+
} catch (err) {
|
|
1942
|
+
console.error(`[JobManager] callback error for entityId="${entityId}" during initial emission`, err);
|
|
1943
|
+
}
|
|
1944
|
+
let active = true;
|
|
1945
|
+
return () => {
|
|
1946
|
+
if (!active) return;
|
|
1947
|
+
active = false;
|
|
1948
|
+
const s = this.statusSubscribers.get(entityId);
|
|
1949
|
+
if (!s) return;
|
|
1950
|
+
s.delete(entry);
|
|
1951
|
+
if (s.size === 0) this.statusSubscribers.delete(entityId);
|
|
1952
|
+
};
|
|
2053
1953
|
}
|
|
2054
1954
|
_copyEntityStatus(s) {
|
|
2055
1955
|
return { ingesting: s.ingesting, librarian: s.librarian, heal: s.heal };
|
|
2056
1956
|
}
|
|
2057
1957
|
_notifyStatusSubscribers(entityId) {
|
|
1958
|
+
if (entityId === "*") return;
|
|
2058
1959
|
const set = this.statusSubscribers.get(entityId);
|
|
2059
1960
|
if (!set || set.size === 0) return;
|
|
2060
1961
|
for (const entry of Array.from(set)) {
|
|
2061
1962
|
if (!set.has(entry)) continue;
|
|
2062
1963
|
const next = this.getEntityStatus(entityId);
|
|
2063
|
-
if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal)
|
|
1964
|
+
if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal) {
|
|
1965
|
+
continue;
|
|
1966
|
+
}
|
|
2064
1967
|
entry.last = this._copyEntityStatus(next);
|
|
2065
1968
|
try {
|
|
2066
1969
|
entry.callback(this._copyEntityStatus(next));
|
|
2067
1970
|
} catch (err) {
|
|
2068
|
-
console.error(`[
|
|
1971
|
+
console.error(`[JobManager] callback error for entityId="${entityId}" during transition emission`, err);
|
|
2069
1972
|
}
|
|
2070
1973
|
}
|
|
2071
1974
|
}
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
1975
|
+
};
|
|
1976
|
+
|
|
1977
|
+
// src/utils/pure.ts
|
|
1978
|
+
function parseJsonResponse(text) {
|
|
1979
|
+
const firstBrace = text.indexOf("{");
|
|
1980
|
+
const firstBracket = text.indexOf("[");
|
|
1981
|
+
let start;
|
|
1982
|
+
let openChar;
|
|
1983
|
+
let closeChar;
|
|
1984
|
+
const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
|
|
1985
|
+
if (useBrace) {
|
|
1986
|
+
start = firstBrace;
|
|
1987
|
+
openChar = "{";
|
|
1988
|
+
closeChar = "}";
|
|
1989
|
+
} else if (firstBracket !== -1) {
|
|
1990
|
+
start = firstBracket;
|
|
1991
|
+
openChar = "[";
|
|
1992
|
+
closeChar = "]";
|
|
1993
|
+
} else {
|
|
1994
|
+
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
2076
1995
|
}
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
1996
|
+
let depth = 0;
|
|
1997
|
+
let inString = false;
|
|
1998
|
+
let escape = false;
|
|
1999
|
+
let end = -1;
|
|
2000
|
+
for (let i = start; i < text.length; i++) {
|
|
2001
|
+
const ch = text[i];
|
|
2002
|
+
if (escape) {
|
|
2003
|
+
escape = false;
|
|
2004
|
+
continue;
|
|
2005
|
+
}
|
|
2006
|
+
if (ch === "\\" && inString) {
|
|
2007
|
+
escape = true;
|
|
2008
|
+
continue;
|
|
2009
|
+
}
|
|
2010
|
+
if (ch === '"') {
|
|
2011
|
+
inString = !inString;
|
|
2012
|
+
continue;
|
|
2013
|
+
}
|
|
2014
|
+
if (inString) continue;
|
|
2015
|
+
if (ch === openChar) {
|
|
2016
|
+
depth++;
|
|
2017
|
+
continue;
|
|
2018
|
+
}
|
|
2019
|
+
if (ch === closeChar) {
|
|
2020
|
+
depth--;
|
|
2021
|
+
if (depth === 0) {
|
|
2022
|
+
end = i;
|
|
2084
2023
|
break;
|
|
2085
2024
|
}
|
|
2086
2025
|
}
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
}
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
// events not yet deleted at this point
|
|
2152
|
-
);
|
|
2153
|
-
}
|
|
2154
|
-
const errMsg = failure.cause?.message ?? "";
|
|
2155
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2156
|
-
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
2157
|
-
throw new PrunePartialFailureError(
|
|
2158
|
-
succeeded.length,
|
|
2159
|
-
failure.factId,
|
|
2160
|
-
remaining,
|
|
2161
|
-
sanitizedCause,
|
|
2162
|
-
deletedTasks,
|
|
2163
|
-
0
|
|
2164
|
-
// events not yet deleted at this point
|
|
2165
|
-
);
|
|
2026
|
+
}
|
|
2027
|
+
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
2028
|
+
return JSON.parse(text.slice(start, end + 1));
|
|
2029
|
+
}
|
|
2030
|
+
function sanitizeRankerError(err, sanitizeRankerErrors) {
|
|
2031
|
+
if (sanitizeRankerErrors === false) {
|
|
2032
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
2033
|
+
}
|
|
2034
|
+
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
2035
|
+
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
2036
|
+
const sanitized = new Error(
|
|
2037
|
+
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
2038
|
+
innerCause ? { cause: innerCause } : void 0
|
|
2039
|
+
);
|
|
2040
|
+
sanitized.name = typeName;
|
|
2041
|
+
return sanitized;
|
|
2042
|
+
}
|
|
2043
|
+
function safeSlice(value, start, end) {
|
|
2044
|
+
const length = value.length;
|
|
2045
|
+
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
2046
|
+
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
2047
|
+
if (safeStart > safeEnd) {
|
|
2048
|
+
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
2049
|
+
}
|
|
2050
|
+
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
2051
|
+
safeStart--;
|
|
2052
|
+
}
|
|
2053
|
+
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
2054
|
+
safeEnd--;
|
|
2055
|
+
}
|
|
2056
|
+
return value.slice(safeStart, safeEnd);
|
|
2057
|
+
}
|
|
2058
|
+
function chunkText(input, maxChunkLength, overlap) {
|
|
2059
|
+
const text = input.trim();
|
|
2060
|
+
if (text.length === 0) return { chunks: [], truncated: false };
|
|
2061
|
+
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
2062
|
+
throw new Error("maxChunkLength must be an integer >= 2");
|
|
2063
|
+
}
|
|
2064
|
+
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
2065
|
+
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
2066
|
+
}
|
|
2067
|
+
const chunks = [];
|
|
2068
|
+
let truncated = false;
|
|
2069
|
+
let cursor = 0;
|
|
2070
|
+
const halfMax = Math.floor(maxChunkLength / 2);
|
|
2071
|
+
while (cursor < text.length) {
|
|
2072
|
+
const remaining = text.length - cursor;
|
|
2073
|
+
if (remaining <= maxChunkLength) {
|
|
2074
|
+
chunks.push(safeSlice(text, cursor, text.length));
|
|
2075
|
+
break;
|
|
2076
|
+
}
|
|
2077
|
+
const windowEnd = cursor + maxChunkLength;
|
|
2078
|
+
const minSplit = cursor + halfMax;
|
|
2079
|
+
let splitPoint = -1;
|
|
2080
|
+
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
2081
|
+
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
2082
|
+
splitPoint = paraIdx + 2;
|
|
2083
|
+
}
|
|
2084
|
+
if (splitPoint === -1) {
|
|
2085
|
+
let lastTerm = -1;
|
|
2086
|
+
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
2087
|
+
const ch = text[i];
|
|
2088
|
+
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
2089
|
+
lastTerm = i + 2;
|
|
2166
2090
|
}
|
|
2167
2091
|
}
|
|
2168
|
-
if (
|
|
2169
|
-
const cutoff = now - retainEventsFor * 864e5;
|
|
2170
|
-
const eventResult = await this.eventRepo.prune(entityId, cutoff);
|
|
2171
|
-
deletedEvents = eventResult.changes;
|
|
2172
|
-
}
|
|
2173
|
-
if (vacuum) {
|
|
2174
|
-
await this.metadataRepo.vacuum();
|
|
2175
|
-
}
|
|
2176
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2177
|
-
this.vectorCache.delete(entityId);
|
|
2178
|
-
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
2179
|
-
} finally {
|
|
2180
|
-
this.activeMaintenanceJobs.delete(pruneKey);
|
|
2092
|
+
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
2181
2093
|
}
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
if (entityIds.length === 0) {
|
|
2189
|
-
const empty = { facts: [], tasks: [], events: [] };
|
|
2190
|
-
if (exposeMetadata) {
|
|
2191
|
-
empty.metadata = { query, entityIds: [] };
|
|
2192
|
-
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
2094
|
+
if (splitPoint === -1) {
|
|
2095
|
+
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
2096
|
+
if (/\s/.test(text[i])) {
|
|
2097
|
+
splitPoint = i + 1;
|
|
2098
|
+
break;
|
|
2099
|
+
}
|
|
2193
2100
|
}
|
|
2194
|
-
return empty;
|
|
2195
2101
|
}
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2102
|
+
if (splitPoint === -1) {
|
|
2103
|
+
truncated = true;
|
|
2104
|
+
splitPoint = windowEnd;
|
|
2199
2105
|
}
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2106
|
+
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
2107
|
+
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
2108
|
+
cursor = next;
|
|
2109
|
+
}
|
|
2110
|
+
return { chunks, truncated };
|
|
2111
|
+
}
|
|
2112
|
+
async function withConcurrency(tasks, limit) {
|
|
2113
|
+
const results = new Array(tasks.length);
|
|
2114
|
+
let index = 0;
|
|
2115
|
+
let failed = false;
|
|
2116
|
+
let firstError;
|
|
2117
|
+
async function worker() {
|
|
2118
|
+
while (index < tasks.length && !failed) {
|
|
2119
|
+
const i = index++;
|
|
2120
|
+
try {
|
|
2121
|
+
results[i] = await tasks[i]();
|
|
2122
|
+
} catch (e) {
|
|
2123
|
+
if (!failed) {
|
|
2124
|
+
failed = true;
|
|
2125
|
+
firstError = e;
|
|
2126
|
+
}
|
|
2127
|
+
return;
|
|
2128
|
+
}
|
|
2203
2129
|
}
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
}
|
|
2405
|
-
}
|
|
2406
|
-
}
|
|
2407
|
-
for (const { row, kwScore } of topK) {
|
|
2408
|
-
scored.push({
|
|
2409
|
-
id: row.id,
|
|
2410
|
-
entity_id: row.entity_id,
|
|
2411
|
-
score: (1 - weight) * kwScore,
|
|
2412
|
-
updated_at: row.updated_at,
|
|
2413
|
-
access_count: row.access_count
|
|
2414
|
-
});
|
|
2415
|
-
}
|
|
2416
|
-
} else {
|
|
2417
|
-
const omitted = [];
|
|
2418
|
-
for (const row of candidateRows) {
|
|
2419
|
-
if (scoredIds.has(row.id)) continue;
|
|
2420
|
-
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
2421
|
-
}
|
|
2422
|
-
if (omitted.length > 0) {
|
|
2423
|
-
this._tieBreakSort(omitted);
|
|
2424
|
-
scored.push(...omitted.slice(0, maxBackfill));
|
|
2425
|
-
}
|
|
2426
|
-
}
|
|
2427
|
-
}
|
|
2428
|
-
} catch (rankerErr) {
|
|
2429
|
-
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
2430
|
-
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
2431
|
-
this.options.onVectorRankerFallback?.({
|
|
2432
|
-
error: this._sanitizeRankerError(rankerError),
|
|
2433
|
-
policy
|
|
2434
|
-
});
|
|
2435
|
-
if (policy === "throw") {
|
|
2436
|
-
rankerShouldRethrow = true;
|
|
2437
|
-
throw rankerError;
|
|
2438
|
-
} else if (policy === "js-cosine") {
|
|
2439
|
-
let fallbackRows = candidateRows;
|
|
2440
|
-
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
2441
|
-
const rowIds = fallbackRows.map((r) => r.id);
|
|
2442
|
-
const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
|
|
2443
|
-
const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
|
|
2444
|
-
fallbackRows = fallbackRows.map((r) => ({
|
|
2445
|
-
...r,
|
|
2446
|
-
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
2447
|
-
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
2448
|
-
}));
|
|
2449
|
-
}
|
|
2450
|
-
scored = await this._rankWithJsCosine({
|
|
2451
|
-
entityId: entityCacheKey,
|
|
2452
|
-
queryVec,
|
|
2453
|
-
candidateRows: fallbackRows,
|
|
2454
|
-
weight,
|
|
2455
|
-
miniSearchScores,
|
|
2456
|
-
populateCache,
|
|
2457
|
-
limit: fallbackRows.length,
|
|
2458
|
-
skipSort: true
|
|
2459
|
-
// read() re-sorts after applying tier weights
|
|
2460
|
-
});
|
|
2461
|
-
} else if (policy === "keyword") {
|
|
2462
|
-
const scoredEntityIdSet = new Set(scoredEntityIds);
|
|
2463
|
-
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
2464
|
-
filter: (r) => scoredEntityIdSet.has(r.entity_id),
|
|
2465
|
-
combineWith: "OR"
|
|
2466
|
-
});
|
|
2467
|
-
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
2468
|
-
const topResults = msResults.slice(0, keywordOversampledLimit);
|
|
2469
|
-
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
2470
|
-
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
2471
|
-
scored = topResults.map((result) => {
|
|
2472
|
-
const metadata = candidateMap.get(result.id);
|
|
2473
|
-
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
2474
|
-
return {
|
|
2475
|
-
id: result.id,
|
|
2476
|
-
entity_id: entityForScore,
|
|
2477
|
-
score: result.score ?? 0,
|
|
2478
|
-
access_count: metadata?.access_count ?? null,
|
|
2479
|
-
updated_at: metadata?.updated_at ?? null
|
|
2480
|
-
};
|
|
2481
|
-
});
|
|
2482
|
-
} else {
|
|
2483
|
-
scored = [];
|
|
2484
|
-
}
|
|
2485
|
-
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
2486
|
-
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
2487
|
-
cause: this._sanitizeRankerError(rankerErr)
|
|
2488
|
-
});
|
|
2489
|
-
pendingRankerFallbackError = mirrored;
|
|
2490
|
-
}
|
|
2491
|
-
}
|
|
2492
|
-
} else {
|
|
2493
|
-
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
2494
|
-
scored = await this._rankWithJsCosine({
|
|
2495
|
-
entityId: entityCacheKey,
|
|
2496
|
-
queryVec,
|
|
2497
|
-
candidateRows,
|
|
2498
|
-
weight,
|
|
2499
|
-
miniSearchScores,
|
|
2500
|
-
populateCache,
|
|
2501
|
-
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
2502
|
-
skipSort: jsCosineNeedsTierSort
|
|
2503
|
-
// read() re-sorts after applying tier weights
|
|
2504
|
-
});
|
|
2505
|
-
}
|
|
2506
|
-
if (scored.length > 0) {
|
|
2507
|
-
scored = scored.map((row) => ({
|
|
2508
|
-
...row,
|
|
2509
|
-
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
2510
|
-
}));
|
|
2511
|
-
this._tieBreakSort(scored);
|
|
2512
|
-
const selectedScored = scored.slice(0, maxResults);
|
|
2513
|
-
const topIds = selectedScored.map((s) => s.id);
|
|
2514
|
-
if (exposeMetadata && trimmedQuery) {
|
|
2515
|
-
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
2516
|
-
}
|
|
2517
|
-
if (topIds.length > 0) {
|
|
2518
|
-
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
2519
|
-
if (facts2.length < topIds.length) {
|
|
2520
|
-
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
2521
|
-
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
2522
|
-
const missingCount = missingIds.length;
|
|
2523
|
-
const sample = missingIds.slice(0, 5);
|
|
2524
|
-
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
2525
|
-
const error = new Error(
|
|
2526
|
-
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
2527
|
-
);
|
|
2528
|
-
this.options.onRetrievalFallback?.(error);
|
|
2529
|
-
}
|
|
2530
|
-
facts = facts2;
|
|
2531
|
-
}
|
|
2532
|
-
if (pendingRankerFallbackError) {
|
|
2533
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
2534
|
-
pendingRankerFallbackError = void 0;
|
|
2535
|
-
}
|
|
2536
|
-
usedEmbed = true;
|
|
2537
|
-
} else {
|
|
2538
|
-
if (pendingRankerFallbackError) {
|
|
2539
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
2540
|
-
pendingRankerFallbackError = void 0;
|
|
2541
|
-
}
|
|
2542
|
-
usedEmbed = true;
|
|
2543
|
-
}
|
|
2544
|
-
}
|
|
2545
|
-
} catch (err) {
|
|
2546
|
-
const error = err instanceof Error ? err : new Error(String(err));
|
|
2547
|
-
if (rankerShouldRethrow) {
|
|
2548
|
-
throw error;
|
|
2130
|
+
}
|
|
2131
|
+
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
2132
|
+
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
2133
|
+
if (failed) throw firstError;
|
|
2134
|
+
return results;
|
|
2135
|
+
}
|
|
2136
|
+
function clip(value, max) {
|
|
2137
|
+
if (typeof value !== "string") return "";
|
|
2138
|
+
const s = value.trim();
|
|
2139
|
+
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
2140
|
+
}
|
|
2141
|
+
function validateTags(tags) {
|
|
2142
|
+
if (!Array.isArray(tags)) return [];
|
|
2143
|
+
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
2144
|
+
}
|
|
2145
|
+
function validateFact(fact) {
|
|
2146
|
+
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
2147
|
+
const title = clip(fact.title, 80);
|
|
2148
|
+
const body = clip(fact.body, 800);
|
|
2149
|
+
if (!title || !body) return null;
|
|
2150
|
+
let confidence = fact.confidence;
|
|
2151
|
+
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
2152
|
+
return {
|
|
2153
|
+
...fact,
|
|
2154
|
+
title,
|
|
2155
|
+
body,
|
|
2156
|
+
confidence,
|
|
2157
|
+
tags: validateTags(fact.tags)
|
|
2158
|
+
};
|
|
2159
|
+
}
|
|
2160
|
+
function validateTask(task) {
|
|
2161
|
+
if (typeof task?.description !== "string") return null;
|
|
2162
|
+
const description = clip(task.description, 200);
|
|
2163
|
+
if (!description) return null;
|
|
2164
|
+
let priority = task.priority;
|
|
2165
|
+
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
2166
|
+
priority = Math.max(0, Math.min(10, Math.round(priority)));
|
|
2167
|
+
return {
|
|
2168
|
+
...task,
|
|
2169
|
+
description,
|
|
2170
|
+
priority
|
|
2171
|
+
};
|
|
2172
|
+
}
|
|
2173
|
+
function normalizeSourceRef(value) {
|
|
2174
|
+
if (typeof value !== "string") return null;
|
|
2175
|
+
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
2176
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
2177
|
+
}
|
|
2178
|
+
function normalizeSourceHash(value) {
|
|
2179
|
+
if (typeof value !== "string") return null;
|
|
2180
|
+
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
2181
|
+
}
|
|
2182
|
+
function titleTokens(title) {
|
|
2183
|
+
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
2184
|
+
}
|
|
2185
|
+
function jaccardScore(a, b) {
|
|
2186
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
2187
|
+
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
2188
|
+
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
2189
|
+
return intersection.size / union.size;
|
|
2190
|
+
}
|
|
2191
|
+
|
|
2192
|
+
// src/prompts.ts
|
|
2193
|
+
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
2194
|
+
Return ONLY a valid JSON object matching this schema:
|
|
2195
|
+
{
|
|
2196
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
2197
|
+
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
2198
|
+
}
|
|
2199
|
+
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
2200
|
+
var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
|
|
2201
|
+
Return ONLY a valid JSON object matching this schema:
|
|
2202
|
+
{
|
|
2203
|
+
"downgraded": ["string (fact IDs)"],
|
|
2204
|
+
"deleted": ["string (fact IDs)"],
|
|
2205
|
+
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
2206
|
+
}
|
|
2207
|
+
Do not return markdown, just raw JSON.`;
|
|
2208
|
+
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
2209
|
+
Return ONLY a valid JSON object matching this schema:
|
|
2210
|
+
{
|
|
2211
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
2212
|
+
}
|
|
2213
|
+
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
2214
|
+
|
|
2215
|
+
// src/services/PromptService.ts
|
|
2216
|
+
var PromptService = class {
|
|
2217
|
+
constructor(globalOverrides) {
|
|
2218
|
+
this.globalOverrides = globalOverrides;
|
|
2219
|
+
}
|
|
2220
|
+
hydrate(template, variables) {
|
|
2221
|
+
return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
|
|
2222
|
+
const value = variables[key];
|
|
2223
|
+
if (value === void 0) return _match;
|
|
2224
|
+
return typeof value === "string" ? value : JSON.stringify(value, null, 2);
|
|
2225
|
+
});
|
|
2226
|
+
}
|
|
2227
|
+
buildIngestPrompt(documentChunk, runtimeOverride) {
|
|
2228
|
+
const template = runtimeOverride ?? this.globalOverrides?.ingestSystemPrompt ?? INGEST_SYSTEM_PROMPT;
|
|
2229
|
+
if (/\{\{\s*documentChunk\s*\}\}/.test(template)) {
|
|
2230
|
+
return {
|
|
2231
|
+
systemPrompt: this.hydrate(template, { documentChunk }),
|
|
2232
|
+
userPrompt: "Please extract the facts."
|
|
2233
|
+
};
|
|
2234
|
+
}
|
|
2235
|
+
return {
|
|
2236
|
+
systemPrompt: template,
|
|
2237
|
+
userPrompt: `Document Chunk:
|
|
2238
|
+
${documentChunk}`
|
|
2239
|
+
};
|
|
2240
|
+
}
|
|
2241
|
+
buildLibrarianPrompt(events, currentFacts, runtimeOverride) {
|
|
2242
|
+
const template = runtimeOverride ?? this.globalOverrides?.librarianSystemPrompt ?? LIBRARIAN_SYSTEM_PROMPT;
|
|
2243
|
+
if (/\{\{\s*events\s*\}\}/.test(template) || /\{\{\s*currentFacts\s*\}\}/.test(template)) {
|
|
2244
|
+
return {
|
|
2245
|
+
systemPrompt: this.hydrate(template, { events, currentFacts }),
|
|
2246
|
+
userPrompt: "Please synthesize the context."
|
|
2247
|
+
};
|
|
2248
|
+
}
|
|
2249
|
+
return {
|
|
2250
|
+
systemPrompt: template,
|
|
2251
|
+
userPrompt: `Events:
|
|
2252
|
+
${JSON.stringify(events, null, 2)}
|
|
2253
|
+
|
|
2254
|
+
Current Facts:
|
|
2255
|
+
${JSON.stringify(currentFacts, null, 2)}`
|
|
2256
|
+
};
|
|
2257
|
+
}
|
|
2258
|
+
buildHealPrompt(healCandidates, documentAnchors, allTasks, recentEvents, runtimeOverride) {
|
|
2259
|
+
const template = runtimeOverride ?? this.globalOverrides?.healSystemPrompt ?? HEAL_SYSTEM_PROMPT;
|
|
2260
|
+
if (/\{\{\s*healCandidates\s*\}\}/.test(template) || /\{\{\s*documentAnchors\s*\}\}/.test(template) || /\{\{\s*allTasks\s*\}\}/.test(template) || /\{\{\s*recentEvents\s*\}\}/.test(template)) {
|
|
2261
|
+
return {
|
|
2262
|
+
systemPrompt: this.hydrate(template, { healCandidates, documentAnchors, allTasks, recentEvents }),
|
|
2263
|
+
userPrompt: "Please heal the memory graph."
|
|
2264
|
+
};
|
|
2265
|
+
}
|
|
2266
|
+
return {
|
|
2267
|
+
systemPrompt: template,
|
|
2268
|
+
userPrompt: `Heal Candidates:
|
|
2269
|
+
${JSON.stringify(healCandidates, null, 2)}
|
|
2270
|
+
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
2271
|
+
${JSON.stringify(documentAnchors, null, 2)}
|
|
2272
|
+
All Tasks:
|
|
2273
|
+
${JSON.stringify(allTasks, null, 2)}
|
|
2274
|
+
Recent Events:
|
|
2275
|
+
${JSON.stringify(recentEvents, null, 2)}
|
|
2276
|
+
The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`
|
|
2277
|
+
};
|
|
2278
|
+
}
|
|
2279
|
+
};
|
|
2280
|
+
|
|
2281
|
+
// src/services/IngestionService.ts
|
|
2282
|
+
var IngestionService = class {
|
|
2283
|
+
constructor(db, prefix, options, entryRepo, searchService, jobManager, embeddingService, promptService) {
|
|
2284
|
+
this.db = db;
|
|
2285
|
+
this.prefix = prefix;
|
|
2286
|
+
this.options = options;
|
|
2287
|
+
this.entryRepo = entryRepo;
|
|
2288
|
+
this.searchService = searchService;
|
|
2289
|
+
this.jobManager = jobManager;
|
|
2290
|
+
this.embeddingService = embeddingService;
|
|
2291
|
+
this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
|
|
2292
|
+
}
|
|
2293
|
+
async ingestDocument(entityId, params) {
|
|
2294
|
+
const sourceRef = normalizeSourceRef(params.sourceRef);
|
|
2295
|
+
if (!sourceRef) throw new Error("Invalid sourceRef");
|
|
2296
|
+
const sourceHash = normalizeSourceHash(params.sourceHash);
|
|
2297
|
+
if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2298
|
+
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
|
|
2299
|
+
const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
|
|
2300
|
+
const chunkOverlap = Math.min(
|
|
2301
|
+
Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
|
|
2302
|
+
maxChunkLength - 1
|
|
2303
|
+
);
|
|
2304
|
+
const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
|
|
2305
|
+
const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
|
|
2306
|
+
if (typeof params.documentChunk !== "string") {
|
|
2307
|
+
throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
|
|
2308
|
+
}
|
|
2309
|
+
this.jobManager.acquireLock("ingest", entityId, sourceRef);
|
|
2310
|
+
try {
|
|
2311
|
+
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
2312
|
+
if (chunks.length === 0) return { truncated: false, chunks: 0 };
|
|
2313
|
+
const chunkResults = await withConcurrency(
|
|
2314
|
+
chunks.map((chunk) => async () => {
|
|
2315
|
+
const { systemPrompt, userPrompt } = this.promptService.buildIngestPrompt(chunk, params.promptOverride);
|
|
2316
|
+
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2317
|
+
const result = parseJsonResponse(responseText);
|
|
2318
|
+
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
2319
|
+
}),
|
|
2320
|
+
chunkConcurrency
|
|
2321
|
+
);
|
|
2322
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2323
|
+
const allValidFacts = [];
|
|
2324
|
+
for (const facts of chunkResults) {
|
|
2325
|
+
for (const fact of facts) {
|
|
2326
|
+
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
2327
|
+
if (!seen.has(normalized)) {
|
|
2328
|
+
seen.add(normalized);
|
|
2329
|
+
allValidFacts.push(fact);
|
|
2549
2330
|
}
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
const now = Date.now();
|
|
2334
|
+
const insertedFacts = [];
|
|
2335
|
+
const deletedSourceFactIds = [];
|
|
2336
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2337
|
+
deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
|
|
2338
|
+
await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
|
|
2339
|
+
for (const fact of allValidFacts) {
|
|
2340
|
+
const id = generateId("fact_");
|
|
2341
|
+
const wikiFact = {
|
|
2342
|
+
id,
|
|
2343
|
+
entity_id: entityId,
|
|
2344
|
+
title: fact.title,
|
|
2345
|
+
body: fact.body,
|
|
2346
|
+
tags: fact.tags,
|
|
2347
|
+
confidence: fact.confidence,
|
|
2348
|
+
source_type: "immutable_document",
|
|
2349
|
+
source_hash: sourceHash,
|
|
2350
|
+
source_ref: sourceRef,
|
|
2351
|
+
created_at: now,
|
|
2352
|
+
updated_at: now,
|
|
2353
|
+
last_accessed_at: null,
|
|
2354
|
+
access_count: 0,
|
|
2355
|
+
deleted_at: null
|
|
2356
|
+
};
|
|
2357
|
+
await this.entryRepo.upsert(wikiFact, tx);
|
|
2358
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2359
|
+
}
|
|
2360
|
+
});
|
|
2361
|
+
await this.searchService.sync(entityId);
|
|
2362
|
+
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
2363
|
+
for (const factId of uniqueDeletedSourceFactIds) {
|
|
2364
|
+
try {
|
|
2365
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
|
|
2366
|
+
} catch (hookErr) {
|
|
2367
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
2368
|
+
}
|
|
2369
|
+
}
|
|
2370
|
+
for (const fact of insertedFacts) {
|
|
2371
|
+
await this.embeddingService.embedFact(fact);
|
|
2372
|
+
}
|
|
2373
|
+
this.searchService.evictCache(entityId);
|
|
2374
|
+
return { truncated, chunks: chunks.length };
|
|
2375
|
+
} finally {
|
|
2376
|
+
this.jobManager.releaseLock("ingest", entityId, sourceRef);
|
|
2377
|
+
}
|
|
2378
|
+
}
|
|
2379
|
+
};
|
|
2380
|
+
|
|
2381
|
+
// src/services/MaintenanceService.ts
|
|
2382
|
+
var FUZZY_THRESHOLD = 0.5;
|
|
2383
|
+
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
2384
|
+
var MaintenanceService = class {
|
|
2385
|
+
constructor(db, prefix, options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService, promptService) {
|
|
2386
|
+
this.db = db;
|
|
2387
|
+
this.prefix = prefix;
|
|
2388
|
+
this.options = options;
|
|
2389
|
+
this.entryRepo = entryRepo;
|
|
2390
|
+
this.taskRepo = taskRepo;
|
|
2391
|
+
this.eventRepo = eventRepo;
|
|
2392
|
+
this.metadataRepo = metadataRepo;
|
|
2393
|
+
this.searchService = searchService;
|
|
2394
|
+
this.jobManager = jobManager;
|
|
2395
|
+
this.embeddingService = embeddingService;
|
|
2396
|
+
this.promptService = promptService ?? new PromptService(this.options.config?.prompts);
|
|
2397
|
+
}
|
|
2398
|
+
async runPrune(entityId, options) {
|
|
2399
|
+
this.jobManager.acquireLock("prune", entityId);
|
|
2400
|
+
try {
|
|
2401
|
+
const retainSoftDeletedFor = options?.retainSoftDeletedFor !== void 0 ? options.retainSoftDeletedFor : this.options.config?.pruneRetainSoftDeletedFor ?? 7;
|
|
2402
|
+
const retainEventsFor = options?.retainEventsFor !== void 0 ? options.retainEventsFor : this.options.config?.pruneEventsAfter ?? 30;
|
|
2403
|
+
const vacuum = options?.vacuum ?? false;
|
|
2404
|
+
this._validatePruneDuration(retainSoftDeletedFor, "retainSoftDeletedFor");
|
|
2405
|
+
this._validatePruneDuration(retainEventsFor, "retainEventsFor");
|
|
2406
|
+
const now = Date.now();
|
|
2407
|
+
let deletedEntries = 0;
|
|
2408
|
+
let deletedTasks = 0;
|
|
2409
|
+
let deletedEvents = 0;
|
|
2410
|
+
if (retainSoftDeletedFor !== null) {
|
|
2411
|
+
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
2412
|
+
const entriesToDelete = await this.entryRepo.getPrunableMetadata(entityId, cutoff);
|
|
2413
|
+
const succeeded = [];
|
|
2414
|
+
let failure = null;
|
|
2415
|
+
for (const row of entriesToDelete) {
|
|
2416
|
+
try {
|
|
2417
|
+
await this.embeddingService.notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
|
|
2418
|
+
succeeded.push({ entity_id: row.entity_id, id: row.id });
|
|
2419
|
+
} catch (err) {
|
|
2420
|
+
failure = { factId: row.id, cause: err };
|
|
2421
|
+
break;
|
|
2553
2422
|
}
|
|
2554
|
-
this.options.onRetrievalFallback?.(error);
|
|
2555
2423
|
}
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
combineWith: "OR"
|
|
2424
|
+
const succeededIds = succeeded.map((r) => r.id);
|
|
2425
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2426
|
+
if (succeededIds.length > 0) {
|
|
2427
|
+
deletedEntries = await this.entryRepo.bulkDeletePruned(entityId, cutoff, succeededIds, tx);
|
|
2428
|
+
}
|
|
2429
|
+
deletedTasks = await this.taskRepo.bulkDeletePruned(entityId, cutoff, tx);
|
|
2563
2430
|
});
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
2431
|
+
if (failure) {
|
|
2432
|
+
await this.searchService.sync(entityId);
|
|
2433
|
+
const remaining = entriesToDelete.length - succeeded.length - 1;
|
|
2434
|
+
const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2435
|
+
if (isTimeout) {
|
|
2436
|
+
throw new PrunePartialFailureError(
|
|
2437
|
+
succeeded.length,
|
|
2438
|
+
failure.factId,
|
|
2439
|
+
remaining,
|
|
2440
|
+
new Error("Deletion hook timed out"),
|
|
2441
|
+
deletedTasks,
|
|
2442
|
+
0
|
|
2443
|
+
);
|
|
2578
2444
|
}
|
|
2445
|
+
const errMsg = failure.cause?.message ?? "";
|
|
2446
|
+
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2447
|
+
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
2448
|
+
throw new PrunePartialFailureError(
|
|
2449
|
+
succeeded.length,
|
|
2450
|
+
failure.factId,
|
|
2451
|
+
remaining,
|
|
2452
|
+
sanitizedCause,
|
|
2453
|
+
deletedTasks,
|
|
2454
|
+
0
|
|
2455
|
+
);
|
|
2579
2456
|
}
|
|
2580
2457
|
}
|
|
2581
|
-
if (
|
|
2582
|
-
const
|
|
2583
|
-
const
|
|
2584
|
-
|
|
2458
|
+
if (retainEventsFor !== null) {
|
|
2459
|
+
const cutoff = now - retainEventsFor * 864e5;
|
|
2460
|
+
const eventResult = await this.eventRepo.prune(entityId, cutoff);
|
|
2461
|
+
deletedEvents = eventResult.changes;
|
|
2585
2462
|
}
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
]);
|
|
2594
|
-
let factScores;
|
|
2595
|
-
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
2596
|
-
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
2597
|
-
}
|
|
2598
|
-
const bundle = { facts, tasks, events: events.reverse() };
|
|
2599
|
-
if (exposeMetadata) {
|
|
2600
|
-
bundle.metadata = { query, entityIds };
|
|
2601
|
-
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
2602
|
-
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
2463
|
+
if (vacuum) {
|
|
2464
|
+
await this.metadataRepo.vacuum();
|
|
2465
|
+
}
|
|
2466
|
+
await this.searchService.sync(entityId);
|
|
2467
|
+
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
2468
|
+
} finally {
|
|
2469
|
+
this.jobManager.releaseLock("prune", entityId);
|
|
2603
2470
|
}
|
|
2604
|
-
return bundle;
|
|
2605
|
-
}
|
|
2606
|
-
/**
|
|
2607
|
-
* Returns entity IDs that will participate in scored retrieval.
|
|
2608
|
-
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
2609
|
-
*/
|
|
2610
|
-
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
2611
|
-
return entityIds.filter((id) => {
|
|
2612
|
-
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
2613
|
-
return includeZeroWeightEntities === true || w !== 0;
|
|
2614
|
-
});
|
|
2615
|
-
}
|
|
2616
|
-
/**
|
|
2617
|
-
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
2618
|
-
*/
|
|
2619
|
-
_tieBreakSort(items) {
|
|
2620
|
-
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
2621
|
-
}
|
|
2622
|
-
/**
|
|
2623
|
-
* Comparator for score + deterministic tie-break fields.
|
|
2624
|
-
* Negative return means "a ranks ahead of b" for descending score order.
|
|
2625
|
-
*/
|
|
2626
|
-
_compareScoredRows(a, b) {
|
|
2627
|
-
const scoreDiff = b.score - a.score;
|
|
2628
|
-
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
2629
|
-
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
2630
|
-
if (accessCountDiff !== 0) return accessCountDiff;
|
|
2631
|
-
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
2632
|
-
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
2633
|
-
return a.id.localeCompare(b.id);
|
|
2634
2471
|
}
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
}
|
|
2642
|
-
/**
|
|
2643
|
-
* Strip potentially sensitive data from ranker errors before exposing to host callbacks.
|
|
2644
|
-
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
2645
|
-
* Recursively sanitizes one level of .cause; deeper chains collapse to type only.
|
|
2646
|
-
*/
|
|
2647
|
-
_sanitizeRankerError(err) {
|
|
2648
|
-
if (this.options.sanitizeRankerErrors === false) {
|
|
2649
|
-
return err instanceof Error ? err : new Error(String(err));
|
|
2472
|
+
async runLibrarian(entityId, options) {
|
|
2473
|
+
this.jobManager.acquireLock("librarian", entityId);
|
|
2474
|
+
try {
|
|
2475
|
+
await this.doRunLibrarian(entityId, options?.promptOverride);
|
|
2476
|
+
} finally {
|
|
2477
|
+
this.jobManager.releaseLock("librarian", entityId);
|
|
2650
2478
|
}
|
|
2651
|
-
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
2652
|
-
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
2653
|
-
const sanitized = new Error(
|
|
2654
|
-
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
2655
|
-
innerCause ? { cause: innerCause } : void 0
|
|
2656
|
-
);
|
|
2657
|
-
sanitized.name = typeName;
|
|
2658
|
-
return sanitized;
|
|
2659
2479
|
}
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
2667
|
-
let entityCache = this.vectorCache.get(entityId);
|
|
2668
|
-
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
2669
|
-
if (tooLarge && entityCache) {
|
|
2670
|
-
this.vectorCache.delete(entityId);
|
|
2671
|
-
entityCache = void 0;
|
|
2672
|
-
}
|
|
2673
|
-
const canCache = populateCache && !tooLarge;
|
|
2674
|
-
if (canCache && !entityCache) {
|
|
2675
|
-
entityCache = /* @__PURE__ */ new Map();
|
|
2480
|
+
async runHeal(entityId, options) {
|
|
2481
|
+
this.jobManager.acquireLock("heal", entityId);
|
|
2482
|
+
try {
|
|
2483
|
+
await this.doRunHeal(entityId, options?.promptOverride);
|
|
2484
|
+
} finally {
|
|
2485
|
+
this.jobManager.releaseLock("heal", entityId);
|
|
2676
2486
|
}
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
|
|
2487
|
+
}
|
|
2488
|
+
async runReembed(entityId, opts) {
|
|
2489
|
+
const embedFn = this.options.llmProvider.embed;
|
|
2490
|
+
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
2491
|
+
const op = entityId ? "reembed" : "global_reembed";
|
|
2492
|
+
this.jobManager.acquireLock(op, entityId ?? "*");
|
|
2493
|
+
try {
|
|
2494
|
+
const rows = await this.entryRepo.findAllForReembed(entityId);
|
|
2495
|
+
this.searchService.evictCache(entityId);
|
|
2496
|
+
const skipExisting = opts?.skipExisting ?? false;
|
|
2497
|
+
let effectiveSkip = skipExisting;
|
|
2498
|
+
if (skipExisting) {
|
|
2499
|
+
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
2500
|
+
if (mismatchValue) {
|
|
2501
|
+
if (entityId) {
|
|
2502
|
+
const mismatchDim = parseInt(mismatchValue, 10);
|
|
2503
|
+
const staleCount = await this.entryRepo.countStaleForEntity(entityId, mismatchDim);
|
|
2504
|
+
if (staleCount > 0) effectiveSkip = false;
|
|
2505
|
+
} else {
|
|
2506
|
+
effectiveSkip = false;
|
|
2507
|
+
}
|
|
2690
2508
|
}
|
|
2691
|
-
} else if (weight !== void 0 && weight < 1) {
|
|
2692
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
2693
|
-
score = (1 - weight) * kwScore;
|
|
2694
|
-
} else {
|
|
2695
|
-
score = -2;
|
|
2696
2509
|
}
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2510
|
+
let embedded = 0;
|
|
2511
|
+
let skipped = 0;
|
|
2512
|
+
let failed = 0;
|
|
2513
|
+
try {
|
|
2514
|
+
for (const row of rows) {
|
|
2515
|
+
const existingBlob = row.embedding_blob;
|
|
2516
|
+
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
2517
|
+
if (effectiveSkip && blobIsValid) {
|
|
2518
|
+
const vec = parseEmbedding(existingBlob, null);
|
|
2519
|
+
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
2520
|
+
skipped++;
|
|
2521
|
+
continue;
|
|
2522
|
+
}
|
|
2523
|
+
}
|
|
2524
|
+
const success = await this.embeddingService.embedFact(row);
|
|
2525
|
+
if (success) embedded++;
|
|
2526
|
+
else failed++;
|
|
2710
2527
|
}
|
|
2711
|
-
|
|
2712
|
-
|
|
2713
|
-
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
}
|
|
2717
|
-
/**
|
|
2718
|
-
* Delegate semantic ranking to the injected VectorRanker.
|
|
2719
|
-
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
2720
|
-
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
2721
|
-
*/
|
|
2722
|
-
async _rankWithVectorRanker(args) {
|
|
2723
|
-
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
2724
|
-
const ranker = this.options.vectorRanker;
|
|
2725
|
-
if (!ranker) {
|
|
2726
|
-
throw new Error("vectorRanker not configured");
|
|
2727
|
-
}
|
|
2728
|
-
const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
2729
|
-
const rankerResults = await ranker.rankBySimilarity({
|
|
2730
|
-
entityId,
|
|
2731
|
-
queryVec: queryVecCopy,
|
|
2732
|
-
candidateIds,
|
|
2733
|
-
limit
|
|
2734
|
-
});
|
|
2735
|
-
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
2736
|
-
const seen = /* @__PURE__ */ new Set();
|
|
2737
|
-
const normalized = [];
|
|
2738
|
-
for (const r of rankerResults) {
|
|
2739
|
-
if (normalized.length >= limit) break;
|
|
2740
|
-
if (seen.has(r.id)) continue;
|
|
2741
|
-
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
2742
|
-
if (!Number.isFinite(r.semanticScore)) continue;
|
|
2743
|
-
seen.add(r.id);
|
|
2744
|
-
normalized.push(r);
|
|
2745
|
-
}
|
|
2746
|
-
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
2747
|
-
const scored = normalized.map((r) => {
|
|
2748
|
-
let score = r.semanticScore;
|
|
2749
|
-
if (weight !== void 0) {
|
|
2750
|
-
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
2751
|
-
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
2528
|
+
if (embedded > 0) {
|
|
2529
|
+
await this.embeddingService.reconcileEmbeddingDimension();
|
|
2530
|
+
}
|
|
2531
|
+
} finally {
|
|
2532
|
+
this.searchService.evictCache(entityId);
|
|
2752
2533
|
}
|
|
2753
|
-
return {
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
score
|
|
2758
|
-
};
|
|
2759
|
-
});
|
|
2760
|
-
return scored;
|
|
2761
|
-
}
|
|
2762
|
-
async getMemoryBundle(entityId) {
|
|
2763
|
-
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
2534
|
+
return { embedded, skipped, failed };
|
|
2535
|
+
} finally {
|
|
2536
|
+
this.jobManager.releaseLock(op, entityId ?? "*");
|
|
2537
|
+
}
|
|
2764
2538
|
}
|
|
2765
|
-
async
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
let eventType = event.event_type;
|
|
2769
|
-
if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
|
|
2770
|
-
eventType = "observation";
|
|
2539
|
+
async forget(entityId, params) {
|
|
2540
|
+
if (params.clearAll && (params.entryId !== void 0 || params.taskId !== void 0 || params.sourceRef !== void 0 || params.sourceHash !== void 0)) {
|
|
2541
|
+
throw new Error("forget() clearAll is mutually exclusive with entryId, taskId, sourceRef, and sourceHash");
|
|
2771
2542
|
}
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2543
|
+
this.jobManager.acquireLock("forget", entityId);
|
|
2544
|
+
try {
|
|
2545
|
+
const now = Date.now();
|
|
2546
|
+
let deletedEntries = 0;
|
|
2547
|
+
let deletedTasks = 0;
|
|
2548
|
+
const deletedEntryIds = [];
|
|
2549
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2550
|
+
if (params.clearAll) {
|
|
2551
|
+
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, null, null, tx, true));
|
|
2552
|
+
deletedEntries = await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2553
|
+
deletedTasks = await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2554
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: 0, heal: 0 }, tx);
|
|
2555
|
+
} else {
|
|
2556
|
+
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
2557
|
+
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
2558
|
+
if (hasIdSelectors && hasSourceSelectors) {
|
|
2559
|
+
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
2560
|
+
}
|
|
2561
|
+
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
2562
|
+
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
2563
|
+
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
2564
|
+
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2565
|
+
if (params.entryId) {
|
|
2566
|
+
const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
|
|
2567
|
+
if (entryId) deletedEntryIds.push(entryId);
|
|
2568
|
+
}
|
|
2569
|
+
if (sourceRef || sourceHash) {
|
|
2570
|
+
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
|
|
2571
|
+
}
|
|
2572
|
+
const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
2573
|
+
const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
2574
|
+
const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
|
|
2575
|
+
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
2576
|
+
entryPromise ?? Promise.resolve(false),
|
|
2577
|
+
taskDeletedPromise ?? Promise.resolve(false),
|
|
2578
|
+
refPromise ?? Promise.resolve(0)
|
|
2579
|
+
]);
|
|
2580
|
+
if (entryResult) deletedEntries++;
|
|
2581
|
+
if (taskResult) deletedTasks++;
|
|
2582
|
+
deletedEntries += refResult;
|
|
2799
2583
|
}
|
|
2800
|
-
}
|
|
2801
|
-
});
|
|
2802
|
-
if (shouldRunLibrarian && librarianJobKey !== null) {
|
|
2803
|
-
this.activeMaintenanceJobs.add(librarianJobKey);
|
|
2804
|
-
this._notifyStatusSubscribers(entityId);
|
|
2805
|
-
this.runLibrarianThenMaybeHeal(entityId, librarianCount).catch(console.error).finally(() => {
|
|
2806
|
-
this.activeMaintenanceJobs.delete(librarianJobKey);
|
|
2807
|
-
this._notifyStatusSubscribers(entityId);
|
|
2808
2584
|
});
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
await this._doRunLibrarian(entityId);
|
|
2813
|
-
const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
|
|
2814
|
-
const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
|
|
2815
|
-
let healCheckpoint = cp.heal ?? 0;
|
|
2816
|
-
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
2817
|
-
const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
|
|
2818
|
-
if (shouldRunHeal) {
|
|
2819
|
-
const healKey = this._healKey(entityId);
|
|
2820
|
-
if (!this.activeMaintenanceJobs.has(healKey)) {
|
|
2821
|
-
this.activeMaintenanceJobs.add(healKey);
|
|
2822
|
-
this._notifyStatusSubscribers(entityId);
|
|
2585
|
+
await this.searchService.sync(entityId);
|
|
2586
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
2587
|
+
for (const factId of uniqueDeletedIds) {
|
|
2823
2588
|
try {
|
|
2824
|
-
await this.
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2589
|
+
await this.embeddingService.notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
2590
|
+
} catch (hookErr) {
|
|
2591
|
+
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2592
|
+
if (isTimeout) {
|
|
2593
|
+
throw new Error(`forget(${entityId}/${factId}) failed: ${hookErr.message}`);
|
|
2594
|
+
}
|
|
2595
|
+
const errMsg = hookErr?.message ?? "";
|
|
2596
|
+
if (errMsg.startsWith("Invalid deletionHookTimeoutMs")) {
|
|
2597
|
+
throw new Error(`forget(${entityId}/${factId}) failed: ${errMsg}`, { cause: hookErr });
|
|
2598
|
+
}
|
|
2599
|
+
throw new Error(`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`, { cause: this._sanitizeRankerError(hookErr) });
|
|
2829
2600
|
}
|
|
2830
2601
|
}
|
|
2602
|
+
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
2603
|
+
} finally {
|
|
2604
|
+
this.jobManager.releaseLock("forget", entityId);
|
|
2831
2605
|
}
|
|
2832
2606
|
}
|
|
2833
|
-
|
|
2607
|
+
/** Core librarian pass (locks handled by {@link runLibrarian}). Package-internal orchestration hook. */
|
|
2608
|
+
async doRunLibrarian(entityId, promptOverride) {
|
|
2834
2609
|
const events = await this.eventRepo.getRecent(entityId, 50);
|
|
2835
2610
|
const currentFactsRows = await this.entryRepo.findRecentByEntityId(entityId, 100);
|
|
2836
2611
|
const currentFacts = currentFactsRows.map((f) => {
|
|
@@ -2840,15 +2615,12 @@ After running the migration SQL, restart your application.`
|
|
|
2840
2615
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
2841
2616
|
};
|
|
2842
2617
|
});
|
|
2843
|
-
const userPrompt =
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2849
|
-
systemPrompt: LIBRARIAN_SYSTEM_PROMPT,
|
|
2850
|
-
userPrompt
|
|
2851
|
-
});
|
|
2618
|
+
const { systemPrompt, userPrompt } = this.promptService.buildLibrarianPrompt(
|
|
2619
|
+
events.reverse(),
|
|
2620
|
+
currentFacts,
|
|
2621
|
+
promptOverride
|
|
2622
|
+
);
|
|
2623
|
+
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2852
2624
|
const result = parseJsonResponse(responseText);
|
|
2853
2625
|
const facts = Array.isArray(result.facts) ? result.facts : [];
|
|
2854
2626
|
const tasks = Array.isArray(result.tasks) ? result.tasks : [];
|
|
@@ -2893,24 +2665,35 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
2893
2665
|
};
|
|
2894
2666
|
await this.entryRepo.upsert(factObj, tx);
|
|
2895
2667
|
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2668
|
+
factsForDedupe.push(factObj);
|
|
2896
2669
|
}
|
|
2897
2670
|
for (const task of validTasks) {
|
|
2898
2671
|
const id = generateId("task_");
|
|
2899
|
-
const taskObj = {
|
|
2672
|
+
const taskObj = {
|
|
2673
|
+
id,
|
|
2674
|
+
entity_id: entityId,
|
|
2675
|
+
description: task.description,
|
|
2676
|
+
status: "pending",
|
|
2677
|
+
priority: task.priority,
|
|
2678
|
+
created_at: now,
|
|
2679
|
+
updated_at: now,
|
|
2680
|
+
resolved_at: null,
|
|
2681
|
+
deleted_at: null
|
|
2682
|
+
};
|
|
2900
2683
|
await this.taskRepo.upsert(taskObj, tx);
|
|
2901
2684
|
}
|
|
2902
2685
|
});
|
|
2903
|
-
await this.
|
|
2904
|
-
this.vectorCache.delete(entityId);
|
|
2686
|
+
await this.searchService.sync(entityId);
|
|
2905
2687
|
for (const fact of insertedFacts) {
|
|
2906
|
-
await this.embedFact(fact);
|
|
2688
|
+
await this.embeddingService.embedFact(fact);
|
|
2907
2689
|
}
|
|
2908
|
-
this.
|
|
2690
|
+
this.searchService.evictCache(entityId);
|
|
2909
2691
|
}
|
|
2910
|
-
|
|
2692
|
+
/** Core heal pass (locks handled by {@link runHeal}). Package-internal orchestration hook. */
|
|
2693
|
+
async doRunHeal(entityId, promptOverride) {
|
|
2911
2694
|
const now = Date.now();
|
|
2912
|
-
const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config
|
|
2913
|
-
const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config
|
|
2695
|
+
const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config?.orphanAfterDays : 30;
|
|
2696
|
+
const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config?.staleInferredAfterDays : 60;
|
|
2914
2697
|
const MS_PER_DAY = 24 * 60 * 60 * 1e3;
|
|
2915
2698
|
if (orphanAfterDays !== null && (typeof orphanAfterDays !== "number" || !Number.isFinite(orphanAfterDays) || orphanAfterDays < 0)) {
|
|
2916
2699
|
throw new Error("Invalid orphanAfterDays: must be a finite number >= 0 or null");
|
|
@@ -2918,55 +2701,71 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
2918
2701
|
if (staleInferredAfterDays !== null && (typeof staleInferredAfterDays !== "number" || !Number.isFinite(staleInferredAfterDays) || staleInferredAfterDays < 0)) {
|
|
2919
2702
|
throw new Error("Invalid staleInferredAfterDays: must be a finite number >= 0 or null");
|
|
2920
2703
|
}
|
|
2704
|
+
const orphanedIds = [];
|
|
2921
2705
|
await this.db.withTransactionAsync(async (tx) => {
|
|
2922
2706
|
if (orphanAfterDays !== null) {
|
|
2923
2707
|
const orphanThreshold = now - orphanAfterDays * MS_PER_DAY;
|
|
2924
|
-
await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx);
|
|
2708
|
+
orphanedIds.push(...await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx));
|
|
2925
2709
|
}
|
|
2926
2710
|
if (staleInferredAfterDays !== null) {
|
|
2927
2711
|
const staleThreshold = now - staleInferredAfterDays * MS_PER_DAY;
|
|
2928
2712
|
await this.entryRepo.downgradeStaleInferred(entityId, staleThreshold, tx);
|
|
2929
2713
|
}
|
|
2930
2714
|
});
|
|
2715
|
+
for (const factId of orphanedIds) {
|
|
2716
|
+
try {
|
|
2717
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
|
|
2718
|
+
} catch (hookErr) {
|
|
2719
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal orphan pass for ${factId}:`, hookErr);
|
|
2720
|
+
}
|
|
2721
|
+
}
|
|
2931
2722
|
const allFactsRows = await this.entryRepo.findAllByEntityId(entityId);
|
|
2932
2723
|
const allTasks = await this.taskRepo.findAllPending([entityId]);
|
|
2933
2724
|
const recentEvents = await this.eventRepo.getRecent(entityId, 20);
|
|
2934
2725
|
const healCandidates = allFactsRows.filter((f) => f.source_type !== "immutable_document");
|
|
2935
2726
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "immutable_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
2936
|
-
const
|
|
2937
|
-
${JSON.stringify(healCandidates.map((f) => {
|
|
2727
|
+
const healCandidatesForPrompt = healCandidates.map((f) => {
|
|
2938
2728
|
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
2939
2729
|
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
2940
|
-
}), null, 2)}
|
|
2941
|
-
|
|
2942
|
-
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
2943
|
-
${JSON.stringify(documentAnchors, null, 2)}
|
|
2944
|
-
|
|
2945
|
-
All Tasks:
|
|
2946
|
-
${JSON.stringify(allTasks, null, 2)}
|
|
2947
|
-
|
|
2948
|
-
Recent Events:
|
|
2949
|
-
${JSON.stringify(recentEvents, null, 2)}
|
|
2950
|
-
|
|
2951
|
-
The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`;
|
|
2952
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2953
|
-
systemPrompt: HEAL_SYSTEM_PROMPT,
|
|
2954
|
-
userPrompt
|
|
2955
2730
|
});
|
|
2731
|
+
const { systemPrompt, userPrompt } = this.promptService.buildHealPrompt(
|
|
2732
|
+
healCandidatesForPrompt,
|
|
2733
|
+
documentAnchors,
|
|
2734
|
+
allTasks,
|
|
2735
|
+
recentEvents,
|
|
2736
|
+
promptOverride
|
|
2737
|
+
);
|
|
2738
|
+
const responseText = await this.options.llmProvider.generateText({ systemPrompt, userPrompt });
|
|
2956
2739
|
const result = parseJsonResponse(responseText);
|
|
2957
2740
|
const mutableIds = new Set(healCandidates.map((f) => f.id));
|
|
2958
2741
|
const downgraded = Array.isArray(result.downgraded) ? result.downgraded : [];
|
|
2959
2742
|
const deleted = Array.isArray(result.deleted) ? result.deleted : [];
|
|
2960
2743
|
const newFacts = Array.isArray(result.newFacts) ? result.newFacts : [];
|
|
2961
|
-
const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
|
|
2962
|
-
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
2744
|
+
const safeDowngraded = Array.from(new Set(downgraded.filter((id) => mutableIds.has(id))));
|
|
2745
|
+
const safeDeleted = Array.from(new Set(deleted.filter((id) => mutableIds.has(id))));
|
|
2963
2746
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
2964
2747
|
const insertedFacts = [];
|
|
2965
2748
|
const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
|
|
2749
|
+
const healFactsForDedupe = [...healCandidates];
|
|
2966
2750
|
await this.db.withTransactionAsync(async (tx) => {
|
|
2967
2751
|
await this.entryRepo.downgradeByIds(safeDowngraded, entityId, tx);
|
|
2968
2752
|
await this.entryRepo.softDeleteByIds(safeDeleted, entityId, tx);
|
|
2969
2753
|
for (const fact of validNewFacts) {
|
|
2754
|
+
const newTokens = titleTokens(fact.title);
|
|
2755
|
+
let skip = false;
|
|
2756
|
+
if (newTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
2757
|
+
for (const existing of healFactsForDedupe) {
|
|
2758
|
+
if (existing.source_type !== "librarian_inferred") continue;
|
|
2759
|
+
const existingTokens = titleTokens(existing.title);
|
|
2760
|
+
if (existingTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
2761
|
+
if (jaccardScore(newTokens, existingTokens) >= FUZZY_THRESHOLD) {
|
|
2762
|
+
skip = true;
|
|
2763
|
+
break;
|
|
2764
|
+
}
|
|
2765
|
+
}
|
|
2766
|
+
}
|
|
2767
|
+
}
|
|
2768
|
+
if (skip) continue;
|
|
2970
2769
|
const id = generateId("fact_");
|
|
2971
2770
|
const factObj = {
|
|
2972
2771
|
id,
|
|
@@ -2986,241 +2785,94 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
2986
2785
|
};
|
|
2987
2786
|
await this.entryRepo.upsert(factObj, tx);
|
|
2988
2787
|
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2788
|
+
healFactsForDedupe.push(factObj);
|
|
2989
2789
|
}
|
|
2990
2790
|
});
|
|
2991
|
-
this.
|
|
2992
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2791
|
+
await this.searchService.sync(entityId);
|
|
2993
2792
|
for (const factId of uniqueDeletedFactIds) {
|
|
2994
2793
|
try {
|
|
2995
|
-
await this.
|
|
2794
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, factId, null);
|
|
2996
2795
|
} catch (hookErr) {
|
|
2997
2796
|
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
2998
2797
|
}
|
|
2999
2798
|
}
|
|
3000
2799
|
for (const fact of insertedFacts) {
|
|
3001
|
-
await this.embedFact(fact);
|
|
2800
|
+
await this.embeddingService.embedFact(fact);
|
|
3002
2801
|
}
|
|
3003
|
-
this.
|
|
2802
|
+
this.searchService.evictCache(entityId);
|
|
3004
2803
|
}
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
throw new WikiBusyError("librarian", entityId);
|
|
3009
|
-
}
|
|
3010
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3011
|
-
throw new WikiBusyError("prune", entityId);
|
|
3012
|
-
}
|
|
3013
|
-
if (this._isReembedActive(entityId)) {
|
|
3014
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3015
|
-
}
|
|
3016
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3017
|
-
throw new WikiBusyError("import", entityId);
|
|
3018
|
-
}
|
|
3019
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3020
|
-
throw new WikiBusyError("forget", entityId);
|
|
3021
|
-
}
|
|
3022
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
3023
|
-
this._notifyStatusSubscribers(entityId);
|
|
3024
|
-
try {
|
|
3025
|
-
await this._doRunLibrarian(entityId);
|
|
3026
|
-
} finally {
|
|
3027
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
3028
|
-
this._notifyStatusSubscribers(entityId);
|
|
2804
|
+
_validatePruneDuration(value, name) {
|
|
2805
|
+
if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
|
|
2806
|
+
throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
|
|
3029
2807
|
}
|
|
3030
2808
|
}
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
3034
|
-
throw new WikiBusyError("heal", entityId);
|
|
3035
|
-
}
|
|
3036
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3037
|
-
throw new WikiBusyError("prune", entityId);
|
|
3038
|
-
}
|
|
3039
|
-
if (this._isReembedActive(entityId)) {
|
|
3040
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3041
|
-
}
|
|
3042
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3043
|
-
throw new WikiBusyError("import", entityId);
|
|
3044
|
-
}
|
|
3045
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3046
|
-
throw new WikiBusyError("forget", entityId);
|
|
3047
|
-
}
|
|
3048
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
3049
|
-
this._notifyStatusSubscribers(entityId);
|
|
3050
|
-
try {
|
|
3051
|
-
await this._doRunHeal(entityId);
|
|
3052
|
-
} finally {
|
|
3053
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
3054
|
-
this._notifyStatusSubscribers(entityId);
|
|
3055
|
-
}
|
|
2809
|
+
_sanitizeRankerError(err) {
|
|
2810
|
+
return sanitizeRankerError(err, this.options.sanitizeRankerErrors);
|
|
3056
2811
|
}
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
|
|
3067
|
-
|
|
3068
|
-
|
|
3069
|
-
|
|
3070
|
-
}
|
|
3071
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3072
|
-
throw new WikiBusyError("librarian", entityId);
|
|
3073
|
-
}
|
|
3074
|
-
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3075
|
-
throw new WikiBusyError("heal", entityId);
|
|
3076
|
-
}
|
|
3077
|
-
if (this._isIngestActiveFor(entityId)) {
|
|
3078
|
-
throw new WikiBusyError("ingest", entityId);
|
|
3079
|
-
}
|
|
3080
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3081
|
-
throw new WikiBusyError("import", entityId);
|
|
3082
|
-
}
|
|
3083
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3084
|
-
throw new WikiBusyError("forget", entityId);
|
|
3085
|
-
}
|
|
3086
|
-
} else {
|
|
3087
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
3088
|
-
throw new WikiBusyError("reembed", "*");
|
|
3089
|
-
}
|
|
3090
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
|
|
3091
|
-
throw new WikiBusyError("prune", "*");
|
|
3092
|
-
}
|
|
3093
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
|
|
3094
|
-
throw new WikiBusyError("librarian", "*");
|
|
3095
|
-
}
|
|
3096
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
|
|
3097
|
-
throw new WikiBusyError("heal", "*");
|
|
3098
|
-
}
|
|
3099
|
-
if (this.activeIngestJobs.size > 0) {
|
|
3100
|
-
throw new WikiBusyError("ingest", "*");
|
|
3101
|
-
}
|
|
3102
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
3103
|
-
throw new WikiBusyError("import", "*");
|
|
3104
|
-
}
|
|
3105
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
3106
|
-
throw new WikiBusyError("forget", "*");
|
|
3107
|
-
}
|
|
3108
|
-
}
|
|
3109
|
-
this.activeMaintenanceJobs.add(reembedKey);
|
|
3110
|
-
try {
|
|
3111
|
-
const rows = await this.entryRepo.findAllForReembed(entityId);
|
|
3112
|
-
if (entityId) {
|
|
3113
|
-
this.vectorCache.delete(entityId);
|
|
3114
|
-
} else {
|
|
3115
|
-
this.vectorCache.clear();
|
|
3116
|
-
}
|
|
3117
|
-
const skipExisting = opts?.skipExisting ?? false;
|
|
3118
|
-
let effectiveSkip = skipExisting;
|
|
3119
|
-
if (skipExisting) {
|
|
3120
|
-
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
3121
|
-
if (mismatchValue) {
|
|
3122
|
-
if (entityId) {
|
|
3123
|
-
const mismatchDim = parseInt(mismatchValue, 10);
|
|
3124
|
-
const staleCount = await this.entryRepo.countStaleForEntity(entityId, mismatchDim);
|
|
3125
|
-
if (staleCount > 0) effectiveSkip = false;
|
|
3126
|
-
} else {
|
|
3127
|
-
effectiveSkip = false;
|
|
3128
|
-
}
|
|
3129
|
-
}
|
|
3130
|
-
}
|
|
3131
|
-
let embedded = 0;
|
|
3132
|
-
let skipped = 0;
|
|
3133
|
-
let failed = 0;
|
|
3134
|
-
try {
|
|
3135
|
-
for (const row of rows) {
|
|
3136
|
-
const existingBlob = row.embedding_blob;
|
|
3137
|
-
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
3138
|
-
if (effectiveSkip && blobIsValid) {
|
|
3139
|
-
const vec = parseEmbedding(existingBlob, null);
|
|
3140
|
-
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
3141
|
-
skipped++;
|
|
3142
|
-
continue;
|
|
3143
|
-
}
|
|
3144
|
-
}
|
|
3145
|
-
const success = await this.embedFact(row);
|
|
3146
|
-
if (success) embedded++;
|
|
3147
|
-
else failed++;
|
|
3148
|
-
}
|
|
3149
|
-
if (embedded > 0) {
|
|
3150
|
-
await this._reconcileEmbeddingDimension();
|
|
3151
|
-
}
|
|
3152
|
-
} finally {
|
|
3153
|
-
if (entityId) {
|
|
3154
|
-
this.vectorCache.delete(entityId);
|
|
3155
|
-
} else {
|
|
3156
|
-
this.vectorCache.clear();
|
|
3157
|
-
}
|
|
3158
|
-
}
|
|
3159
|
-
return { embedded, skipped, failed };
|
|
3160
|
-
} finally {
|
|
3161
|
-
this.activeMaintenanceJobs.delete(reembedKey);
|
|
3162
|
-
}
|
|
2812
|
+
};
|
|
2813
|
+
|
|
2814
|
+
// src/services/ImportExportService.ts
|
|
2815
|
+
var ImportExportService = class {
|
|
2816
|
+
constructor(db, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService) {
|
|
2817
|
+
this.db = db;
|
|
2818
|
+
this.entryRepo = entryRepo;
|
|
2819
|
+
this.taskRepo = taskRepo;
|
|
2820
|
+
this.eventRepo = eventRepo;
|
|
2821
|
+
this.metadataRepo = metadataRepo;
|
|
2822
|
+
this.searchService = searchService;
|
|
2823
|
+
this.jobManager = jobManager;
|
|
2824
|
+
this.embeddingService = embeddingService;
|
|
3163
2825
|
}
|
|
3164
|
-
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3169
|
-
|
|
3170
|
-
break;
|
|
3171
|
-
}
|
|
2826
|
+
async exportDump(entityIds) {
|
|
2827
|
+
let ids;
|
|
2828
|
+
if (entityIds && entityIds.length > 0) {
|
|
2829
|
+
ids = Array.from(new Set(entityIds));
|
|
2830
|
+
} else {
|
|
2831
|
+
ids = await this.metadataRepo.getDistinctEntityIds();
|
|
3172
2832
|
}
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3179
|
-
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3187
|
-
|
|
3188
|
-
*/
|
|
3189
|
-
subscribeEntityStatus(entityId, callback) {
|
|
3190
|
-
const initial = this.getEntityStatus(entityId);
|
|
3191
|
-
let set = this.statusSubscribers.get(entityId);
|
|
3192
|
-
if (!set) {
|
|
3193
|
-
set = /* @__PURE__ */ new Set();
|
|
3194
|
-
this.statusSubscribers.set(entityId, set);
|
|
2833
|
+
const entities = {};
|
|
2834
|
+
const BATCH = 3;
|
|
2835
|
+
for (let i = 0; i < ids.length; i += BATCH) {
|
|
2836
|
+
const batch = ids.slice(i, i + BATCH);
|
|
2837
|
+
const batchResults = await Promise.all(
|
|
2838
|
+
batch.map(
|
|
2839
|
+
async (id) => [
|
|
2840
|
+
id,
|
|
2841
|
+
await this.getFullBundle(id, { includeBlobs: true })
|
|
2842
|
+
]
|
|
2843
|
+
)
|
|
2844
|
+
);
|
|
2845
|
+
for (const [id, bundle] of batchResults) {
|
|
2846
|
+
entities[id] = bundle;
|
|
2847
|
+
}
|
|
3195
2848
|
}
|
|
3196
|
-
|
|
3197
|
-
|
|
2849
|
+
return { generatedAt: Date.now(), entities };
|
|
2850
|
+
}
|
|
2851
|
+
async importDump(dump, opts) {
|
|
2852
|
+
const merge = opts?.merge ?? false;
|
|
2853
|
+
const entityIds = Object.keys(dump.entities);
|
|
2854
|
+
this.jobManager.acquireImportLocks(entityIds);
|
|
3198
2855
|
try {
|
|
3199
|
-
|
|
3200
|
-
|
|
3201
|
-
|
|
2856
|
+
await this.assertNoLegacySourceTypes();
|
|
2857
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
2858
|
+
await this.doImportEntity(entityId, bundle, merge);
|
|
2859
|
+
}
|
|
2860
|
+
} finally {
|
|
2861
|
+
this.jobManager.releaseImportLocks(entityIds);
|
|
3202
2862
|
}
|
|
3203
|
-
let active = true;
|
|
3204
|
-
return () => {
|
|
3205
|
-
if (!active) return;
|
|
3206
|
-
active = false;
|
|
3207
|
-
const s = this.statusSubscribers.get(entityId);
|
|
3208
|
-
if (!s) return;
|
|
3209
|
-
s.delete(entry);
|
|
3210
|
-
if (s.size === 0) this.statusSubscribers.delete(entityId);
|
|
3211
|
-
};
|
|
3212
|
-
}
|
|
3213
|
-
clearVectorCache() {
|
|
3214
|
-
this.vectorCache.clear();
|
|
3215
2863
|
}
|
|
3216
|
-
async
|
|
2864
|
+
async getFullBundle(entityId, opts) {
|
|
3217
2865
|
const [factsRaw, tasks, events] = await Promise.all([
|
|
3218
|
-
this.entryRepo.findAllByEntityId(entityId),
|
|
2866
|
+
opts?.includeBlobs ? this.entryRepo.findAllByEntityIdWithBlobs(entityId) : this.entryRepo.findAllByEntityId(entityId),
|
|
3219
2867
|
this.taskRepo.findAllByEntityId(entityId),
|
|
3220
2868
|
this.eventRepo.getByEntityId(entityId, opts?.maxEvents)
|
|
3221
2869
|
]);
|
|
3222
2870
|
const facts = factsRaw.map((f) => {
|
|
3223
|
-
const {
|
|
2871
|
+
const {
|
|
2872
|
+
embedding: _embedding,
|
|
2873
|
+
embedding_blob,
|
|
2874
|
+
...rest
|
|
2875
|
+
} = f;
|
|
3224
2876
|
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
3225
2877
|
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
3226
2878
|
new Uint8Array(c).set(embedding_blob);
|
|
@@ -3234,494 +2886,1272 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
3234
2886
|
});
|
|
3235
2887
|
return { facts, tasks, events };
|
|
3236
2888
|
}
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
2889
|
+
/** Single-entity import transaction + post-processing; package-internal hook for tests. */
|
|
2890
|
+
async doImportEntity(entityId, bundle, merge) {
|
|
2891
|
+
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
2892
|
+
const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
|
|
2893
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
2894
|
+
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
2895
|
+
const softDeletedFactIds = [];
|
|
2896
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
2897
|
+
if (!merge) {
|
|
2898
|
+
const deletedLiveFactIds = await this.entryRepo.findIdsBySource(
|
|
2899
|
+
entityId,
|
|
2900
|
+
null,
|
|
2901
|
+
null,
|
|
2902
|
+
tx,
|
|
2903
|
+
false
|
|
2904
|
+
);
|
|
2905
|
+
softDeletedFactIds.push(...deletedLiveFactIds);
|
|
2906
|
+
await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2907
|
+
await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
2908
|
+
await this.metadataRepo.deleteCheckpoint(entityId, tx);
|
|
2909
|
+
}
|
|
2910
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
2911
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
2912
|
+
const existingFacts = await this.entryRepo.findExistingMetadataByIds(
|
|
2913
|
+
factIds,
|
|
2914
|
+
tx
|
|
2915
|
+
);
|
|
2916
|
+
for (const existingFact of existingFacts) {
|
|
2917
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
2918
|
+
}
|
|
2919
|
+
for (const fact of bundle.facts) {
|
|
2920
|
+
const sourceType = this._normalizeImportedSourceType(
|
|
2921
|
+
String(fact.source_type),
|
|
2922
|
+
{
|
|
2923
|
+
entityId,
|
|
2924
|
+
factId: fact.id
|
|
2925
|
+
}
|
|
2926
|
+
);
|
|
2927
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
2928
|
+
const existing = existingFactsById.get(fact.id);
|
|
2929
|
+
const rawBlobRaw = fact.embedding_blob;
|
|
2930
|
+
let rawBlob = null;
|
|
2931
|
+
if (rawBlobRaw instanceof Uint8Array) {
|
|
2932
|
+
rawBlob = rawBlobRaw;
|
|
2933
|
+
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
2934
|
+
const obj = rawBlobRaw;
|
|
2935
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
2936
|
+
rawBlob = new Uint8Array(obj["data"]);
|
|
2937
|
+
} else if (!Array.isArray(rawBlobRaw)) {
|
|
2938
|
+
const entries = Object.keys(obj);
|
|
2939
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
2940
|
+
const len = entries.length;
|
|
2941
|
+
rawBlob = new Uint8Array(len);
|
|
2942
|
+
for (let i = 0; i < len; i++)
|
|
2943
|
+
rawBlob[i] = obj[String(i)] ?? 0;
|
|
2944
|
+
}
|
|
2945
|
+
}
|
|
2946
|
+
}
|
|
2947
|
+
let blobData = null;
|
|
2948
|
+
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
2949
|
+
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
2950
|
+
const alignedBlob = new Uint8Array(copy);
|
|
2951
|
+
alignedBlob.set(rawBlob);
|
|
2952
|
+
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
2953
|
+
let allFinite = true;
|
|
2954
|
+
for (let i = 0; i < floats.length; i++) {
|
|
2955
|
+
if (!isFinite(floats[i])) {
|
|
2956
|
+
allFinite = false;
|
|
2957
|
+
break;
|
|
2958
|
+
}
|
|
2959
|
+
}
|
|
2960
|
+
if (allFinite) {
|
|
2961
|
+
blobData = alignedBlob;
|
|
2962
|
+
}
|
|
2963
|
+
}
|
|
2964
|
+
if (existing) {
|
|
2965
|
+
if (existing.entity_id !== entityId) {
|
|
2966
|
+
this._warnCrossEntityCollision(
|
|
2967
|
+
"entry",
|
|
2968
|
+
fact.id,
|
|
2969
|
+
existing.entity_id,
|
|
2970
|
+
entityId
|
|
2971
|
+
);
|
|
2972
|
+
continue;
|
|
2973
|
+
}
|
|
2974
|
+
if (merge && safeUpdatedAt <= existing.updated_at) continue;
|
|
2975
|
+
}
|
|
2976
|
+
const factObj = {
|
|
2977
|
+
id: fact.id,
|
|
2978
|
+
entity_id: entityId,
|
|
2979
|
+
title: fact.title,
|
|
2980
|
+
body: fact.body,
|
|
2981
|
+
tags: Array.isArray(fact.tags) ? fact.tags : [],
|
|
2982
|
+
confidence: fact.confidence,
|
|
2983
|
+
source_type: sourceType,
|
|
2984
|
+
source_hash: fact.source_hash,
|
|
2985
|
+
source_ref: fact.source_ref,
|
|
2986
|
+
created_at: fact.created_at,
|
|
2987
|
+
updated_at: safeUpdatedAt,
|
|
2988
|
+
last_accessed_at: fact.last_accessed_at,
|
|
2989
|
+
access_count: fact.access_count,
|
|
2990
|
+
deleted_at: fact.deleted_at,
|
|
2991
|
+
embedding_blob: blobData ?? void 0
|
|
2992
|
+
};
|
|
2993
|
+
await this.entryRepo.upsertForImport(factObj, tx);
|
|
2994
|
+
if (blobData != null) {
|
|
2995
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
2996
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
2997
|
+
}
|
|
2998
|
+
existingFactsById.set(fact.id, {
|
|
2999
|
+
id: fact.id,
|
|
3000
|
+
entity_id: entityId,
|
|
3001
|
+
updated_at: safeUpdatedAt
|
|
3002
|
+
});
|
|
3003
|
+
upsertedFactIds.add(fact.id);
|
|
3004
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
3005
|
+
}
|
|
3006
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
3007
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
3008
|
+
const existingTasks = await this.taskRepo.findExistingMetadataByIds(
|
|
3009
|
+
taskIds,
|
|
3010
|
+
tx
|
|
3011
|
+
);
|
|
3012
|
+
for (const existingTask of existingTasks) {
|
|
3013
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
3014
|
+
}
|
|
3015
|
+
for (const task of bundle.tasks) {
|
|
3016
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
3017
|
+
const existing = existingTasksById.get(task.id);
|
|
3018
|
+
if (existing) {
|
|
3019
|
+
if (existing.entity_id !== entityId) {
|
|
3020
|
+
this._warnCrossEntityCollision(
|
|
3021
|
+
"task",
|
|
3022
|
+
task.id,
|
|
3023
|
+
existing.entity_id,
|
|
3024
|
+
entityId
|
|
3025
|
+
);
|
|
3026
|
+
continue;
|
|
3027
|
+
}
|
|
3028
|
+
if (merge && safeUpdatedAt <= existing.updated_at) continue;
|
|
3029
|
+
}
|
|
3030
|
+
await this.taskRepo.upsertForImport(
|
|
3031
|
+
{
|
|
3032
|
+
id: task.id,
|
|
3033
|
+
entity_id: entityId,
|
|
3034
|
+
description: task.description,
|
|
3035
|
+
status: task.status,
|
|
3036
|
+
priority: task.priority,
|
|
3037
|
+
created_at: task.created_at,
|
|
3038
|
+
updated_at: safeUpdatedAt,
|
|
3039
|
+
resolved_at: task.resolved_at,
|
|
3040
|
+
deleted_at: task.deleted_at
|
|
3041
|
+
},
|
|
3042
|
+
tx,
|
|
3043
|
+
safeUpdatedAt
|
|
3044
|
+
);
|
|
3045
|
+
existingTasksById.set(task.id, {
|
|
3046
|
+
id: task.id,
|
|
3047
|
+
entity_id: entityId,
|
|
3048
|
+
updated_at: safeUpdatedAt
|
|
3049
|
+
});
|
|
3050
|
+
}
|
|
3051
|
+
for (const event of bundle.events) {
|
|
3052
|
+
await this.eventRepo.addIgnoreDuplicate(
|
|
3053
|
+
{
|
|
3054
|
+
id: event.id,
|
|
3055
|
+
entity_id: entityId,
|
|
3056
|
+
event_type: event.event_type,
|
|
3057
|
+
summary: event.summary,
|
|
3058
|
+
related_entry_id: event.related_entry_id ?? null,
|
|
3059
|
+
created_at: event.created_at
|
|
3060
|
+
},
|
|
3061
|
+
tx
|
|
3062
|
+
);
|
|
3063
|
+
}
|
|
3064
|
+
});
|
|
3065
|
+
await this.searchService.sync(entityId);
|
|
3066
|
+
for (const fact of bundle.facts) {
|
|
3067
|
+
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
3068
|
+
const embedded = await this.embeddingService.embedFact({
|
|
3069
|
+
id: fact.id,
|
|
3070
|
+
entity_id: entityId,
|
|
3071
|
+
title: fact.title,
|
|
3072
|
+
body: fact.body,
|
|
3073
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
3074
|
+
});
|
|
3075
|
+
if (!embedded) {
|
|
3076
|
+
await this.embeddingService.notifyEmbeddingPersisted(entityId, fact.id, null);
|
|
3077
|
+
}
|
|
3078
|
+
}
|
|
3243
3079
|
}
|
|
3244
|
-
const
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
|
|
3080
|
+
for (const fact of bundle.facts) {
|
|
3081
|
+
const blobData = factsWithPreservedBlob.get(fact.id);
|
|
3082
|
+
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
3083
|
+
try {
|
|
3084
|
+
const float32Vector = new Float32Array(
|
|
3085
|
+
blobData.buffer,
|
|
3086
|
+
blobData.byteOffset,
|
|
3087
|
+
blobData.byteLength / 4
|
|
3088
|
+
);
|
|
3089
|
+
await this.embeddingService.notifyEmbeddingPersisted(
|
|
3090
|
+
entityId,
|
|
3091
|
+
fact.id,
|
|
3092
|
+
float32Vector
|
|
3093
|
+
);
|
|
3094
|
+
} catch (hookErr) {
|
|
3095
|
+
console.warn(
|
|
3096
|
+
`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`,
|
|
3097
|
+
hookErr
|
|
3098
|
+
);
|
|
3099
|
+
}
|
|
3100
|
+
}
|
|
3101
|
+
}
|
|
3102
|
+
for (const factId of softDeletedFactIds) {
|
|
3103
|
+
if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
|
|
3104
|
+
try {
|
|
3105
|
+
await this.embeddingService.notifyEmbeddingPersisted(
|
|
3106
|
+
entityId,
|
|
3107
|
+
factId,
|
|
3108
|
+
null
|
|
3109
|
+
);
|
|
3110
|
+
} catch (hookErr) {
|
|
3111
|
+
console.warn(
|
|
3112
|
+
`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`,
|
|
3113
|
+
hookErr
|
|
3114
|
+
);
|
|
3115
|
+
}
|
|
3116
|
+
}
|
|
3117
|
+
}
|
|
3118
|
+
try {
|
|
3119
|
+
const canonicalDimValue = await this.metadataRepo.getMeta(
|
|
3120
|
+
"embedding_dimension"
|
|
3250
3121
|
);
|
|
3251
|
-
|
|
3252
|
-
|
|
3122
|
+
const canonicalDim = canonicalDimValue ? parseInt(canonicalDimValue, 10) : null;
|
|
3123
|
+
if (preservedBlobDims.size === 1) {
|
|
3124
|
+
const preservedDim = [...preservedBlobDims][0];
|
|
3125
|
+
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
3126
|
+
await this.embeddingService.storeEmbeddingDimension(preservedDim);
|
|
3127
|
+
const staleMismatchValue = await this.metadataRepo.getMeta(
|
|
3128
|
+
"embedding_dimension_mismatch"
|
|
3129
|
+
);
|
|
3130
|
+
if (staleMismatchValue && parseInt(staleMismatchValue, 10) !== preservedDim) {
|
|
3131
|
+
await this.metadataRepo.setMeta(
|
|
3132
|
+
"embedding_dimension_mismatch",
|
|
3133
|
+
String(preservedDim),
|
|
3134
|
+
this.db
|
|
3135
|
+
);
|
|
3136
|
+
}
|
|
3137
|
+
await this.embeddingService.reconcileEmbeddingDimension();
|
|
3138
|
+
} else {
|
|
3139
|
+
await this.metadataRepo.setMeta(
|
|
3140
|
+
"embedding_dimension_mismatch",
|
|
3141
|
+
String(canonicalDim),
|
|
3142
|
+
this.db
|
|
3143
|
+
);
|
|
3144
|
+
}
|
|
3145
|
+
} else if (preservedBlobDims.size > 1) {
|
|
3146
|
+
if (canonicalDim === null) {
|
|
3147
|
+
const sortedPreservedBlobDims = [...preservedBlobDims].sort(
|
|
3148
|
+
(a, b) => a - b
|
|
3149
|
+
);
|
|
3150
|
+
await this.embeddingService.storeEmbeddingDimension(
|
|
3151
|
+
sortedPreservedBlobDims[0]
|
|
3152
|
+
);
|
|
3153
|
+
await this.metadataRepo.setMeta(
|
|
3154
|
+
"embedding_dimension_mismatch",
|
|
3155
|
+
String(sortedPreservedBlobDims[0]),
|
|
3156
|
+
this.db
|
|
3157
|
+
);
|
|
3158
|
+
} else {
|
|
3159
|
+
await this.metadataRepo.setMeta(
|
|
3160
|
+
"embedding_dimension_mismatch",
|
|
3161
|
+
String(canonicalDim),
|
|
3162
|
+
this.db
|
|
3163
|
+
);
|
|
3164
|
+
}
|
|
3165
|
+
}
|
|
3166
|
+
} finally {
|
|
3167
|
+
this.searchService.evictCache(entityId);
|
|
3168
|
+
}
|
|
3169
|
+
}
|
|
3170
|
+
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
3171
|
+
console.warn(
|
|
3172
|
+
`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`
|
|
3173
|
+
);
|
|
3174
|
+
}
|
|
3175
|
+
_normalizeImportedSourceType(raw, ctx) {
|
|
3176
|
+
if (raw === "user_document") return "immutable_document";
|
|
3177
|
+
if (raw === "agent_inferred") return "librarian_inferred";
|
|
3178
|
+
const allowed = [
|
|
3179
|
+
"user_stated",
|
|
3180
|
+
"librarian_inferred",
|
|
3181
|
+
"user_confirmed",
|
|
3182
|
+
"immutable_document"
|
|
3183
|
+
];
|
|
3184
|
+
if (allowed.includes(raw))
|
|
3185
|
+
return raw;
|
|
3186
|
+
const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
|
|
3187
|
+
throw new Error(
|
|
3188
|
+
`importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
|
|
3189
|
+
);
|
|
3190
|
+
}
|
|
3191
|
+
async assertNoLegacySourceTypes() {
|
|
3192
|
+
if (!await this.entryRepo.hasLegacySourceTypes()) return;
|
|
3193
|
+
const count = await this.entryRepo.countLegacySourceTypes();
|
|
3194
|
+
throw new Error(
|
|
3195
|
+
`Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
|
|
3196
|
+
|
|
3197
|
+
${this.entryRepo.getLegacyMigrationSQL()}
|
|
3198
|
+
|
|
3199
|
+
After running the migration SQL, restart your application.`
|
|
3200
|
+
);
|
|
3201
|
+
}
|
|
3202
|
+
};
|
|
3203
|
+
|
|
3204
|
+
// src/services/EmbeddingService.ts
|
|
3205
|
+
var EmbeddingService = class {
|
|
3206
|
+
constructor(db, options, entryRepo, metadataRepo) {
|
|
3207
|
+
this.db = db;
|
|
3208
|
+
this.options = options;
|
|
3209
|
+
this.entryRepo = entryRepo;
|
|
3210
|
+
this.metadataRepo = metadataRepo;
|
|
3211
|
+
}
|
|
3212
|
+
async storeEmbeddingDimension(dim) {
|
|
3213
|
+
const existing = await this.metadataRepo.getMeta("embedding_dimension");
|
|
3214
|
+
if (existing) {
|
|
3215
|
+
const storedDim = parseInt(existing, 10);
|
|
3216
|
+
if (storedDim !== dim) {
|
|
3217
|
+
console.warn(
|
|
3218
|
+
`[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
|
|
3219
|
+
);
|
|
3220
|
+
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(dim), this.db);
|
|
3253
3221
|
}
|
|
3222
|
+
} else {
|
|
3223
|
+
await this.metadataRepo.setMeta("embedding_dimension", String(dim), this.db);
|
|
3254
3224
|
}
|
|
3255
|
-
return { generatedAt: Date.now(), entities };
|
|
3256
3225
|
}
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
const
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3226
|
+
/** Promotes embedding_dimension_mismatch to canonical embedding_dimension when safe. */
|
|
3227
|
+
async reconcileEmbeddingDimension() {
|
|
3228
|
+
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
3229
|
+
if (!mismatchValue) return;
|
|
3230
|
+
const newDim = parseInt(mismatchValue, 10);
|
|
3231
|
+
const residualCount = await this.entryRepo.countStaleEmbeddings(newDim);
|
|
3232
|
+
if (residualCount === 0) {
|
|
3233
|
+
await this.metadataRepo.setMeta("embedding_dimension", mismatchValue, this.db);
|
|
3234
|
+
await this.metadataRepo.clearDimensionMismatch(this.db);
|
|
3235
|
+
}
|
|
3236
|
+
}
|
|
3237
|
+
async embedFact(fact) {
|
|
3238
|
+
const embedFn = this.options.llmProvider.embed;
|
|
3239
|
+
if (!embedFn) return false;
|
|
3240
|
+
let tagsStr;
|
|
3241
|
+
if (Array.isArray(fact.tags)) {
|
|
3242
|
+
tagsStr = fact.tags.join(" ");
|
|
3243
|
+
} else {
|
|
3244
|
+
try {
|
|
3245
|
+
const parsed = JSON.parse(fact.tags);
|
|
3246
|
+
tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
|
|
3247
|
+
} catch {
|
|
3248
|
+
tagsStr = fact.tags;
|
|
3269
3249
|
}
|
|
3270
|
-
|
|
3271
|
-
|
|
3250
|
+
}
|
|
3251
|
+
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
3252
|
+
try {
|
|
3253
|
+
const vector = await embedFn(text);
|
|
3254
|
+
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
3255
|
+
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
3256
|
+
return false;
|
|
3272
3257
|
}
|
|
3273
|
-
|
|
3274
|
-
|
|
3258
|
+
const float32Vector = new Float32Array(vector);
|
|
3259
|
+
let hasNonFinite = false;
|
|
3260
|
+
for (let i = 0; i < float32Vector.length; i++) {
|
|
3261
|
+
if (!isFinite(float32Vector[i])) {
|
|
3262
|
+
hasNonFinite = true;
|
|
3263
|
+
break;
|
|
3264
|
+
}
|
|
3275
3265
|
}
|
|
3276
|
-
if (
|
|
3277
|
-
|
|
3266
|
+
if (hasNonFinite) {
|
|
3267
|
+
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
3268
|
+
return false;
|
|
3278
3269
|
}
|
|
3279
|
-
|
|
3280
|
-
|
|
3270
|
+
await this.storeEmbeddingDimension(float32Vector.length);
|
|
3271
|
+
const blob = new Uint8Array(float32Vector.buffer);
|
|
3272
|
+
await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
|
|
3273
|
+
try {
|
|
3274
|
+
await this.notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
3275
|
+
} catch (hookErr) {
|
|
3276
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
3281
3277
|
}
|
|
3278
|
+
return true;
|
|
3279
|
+
} catch (err) {
|
|
3280
|
+
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
3281
|
+
return false;
|
|
3282
3282
|
}
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3288
|
-
|
|
3283
|
+
}
|
|
3284
|
+
async notifyEmbeddingPersisted(entityId, factId, vector) {
|
|
3285
|
+
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
3286
|
+
const vectorCopy = vector ? vector.slice() : null;
|
|
3287
|
+
await this.options.vectorRanker.onEmbeddingPersisted({
|
|
3288
|
+
entityId,
|
|
3289
|
+
factId,
|
|
3290
|
+
vector: vectorCopy
|
|
3291
|
+
});
|
|
3292
|
+
}
|
|
3293
|
+
async notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
|
|
3294
|
+
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
3295
|
+
if (this.options.forceDeleteIgnoreRankerHook === true) return;
|
|
3296
|
+
const vectorCopy = vector ? vector.slice() : null;
|
|
3297
|
+
const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
|
|
3298
|
+
if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
|
|
3299
|
+
throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
|
|
3289
3300
|
}
|
|
3301
|
+
const timeoutMs = rawTimeout;
|
|
3302
|
+
let timeoutHandle;
|
|
3303
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
3304
|
+
timeoutHandle = setTimeout(() => {
|
|
3305
|
+
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
3306
|
+
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
3307
|
+
reject(timeoutError);
|
|
3308
|
+
}, timeoutMs);
|
|
3309
|
+
});
|
|
3310
|
+
const hookPromise = Promise.resolve().then(
|
|
3311
|
+
() => this.options.vectorRanker.onEmbeddingPersisted({
|
|
3312
|
+
entityId,
|
|
3313
|
+
factId,
|
|
3314
|
+
vector: vectorCopy
|
|
3315
|
+
})
|
|
3316
|
+
);
|
|
3290
3317
|
try {
|
|
3291
|
-
await
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
}
|
|
3318
|
+
await Promise.race([hookPromise, timeoutPromise]);
|
|
3319
|
+
} catch (err) {
|
|
3320
|
+
hookPromise.catch(() => {
|
|
3321
|
+
});
|
|
3322
|
+
throw err;
|
|
3295
3323
|
} finally {
|
|
3296
|
-
|
|
3297
|
-
for (const entityId of entityIds) {
|
|
3298
|
-
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
3299
|
-
}
|
|
3324
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
3300
3325
|
}
|
|
3301
3326
|
}
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3327
|
+
};
|
|
3328
|
+
|
|
3329
|
+
// src/readOptions.ts
|
|
3330
|
+
function normalizeEntityIds(entityId) {
|
|
3331
|
+
const input = Array.isArray(entityId) ? entityId : [entityId];
|
|
3332
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3333
|
+
const normalized = [];
|
|
3334
|
+
for (const id of input) {
|
|
3335
|
+
if (seen.has(id)) continue;
|
|
3336
|
+
seen.add(id);
|
|
3337
|
+
normalized.push(id);
|
|
3338
|
+
}
|
|
3339
|
+
return normalized;
|
|
3340
|
+
}
|
|
3341
|
+
function sanitizeTierWeights(entityIds, tierWeights) {
|
|
3342
|
+
if (tierWeights === void 0) return void 0;
|
|
3343
|
+
const sanitized = /* @__PURE__ */ Object.create(null);
|
|
3344
|
+
for (const entityId of entityIds) {
|
|
3345
|
+
const raw = tierWeights[entityId];
|
|
3346
|
+
if (raw === void 0 || !Number.isFinite(raw)) {
|
|
3347
|
+
sanitized[entityId] = 1;
|
|
3348
|
+
} else {
|
|
3349
|
+
sanitized[entityId] = Math.max(0, raw);
|
|
3350
|
+
}
|
|
3351
|
+
}
|
|
3352
|
+
return sanitized;
|
|
3353
|
+
}
|
|
3354
|
+
function applyTierWeight(score, entityId, sanitizedTierWeights) {
|
|
3355
|
+
const weight = sanitizedTierWeights?.[entityId] ?? 1;
|
|
3356
|
+
if (weight === 0) return -Infinity;
|
|
3357
|
+
return score * weight;
|
|
3358
|
+
}
|
|
3359
|
+
function shouldExposeReadMetadata(entityId) {
|
|
3360
|
+
return Array.isArray(entityId);
|
|
3361
|
+
}
|
|
3362
|
+
|
|
3363
|
+
// src/services/RetrievalService.ts
|
|
3364
|
+
var RetrievalService = class {
|
|
3365
|
+
constructor(options, entryRepo, taskRepo, eventRepo, metadataRepo, searchService) {
|
|
3366
|
+
this.options = options;
|
|
3367
|
+
this.entryRepo = entryRepo;
|
|
3368
|
+
this.taskRepo = taskRepo;
|
|
3369
|
+
this.eventRepo = eventRepo;
|
|
3370
|
+
this.metadataRepo = metadataRepo;
|
|
3371
|
+
this.searchService = searchService;
|
|
3372
|
+
}
|
|
3373
|
+
async read(entityId, query, options) {
|
|
3374
|
+
const config = this.options.config;
|
|
3375
|
+
const entityIds = normalizeEntityIds(entityId);
|
|
3376
|
+
const sanitizedTierWeights = shouldExposeReadMetadata(entityId) ? sanitizeTierWeights(entityIds, options?.tierWeights) : void 0;
|
|
3377
|
+
const exposeMetadata = shouldExposeReadMetadata(entityId);
|
|
3378
|
+
if (entityIds.length === 0) {
|
|
3379
|
+
const empty = { facts: [], tasks: [], events: [] };
|
|
3380
|
+
if (exposeMetadata) {
|
|
3381
|
+
empty.metadata = { query, entityIds: [] };
|
|
3382
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
3321
3383
|
}
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3384
|
+
return empty;
|
|
3385
|
+
}
|
|
3386
|
+
const MAX_ENTITY_IDS = 100;
|
|
3387
|
+
if (entityIds.length > MAX_ENTITY_IDS) {
|
|
3388
|
+
throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
|
|
3389
|
+
}
|
|
3390
|
+
const nullByteId = entityIds.find((id) => id.includes("\0"));
|
|
3391
|
+
if (nullByteId !== void 0) {
|
|
3392
|
+
throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
|
|
3393
|
+
}
|
|
3394
|
+
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
3395
|
+
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
3396
|
+
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
3397
|
+
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
3398
|
+
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
3399
|
+
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
3400
|
+
const skipEmbed = weight === 0;
|
|
3401
|
+
const embedFn = this.options.llmProvider.embed;
|
|
3402
|
+
const trimmedQuery = query.trim();
|
|
3403
|
+
let facts = [];
|
|
3404
|
+
let scoreByFactId;
|
|
3405
|
+
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
3406
|
+
let usedEmbed = false;
|
|
3407
|
+
const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
|
|
3408
|
+
if (scoredEntityIds.length === 0) {
|
|
3409
|
+
usedEmbed = true;
|
|
3410
|
+
} else if (!skipEmbed && embedFn) {
|
|
3411
|
+
let rankerShouldRethrow = false;
|
|
3412
|
+
let pendingRankerFallbackError;
|
|
3413
|
+
try {
|
|
3414
|
+
const queryVec = await embedFn(trimmedQuery);
|
|
3415
|
+
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
3416
|
+
throw new Error(
|
|
3417
|
+
"embed() returned an empty or non-finite vector. Falling back to keyword search."
|
|
3418
|
+
);
|
|
3419
|
+
}
|
|
3420
|
+
const storedDimValue = await this.metadataRepo.getMeta("embedding_dimension");
|
|
3421
|
+
if (storedDimValue) {
|
|
3422
|
+
const storedDim = parseInt(storedDimValue, 10);
|
|
3423
|
+
if (storedDim !== queryVec.length) {
|
|
3424
|
+
throw new Error(
|
|
3425
|
+
`Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
|
|
3426
|
+
);
|
|
3427
|
+
}
|
|
3428
|
+
}
|
|
3429
|
+
const mismatchedCount = await this.entryRepo.countDimensionMismatched(scoredEntityIds, queryVec.length);
|
|
3430
|
+
if (mismatchedCount > 0) {
|
|
3431
|
+
throw new Error(
|
|
3432
|
+
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
3433
|
+
);
|
|
3434
|
+
}
|
|
3435
|
+
const useRanker = Boolean(this.options.vectorRanker);
|
|
3436
|
+
let candidateRows;
|
|
3437
|
+
let populateCache = entityIds.length === 1;
|
|
3438
|
+
let miniSearchScores;
|
|
3439
|
+
if (effectivePreFilterLimit !== void 0) {
|
|
3440
|
+
populateCache = false;
|
|
3441
|
+
const preResults = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, Number.MAX_SAFE_INTEGER);
|
|
3442
|
+
if (preResults.length === 0) {
|
|
3443
|
+
candidateRows = null;
|
|
3444
|
+
} else {
|
|
3445
|
+
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
3446
|
+
if (topKResults.length === 0) {
|
|
3447
|
+
candidateRows = null;
|
|
3448
|
+
} else {
|
|
3449
|
+
const topKIds = topKResults.map((r) => r.id);
|
|
3450
|
+
if (useRanker) {
|
|
3451
|
+
candidateRows = await this.entryRepo.findMetadataByIds(topKIds);
|
|
3452
|
+
} else {
|
|
3453
|
+
candidateRows = await this.entryRepo.findWithEmbeddingsByIds(topKIds);
|
|
3454
|
+
}
|
|
3455
|
+
if (weight !== void 0 && weight < 1) {
|
|
3456
|
+
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
3457
|
+
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
3458
|
+
}
|
|
3459
|
+
}
|
|
3460
|
+
}
|
|
3461
|
+
} else {
|
|
3462
|
+
if (useRanker) {
|
|
3463
|
+
candidateRows = await this.entryRepo.findMetadataByEntityIds(scoredEntityIds);
|
|
3464
|
+
} else {
|
|
3465
|
+
candidateRows = await this.entryRepo.findWithEmbeddingsByEntityIds(scoredEntityIds);
|
|
3466
|
+
}
|
|
3467
|
+
if (weight !== void 0 && weight < 1) {
|
|
3468
|
+
miniSearchScores = this.searchService.getMiniSearchScores(trimmedQuery, scoredEntityIds);
|
|
3344
3469
|
}
|
|
3345
3470
|
}
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
|
|
3356
|
-
|
|
3357
|
-
|
|
3471
|
+
if (candidateRows === null) {
|
|
3472
|
+
usedEmbed = true;
|
|
3473
|
+
} else {
|
|
3474
|
+
const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
|
|
3475
|
+
let scored;
|
|
3476
|
+
if (useRanker) {
|
|
3477
|
+
const candidateRowsByEntity = /* @__PURE__ */ new Map();
|
|
3478
|
+
for (const row of candidateRows) {
|
|
3479
|
+
const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
|
|
3480
|
+
rows.push(row);
|
|
3481
|
+
candidateRowsByEntity.set(row.entity_id, rows);
|
|
3482
|
+
}
|
|
3483
|
+
try {
|
|
3484
|
+
const rankerResultsByEntity = await Promise.all(
|
|
3485
|
+
scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
|
|
3486
|
+
const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
|
|
3487
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
|
|
3488
|
+
const ranked = await this._rankWithVectorRanker({
|
|
3489
|
+
entityId: scopedEntityId,
|
|
3490
|
+
queryVec,
|
|
3491
|
+
candidateIds,
|
|
3492
|
+
candidateRows: rowsForEntity,
|
|
3493
|
+
weight,
|
|
3494
|
+
miniSearchScores,
|
|
3495
|
+
limit: Math.max(maxResults * 2, maxResults + 50)
|
|
3496
|
+
});
|
|
3497
|
+
return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
|
|
3498
|
+
})
|
|
3499
|
+
);
|
|
3500
|
+
scored = rankerResultsByEntity.flat();
|
|
3501
|
+
const scoredIds = new Set(scored.map((s) => s.id));
|
|
3502
|
+
const metadataById = new Map(
|
|
3503
|
+
candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
|
|
3504
|
+
);
|
|
3505
|
+
scored = scored.map((row) => {
|
|
3506
|
+
const metadata = metadataById.get(row.id);
|
|
3507
|
+
return {
|
|
3508
|
+
...row,
|
|
3509
|
+
updated_at: metadata?.updated_at ?? null,
|
|
3510
|
+
access_count: metadata?.access_count ?? null
|
|
3511
|
+
};
|
|
3512
|
+
});
|
|
3513
|
+
const isHybrid = weight !== void 0 && weight < 1;
|
|
3514
|
+
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
3515
|
+
if (maxBackfill > 0) {
|
|
3516
|
+
if (isHybrid) {
|
|
3517
|
+
const topK = [];
|
|
3518
|
+
for (const row of candidateRows) {
|
|
3519
|
+
if (scoredIds.has(row.id)) continue;
|
|
3520
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
3521
|
+
const candidate = { row, kwScore };
|
|
3522
|
+
if (topK.length < maxBackfill) {
|
|
3523
|
+
let insertIdx = topK.length;
|
|
3524
|
+
for (let i = 0; i < topK.length; i++) {
|
|
3525
|
+
const cmp = this._compareScoredRows(
|
|
3526
|
+
{
|
|
3527
|
+
id: candidate.row.id,
|
|
3528
|
+
score: candidate.kwScore,
|
|
3529
|
+
updated_at: candidate.row.updated_at,
|
|
3530
|
+
access_count: candidate.row.access_count
|
|
3531
|
+
},
|
|
3532
|
+
{
|
|
3533
|
+
id: topK[i].row.id,
|
|
3534
|
+
score: topK[i].kwScore,
|
|
3535
|
+
updated_at: topK[i].row.updated_at,
|
|
3536
|
+
access_count: topK[i].row.access_count
|
|
3537
|
+
}
|
|
3538
|
+
);
|
|
3539
|
+
if (cmp < 0) {
|
|
3540
|
+
insertIdx = i;
|
|
3541
|
+
break;
|
|
3542
|
+
}
|
|
3543
|
+
}
|
|
3544
|
+
topK.splice(insertIdx, 0, candidate);
|
|
3545
|
+
} else {
|
|
3546
|
+
const cmpWorst = this._compareScoredRows(
|
|
3547
|
+
{
|
|
3548
|
+
id: candidate.row.id,
|
|
3549
|
+
score: candidate.kwScore,
|
|
3550
|
+
updated_at: candidate.row.updated_at,
|
|
3551
|
+
access_count: candidate.row.access_count
|
|
3552
|
+
},
|
|
3553
|
+
{
|
|
3554
|
+
id: topK[maxBackfill - 1].row.id,
|
|
3555
|
+
score: topK[maxBackfill - 1].kwScore,
|
|
3556
|
+
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
3557
|
+
access_count: topK[maxBackfill - 1].row.access_count
|
|
3558
|
+
}
|
|
3559
|
+
);
|
|
3560
|
+
if (cmpWorst < 0) {
|
|
3561
|
+
let insertIdx = maxBackfill - 1;
|
|
3562
|
+
for (let i = 0; i < topK.length; i++) {
|
|
3563
|
+
const cmp = this._compareScoredRows(
|
|
3564
|
+
{
|
|
3565
|
+
id: candidate.row.id,
|
|
3566
|
+
score: candidate.kwScore,
|
|
3567
|
+
updated_at: candidate.row.updated_at,
|
|
3568
|
+
access_count: candidate.row.access_count
|
|
3569
|
+
},
|
|
3570
|
+
{
|
|
3571
|
+
id: topK[i].row.id,
|
|
3572
|
+
score: topK[i].kwScore,
|
|
3573
|
+
updated_at: topK[i].row.updated_at,
|
|
3574
|
+
access_count: topK[i].row.access_count
|
|
3575
|
+
}
|
|
3576
|
+
);
|
|
3577
|
+
if (cmp < 0) {
|
|
3578
|
+
insertIdx = i;
|
|
3579
|
+
break;
|
|
3580
|
+
}
|
|
3581
|
+
}
|
|
3582
|
+
topK.splice(insertIdx, 0, candidate);
|
|
3583
|
+
topK.pop();
|
|
3584
|
+
}
|
|
3585
|
+
}
|
|
3586
|
+
}
|
|
3587
|
+
for (const { row, kwScore } of topK) {
|
|
3588
|
+
scored.push({
|
|
3589
|
+
id: row.id,
|
|
3590
|
+
entity_id: row.entity_id,
|
|
3591
|
+
score: (1 - weight) * kwScore,
|
|
3592
|
+
updated_at: row.updated_at,
|
|
3593
|
+
access_count: row.access_count
|
|
3594
|
+
});
|
|
3595
|
+
}
|
|
3596
|
+
} else {
|
|
3597
|
+
const omitted = [];
|
|
3598
|
+
for (const row of candidateRows) {
|
|
3599
|
+
if (scoredIds.has(row.id)) continue;
|
|
3600
|
+
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
3601
|
+
}
|
|
3602
|
+
if (omitted.length > 0) {
|
|
3603
|
+
this._tieBreakSort(omitted);
|
|
3604
|
+
scored.push(...omitted.slice(0, maxBackfill));
|
|
3605
|
+
}
|
|
3606
|
+
}
|
|
3607
|
+
}
|
|
3608
|
+
} catch (rankerErr) {
|
|
3609
|
+
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
3610
|
+
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
3611
|
+
this.options.onVectorRankerFallback?.({
|
|
3612
|
+
error: this._sanitizeRankerError(rankerError),
|
|
3613
|
+
policy
|
|
3614
|
+
});
|
|
3615
|
+
if (policy === "throw") {
|
|
3616
|
+
rankerShouldRethrow = true;
|
|
3617
|
+
throw rankerError;
|
|
3618
|
+
} else if (policy === "js-cosine") {
|
|
3619
|
+
let fallbackRows = candidateRows;
|
|
3620
|
+
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
3621
|
+
const rowIds = fallbackRows.map((r) => r.id);
|
|
3622
|
+
const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
|
|
3623
|
+
const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
|
|
3624
|
+
fallbackRows = fallbackRows.map((r) => ({
|
|
3625
|
+
...r,
|
|
3626
|
+
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
3627
|
+
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
3628
|
+
}));
|
|
3629
|
+
}
|
|
3630
|
+
scored = await this.searchService.rankSemantic({
|
|
3631
|
+
entityId: entityCacheKey,
|
|
3632
|
+
queryVec,
|
|
3633
|
+
candidateRows: fallbackRows,
|
|
3634
|
+
weight,
|
|
3635
|
+
miniSearchScores,
|
|
3636
|
+
populateCache,
|
|
3637
|
+
limit: fallbackRows.length,
|
|
3638
|
+
skipSort: true
|
|
3639
|
+
// read() re-sorts after applying tier weights
|
|
3640
|
+
});
|
|
3641
|
+
} else if (policy === "keyword") {
|
|
3642
|
+
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
3643
|
+
const topResults = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, keywordOversampledLimit);
|
|
3644
|
+
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
3645
|
+
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
3646
|
+
scored = topResults.map((result) => {
|
|
3647
|
+
const metadata = candidateMap.get(result.id);
|
|
3648
|
+
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
3649
|
+
return {
|
|
3650
|
+
id: result.id,
|
|
3651
|
+
entity_id: entityForScore,
|
|
3652
|
+
score: result.score ?? 0,
|
|
3653
|
+
access_count: metadata?.access_count ?? null,
|
|
3654
|
+
updated_at: metadata?.updated_at ?? null
|
|
3655
|
+
};
|
|
3656
|
+
});
|
|
3657
|
+
} else {
|
|
3658
|
+
scored = [];
|
|
3659
|
+
}
|
|
3660
|
+
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
3661
|
+
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
3662
|
+
cause: this._sanitizeRankerError(rankerErr)
|
|
3663
|
+
});
|
|
3664
|
+
pendingRankerFallbackError = mirrored;
|
|
3665
|
+
}
|
|
3666
|
+
}
|
|
3667
|
+
} else {
|
|
3668
|
+
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
3669
|
+
scored = await this.searchService.rankSemantic({
|
|
3670
|
+
entityId: entityCacheKey,
|
|
3671
|
+
queryVec,
|
|
3672
|
+
candidateRows,
|
|
3673
|
+
weight,
|
|
3674
|
+
miniSearchScores,
|
|
3675
|
+
populateCache,
|
|
3676
|
+
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
3677
|
+
skipSort: jsCosineNeedsTierSort
|
|
3678
|
+
// read() re-sorts after applying tier weights
|
|
3679
|
+
});
|
|
3680
|
+
}
|
|
3681
|
+
if (scored.length > 0) {
|
|
3682
|
+
scored = scored.map((row) => ({
|
|
3683
|
+
...row,
|
|
3684
|
+
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
3685
|
+
}));
|
|
3686
|
+
this._tieBreakSort(scored);
|
|
3687
|
+
const selectedScored = scored.slice(0, maxResults);
|
|
3688
|
+
const topIds = selectedScored.map((s) => s.id);
|
|
3689
|
+
if (exposeMetadata && trimmedQuery) {
|
|
3690
|
+
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
3691
|
+
}
|
|
3692
|
+
if (topIds.length > 0) {
|
|
3693
|
+
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
3694
|
+
if (facts2.length < topIds.length) {
|
|
3695
|
+
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
3696
|
+
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
3697
|
+
const missingCount = missingIds.length;
|
|
3698
|
+
const sample = missingIds.slice(0, 5);
|
|
3699
|
+
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
3700
|
+
const error = new Error(
|
|
3701
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
3702
|
+
);
|
|
3703
|
+
this.options.onRetrievalFallback?.(error);
|
|
3704
|
+
}
|
|
3705
|
+
facts = facts2;
|
|
3706
|
+
}
|
|
3707
|
+
if (pendingRankerFallbackError) {
|
|
3708
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
3709
|
+
pendingRankerFallbackError = void 0;
|
|
3710
|
+
}
|
|
3711
|
+
usedEmbed = true;
|
|
3712
|
+
} else {
|
|
3713
|
+
if (pendingRankerFallbackError) {
|
|
3714
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
3715
|
+
pendingRankerFallbackError = void 0;
|
|
3716
|
+
}
|
|
3717
|
+
usedEmbed = true;
|
|
3358
3718
|
}
|
|
3359
3719
|
}
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
if (existing) {
|
|
3365
|
-
if (existing.entity_id !== entityId) {
|
|
3366
|
-
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
3367
|
-
continue;
|
|
3720
|
+
} catch (err) {
|
|
3721
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
3722
|
+
if (rankerShouldRethrow) {
|
|
3723
|
+
throw error;
|
|
3368
3724
|
}
|
|
3369
|
-
if (
|
|
3370
|
-
|
|
3725
|
+
if (pendingRankerFallbackError) {
|
|
3726
|
+
error.cause = pendingRankerFallbackError;
|
|
3727
|
+
pendingRankerFallbackError = void 0;
|
|
3371
3728
|
}
|
|
3729
|
+
this.options.onRetrievalFallback?.(error);
|
|
3372
3730
|
}
|
|
3373
|
-
const factObj = {
|
|
3374
|
-
id: fact.id,
|
|
3375
|
-
entity_id: entityId,
|
|
3376
|
-
title: fact.title,
|
|
3377
|
-
body: fact.body,
|
|
3378
|
-
tags: Array.isArray(fact.tags) ? fact.tags : [],
|
|
3379
|
-
confidence: fact.confidence,
|
|
3380
|
-
source_type: sourceType,
|
|
3381
|
-
source_hash: fact.source_hash,
|
|
3382
|
-
source_ref: fact.source_ref,
|
|
3383
|
-
created_at: fact.created_at,
|
|
3384
|
-
updated_at: safeUpdatedAt,
|
|
3385
|
-
last_accessed_at: fact.last_accessed_at,
|
|
3386
|
-
access_count: fact.access_count,
|
|
3387
|
-
deleted_at: fact.deleted_at,
|
|
3388
|
-
embedding_blob: blobData ?? void 0
|
|
3389
|
-
};
|
|
3390
|
-
await this.entryRepo.upsertForImport(factObj, tx);
|
|
3391
|
-
if (blobData != null) {
|
|
3392
|
-
factsWithPreservedBlob.set(fact.id, blobData);
|
|
3393
|
-
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
3394
|
-
}
|
|
3395
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
3396
|
-
upsertedFactIds.add(fact.id);
|
|
3397
|
-
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
3398
|
-
}
|
|
3399
|
-
const taskIds = bundle.tasks.map((task) => task.id);
|
|
3400
|
-
const existingTasksById = /* @__PURE__ */ new Map();
|
|
3401
|
-
const existingTasks = await this.taskRepo.findExistingMetadataByIds(taskIds, tx);
|
|
3402
|
-
for (const existingTask of existingTasks) {
|
|
3403
|
-
existingTasksById.set(existingTask.id, existingTask);
|
|
3404
3731
|
}
|
|
3405
|
-
|
|
3406
|
-
const
|
|
3407
|
-
const
|
|
3408
|
-
|
|
3409
|
-
|
|
3410
|
-
|
|
3411
|
-
|
|
3412
|
-
|
|
3413
|
-
|
|
3414
|
-
|
|
3732
|
+
if (!usedEmbed && scoredEntityIds.length > 0) {
|
|
3733
|
+
const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
3734
|
+
const results = this.searchService.searchKeyword(trimmedQuery, scoredEntityIds, fallbackOversampledLimit);
|
|
3735
|
+
const candidates = results.map((r) => ({
|
|
3736
|
+
id: r.id,
|
|
3737
|
+
entity_id: r.entity_id,
|
|
3738
|
+
score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
|
|
3739
|
+
updated_at: null,
|
|
3740
|
+
access_count: null
|
|
3741
|
+
}));
|
|
3742
|
+
this._tieBreakSort(candidates);
|
|
3743
|
+
const topCandidates = candidates.slice(0, maxResults);
|
|
3744
|
+
const topIds = topCandidates.map((c) => c.id);
|
|
3745
|
+
if (topIds.length > 0) {
|
|
3746
|
+
facts = await this._hydrateFactsByIds(topIds, entityIds);
|
|
3747
|
+
if (exposeMetadata) {
|
|
3748
|
+
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
3415
3749
|
}
|
|
3416
3750
|
}
|
|
3417
|
-
await this.taskRepo.upsertForImport({
|
|
3418
|
-
id: task.id,
|
|
3419
|
-
entity_id: entityId,
|
|
3420
|
-
description: task.description,
|
|
3421
|
-
status: task.status,
|
|
3422
|
-
priority: task.priority,
|
|
3423
|
-
created_at: task.created_at,
|
|
3424
|
-
updated_at: safeUpdatedAt,
|
|
3425
|
-
resolved_at: task.resolved_at,
|
|
3426
|
-
deleted_at: task.deleted_at
|
|
3427
|
-
}, tx, safeUpdatedAt);
|
|
3428
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
3429
3751
|
}
|
|
3430
|
-
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
event_type: event.event_type,
|
|
3435
|
-
summary: event.summary,
|
|
3436
|
-
related_entry_id: event.related_entry_id ?? null,
|
|
3437
|
-
created_at: event.created_at
|
|
3438
|
-
}, tx);
|
|
3752
|
+
if (facts.length > 0) {
|
|
3753
|
+
const ids = facts.map((f) => f.id);
|
|
3754
|
+
const now = Date.now();
|
|
3755
|
+
await this.entryRepo.trackAccess(ids, now);
|
|
3439
3756
|
}
|
|
3757
|
+
} else {
|
|
3758
|
+
facts = await this.entryRepo.findRecentByEntityIds(entityIds, maxResults);
|
|
3759
|
+
}
|
|
3760
|
+
const eventsLimit = Math.min(10 * entityIds.length, 100);
|
|
3761
|
+
const [tasks, events] = await Promise.all([
|
|
3762
|
+
this.taskRepo.findAllPending(entityIds, entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200)),
|
|
3763
|
+
entityIds.length === 1 ? this.eventRepo.getRecent(entityIds[0], eventsLimit) : this.eventRepo.getRecentForEntities(entityIds, eventsLimit)
|
|
3764
|
+
]);
|
|
3765
|
+
let factScores;
|
|
3766
|
+
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
3767
|
+
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
3768
|
+
}
|
|
3769
|
+
const bundle = { facts, tasks, events: events.reverse() };
|
|
3770
|
+
if (exposeMetadata) {
|
|
3771
|
+
bundle.metadata = { query, entityIds };
|
|
3772
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
3773
|
+
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
3774
|
+
}
|
|
3775
|
+
return bundle;
|
|
3776
|
+
}
|
|
3777
|
+
/**
|
|
3778
|
+
* Returns entity IDs that will participate in scored retrieval.
|
|
3779
|
+
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
3780
|
+
*/
|
|
3781
|
+
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
3782
|
+
return entityIds.filter((id) => {
|
|
3783
|
+
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
3784
|
+
return includeZeroWeightEntities === true || w !== 0;
|
|
3440
3785
|
});
|
|
3441
|
-
|
|
3442
|
-
|
|
3443
|
-
|
|
3444
|
-
|
|
3445
|
-
|
|
3446
|
-
|
|
3447
|
-
|
|
3448
|
-
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3786
|
+
}
|
|
3787
|
+
/**
|
|
3788
|
+
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
3789
|
+
*/
|
|
3790
|
+
_tieBreakSort(items) {
|
|
3791
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
3792
|
+
}
|
|
3793
|
+
/**
|
|
3794
|
+
* Comparator for score + deterministic tie-break fields.
|
|
3795
|
+
* Negative return means "a ranks ahead of b" for descending score order.
|
|
3796
|
+
*/
|
|
3797
|
+
_compareScoredRows(a, b) {
|
|
3798
|
+
const scoreDiff = b.score - a.score;
|
|
3799
|
+
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
3800
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
3801
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
3802
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
3803
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
3804
|
+
return a.id.localeCompare(b.id);
|
|
3805
|
+
}
|
|
3806
|
+
/**
|
|
3807
|
+
* Hydrate full facts by ID. Pass scopedEntityIds to restrict to requested namespaces in SQL
|
|
3808
|
+
* (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
|
|
3809
|
+
*/
|
|
3810
|
+
async _hydrateFactsByIds(ids, scopedEntityIds, tx) {
|
|
3811
|
+
return this.entryRepo.findByIds(ids, scopedEntityIds, tx);
|
|
3812
|
+
}
|
|
3813
|
+
_sanitizeRankerError(err) {
|
|
3814
|
+
return sanitizeRankerError(err, this.options.sanitizeRankerErrors);
|
|
3815
|
+
}
|
|
3816
|
+
/**
|
|
3817
|
+
* Delegate semantic ranking to the injected VectorRanker.
|
|
3818
|
+
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
3819
|
+
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
3820
|
+
*/
|
|
3821
|
+
async _rankWithVectorRanker(args) {
|
|
3822
|
+
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
3823
|
+
const ranker = this.options.vectorRanker;
|
|
3824
|
+
if (!ranker) {
|
|
3825
|
+
throw new Error("vectorRanker not configured");
|
|
3454
3826
|
}
|
|
3455
|
-
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3827
|
+
const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
3828
|
+
const rankerResults = await ranker.rankBySimilarity({
|
|
3829
|
+
entityId,
|
|
3830
|
+
queryVec: queryVecCopy,
|
|
3831
|
+
candidateIds,
|
|
3832
|
+
limit
|
|
3833
|
+
});
|
|
3834
|
+
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
3835
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3836
|
+
const normalized = [];
|
|
3837
|
+
for (const r of rankerResults) {
|
|
3838
|
+
if (normalized.length >= limit) break;
|
|
3839
|
+
if (seen.has(r.id)) continue;
|
|
3840
|
+
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
3841
|
+
if (!Number.isFinite(r.semanticScore)) continue;
|
|
3842
|
+
seen.add(r.id);
|
|
3843
|
+
normalized.push(r);
|
|
3465
3844
|
}
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
}
|
|
3845
|
+
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
3846
|
+
const scored = normalized.map((r) => {
|
|
3847
|
+
let score = r.semanticScore;
|
|
3848
|
+
if (weight !== void 0) {
|
|
3849
|
+
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
3850
|
+
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
3473
3851
|
}
|
|
3852
|
+
return {
|
|
3853
|
+
id: r.id,
|
|
3854
|
+
entity_id: entityIdByCandidateId.get(r.id),
|
|
3855
|
+
// allowedIds filter above guarantees membership
|
|
3856
|
+
score
|
|
3857
|
+
};
|
|
3858
|
+
});
|
|
3859
|
+
return scored;
|
|
3860
|
+
}
|
|
3861
|
+
};
|
|
3862
|
+
|
|
3863
|
+
// src/services/WriteService.ts
|
|
3864
|
+
var WriteService = class {
|
|
3865
|
+
constructor(db, options, eventRepo, metadataRepo, jobManager, maintenanceService) {
|
|
3866
|
+
this.db = db;
|
|
3867
|
+
this.options = options;
|
|
3868
|
+
this.eventRepo = eventRepo;
|
|
3869
|
+
this.metadataRepo = metadataRepo;
|
|
3870
|
+
this.jobManager = jobManager;
|
|
3871
|
+
this.maintenanceService = maintenanceService;
|
|
3872
|
+
}
|
|
3873
|
+
async write(entityId, event) {
|
|
3874
|
+
const id = generateId("evt_");
|
|
3875
|
+
const now = Date.now();
|
|
3876
|
+
let eventType = event.event_type;
|
|
3877
|
+
if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
|
|
3878
|
+
eventType = "observation";
|
|
3474
3879
|
}
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
|
|
3479
|
-
|
|
3480
|
-
|
|
3481
|
-
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
|
|
3485
|
-
|
|
3486
|
-
|
|
3487
|
-
|
|
3488
|
-
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3880
|
+
const newEvent = {
|
|
3881
|
+
id,
|
|
3882
|
+
entity_id: entityId,
|
|
3883
|
+
event_type: eventType,
|
|
3884
|
+
summary: event.summary,
|
|
3885
|
+
related_entry_id: event.related_entry_id || null,
|
|
3886
|
+
created_at: now
|
|
3887
|
+
};
|
|
3888
|
+
let shouldRunLibrarian = false;
|
|
3889
|
+
let librarianCount = 0;
|
|
3890
|
+
let prevMemoryCheckpoint = 0;
|
|
3891
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
3892
|
+
await this.eventRepo.add(newEvent, tx);
|
|
3893
|
+
const threshold = this.options.config?.autoLibrarianThreshold || 20;
|
|
3894
|
+
const [count, cp] = await Promise.all([
|
|
3895
|
+
this.eventRepo.count(entityId, tx),
|
|
3896
|
+
this.metadataRepo.getCheckpoint(entityId, tx)
|
|
3897
|
+
]);
|
|
3898
|
+
let memoryCheckpoint = cp.memory ?? 0;
|
|
3899
|
+
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
3900
|
+
if (count - memoryCheckpoint >= threshold) {
|
|
3901
|
+
if (!this.jobManager.isBlocked("librarian", entityId)) {
|
|
3902
|
+
shouldRunLibrarian = true;
|
|
3903
|
+
librarianCount = count;
|
|
3904
|
+
prevMemoryCheckpoint = memoryCheckpoint;
|
|
3905
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: count }, tx);
|
|
3497
3906
|
}
|
|
3498
3907
|
}
|
|
3499
|
-
}
|
|
3500
|
-
|
|
3908
|
+
});
|
|
3909
|
+
if (shouldRunLibrarian) {
|
|
3910
|
+
try {
|
|
3911
|
+
this.jobManager.acquireLock("librarian", entityId);
|
|
3912
|
+
this.runLibrarianThenMaybeHeal(entityId, librarianCount, prevMemoryCheckpoint).catch(console.error).finally(() => {
|
|
3913
|
+
this.jobManager.releaseLock("librarian", entityId);
|
|
3914
|
+
});
|
|
3915
|
+
} catch (e) {
|
|
3916
|
+
if (!(e instanceof WikiBusyError)) throw e;
|
|
3917
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: prevMemoryCheckpoint }, this.db);
|
|
3918
|
+
}
|
|
3501
3919
|
}
|
|
3502
3920
|
}
|
|
3503
|
-
async
|
|
3504
|
-
let blockingOperation = null;
|
|
3505
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3506
|
-
blockingOperation = "librarian";
|
|
3507
|
-
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3508
|
-
blockingOperation = "heal";
|
|
3509
|
-
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3510
|
-
blockingOperation = "prune";
|
|
3511
|
-
} else if (this._isReembedActive(entityId)) {
|
|
3512
|
-
blockingOperation = "reembed";
|
|
3513
|
-
} else if (this._isIngestActiveFor(entityId)) {
|
|
3514
|
-
blockingOperation = "ingest";
|
|
3515
|
-
} else if (this._isImportActiveFor(entityId)) {
|
|
3516
|
-
blockingOperation = "import";
|
|
3517
|
-
} else if (this._isForgetActiveFor(entityId)) {
|
|
3518
|
-
blockingOperation = "forget";
|
|
3519
|
-
}
|
|
3520
|
-
if (blockingOperation !== null) {
|
|
3521
|
-
throw new WikiBusyError(blockingOperation, entityId);
|
|
3522
|
-
}
|
|
3523
|
-
const forgetKey = this._forgetKey(entityId);
|
|
3524
|
-
this.activeMaintenanceJobs.add(forgetKey);
|
|
3921
|
+
async runLibrarianThenMaybeHeal(entityId, currentEventCount, prevCheckpoint) {
|
|
3525
3922
|
try {
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
|
|
3541
|
-
|
|
3542
|
-
|
|
3543
|
-
}
|
|
3544
|
-
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
3545
|
-
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
3546
|
-
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
3547
|
-
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
3548
|
-
if (params.entryId) {
|
|
3549
|
-
const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
|
|
3550
|
-
if (entryId) deletedEntryIds.push(entryId);
|
|
3551
|
-
}
|
|
3552
|
-
if (sourceRef || sourceHash) {
|
|
3553
|
-
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
|
|
3554
|
-
}
|
|
3555
|
-
const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
3556
|
-
const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
3557
|
-
const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
|
|
3558
|
-
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
3559
|
-
entryPromise ?? Promise.resolve(false),
|
|
3560
|
-
taskDeletedPromise ?? Promise.resolve(false),
|
|
3561
|
-
refPromise ?? Promise.resolve(0)
|
|
3562
|
-
]);
|
|
3563
|
-
if (entryResult) deletedEntries++;
|
|
3564
|
-
if (taskResult) deletedTasks++;
|
|
3565
|
-
deletedEntries += refResult;
|
|
3566
|
-
}
|
|
3567
|
-
});
|
|
3568
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3569
|
-
this.vectorCache.delete(entityId);
|
|
3570
|
-
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
3571
|
-
for (const factId of uniqueDeletedIds) {
|
|
3572
|
-
try {
|
|
3573
|
-
await this._notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
3574
|
-
} catch (hookErr) {
|
|
3575
|
-
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
3576
|
-
if (isTimeout) {
|
|
3577
|
-
throw new Error(
|
|
3578
|
-
`forget(${entityId}/${factId}) failed: ${hookErr.message}`
|
|
3579
|
-
);
|
|
3580
|
-
}
|
|
3581
|
-
const errMsg = hookErr?.message ?? "";
|
|
3582
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
3583
|
-
if (isValidationError) {
|
|
3584
|
-
throw new Error(
|
|
3585
|
-
`forget(${entityId}/${factId}) failed: ${errMsg}`,
|
|
3586
|
-
{ cause: hookErr }
|
|
3587
|
-
);
|
|
3588
|
-
}
|
|
3589
|
-
throw new Error(
|
|
3590
|
-
`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`,
|
|
3591
|
-
{ cause: this._sanitizeRankerError(hookErr) }
|
|
3592
|
-
);
|
|
3593
|
-
}
|
|
3923
|
+
await this.maintenanceService.doRunLibrarian(entityId);
|
|
3924
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: currentEventCount }, this.db);
|
|
3925
|
+
} catch (e) {
|
|
3926
|
+
await this.metadataRepo.updateCheckpoint(entityId, { memory: prevCheckpoint }, this.db);
|
|
3927
|
+
throw e;
|
|
3928
|
+
}
|
|
3929
|
+
const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
|
|
3930
|
+
const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
|
|
3931
|
+
let healCheckpoint = cp.heal ?? 0;
|
|
3932
|
+
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
3933
|
+
const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
|
|
3934
|
+
if (shouldRunHeal && this.jobManager.tryAcquireAutoHealLock(entityId)) {
|
|
3935
|
+
try {
|
|
3936
|
+
await this.maintenanceService.doRunHeal(entityId);
|
|
3937
|
+
await this.metadataRepo.updateCheckpoint(entityId, { heal: currentEventCount }, this.db);
|
|
3938
|
+
} finally {
|
|
3939
|
+
this.jobManager.releaseLock("heal", entityId);
|
|
3594
3940
|
}
|
|
3595
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
3596
|
-
} finally {
|
|
3597
|
-
this.activeMaintenanceJobs.delete(forgetKey);
|
|
3598
3941
|
}
|
|
3599
3942
|
}
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3943
|
+
};
|
|
3944
|
+
|
|
3945
|
+
// src/WikiMemory.ts
|
|
3946
|
+
var _testAccessNonTestEnvWarned;
|
|
3947
|
+
var WikiMemory = class {
|
|
3948
|
+
constructor(db, options) {
|
|
3949
|
+
/** Emits `__testAccess` console warning at most once per instance when NODE_ENV ≠ "test". */
|
|
3950
|
+
__privateAdd(this, _testAccessNonTestEnvWarned, false);
|
|
3951
|
+
this.db = db;
|
|
3952
|
+
this.options = options;
|
|
3953
|
+
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
3954
|
+
this.outboxRepo = new OutboxRepository(db, this.prefix);
|
|
3955
|
+
this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
|
|
3956
|
+
this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
|
|
3957
|
+
this.eventRepo = new EventRepository(db, this.prefix);
|
|
3958
|
+
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
3959
|
+
this.embeddingService = new EmbeddingService(this.db, this.options, this.entryRepo, this.metadataRepo);
|
|
3960
|
+
this.searchService = new SearchService(this.entryRepo);
|
|
3961
|
+
this.jobManager = new JobManager(this.prefix);
|
|
3962
|
+
this.promptService = new PromptService(options.config?.prompts);
|
|
3963
|
+
this.ingestionService = new IngestionService(
|
|
3964
|
+
this.db,
|
|
3965
|
+
this.prefix,
|
|
3966
|
+
this.options,
|
|
3967
|
+
this.entryRepo,
|
|
3968
|
+
this.searchService,
|
|
3969
|
+
this.jobManager,
|
|
3970
|
+
this.embeddingService,
|
|
3971
|
+
this.promptService
|
|
3610
3972
|
);
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3614
|
-
|
|
3615
|
-
|
|
3616
|
-
|
|
3617
|
-
|
|
3618
|
-
|
|
3973
|
+
this.maintenanceService = new MaintenanceService(
|
|
3974
|
+
this.db,
|
|
3975
|
+
this.prefix,
|
|
3976
|
+
this.options,
|
|
3977
|
+
this.entryRepo,
|
|
3978
|
+
this.taskRepo,
|
|
3979
|
+
this.eventRepo,
|
|
3980
|
+
this.metadataRepo,
|
|
3981
|
+
this.searchService,
|
|
3982
|
+
this.jobManager,
|
|
3983
|
+
this.embeddingService,
|
|
3984
|
+
this.promptService
|
|
3985
|
+
);
|
|
3986
|
+
this.importExportService = new ImportExportService(
|
|
3987
|
+
this.db,
|
|
3988
|
+
this.entryRepo,
|
|
3989
|
+
this.taskRepo,
|
|
3990
|
+
this.eventRepo,
|
|
3991
|
+
this.metadataRepo,
|
|
3992
|
+
this.searchService,
|
|
3993
|
+
this.jobManager,
|
|
3994
|
+
this.embeddingService
|
|
3995
|
+
);
|
|
3996
|
+
this.retrievalService = new RetrievalService(
|
|
3997
|
+
this.options,
|
|
3998
|
+
this.entryRepo,
|
|
3999
|
+
this.taskRepo,
|
|
4000
|
+
this.eventRepo,
|
|
4001
|
+
this.metadataRepo,
|
|
4002
|
+
this.searchService
|
|
4003
|
+
);
|
|
4004
|
+
this.writeService = new WriteService(
|
|
4005
|
+
this.db,
|
|
4006
|
+
this.options,
|
|
4007
|
+
this.eventRepo,
|
|
4008
|
+
this.metadataRepo,
|
|
4009
|
+
this.jobManager,
|
|
4010
|
+
this.maintenanceService
|
|
4011
|
+
);
|
|
4012
|
+
}
|
|
4013
|
+
/**
|
|
4014
|
+
* Explicit escape hatch for test suites: typed access to composed services for mocks/spies.
|
|
4015
|
+
* If `NODE_ENV` is not `"test"`, emits a single `console.warn` per instance (skipped when `process` is undefined).
|
|
4016
|
+
*/
|
|
4017
|
+
get __testAccess() {
|
|
4018
|
+
const processEnv = typeof globalThis !== "undefined" ? globalThis.process?.env : void 0;
|
|
4019
|
+
if (processEnv !== void 0 && processEnv.NODE_ENV !== "test" && !__privateGet(this, _testAccessNonTestEnvWarned)) {
|
|
4020
|
+
__privateSet(this, _testAccessNonTestEnvWarned, true);
|
|
4021
|
+
console.warn('Warning: WikiMemory.__testAccess is intended for tests (NODE_ENV !== "test").');
|
|
3619
4022
|
}
|
|
3620
|
-
|
|
3621
|
-
|
|
4023
|
+
return {
|
|
4024
|
+
embeddingService: this.embeddingService,
|
|
4025
|
+
importExportService: this.importExportService,
|
|
4026
|
+
ingestionService: this.ingestionService,
|
|
4027
|
+
maintenanceService: this.maintenanceService,
|
|
4028
|
+
retrievalService: this.retrievalService,
|
|
4029
|
+
searchService: this.searchService,
|
|
4030
|
+
writeService: this.writeService,
|
|
4031
|
+
promptService: this.promptService,
|
|
4032
|
+
entryRepo: this.entryRepo,
|
|
4033
|
+
metadataRepo: this.metadataRepo,
|
|
4034
|
+
jobManager: this.jobManager
|
|
4035
|
+
};
|
|
4036
|
+
}
|
|
4037
|
+
async setup() {
|
|
4038
|
+
const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
|
|
4039
|
+
await setupDatabase(this.db, this.prefix);
|
|
4040
|
+
let currentVersion;
|
|
4041
|
+
if (!entriesExistedBeforeSetup) {
|
|
4042
|
+
await this.metadataRepo.setMeta("schema_version", String(CURRENT_SCHEMA_VERSION), this.db);
|
|
4043
|
+
currentVersion = CURRENT_SCHEMA_VERSION;
|
|
4044
|
+
} else {
|
|
4045
|
+
const schemaVersionValue = await this.metadataRepo.getMeta("schema_version");
|
|
4046
|
+
if (schemaVersionValue) {
|
|
4047
|
+
currentVersion = parseInt(schemaVersionValue, 10);
|
|
4048
|
+
if (!Number.isFinite(currentVersion)) currentVersion = 0;
|
|
4049
|
+
} else {
|
|
4050
|
+
const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
|
|
4051
|
+
const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
|
|
4052
|
+
currentVersion = hasPorter ? 1 : 0;
|
|
4053
|
+
}
|
|
3622
4054
|
}
|
|
3623
|
-
|
|
3624
|
-
|
|
4055
|
+
for (const migration of MIGRATIONS) {
|
|
4056
|
+
if (migration.version > currentVersion) {
|
|
4057
|
+
await migration.run(this.db, this.prefix);
|
|
4058
|
+
await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
|
|
4059
|
+
currentVersion = migration.version;
|
|
4060
|
+
}
|
|
3625
4061
|
}
|
|
3626
|
-
if (
|
|
3627
|
-
|
|
4062
|
+
if (entriesExistedBeforeSetup) {
|
|
4063
|
+
const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
|
|
4064
|
+
if (!schemaVersionCheck) {
|
|
4065
|
+
await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
|
|
4066
|
+
}
|
|
3628
4067
|
}
|
|
3629
|
-
if (
|
|
3630
|
-
|
|
4068
|
+
if (entriesExistedBeforeSetup) {
|
|
4069
|
+
await this.importExportService.assertNoLegacySourceTypes();
|
|
3631
4070
|
}
|
|
3632
|
-
this.
|
|
3633
|
-
this.
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
|
|
3638
|
-
}
|
|
3639
|
-
const chunkResults = await withConcurrency(
|
|
3640
|
-
chunks.map((chunk) => async () => {
|
|
3641
|
-
const userPrompt = `Document Chunk:
|
|
3642
|
-
${chunk}`;
|
|
3643
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
3644
|
-
systemPrompt: INGEST_SYSTEM_PROMPT,
|
|
3645
|
-
userPrompt
|
|
3646
|
-
});
|
|
3647
|
-
const result = parseJsonResponse(responseText);
|
|
3648
|
-
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
3649
|
-
}),
|
|
3650
|
-
chunkConcurrency
|
|
3651
|
-
);
|
|
3652
|
-
const seen = /* @__PURE__ */ new Set();
|
|
3653
|
-
const allValidFacts = [];
|
|
3654
|
-
for (const facts of chunkResults) {
|
|
3655
|
-
for (const fact of facts) {
|
|
3656
|
-
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
3657
|
-
if (!seen.has(normalized)) {
|
|
3658
|
-
seen.add(normalized);
|
|
3659
|
-
allValidFacts.push(fact);
|
|
3660
|
-
}
|
|
3661
|
-
}
|
|
3662
|
-
}
|
|
3663
|
-
const now = Date.now();
|
|
3664
|
-
const insertedFacts = [];
|
|
3665
|
-
const deletedSourceFactIds = [];
|
|
3666
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
3667
|
-
deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
|
|
3668
|
-
await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
|
|
3669
|
-
for (const fact of allValidFacts) {
|
|
3670
|
-
const id = generateId("fact_");
|
|
3671
|
-
const wikiFact = {
|
|
3672
|
-
id,
|
|
3673
|
-
entity_id: entityId,
|
|
3674
|
-
title: fact.title,
|
|
3675
|
-
body: fact.body,
|
|
3676
|
-
tags: fact.tags,
|
|
3677
|
-
confidence: fact.confidence,
|
|
3678
|
-
source_type: "immutable_document",
|
|
3679
|
-
source_hash: sourceHash,
|
|
3680
|
-
source_ref: sourceRef,
|
|
3681
|
-
created_at: now,
|
|
3682
|
-
updated_at: now,
|
|
3683
|
-
last_accessed_at: null,
|
|
3684
|
-
access_count: 0,
|
|
3685
|
-
deleted_at: null
|
|
3686
|
-
};
|
|
3687
|
-
await this.entryRepo.upsert(wikiFact, tx);
|
|
3688
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
3689
|
-
}
|
|
3690
|
-
});
|
|
3691
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3692
|
-
this.vectorCache.delete(entityId);
|
|
3693
|
-
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
3694
|
-
for (const factId of uniqueDeletedSourceFactIds) {
|
|
3695
|
-
try {
|
|
3696
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
3697
|
-
} catch (hookErr) {
|
|
3698
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
4071
|
+
const rows = await this.entryRepo.findRowsForSourceRefMigration();
|
|
4072
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
4073
|
+
for (const row of rows) {
|
|
4074
|
+
const normalized = normalizeSourceRef(row.source_ref);
|
|
4075
|
+
if (normalized !== row.source_ref) {
|
|
4076
|
+
await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
|
|
3699
4077
|
}
|
|
3700
4078
|
}
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
|
|
3704
|
-
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
|
|
3708
|
-
|
|
4079
|
+
});
|
|
4080
|
+
await this.searchService.sync();
|
|
4081
|
+
}
|
|
4082
|
+
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
4083
|
+
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
4084
|
+
if (!normalizedRef) {
|
|
4085
|
+
throw new Error(`Invalid sourceRef: "${sourceRef}"`);
|
|
4086
|
+
}
|
|
4087
|
+
const normalizedHash = normalizeSourceHash(sourceHash);
|
|
4088
|
+
if (!normalizedHash) {
|
|
4089
|
+
throw new Error(`Invalid sourceHash: must be a 64-character hex string (normalized to lowercase)`);
|
|
3709
4090
|
}
|
|
4091
|
+
const storedHash = await this.entryRepo.findLatestSourceHash(entityId, normalizedRef);
|
|
4092
|
+
if (storedHash === null) return true;
|
|
4093
|
+
const normalizedStoredHash = normalizeSourceHash(storedHash);
|
|
4094
|
+
return normalizedStoredHash !== normalizedHash;
|
|
4095
|
+
}
|
|
4096
|
+
async runPrune(entityId, options) {
|
|
4097
|
+
return this.maintenanceService.runPrune(entityId, options);
|
|
4098
|
+
}
|
|
4099
|
+
async read(entityId, query, options) {
|
|
4100
|
+
return this.retrievalService.read(entityId, query, options);
|
|
4101
|
+
}
|
|
4102
|
+
async getMemoryBundle(entityId) {
|
|
4103
|
+
return this.importExportService.getFullBundle(entityId, { maxEvents: 10 });
|
|
4104
|
+
}
|
|
4105
|
+
async write(entityId, event) {
|
|
4106
|
+
return this.writeService.write(entityId, event);
|
|
4107
|
+
}
|
|
4108
|
+
/**
|
|
4109
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
4110
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
4111
|
+
* set `options.config.prompts.librarianSystemPrompt` at WikiMemory construction time.
|
|
4112
|
+
*/
|
|
4113
|
+
async runLibrarian(entityId, options) {
|
|
4114
|
+
return this.maintenanceService.runLibrarian(entityId, options);
|
|
4115
|
+
}
|
|
4116
|
+
/**
|
|
4117
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
4118
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
4119
|
+
* set `options.config.prompts.healSystemPrompt` at WikiMemory construction time.
|
|
4120
|
+
*/
|
|
4121
|
+
async runHeal(entityId, options) {
|
|
4122
|
+
return this.maintenanceService.runHeal(entityId, options);
|
|
4123
|
+
}
|
|
4124
|
+
async runReembed(entityId, opts) {
|
|
4125
|
+
return this.maintenanceService.runReembed(entityId, opts);
|
|
4126
|
+
}
|
|
4127
|
+
getEntityStatus(entityId) {
|
|
4128
|
+
return this.jobManager.getEntityStatus(entityId);
|
|
4129
|
+
}
|
|
4130
|
+
subscribeEntityStatus(entityId, callback) {
|
|
4131
|
+
return this.jobManager.subscribeEntityStatus(entityId, callback);
|
|
4132
|
+
}
|
|
4133
|
+
clearVectorCache() {
|
|
4134
|
+
this.searchService.evictCache();
|
|
4135
|
+
}
|
|
4136
|
+
async exportDump(entityIds) {
|
|
4137
|
+
return this.importExportService.exportDump(entityIds);
|
|
4138
|
+
}
|
|
4139
|
+
async importDump(dump, opts) {
|
|
4140
|
+
return this.importExportService.importDump(dump, opts);
|
|
4141
|
+
}
|
|
4142
|
+
async forget(entityId, params) {
|
|
4143
|
+
return this.maintenanceService.forget(entityId, params);
|
|
4144
|
+
}
|
|
4145
|
+
/**
|
|
4146
|
+
* @param params.promptOverride - Overrides the system prompt for this ingest call only.
|
|
4147
|
+
* For persistent customization, set `options.config.prompts.ingestSystemPrompt` at
|
|
4148
|
+
* WikiMemory construction time.
|
|
4149
|
+
*/
|
|
4150
|
+
async ingestDocument(entityId, params) {
|
|
4151
|
+
return this.ingestionService.ingestDocument(entityId, params);
|
|
3710
4152
|
}
|
|
3711
4153
|
};
|
|
3712
|
-
|
|
3713
|
-
* Maximum number of entities whose parsed embedding vectors are held in
|
|
3714
|
-
* memory. This cap is intentionally conservative so the cache remains safe
|
|
3715
|
-
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
3716
|
-
*/
|
|
3717
|
-
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
3718
|
-
/**
|
|
3719
|
-
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
3720
|
-
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
3721
|
-
* entities while still maintaining a bounded memory footprint.
|
|
3722
|
-
*/
|
|
3723
|
-
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
3724
|
-
var WikiMemory = _WikiMemory;
|
|
4154
|
+
_testAccessNonTestEnvWarned = new WeakMap();
|
|
3725
4155
|
|
|
3726
4156
|
// src/utils/formatContext.ts
|
|
3727
4157
|
function validateMaxOption(value, name) {
|
|
@@ -3989,6 +4419,8 @@ function createWiki(db, options) {
|
|
|
3989
4419
|
}
|
|
3990
4420
|
|
|
3991
4421
|
exports.DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT = DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT;
|
|
4422
|
+
exports.HOOK_TIMEOUT_MARKER = HOOK_TIMEOUT_MARKER;
|
|
4423
|
+
exports.PromptService = PromptService;
|
|
3992
4424
|
exports.PrunePartialFailureError = PrunePartialFailureError;
|
|
3993
4425
|
exports.WikiBusyError = WikiBusyError;
|
|
3994
4426
|
exports.WikiMemory = WikiMemory;
|