@equationalapplications/core-llm-wiki 4.7.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/dist/chunk-6FWG2DG4.mjs +2547 -0
- package/dist/chunk-6FWG2DG4.mjs.map +1 -0
- package/dist/index.d.mts +4 -530
- package/dist/index.d.ts +4 -530
- package/dist/index.js +2489 -2020
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +200 -2274
- package/dist/index.mjs.map +1 -1
- package/dist/testing-CDIDE4Jd.d.mts +1141 -0
- package/dist/testing-CDIDE4Jd.d.ts +1141 -0
- package/dist/testing.d.mts +2 -0
- package/dist/testing.d.ts +2 -0
- package/dist/testing.js +2552 -0
- package/dist/testing.js.map +1 -0
- package/dist/testing.mjs +3 -0
- package/dist/testing.mjs.map +1 -0
- package/package.json +6 -1
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { __privateAdd, EmbeddingService, SearchService, JobManager, PromptService, IngestionService, MaintenanceService, ImportExportService, RetrievalService, WriteService, __privateGet, __privateSet, normalizeSourceRef, normalizeSourceHash, generateId } from './chunk-6FWG2DG4.mjs';
|
|
2
|
+
export { HOOK_TIMEOUT_MARKER, PromptService, PrunePartialFailureError, WikiBusyError, parseEmbedding } from './chunk-6FWG2DG4.mjs';
|
|
2
3
|
|
|
3
4
|
// src/db/schema.ts
|
|
4
5
|
async function setupDatabase(db, prefix) {
|
|
@@ -75,6 +76,9 @@ async function setupDatabase(db, prefix) {
|
|
|
75
76
|
|
|
76
77
|
CREATE INDEX IF NOT EXISTS ${prefix}outbox_entity_id_created_at
|
|
77
78
|
ON ${prefix}outbox (entity_id, created_at);
|
|
79
|
+
|
|
80
|
+
CREATE INDEX IF NOT EXISTS ${prefix}outbox_created_at
|
|
81
|
+
ON ${prefix}outbox (created_at);
|
|
78
82
|
`);
|
|
79
83
|
}
|
|
80
84
|
|
|
@@ -149,28 +153,6 @@ for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
|
149
153
|
}
|
|
150
154
|
var CURRENT_SCHEMA_VERSION = MIGRATIONS.length > 0 ? MIGRATIONS[MIGRATIONS.length - 1].version : 0;
|
|
151
155
|
|
|
152
|
-
// src/types.ts
|
|
153
|
-
var WikiBusyError = class extends Error {
|
|
154
|
-
constructor(operation, entityId) {
|
|
155
|
-
super(`${operation} already running for entity ${entityId}`);
|
|
156
|
-
this.name = "WikiBusyError";
|
|
157
|
-
this.operation = operation;
|
|
158
|
-
this.entityId = entityId;
|
|
159
|
-
}
|
|
160
|
-
};
|
|
161
|
-
var PrunePartialFailureError = class extends Error {
|
|
162
|
-
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
163
|
-
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
164
|
-
this.name = "PrunePartialFailureError";
|
|
165
|
-
this.deleted = deleted;
|
|
166
|
-
this.failedAt = failedAt;
|
|
167
|
-
this.remaining = remaining;
|
|
168
|
-
this.deletedTasks = deletedTasks;
|
|
169
|
-
this.deletedEvents = deletedEvents;
|
|
170
|
-
this.cause = cause;
|
|
171
|
-
}
|
|
172
|
-
};
|
|
173
|
-
|
|
174
156
|
// src/repositories/BaseRepository.ts
|
|
175
157
|
var BaseRepository = class {
|
|
176
158
|
constructor(db, prefix) {
|
|
@@ -214,6 +196,28 @@ function mapRowToFact(row) {
|
|
|
214
196
|
access_count: Number(row.access_count ?? 0)
|
|
215
197
|
};
|
|
216
198
|
}
|
|
199
|
+
function normalizeEmbeddingBlobValue(blob) {
|
|
200
|
+
if (blob instanceof Uint8Array) return blob;
|
|
201
|
+
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
202
|
+
const obj = blob;
|
|
203
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
204
|
+
return new Uint8Array(obj["data"]);
|
|
205
|
+
}
|
|
206
|
+
const entries = Object.keys(obj);
|
|
207
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
208
|
+
const len = entries.length;
|
|
209
|
+
const arr = new Uint8Array(len);
|
|
210
|
+
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
211
|
+
return arr;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
function mapRowToFactWithBlobs(row) {
|
|
217
|
+
const base = mapRowToFact(row);
|
|
218
|
+
const embeddingBlob = normalizeEmbeddingBlobValue(row.embedding_blob);
|
|
219
|
+
return embeddingBlob ? { ...base, embedding_blob: embeddingBlob } : base;
|
|
220
|
+
}
|
|
217
221
|
var EntryRepository = class extends BaseRepository {
|
|
218
222
|
constructor(db, prefix, outbox) {
|
|
219
223
|
super(db, prefix);
|
|
@@ -309,21 +313,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
309
313
|
* Normalize an embedding blob value to Uint8Array or null.
|
|
310
314
|
*/
|
|
311
315
|
normalizeEmbeddingBlob(blob) {
|
|
312
|
-
|
|
313
|
-
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
314
|
-
const obj = blob;
|
|
315
|
-
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
316
|
-
return new Uint8Array(obj["data"]);
|
|
317
|
-
}
|
|
318
|
-
const entries = Object.keys(obj);
|
|
319
|
-
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
320
|
-
const len = entries.length;
|
|
321
|
-
const arr = new Uint8Array(len);
|
|
322
|
-
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
323
|
-
return arr;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
return null;
|
|
316
|
+
return normalizeEmbeddingBlobValue(blob);
|
|
327
317
|
}
|
|
328
318
|
/**
|
|
329
319
|
* Fetch existing rows by IDs and return id/entity_id/updated_at for import collision resolution.
|
|
@@ -503,7 +493,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
503
493
|
}
|
|
504
494
|
/**
|
|
505
495
|
* Fetch recent non-deleted entries for an entity (limited), ordered by updated_at DESC.
|
|
506
|
-
* Used by
|
|
496
|
+
* Used by MaintenanceService.doRunLibrarian().
|
|
507
497
|
*/
|
|
508
498
|
async findRecentByEntityId(entityId, limit, tx) {
|
|
509
499
|
const executor = this.getExecutor(tx);
|
|
@@ -513,6 +503,18 @@ var EntryRepository = class extends BaseRepository {
|
|
|
513
503
|
);
|
|
514
504
|
return rows.map(mapRowToFact);
|
|
515
505
|
}
|
|
506
|
+
/**
|
|
507
|
+
* Fetch all non-deleted entries for an entity with embedding blobs preserved.
|
|
508
|
+
* Used by ImportExportService for export/import round-tripping.
|
|
509
|
+
*/
|
|
510
|
+
async findAllByEntityIdWithBlobs(entityId, tx) {
|
|
511
|
+
const executor = this.getExecutor(tx);
|
|
512
|
+
const rows = await executor.getAllAsync(
|
|
513
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
514
|
+
[entityId]
|
|
515
|
+
);
|
|
516
|
+
return rows.map(mapRowToFactWithBlobs);
|
|
517
|
+
}
|
|
516
518
|
/**
|
|
517
519
|
* Count non-deleted entries for the given entities whose embedding_blob dimension
|
|
518
520
|
* doesn't match queryVecLength. Used by read() to detect model-switch mismatches.
|
|
@@ -599,24 +601,19 @@ var EntryRepository = class extends BaseRepository {
|
|
|
599
601
|
}
|
|
600
602
|
/**
|
|
601
603
|
* Mark orphaned entries (never accessed, old) as deleted.
|
|
602
|
-
* Used by
|
|
604
|
+
* Used by MaintenanceService.doRunHeal().
|
|
603
605
|
*/
|
|
604
606
|
async markOrphaned(entityId, orphanThreshold, tx) {
|
|
605
607
|
const executor = this.getExecutor(tx);
|
|
606
608
|
const now = Date.now();
|
|
607
|
-
const
|
|
608
|
-
`SELECT id FROM ${this.prefix}entries
|
|
609
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL`,
|
|
610
|
-
[entityId, orphanThreshold]
|
|
611
|
-
);
|
|
612
|
-
if (orphanedRows.length === 0) return 0;
|
|
613
|
-
const result = await executor.runAsync(
|
|
609
|
+
const updatedRows = await executor.getAllAsync(
|
|
614
610
|
`UPDATE ${this.prefix}entries
|
|
615
611
|
SET deleted_at = ?, updated_at = ?
|
|
616
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
612
|
+
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
613
|
+
RETURNING id`,
|
|
617
614
|
[now, now, entityId, orphanThreshold]
|
|
618
615
|
);
|
|
619
|
-
for (const row of
|
|
616
|
+
for (const row of updatedRows) {
|
|
620
617
|
await this.outbox.push({
|
|
621
618
|
entityId,
|
|
622
619
|
tableName: "entries",
|
|
@@ -625,11 +622,11 @@ var EntryRepository = class extends BaseRepository {
|
|
|
625
622
|
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
626
623
|
}, tx);
|
|
627
624
|
}
|
|
628
|
-
return
|
|
625
|
+
return updatedRows.map((r) => r.id);
|
|
629
626
|
}
|
|
630
627
|
/**
|
|
631
628
|
* Downgrade stale inferred entries to 'tentative'.
|
|
632
|
-
* Used by
|
|
629
|
+
* Used by MaintenanceService.doRunHeal().
|
|
633
630
|
*/
|
|
634
631
|
async downgradeStaleInferred(entityId, staleThreshold, tx) {
|
|
635
632
|
const executor = this.getExecutor(tx);
|
|
@@ -661,7 +658,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
661
658
|
}
|
|
662
659
|
/**
|
|
663
660
|
* Downgrade specific entries to 'tentative' by IDs.
|
|
664
|
-
* Used by
|
|
661
|
+
* Used by MaintenanceService.doRunHeal().
|
|
665
662
|
*/
|
|
666
663
|
async downgradeByIds(ids, entityId, tx) {
|
|
667
664
|
if (ids.length === 0) return;
|
|
@@ -684,7 +681,7 @@ var EntryRepository = class extends BaseRepository {
|
|
|
684
681
|
}
|
|
685
682
|
/**
|
|
686
683
|
* Soft-delete specific entries by IDs.
|
|
687
|
-
* Used by
|
|
684
|
+
* Used by MaintenanceService.doRunHeal().
|
|
688
685
|
*/
|
|
689
686
|
async softDeleteByIds(ids, entityId, tx) {
|
|
690
687
|
if (ids.length === 0) return;
|
|
@@ -905,27 +902,20 @@ var EntryRepository = class extends BaseRepository {
|
|
|
905
902
|
}
|
|
906
903
|
};
|
|
907
904
|
|
|
908
|
-
// src/utils/ids.ts
|
|
909
|
-
function generateId(prefix = "") {
|
|
910
|
-
if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
|
|
911
|
-
return prefix + crypto.randomUUID().replace(/-/g, "").substring(0, 24);
|
|
912
|
-
}
|
|
913
|
-
if (typeof crypto !== "undefined" && typeof crypto.getRandomValues === "function") {
|
|
914
|
-
const bytes = new Uint8Array(16);
|
|
915
|
-
crypto.getRandomValues(bytes);
|
|
916
|
-
return prefix + Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("").substring(0, 24);
|
|
917
|
-
}
|
|
918
|
-
return prefix + Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
|
|
919
|
-
}
|
|
920
|
-
|
|
921
905
|
// src/repositories/OutboxRepository.ts
|
|
922
906
|
var OutboxRepository = class extends BaseRepository {
|
|
907
|
+
constructor(db, prefix, enableOutbox = false) {
|
|
908
|
+
super(db, prefix);
|
|
909
|
+
this.enableOutbox = enableOutbox;
|
|
910
|
+
}
|
|
923
911
|
/**
|
|
924
912
|
* Insert a new outbox event within the provided transaction.
|
|
913
|
+
* No-op when enableOutbox is false.
|
|
925
914
|
* `tx` is required — callers must always pass the active transaction
|
|
926
915
|
* so the write is atomic with the main table mutation.
|
|
927
916
|
*/
|
|
928
917
|
async push(params, tx) {
|
|
918
|
+
if (!this.enableOutbox) return;
|
|
929
919
|
const executor = this.getExecutor(tx);
|
|
930
920
|
const id = generateId("out_");
|
|
931
921
|
const now = Date.now();
|
|
@@ -936,12 +926,12 @@ var OutboxRepository = class extends BaseRepository {
|
|
|
936
926
|
);
|
|
937
927
|
}
|
|
938
928
|
/**
|
|
939
|
-
* Fetch pending outbox rows ordered by created_at ASC.
|
|
929
|
+
* Fetch pending outbox rows ordered by created_at ASC, rowid ASC.
|
|
940
930
|
* Reads directly from `this.db` (not a transaction).
|
|
941
931
|
*/
|
|
942
932
|
async fetchPending(limit = 50) {
|
|
943
933
|
return this.db.getAllAsync(
|
|
944
|
-
`SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC LIMIT ?`,
|
|
934
|
+
`SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC, rowid ASC LIMIT ?`,
|
|
945
935
|
[limit]
|
|
946
936
|
);
|
|
947
937
|
}
|
|
@@ -952,11 +942,15 @@ var OutboxRepository = class extends BaseRepository {
|
|
|
952
942
|
*/
|
|
953
943
|
async acknowledge(ids) {
|
|
954
944
|
if (ids.length === 0) return;
|
|
955
|
-
const
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
945
|
+
const chunkSize = 500;
|
|
946
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
947
|
+
const chunk = ids.slice(i, i + chunkSize);
|
|
948
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
949
|
+
await this.db.runAsync(
|
|
950
|
+
`DELETE FROM ${this.prefix}outbox WHERE id IN (${placeholders})`,
|
|
951
|
+
chunk
|
|
952
|
+
);
|
|
953
|
+
}
|
|
960
954
|
}
|
|
961
955
|
};
|
|
962
956
|
|
|
@@ -1414,534 +1408,97 @@ var MetadataRepository = class extends BaseRepository {
|
|
|
1414
1408
|
}
|
|
1415
1409
|
};
|
|
1416
1410
|
|
|
1417
|
-
// src/prompts.ts
|
|
1418
|
-
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
1419
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1420
|
-
{
|
|
1421
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
1422
|
-
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
1423
|
-
}
|
|
1424
|
-
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
1425
|
-
var HEAL_SYSTEM_PROMPT = `You are a memory grooming agent. Your job is to review a full dump of facts and recent events to resolve contradictions, downgrade stale claims, and flag obsolete facts for deletion.
|
|
1426
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1427
|
-
{
|
|
1428
|
-
"downgraded": ["string (fact IDs)"],
|
|
1429
|
-
"deleted": ["string (fact IDs)"],
|
|
1430
|
-
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
1431
|
-
}
|
|
1432
|
-
Do not return markdown, just raw JSON.`;
|
|
1433
|
-
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
1434
|
-
Return ONLY a valid JSON object matching this schema:
|
|
1435
|
-
{
|
|
1436
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
1437
|
-
}
|
|
1438
|
-
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
1439
|
-
|
|
1440
|
-
// src/utils/cosine.ts
|
|
1441
|
-
function cosineSimilarity(a, b) {
|
|
1442
|
-
let dot = 0, normA = 0, normB = 0;
|
|
1443
|
-
const len = Math.min(a.length, b.length);
|
|
1444
|
-
for (let i = 0; i < len; i++) {
|
|
1445
|
-
dot += a[i] * b[i];
|
|
1446
|
-
normA += a[i] * a[i];
|
|
1447
|
-
normB += b[i] * b[i];
|
|
1448
|
-
}
|
|
1449
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
1450
|
-
return denom === 0 ? 0 : dot / denom;
|
|
1451
|
-
}
|
|
1452
|
-
|
|
1453
|
-
// src/utils/embedding.ts
|
|
1454
|
-
function parseEmbedding(blob, text) {
|
|
1455
|
-
if (blob && blob.byteLength > 0) {
|
|
1456
|
-
if (blob.byteLength % 4 !== 0) return null;
|
|
1457
|
-
const copy = new ArrayBuffer(blob.byteLength);
|
|
1458
|
-
new Uint8Array(copy).set(blob);
|
|
1459
|
-
const vector = new Float32Array(copy);
|
|
1460
|
-
for (const value of vector) {
|
|
1461
|
-
if (!Number.isFinite(value)) return null;
|
|
1462
|
-
}
|
|
1463
|
-
return vector;
|
|
1464
|
-
}
|
|
1465
|
-
if (text) {
|
|
1466
|
-
try {
|
|
1467
|
-
const arr = JSON.parse(text);
|
|
1468
|
-
if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
|
|
1469
|
-
const vector = new Float32Array(arr);
|
|
1470
|
-
for (const value of vector) {
|
|
1471
|
-
if (!Number.isFinite(value)) return null;
|
|
1472
|
-
}
|
|
1473
|
-
return vector;
|
|
1474
|
-
} catch {
|
|
1475
|
-
return null;
|
|
1476
|
-
}
|
|
1477
|
-
}
|
|
1478
|
-
return null;
|
|
1479
|
-
}
|
|
1480
|
-
|
|
1481
|
-
// src/readOptions.ts
|
|
1482
|
-
function normalizeEntityIds(entityId) {
|
|
1483
|
-
const input = Array.isArray(entityId) ? entityId : [entityId];
|
|
1484
|
-
const seen = /* @__PURE__ */ new Set();
|
|
1485
|
-
const normalized = [];
|
|
1486
|
-
for (const id of input) {
|
|
1487
|
-
if (seen.has(id)) continue;
|
|
1488
|
-
seen.add(id);
|
|
1489
|
-
normalized.push(id);
|
|
1490
|
-
}
|
|
1491
|
-
return normalized;
|
|
1492
|
-
}
|
|
1493
|
-
function sanitizeTierWeights(entityIds, tierWeights) {
|
|
1494
|
-
if (tierWeights === void 0) return void 0;
|
|
1495
|
-
const sanitized = /* @__PURE__ */ Object.create(null);
|
|
1496
|
-
for (const entityId of entityIds) {
|
|
1497
|
-
const raw = tierWeights[entityId];
|
|
1498
|
-
if (raw === void 0 || !Number.isFinite(raw)) {
|
|
1499
|
-
sanitized[entityId] = 1;
|
|
1500
|
-
} else {
|
|
1501
|
-
sanitized[entityId] = Math.max(0, raw);
|
|
1502
|
-
}
|
|
1503
|
-
}
|
|
1504
|
-
return sanitized;
|
|
1505
|
-
}
|
|
1506
|
-
function applyTierWeight(score, entityId, sanitizedTierWeights) {
|
|
1507
|
-
const weight = sanitizedTierWeights?.[entityId] ?? 1;
|
|
1508
|
-
if (weight === 0) return -Infinity;
|
|
1509
|
-
return score * weight;
|
|
1510
|
-
}
|
|
1511
|
-
function shouldExposeReadMetadata(entityId) {
|
|
1512
|
-
return Array.isArray(entityId);
|
|
1513
|
-
}
|
|
1514
|
-
|
|
1515
1411
|
// src/WikiMemory.ts
|
|
1516
|
-
var
|
|
1517
|
-
|
|
1518
|
-
const firstBrace = text.indexOf("{");
|
|
1519
|
-
const firstBracket = text.indexOf("[");
|
|
1520
|
-
let start;
|
|
1521
|
-
let openChar;
|
|
1522
|
-
let closeChar;
|
|
1523
|
-
if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
|
|
1524
|
-
start = firstBrace;
|
|
1525
|
-
openChar = "{";
|
|
1526
|
-
closeChar = "}";
|
|
1527
|
-
} else if (firstBracket !== -1) {
|
|
1528
|
-
start = firstBracket;
|
|
1529
|
-
openChar = "[";
|
|
1530
|
-
closeChar = "]";
|
|
1531
|
-
} else {
|
|
1532
|
-
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
1533
|
-
}
|
|
1534
|
-
let depth = 0;
|
|
1535
|
-
let inString = false;
|
|
1536
|
-
let escape = false;
|
|
1537
|
-
let end = -1;
|
|
1538
|
-
for (let i = start; i < text.length; i++) {
|
|
1539
|
-
const ch = text[i];
|
|
1540
|
-
if (escape) {
|
|
1541
|
-
escape = false;
|
|
1542
|
-
continue;
|
|
1543
|
-
}
|
|
1544
|
-
if (ch === "\\" && inString) {
|
|
1545
|
-
escape = true;
|
|
1546
|
-
continue;
|
|
1547
|
-
}
|
|
1548
|
-
if (ch === '"') {
|
|
1549
|
-
inString = !inString;
|
|
1550
|
-
continue;
|
|
1551
|
-
}
|
|
1552
|
-
if (inString) continue;
|
|
1553
|
-
if (ch === openChar) {
|
|
1554
|
-
depth++;
|
|
1555
|
-
continue;
|
|
1556
|
-
}
|
|
1557
|
-
if (ch === closeChar) {
|
|
1558
|
-
depth--;
|
|
1559
|
-
if (depth === 0) {
|
|
1560
|
-
end = i;
|
|
1561
|
-
break;
|
|
1562
|
-
}
|
|
1563
|
-
}
|
|
1564
|
-
}
|
|
1565
|
-
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
1566
|
-
return JSON.parse(text.slice(start, end + 1));
|
|
1567
|
-
}
|
|
1568
|
-
function safeSlice(value, start, end) {
|
|
1569
|
-
const length = value.length;
|
|
1570
|
-
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
1571
|
-
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
1572
|
-
if (safeStart > safeEnd) {
|
|
1573
|
-
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
1574
|
-
}
|
|
1575
|
-
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
1576
|
-
safeStart--;
|
|
1577
|
-
}
|
|
1578
|
-
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
1579
|
-
safeEnd--;
|
|
1580
|
-
}
|
|
1581
|
-
return value.slice(safeStart, safeEnd);
|
|
1582
|
-
}
|
|
1583
|
-
function chunkText(input, maxChunkLength, overlap) {
|
|
1584
|
-
const text = input.trim();
|
|
1585
|
-
if (text.length === 0) return { chunks: [], truncated: false };
|
|
1586
|
-
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
1587
|
-
throw new Error("maxChunkLength must be an integer >= 2");
|
|
1588
|
-
}
|
|
1589
|
-
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
1590
|
-
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
1591
|
-
}
|
|
1592
|
-
const chunks = [];
|
|
1593
|
-
let truncated = false;
|
|
1594
|
-
let cursor = 0;
|
|
1595
|
-
const halfMax = Math.floor(maxChunkLength / 2);
|
|
1596
|
-
while (cursor < text.length) {
|
|
1597
|
-
const remaining = text.length - cursor;
|
|
1598
|
-
if (remaining <= maxChunkLength) {
|
|
1599
|
-
chunks.push(safeSlice(text, cursor, text.length));
|
|
1600
|
-
break;
|
|
1601
|
-
}
|
|
1602
|
-
const windowEnd = cursor + maxChunkLength;
|
|
1603
|
-
const minSplit = cursor + halfMax;
|
|
1604
|
-
let splitPoint = -1;
|
|
1605
|
-
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
1606
|
-
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
1607
|
-
splitPoint = paraIdx + 2;
|
|
1608
|
-
}
|
|
1609
|
-
if (splitPoint === -1) {
|
|
1610
|
-
let lastTerm = -1;
|
|
1611
|
-
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
1612
|
-
const ch = text[i];
|
|
1613
|
-
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
1614
|
-
lastTerm = i + 2;
|
|
1615
|
-
}
|
|
1616
|
-
}
|
|
1617
|
-
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
1618
|
-
}
|
|
1619
|
-
if (splitPoint === -1) {
|
|
1620
|
-
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
1621
|
-
if (/\s/.test(text[i])) {
|
|
1622
|
-
splitPoint = i + 1;
|
|
1623
|
-
break;
|
|
1624
|
-
}
|
|
1625
|
-
}
|
|
1626
|
-
}
|
|
1627
|
-
if (splitPoint === -1) {
|
|
1628
|
-
truncated = true;
|
|
1629
|
-
splitPoint = windowEnd;
|
|
1630
|
-
}
|
|
1631
|
-
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
1632
|
-
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
1633
|
-
cursor = next;
|
|
1634
|
-
}
|
|
1635
|
-
return { chunks, truncated };
|
|
1636
|
-
}
|
|
1637
|
-
async function withConcurrency(tasks, limit) {
|
|
1638
|
-
const results = new Array(tasks.length);
|
|
1639
|
-
let index = 0;
|
|
1640
|
-
let failed = false;
|
|
1641
|
-
let firstError;
|
|
1642
|
-
async function worker() {
|
|
1643
|
-
while (index < tasks.length && !failed) {
|
|
1644
|
-
const i = index++;
|
|
1645
|
-
try {
|
|
1646
|
-
results[i] = await tasks[i]();
|
|
1647
|
-
} catch (e) {
|
|
1648
|
-
if (!failed) {
|
|
1649
|
-
failed = true;
|
|
1650
|
-
firstError = e;
|
|
1651
|
-
}
|
|
1652
|
-
return;
|
|
1653
|
-
}
|
|
1654
|
-
}
|
|
1655
|
-
}
|
|
1656
|
-
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
1657
|
-
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
1658
|
-
if (failed) throw firstError;
|
|
1659
|
-
return results;
|
|
1660
|
-
}
|
|
1661
|
-
function clip(value, max) {
|
|
1662
|
-
if (typeof value !== "string") return "";
|
|
1663
|
-
const s = value.trim();
|
|
1664
|
-
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
1665
|
-
}
|
|
1666
|
-
function validateTags(tags) {
|
|
1667
|
-
if (!Array.isArray(tags)) return [];
|
|
1668
|
-
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
1669
|
-
}
|
|
1670
|
-
function validateFact(fact) {
|
|
1671
|
-
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
1672
|
-
const title = clip(fact.title, 80);
|
|
1673
|
-
const body = clip(fact.body, 800);
|
|
1674
|
-
if (!title || !body) return null;
|
|
1675
|
-
let confidence = fact.confidence;
|
|
1676
|
-
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
1677
|
-
return {
|
|
1678
|
-
...fact,
|
|
1679
|
-
title,
|
|
1680
|
-
body,
|
|
1681
|
-
confidence,
|
|
1682
|
-
tags: validateTags(fact.tags)
|
|
1683
|
-
};
|
|
1684
|
-
}
|
|
1685
|
-
function validateTask(task) {
|
|
1686
|
-
if (typeof task?.description !== "string") return null;
|
|
1687
|
-
const description = clip(task.description, 200);
|
|
1688
|
-
if (!description) return null;
|
|
1689
|
-
let priority = task.priority;
|
|
1690
|
-
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
1691
|
-
return {
|
|
1692
|
-
...task,
|
|
1693
|
-
description,
|
|
1694
|
-
priority
|
|
1695
|
-
};
|
|
1696
|
-
}
|
|
1697
|
-
function normalizeSourceRef(value) {
|
|
1698
|
-
if (typeof value !== "string") return null;
|
|
1699
|
-
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
1700
|
-
return cleaned.length > 0 ? cleaned : null;
|
|
1701
|
-
}
|
|
1702
|
-
function normalizeSourceHash(value) {
|
|
1703
|
-
if (typeof value !== "string") return null;
|
|
1704
|
-
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
1705
|
-
}
|
|
1706
|
-
function titleTokens(title) {
|
|
1707
|
-
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
1708
|
-
}
|
|
1709
|
-
function jaccardScore(a, b) {
|
|
1710
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
1711
|
-
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
1712
|
-
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
1713
|
-
return intersection.size / union.size;
|
|
1714
|
-
}
|
|
1715
|
-
var FUZZY_THRESHOLD = 0.5;
|
|
1716
|
-
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
1717
|
-
var _WikiMemory = class _WikiMemory {
|
|
1412
|
+
var _testAccessNonTestEnvWarned;
|
|
1413
|
+
var WikiMemory = class {
|
|
1718
1414
|
constructor(db, options) {
|
|
1719
|
-
|
|
1720
|
-
this
|
|
1721
|
-
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
1722
|
-
this.miniSearch = new MiniSearch({
|
|
1723
|
-
fields: ["title", "body", "tags"],
|
|
1724
|
-
storeFields: ["entity_id"],
|
|
1725
|
-
searchOptions: {
|
|
1726
|
-
boost: { title: 2 },
|
|
1727
|
-
fuzzy: 0.2,
|
|
1728
|
-
prefix: true
|
|
1729
|
-
}
|
|
1730
|
-
});
|
|
1731
|
-
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
1732
|
-
this.vectorCache = /* @__PURE__ */ new Map();
|
|
1415
|
+
/** Emits `__testAccess` console warning at most once per instance when NODE_ENV ≠ "test". */
|
|
1416
|
+
__privateAdd(this, _testAccessNonTestEnvWarned, false);
|
|
1733
1417
|
this.db = db;
|
|
1734
1418
|
this.options = options;
|
|
1735
1419
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
1736
|
-
this.outboxRepo = new OutboxRepository(db, this.prefix);
|
|
1420
|
+
this.outboxRepo = new OutboxRepository(db, this.prefix, !!options.config?.enableOutbox);
|
|
1737
1421
|
this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
|
|
1738
1422
|
this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
|
|
1739
1423
|
this.eventRepo = new EventRepository(db, this.prefix);
|
|
1740
1424
|
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
this.
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
this.
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
this.
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
`[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
|
|
1794
|
-
);
|
|
1795
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(dim), this.db);
|
|
1796
|
-
}
|
|
1797
|
-
} else {
|
|
1798
|
-
await this.metadataRepo.setMeta("embedding_dimension", String(dim), this.db);
|
|
1799
|
-
}
|
|
1800
|
-
}
|
|
1801
|
-
/**
|
|
1802
|
-
* After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
|
|
1803
|
-
* value to the canonical `embedding_dimension` key and clear the mismatch flag.
|
|
1804
|
-
* This ensures future read() calls use embedding-based retrieval rather than staying
|
|
1805
|
-
* stuck on the MiniSearch fallback.
|
|
1806
|
-
*/
|
|
1807
|
-
async _reconcileEmbeddingDimension() {
|
|
1808
|
-
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
1809
|
-
if (!mismatchValue) return;
|
|
1810
|
-
const newDim = parseInt(mismatchValue, 10);
|
|
1811
|
-
const residualCount = await this.entryRepo.countStaleEmbeddings(newDim);
|
|
1812
|
-
if (residualCount === 0) {
|
|
1813
|
-
await this.metadataRepo.setMeta("embedding_dimension", mismatchValue, this.db);
|
|
1814
|
-
await this.metadataRepo.clearDimensionMismatch(this.db);
|
|
1815
|
-
}
|
|
1816
|
-
}
|
|
1817
|
-
async embedFact(fact) {
|
|
1818
|
-
const embedFn = this.options.llmProvider.embed;
|
|
1819
|
-
if (!embedFn) return false;
|
|
1820
|
-
let tagsStr;
|
|
1821
|
-
if (Array.isArray(fact.tags)) {
|
|
1822
|
-
tagsStr = fact.tags.join(" ");
|
|
1823
|
-
} else {
|
|
1824
|
-
try {
|
|
1825
|
-
const parsed = JSON.parse(fact.tags);
|
|
1826
|
-
tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
|
|
1827
|
-
} catch {
|
|
1828
|
-
tagsStr = fact.tags;
|
|
1829
|
-
}
|
|
1830
|
-
}
|
|
1831
|
-
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
1832
|
-
try {
|
|
1833
|
-
const vector = await embedFn(text);
|
|
1834
|
-
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
1835
|
-
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
1836
|
-
return false;
|
|
1837
|
-
}
|
|
1838
|
-
const float32Vector = new Float32Array(vector);
|
|
1839
|
-
let hasNonFinite = false;
|
|
1840
|
-
for (let i = 0; i < float32Vector.length; i++) {
|
|
1841
|
-
if (!isFinite(float32Vector[i])) {
|
|
1842
|
-
hasNonFinite = true;
|
|
1843
|
-
break;
|
|
1844
|
-
}
|
|
1845
|
-
}
|
|
1846
|
-
if (hasNonFinite) {
|
|
1847
|
-
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
1848
|
-
return false;
|
|
1849
|
-
}
|
|
1850
|
-
await this.storeEmbeddingDimension(float32Vector.length);
|
|
1851
|
-
const blob = new Uint8Array(float32Vector.buffer);
|
|
1852
|
-
await this.entryRepo.updateEmbeddingBlob(fact.id, blob);
|
|
1853
|
-
try {
|
|
1854
|
-
await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
1855
|
-
} catch (hookErr) {
|
|
1856
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
1857
|
-
}
|
|
1858
|
-
return true;
|
|
1859
|
-
} catch (err) {
|
|
1860
|
-
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
1861
|
-
return false;
|
|
1862
|
-
}
|
|
1863
|
-
}
|
|
1864
|
-
_librarianKey(entityId) {
|
|
1865
|
-
return `${this.prefix}:${entityId}:librarian`;
|
|
1866
|
-
}
|
|
1867
|
-
_healKey(entityId) {
|
|
1868
|
-
return `${this.prefix}:${entityId}:heal`;
|
|
1869
|
-
}
|
|
1870
|
-
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
1871
|
-
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
1872
|
-
}
|
|
1873
|
-
/** Maps pre-rename enum strings from older dumps to current source_type values. */
|
|
1874
|
-
_normalizeImportedSourceType(raw, ctx) {
|
|
1875
|
-
if (raw === "user_document") return "immutable_document";
|
|
1876
|
-
if (raw === "agent_inferred") return "librarian_inferred";
|
|
1877
|
-
const allowed = ["user_stated", "librarian_inferred", "user_confirmed", "immutable_document"];
|
|
1878
|
-
if (allowed.includes(raw)) return raw;
|
|
1879
|
-
const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
|
|
1880
|
-
throw new Error(
|
|
1881
|
-
`importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
|
|
1425
|
+
this.embeddingService = new EmbeddingService(this.db, this.options, this.entryRepo, this.metadataRepo);
|
|
1426
|
+
this.searchService = new SearchService(this.entryRepo);
|
|
1427
|
+
this.jobManager = new JobManager(this.prefix);
|
|
1428
|
+
this.promptService = new PromptService(options.config?.prompts);
|
|
1429
|
+
this.ingestionService = new IngestionService(
|
|
1430
|
+
this.db,
|
|
1431
|
+
this.prefix,
|
|
1432
|
+
this.options,
|
|
1433
|
+
this.entryRepo,
|
|
1434
|
+
this.searchService,
|
|
1435
|
+
this.jobManager,
|
|
1436
|
+
this.embeddingService,
|
|
1437
|
+
this.promptService
|
|
1438
|
+
);
|
|
1439
|
+
this.maintenanceService = new MaintenanceService(
|
|
1440
|
+
this.db,
|
|
1441
|
+
this.prefix,
|
|
1442
|
+
this.options,
|
|
1443
|
+
this.entryRepo,
|
|
1444
|
+
this.taskRepo,
|
|
1445
|
+
this.eventRepo,
|
|
1446
|
+
this.metadataRepo,
|
|
1447
|
+
this.searchService,
|
|
1448
|
+
this.jobManager,
|
|
1449
|
+
this.embeddingService,
|
|
1450
|
+
this.promptService
|
|
1451
|
+
);
|
|
1452
|
+
this.importExportService = new ImportExportService(
|
|
1453
|
+
this.db,
|
|
1454
|
+
this.entryRepo,
|
|
1455
|
+
this.taskRepo,
|
|
1456
|
+
this.eventRepo,
|
|
1457
|
+
this.metadataRepo,
|
|
1458
|
+
this.searchService,
|
|
1459
|
+
this.jobManager,
|
|
1460
|
+
this.embeddingService
|
|
1461
|
+
);
|
|
1462
|
+
this.retrievalService = new RetrievalService(
|
|
1463
|
+
this.options,
|
|
1464
|
+
this.entryRepo,
|
|
1465
|
+
this.taskRepo,
|
|
1466
|
+
this.eventRepo,
|
|
1467
|
+
this.metadataRepo,
|
|
1468
|
+
this.searchService
|
|
1469
|
+
);
|
|
1470
|
+
this.writeService = new WriteService(
|
|
1471
|
+
this.db,
|
|
1472
|
+
this.options,
|
|
1473
|
+
this.eventRepo,
|
|
1474
|
+
this.metadataRepo,
|
|
1475
|
+
this.jobManager,
|
|
1476
|
+
this.maintenanceService
|
|
1882
1477
|
);
|
|
1883
1478
|
}
|
|
1884
|
-
async assertNoLegacySourceTypes() {
|
|
1885
|
-
if (!await this.entryRepo.hasLegacySourceTypes()) return;
|
|
1886
|
-
const count = await this.entryRepo.countLegacySourceTypes();
|
|
1887
|
-
throw new Error(
|
|
1888
|
-
`Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
|
|
1889
|
-
|
|
1890
|
-
${this.entryRepo.getLegacyMigrationSQL()}
|
|
1891
|
-
|
|
1892
|
-
After running the migration SQL, restart your application.`
|
|
1893
|
-
);
|
|
1894
|
-
}
|
|
1895
|
-
async _notifyEmbeddingPersisted(entityId, factId, vector) {
|
|
1896
|
-
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
1897
|
-
const vectorCopy = vector ? vector.slice() : null;
|
|
1898
|
-
await this.options.vectorRanker.onEmbeddingPersisted({
|
|
1899
|
-
entityId,
|
|
1900
|
-
factId,
|
|
1901
|
-
vector: vectorCopy
|
|
1902
|
-
});
|
|
1903
|
-
}
|
|
1904
1479
|
/**
|
|
1905
|
-
*
|
|
1906
|
-
*
|
|
1907
|
-
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
1480
|
+
* Explicit escape hatch for test suites: typed access to composed services for mocks/spies.
|
|
1481
|
+
* If `NODE_ENV` is not `"test"`, emits a single `console.warn` per instance (skipped when `process` is undefined).
|
|
1908
1482
|
*/
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
if (
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
|
|
1915
|
-
throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
|
|
1916
|
-
}
|
|
1917
|
-
const timeoutMs = rawTimeout;
|
|
1918
|
-
let timeoutHandle;
|
|
1919
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
1920
|
-
timeoutHandle = setTimeout(
|
|
1921
|
-
() => {
|
|
1922
|
-
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
1923
|
-
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
1924
|
-
reject(timeoutError);
|
|
1925
|
-
},
|
|
1926
|
-
timeoutMs
|
|
1927
|
-
);
|
|
1928
|
-
});
|
|
1929
|
-
const hookPromise = Promise.resolve(
|
|
1930
|
-
this.options.vectorRanker.onEmbeddingPersisted({
|
|
1931
|
-
entityId,
|
|
1932
|
-
factId,
|
|
1933
|
-
vector: vectorCopy
|
|
1934
|
-
})
|
|
1935
|
-
);
|
|
1936
|
-
try {
|
|
1937
|
-
await Promise.race([hookPromise, timeoutPromise]);
|
|
1938
|
-
} catch (err) {
|
|
1939
|
-
hookPromise.catch(() => {
|
|
1940
|
-
});
|
|
1941
|
-
throw err;
|
|
1942
|
-
} finally {
|
|
1943
|
-
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
1483
|
+
get __testAccess() {
|
|
1484
|
+
const processEnv = typeof globalThis !== "undefined" ? globalThis.process?.env : void 0;
|
|
1485
|
+
if (processEnv !== void 0 && processEnv.NODE_ENV !== "test" && !__privateGet(this, _testAccessNonTestEnvWarned)) {
|
|
1486
|
+
__privateSet(this, _testAccessNonTestEnvWarned, true);
|
|
1487
|
+
console.warn('Warning: WikiMemory.__testAccess is intended for tests (NODE_ENV !== "test").');
|
|
1944
1488
|
}
|
|
1489
|
+
return {
|
|
1490
|
+
embeddingService: this.embeddingService,
|
|
1491
|
+
importExportService: this.importExportService,
|
|
1492
|
+
ingestionService: this.ingestionService,
|
|
1493
|
+
maintenanceService: this.maintenanceService,
|
|
1494
|
+
retrievalService: this.retrievalService,
|
|
1495
|
+
searchService: this.searchService,
|
|
1496
|
+
writeService: this.writeService,
|
|
1497
|
+
promptService: this.promptService,
|
|
1498
|
+
entryRepo: this.entryRepo,
|
|
1499
|
+
metadataRepo: this.metadataRepo,
|
|
1500
|
+
jobManager: this.jobManager
|
|
1501
|
+
};
|
|
1945
1502
|
}
|
|
1946
1503
|
async setup() {
|
|
1947
1504
|
const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
|
|
@@ -1975,7 +1532,7 @@ After running the migration SQL, restart your application.`
|
|
|
1975
1532
|
}
|
|
1976
1533
|
}
|
|
1977
1534
|
if (entriesExistedBeforeSetup) {
|
|
1978
|
-
await this.assertNoLegacySourceTypes();
|
|
1535
|
+
await this.importExportService.assertNoLegacySourceTypes();
|
|
1979
1536
|
}
|
|
1980
1537
|
const rows = await this.entryRepo.findRowsForSourceRefMigration();
|
|
1981
1538
|
await this.db.withTransactionAsync(async (tx) => {
|
|
@@ -1986,7 +1543,7 @@ After running the migration SQL, restart your application.`
|
|
|
1986
1543
|
}
|
|
1987
1544
|
}
|
|
1988
1545
|
});
|
|
1989
|
-
await this.
|
|
1546
|
+
await this.searchService.sync();
|
|
1990
1547
|
}
|
|
1991
1548
|
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
1992
1549
|
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
@@ -2002,1720 +1559,89 @@ After running the migration SQL, restart your application.`
|
|
|
2002
1559
|
const normalizedStoredHash = normalizeSourceHash(storedHash);
|
|
2003
1560
|
return normalizedStoredHash !== normalizedHash;
|
|
2004
1561
|
}
|
|
2005
|
-
_pruneKey(entityId) {
|
|
2006
|
-
return `${this.prefix}:${entityId}:prune`;
|
|
2007
|
-
}
|
|
2008
|
-
_reembedKey(entityId) {
|
|
2009
|
-
return `${this.prefix}:${entityId}:reembed`;
|
|
2010
|
-
}
|
|
2011
|
-
_globalReembedKey() {
|
|
2012
|
-
return `${this.prefix}:reembed`;
|
|
2013
|
-
}
|
|
2014
|
-
_importKey(entityId) {
|
|
2015
|
-
return `${this.prefix}:${entityId}:import`;
|
|
2016
|
-
}
|
|
2017
|
-
_globalImportKey() {
|
|
2018
|
-
return `${this.prefix}:import`;
|
|
2019
|
-
}
|
|
2020
|
-
_forgetKey(entityId) {
|
|
2021
|
-
return `${this.prefix}:${entityId}:forget`;
|
|
2022
|
-
}
|
|
2023
|
-
_isReembedActive(entityId) {
|
|
2024
|
-
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
2025
|
-
}
|
|
2026
|
-
_isImportActiveFor(entityId) {
|
|
2027
|
-
return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
|
|
2028
|
-
}
|
|
2029
|
-
_isForgetActiveFor(entityId) {
|
|
2030
|
-
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
2031
|
-
}
|
|
2032
|
-
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
2033
|
-
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
2034
|
-
const entityKeyPrefix = `${this.prefix}:`;
|
|
2035
|
-
for (const k of this.activeMaintenanceJobs) {
|
|
2036
|
-
if (k.startsWith(entityKeyPrefix) && k.endsWith(suffix)) return true;
|
|
2037
|
-
}
|
|
2038
|
-
return false;
|
|
2039
|
-
}
|
|
2040
|
-
/** Returns true if any ingest job is active for the given entity. */
|
|
2041
|
-
_isIngestActiveFor(entityId) {
|
|
2042
|
-
const entityKeyPrefix = `${this.prefix}:${entityId}:`;
|
|
2043
|
-
for (const k of this.activeIngestJobs) {
|
|
2044
|
-
if (k.startsWith(entityKeyPrefix)) return true;
|
|
2045
|
-
}
|
|
2046
|
-
return false;
|
|
2047
|
-
}
|
|
2048
|
-
_copyEntityStatus(s) {
|
|
2049
|
-
return { ingesting: s.ingesting, librarian: s.librarian, heal: s.heal };
|
|
2050
|
-
}
|
|
2051
|
-
_notifyStatusSubscribers(entityId) {
|
|
2052
|
-
const set = this.statusSubscribers.get(entityId);
|
|
2053
|
-
if (!set || set.size === 0) return;
|
|
2054
|
-
for (const entry of Array.from(set)) {
|
|
2055
|
-
if (!set.has(entry)) continue;
|
|
2056
|
-
const next = this.getEntityStatus(entityId);
|
|
2057
|
-
if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal) continue;
|
|
2058
|
-
entry.last = this._copyEntityStatus(next);
|
|
2059
|
-
try {
|
|
2060
|
-
entry.callback(this._copyEntityStatus(next));
|
|
2061
|
-
} catch (err) {
|
|
2062
|
-
console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during transition emission`, err);
|
|
2063
|
-
}
|
|
2064
|
-
}
|
|
2065
|
-
}
|
|
2066
|
-
_validatePruneDuration(value, name) {
|
|
2067
|
-
if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
|
|
2068
|
-
throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
|
|
2069
|
-
}
|
|
2070
|
-
}
|
|
2071
1562
|
async runPrune(entityId, options) {
|
|
2072
|
-
|
|
2073
|
-
const ingestPrefix = `${this.prefix}:${entityId}:`;
|
|
2074
|
-
let isIngestRunning = false;
|
|
2075
|
-
for (const k of this.activeIngestJobs) {
|
|
2076
|
-
if (k.startsWith(ingestPrefix)) {
|
|
2077
|
-
isIngestRunning = true;
|
|
2078
|
-
break;
|
|
2079
|
-
}
|
|
2080
|
-
}
|
|
2081
|
-
let blockingOperation = null;
|
|
2082
|
-
if (this.activeMaintenanceJobs.has(pruneKey)) {
|
|
2083
|
-
blockingOperation = "prune";
|
|
2084
|
-
} else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
2085
|
-
blockingOperation = "librarian";
|
|
2086
|
-
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
2087
|
-
blockingOperation = "heal";
|
|
2088
|
-
} else if (this._isReembedActive(entityId)) {
|
|
2089
|
-
blockingOperation = "reembed";
|
|
2090
|
-
} else if (isIngestRunning) {
|
|
2091
|
-
blockingOperation = "ingest";
|
|
2092
|
-
} else if (this._isImportActiveFor(entityId)) {
|
|
2093
|
-
blockingOperation = "import";
|
|
2094
|
-
} else if (this._isForgetActiveFor(entityId)) {
|
|
2095
|
-
blockingOperation = "forget";
|
|
2096
|
-
}
|
|
2097
|
-
if (blockingOperation !== null) {
|
|
2098
|
-
throw new WikiBusyError(blockingOperation, entityId);
|
|
2099
|
-
}
|
|
2100
|
-
this.activeMaintenanceJobs.add(pruneKey);
|
|
2101
|
-
try {
|
|
2102
|
-
const retainSoftDeletedFor = options?.retainSoftDeletedFor !== void 0 ? options.retainSoftDeletedFor : this.options.config?.pruneRetainSoftDeletedFor ?? 7;
|
|
2103
|
-
const retainEventsFor = options?.retainEventsFor !== void 0 ? options.retainEventsFor : this.options.config?.pruneEventsAfter ?? 30;
|
|
2104
|
-
const vacuum = options?.vacuum ?? false;
|
|
2105
|
-
this._validatePruneDuration(retainSoftDeletedFor, "retainSoftDeletedFor");
|
|
2106
|
-
this._validatePruneDuration(retainEventsFor, "retainEventsFor");
|
|
2107
|
-
const now = Date.now();
|
|
2108
|
-
let deletedEntries = 0;
|
|
2109
|
-
let deletedTasks = 0;
|
|
2110
|
-
let deletedEvents = 0;
|
|
2111
|
-
if (retainSoftDeletedFor !== null) {
|
|
2112
|
-
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
2113
|
-
const entriesToDelete = await this.entryRepo.getPrunableMetadata(entityId, cutoff);
|
|
2114
|
-
const succeeded = [];
|
|
2115
|
-
let failure = null;
|
|
2116
|
-
for (const row of entriesToDelete) {
|
|
2117
|
-
try {
|
|
2118
|
-
await this._notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
|
|
2119
|
-
succeeded.push({ entity_id: row.entity_id, id: row.id });
|
|
2120
|
-
} catch (err) {
|
|
2121
|
-
failure = { factId: row.id, cause: err };
|
|
2122
|
-
break;
|
|
2123
|
-
}
|
|
2124
|
-
}
|
|
2125
|
-
const succeededIds = succeeded.map((r) => r.id);
|
|
2126
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
2127
|
-
if (succeededIds.length > 0) {
|
|
2128
|
-
deletedEntries = await this.entryRepo.bulkDeletePruned(entityId, cutoff, succeededIds, tx);
|
|
2129
|
-
}
|
|
2130
|
-
deletedTasks = await this.taskRepo.bulkDeletePruned(entityId, cutoff, tx);
|
|
2131
|
-
});
|
|
2132
|
-
if (failure) {
|
|
2133
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2134
|
-
this.vectorCache.delete(entityId);
|
|
2135
|
-
const remaining = entriesToDelete.length - succeeded.length - 1;
|
|
2136
|
-
const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2137
|
-
if (isTimeout) {
|
|
2138
|
-
throw new PrunePartialFailureError(
|
|
2139
|
-
succeeded.length,
|
|
2140
|
-
failure.factId,
|
|
2141
|
-
remaining,
|
|
2142
|
-
new Error("Deletion hook timed out"),
|
|
2143
|
-
deletedTasks,
|
|
2144
|
-
0
|
|
2145
|
-
// events not yet deleted at this point
|
|
2146
|
-
);
|
|
2147
|
-
}
|
|
2148
|
-
const errMsg = failure.cause?.message ?? "";
|
|
2149
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2150
|
-
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
2151
|
-
throw new PrunePartialFailureError(
|
|
2152
|
-
succeeded.length,
|
|
2153
|
-
failure.factId,
|
|
2154
|
-
remaining,
|
|
2155
|
-
sanitizedCause,
|
|
2156
|
-
deletedTasks,
|
|
2157
|
-
0
|
|
2158
|
-
// events not yet deleted at this point
|
|
2159
|
-
);
|
|
2160
|
-
}
|
|
2161
|
-
}
|
|
2162
|
-
if (retainEventsFor !== null) {
|
|
2163
|
-
const cutoff = now - retainEventsFor * 864e5;
|
|
2164
|
-
const eventResult = await this.eventRepo.prune(entityId, cutoff);
|
|
2165
|
-
deletedEvents = eventResult.changes;
|
|
2166
|
-
}
|
|
2167
|
-
if (vacuum) {
|
|
2168
|
-
await this.metadataRepo.vacuum();
|
|
2169
|
-
}
|
|
2170
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2171
|
-
this.vectorCache.delete(entityId);
|
|
2172
|
-
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
2173
|
-
} finally {
|
|
2174
|
-
this.activeMaintenanceJobs.delete(pruneKey);
|
|
2175
|
-
}
|
|
1563
|
+
return this.maintenanceService.runPrune(entityId, options);
|
|
2176
1564
|
}
|
|
2177
1565
|
async read(entityId, query, options) {
|
|
2178
|
-
|
|
2179
|
-
const entityIds = normalizeEntityIds(entityId);
|
|
2180
|
-
const sanitizedTierWeights = shouldExposeReadMetadata(entityId) ? sanitizeTierWeights(entityIds, options?.tierWeights) : void 0;
|
|
2181
|
-
const exposeMetadata = shouldExposeReadMetadata(entityId);
|
|
2182
|
-
if (entityIds.length === 0) {
|
|
2183
|
-
const empty = { facts: [], tasks: [], events: [] };
|
|
2184
|
-
if (exposeMetadata) {
|
|
2185
|
-
empty.metadata = { query, entityIds: [] };
|
|
2186
|
-
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
2187
|
-
}
|
|
2188
|
-
return empty;
|
|
2189
|
-
}
|
|
2190
|
-
const MAX_ENTITY_IDS = 100;
|
|
2191
|
-
if (entityIds.length > MAX_ENTITY_IDS) {
|
|
2192
|
-
throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
|
|
2193
|
-
}
|
|
2194
|
-
const nullByteId = entityIds.find((id) => id.includes("\0"));
|
|
2195
|
-
if (nullByteId !== void 0) {
|
|
2196
|
-
throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
|
|
2197
|
-
}
|
|
2198
|
-
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
2199
|
-
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
2200
|
-
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
2201
|
-
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
2202
|
-
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
2203
|
-
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
2204
|
-
const skipEmbed = weight === 0;
|
|
2205
|
-
const embedFn = this.options.llmProvider.embed;
|
|
2206
|
-
const trimmedQuery = query.trim();
|
|
2207
|
-
let facts = [];
|
|
2208
|
-
let scoreByFactId;
|
|
2209
|
-
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
2210
|
-
let usedEmbed = false;
|
|
2211
|
-
const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
|
|
2212
|
-
if (scoredEntityIds.length === 0) {
|
|
2213
|
-
usedEmbed = true;
|
|
2214
|
-
} else if (!skipEmbed && embedFn) {
|
|
2215
|
-
let rankerShouldRethrow = false;
|
|
2216
|
-
let pendingRankerFallbackError;
|
|
2217
|
-
try {
|
|
2218
|
-
const queryVec = await embedFn(trimmedQuery);
|
|
2219
|
-
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
2220
|
-
throw new Error(
|
|
2221
|
-
"embed() returned an empty or non-finite vector. Falling back to keyword search."
|
|
2222
|
-
);
|
|
2223
|
-
}
|
|
2224
|
-
const storedDimValue = await this.metadataRepo.getMeta("embedding_dimension");
|
|
2225
|
-
if (storedDimValue) {
|
|
2226
|
-
const storedDim = parseInt(storedDimValue, 10);
|
|
2227
|
-
if (storedDim !== queryVec.length) {
|
|
2228
|
-
throw new Error(
|
|
2229
|
-
`Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
|
|
2230
|
-
);
|
|
2231
|
-
}
|
|
2232
|
-
}
|
|
2233
|
-
const mismatchedCount = await this.entryRepo.countDimensionMismatched(scoredEntityIds, queryVec.length);
|
|
2234
|
-
if (mismatchedCount > 0) {
|
|
2235
|
-
throw new Error(
|
|
2236
|
-
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
2237
|
-
);
|
|
2238
|
-
}
|
|
2239
|
-
const useRanker = Boolean(this.options.vectorRanker);
|
|
2240
|
-
let candidateRows;
|
|
2241
|
-
let populateCache = entityIds.length === 1;
|
|
2242
|
-
let miniSearchScores;
|
|
2243
|
-
if (effectivePreFilterLimit !== void 0) {
|
|
2244
|
-
populateCache = false;
|
|
2245
|
-
const entityIdSet = new Set(scoredEntityIds);
|
|
2246
|
-
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
2247
|
-
filter: (r) => entityIdSet.has(r.entity_id),
|
|
2248
|
-
combineWith: "OR"
|
|
2249
|
-
});
|
|
2250
|
-
if (preResults.length === 0) {
|
|
2251
|
-
candidateRows = null;
|
|
2252
|
-
} else {
|
|
2253
|
-
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
2254
|
-
if (topKResults.length === 0) {
|
|
2255
|
-
candidateRows = null;
|
|
2256
|
-
} else {
|
|
2257
|
-
const topKIds = topKResults.map((r) => r.id);
|
|
2258
|
-
if (useRanker) {
|
|
2259
|
-
candidateRows = await this.entryRepo.findMetadataByIds(topKIds);
|
|
2260
|
-
} else {
|
|
2261
|
-
candidateRows = await this.entryRepo.findWithEmbeddingsByIds(topKIds);
|
|
2262
|
-
}
|
|
2263
|
-
if (weight !== void 0 && weight < 1) {
|
|
2264
|
-
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
2265
|
-
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
2266
|
-
}
|
|
2267
|
-
}
|
|
2268
|
-
}
|
|
2269
|
-
} else {
|
|
2270
|
-
if (useRanker) {
|
|
2271
|
-
candidateRows = await this.entryRepo.findMetadataByEntityIds(scoredEntityIds);
|
|
2272
|
-
} else {
|
|
2273
|
-
candidateRows = await this.entryRepo.findWithEmbeddingsByEntityIds(scoredEntityIds);
|
|
2274
|
-
}
|
|
2275
|
-
if (weight !== void 0 && weight < 1) {
|
|
2276
|
-
const entityIdSet = new Set(scoredEntityIds);
|
|
2277
|
-
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
2278
|
-
filter: (r) => entityIdSet.has(r.entity_id),
|
|
2279
|
-
combineWith: "OR"
|
|
2280
|
-
});
|
|
2281
|
-
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
2282
|
-
miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
2283
|
-
}
|
|
2284
|
-
}
|
|
2285
|
-
if (candidateRows === null) {
|
|
2286
|
-
usedEmbed = true;
|
|
2287
|
-
} else {
|
|
2288
|
-
const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
|
|
2289
|
-
let scored;
|
|
2290
|
-
if (useRanker) {
|
|
2291
|
-
const candidateRowsByEntity = /* @__PURE__ */ new Map();
|
|
2292
|
-
for (const row of candidateRows) {
|
|
2293
|
-
const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
|
|
2294
|
-
rows.push(row);
|
|
2295
|
-
candidateRowsByEntity.set(row.entity_id, rows);
|
|
2296
|
-
}
|
|
2297
|
-
try {
|
|
2298
|
-
const rankerResultsByEntity = await Promise.all(
|
|
2299
|
-
scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
|
|
2300
|
-
const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
|
|
2301
|
-
const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
|
|
2302
|
-
const ranked = await this._rankWithVectorRanker({
|
|
2303
|
-
entityId: scopedEntityId,
|
|
2304
|
-
queryVec,
|
|
2305
|
-
candidateIds,
|
|
2306
|
-
candidateRows: rowsForEntity,
|
|
2307
|
-
weight,
|
|
2308
|
-
miniSearchScores,
|
|
2309
|
-
limit: Math.max(maxResults * 2, maxResults + 50)
|
|
2310
|
-
});
|
|
2311
|
-
return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
|
|
2312
|
-
})
|
|
2313
|
-
);
|
|
2314
|
-
scored = rankerResultsByEntity.flat();
|
|
2315
|
-
const scoredIds = new Set(scored.map((s) => s.id));
|
|
2316
|
-
const metadataById = new Map(
|
|
2317
|
-
candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
|
|
2318
|
-
);
|
|
2319
|
-
scored = scored.map((row) => {
|
|
2320
|
-
const metadata = metadataById.get(row.id);
|
|
2321
|
-
return {
|
|
2322
|
-
...row,
|
|
2323
|
-
updated_at: metadata?.updated_at ?? null,
|
|
2324
|
-
access_count: metadata?.access_count ?? null
|
|
2325
|
-
};
|
|
2326
|
-
});
|
|
2327
|
-
const isHybrid = weight !== void 0 && weight < 1;
|
|
2328
|
-
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
2329
|
-
if (maxBackfill > 0) {
|
|
2330
|
-
if (isHybrid) {
|
|
2331
|
-
const topK = [];
|
|
2332
|
-
for (const row of candidateRows) {
|
|
2333
|
-
if (scoredIds.has(row.id)) continue;
|
|
2334
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
2335
|
-
const candidate = { row, kwScore };
|
|
2336
|
-
if (topK.length < maxBackfill) {
|
|
2337
|
-
let insertIdx = topK.length;
|
|
2338
|
-
for (let i = 0; i < topK.length; i++) {
|
|
2339
|
-
const cmp = this._compareScoredRows(
|
|
2340
|
-
{
|
|
2341
|
-
id: candidate.row.id,
|
|
2342
|
-
score: candidate.kwScore,
|
|
2343
|
-
updated_at: candidate.row.updated_at,
|
|
2344
|
-
access_count: candidate.row.access_count
|
|
2345
|
-
},
|
|
2346
|
-
{
|
|
2347
|
-
id: topK[i].row.id,
|
|
2348
|
-
score: topK[i].kwScore,
|
|
2349
|
-
updated_at: topK[i].row.updated_at,
|
|
2350
|
-
access_count: topK[i].row.access_count
|
|
2351
|
-
}
|
|
2352
|
-
);
|
|
2353
|
-
if (cmp < 0) {
|
|
2354
|
-
insertIdx = i;
|
|
2355
|
-
break;
|
|
2356
|
-
}
|
|
2357
|
-
}
|
|
2358
|
-
topK.splice(insertIdx, 0, candidate);
|
|
2359
|
-
} else {
|
|
2360
|
-
const cmpWorst = this._compareScoredRows(
|
|
2361
|
-
{
|
|
2362
|
-
id: candidate.row.id,
|
|
2363
|
-
score: candidate.kwScore,
|
|
2364
|
-
updated_at: candidate.row.updated_at,
|
|
2365
|
-
access_count: candidate.row.access_count
|
|
2366
|
-
},
|
|
2367
|
-
{
|
|
2368
|
-
id: topK[maxBackfill - 1].row.id,
|
|
2369
|
-
score: topK[maxBackfill - 1].kwScore,
|
|
2370
|
-
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
2371
|
-
access_count: topK[maxBackfill - 1].row.access_count
|
|
2372
|
-
}
|
|
2373
|
-
);
|
|
2374
|
-
if (cmpWorst < 0) {
|
|
2375
|
-
let insertIdx = maxBackfill - 1;
|
|
2376
|
-
for (let i = 0; i < topK.length; i++) {
|
|
2377
|
-
const cmp = this._compareScoredRows(
|
|
2378
|
-
{
|
|
2379
|
-
id: candidate.row.id,
|
|
2380
|
-
score: candidate.kwScore,
|
|
2381
|
-
updated_at: candidate.row.updated_at,
|
|
2382
|
-
access_count: candidate.row.access_count
|
|
2383
|
-
},
|
|
2384
|
-
{
|
|
2385
|
-
id: topK[i].row.id,
|
|
2386
|
-
score: topK[i].kwScore,
|
|
2387
|
-
updated_at: topK[i].row.updated_at,
|
|
2388
|
-
access_count: topK[i].row.access_count
|
|
2389
|
-
}
|
|
2390
|
-
);
|
|
2391
|
-
if (cmp < 0) {
|
|
2392
|
-
insertIdx = i;
|
|
2393
|
-
break;
|
|
2394
|
-
}
|
|
2395
|
-
}
|
|
2396
|
-
topK.splice(insertIdx, 0, candidate);
|
|
2397
|
-
topK.pop();
|
|
2398
|
-
}
|
|
2399
|
-
}
|
|
2400
|
-
}
|
|
2401
|
-
for (const { row, kwScore } of topK) {
|
|
2402
|
-
scored.push({
|
|
2403
|
-
id: row.id,
|
|
2404
|
-
entity_id: row.entity_id,
|
|
2405
|
-
score: (1 - weight) * kwScore,
|
|
2406
|
-
updated_at: row.updated_at,
|
|
2407
|
-
access_count: row.access_count
|
|
2408
|
-
});
|
|
2409
|
-
}
|
|
2410
|
-
} else {
|
|
2411
|
-
const omitted = [];
|
|
2412
|
-
for (const row of candidateRows) {
|
|
2413
|
-
if (scoredIds.has(row.id)) continue;
|
|
2414
|
-
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
2415
|
-
}
|
|
2416
|
-
if (omitted.length > 0) {
|
|
2417
|
-
this._tieBreakSort(omitted);
|
|
2418
|
-
scored.push(...omitted.slice(0, maxBackfill));
|
|
2419
|
-
}
|
|
2420
|
-
}
|
|
2421
|
-
}
|
|
2422
|
-
} catch (rankerErr) {
|
|
2423
|
-
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
2424
|
-
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
2425
|
-
this.options.onVectorRankerFallback?.({
|
|
2426
|
-
error: this._sanitizeRankerError(rankerError),
|
|
2427
|
-
policy
|
|
2428
|
-
});
|
|
2429
|
-
if (policy === "throw") {
|
|
2430
|
-
rankerShouldRethrow = true;
|
|
2431
|
-
throw rankerError;
|
|
2432
|
-
} else if (policy === "js-cosine") {
|
|
2433
|
-
let fallbackRows = candidateRows;
|
|
2434
|
-
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
2435
|
-
const rowIds = fallbackRows.map((r) => r.id);
|
|
2436
|
-
const embeddingRows = await this.entryRepo.findEmbeddingsByIds(rowIds);
|
|
2437
|
-
const embeddingsMap = new Map(embeddingRows.map((row) => [row.id, row]));
|
|
2438
|
-
fallbackRows = fallbackRows.map((r) => ({
|
|
2439
|
-
...r,
|
|
2440
|
-
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
2441
|
-
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
2442
|
-
}));
|
|
2443
|
-
}
|
|
2444
|
-
scored = await this._rankWithJsCosine({
|
|
2445
|
-
entityId: entityCacheKey,
|
|
2446
|
-
queryVec,
|
|
2447
|
-
candidateRows: fallbackRows,
|
|
2448
|
-
weight,
|
|
2449
|
-
miniSearchScores,
|
|
2450
|
-
populateCache,
|
|
2451
|
-
limit: fallbackRows.length,
|
|
2452
|
-
skipSort: true
|
|
2453
|
-
// read() re-sorts after applying tier weights
|
|
2454
|
-
});
|
|
2455
|
-
} else if (policy === "keyword") {
|
|
2456
|
-
const scoredEntityIdSet = new Set(scoredEntityIds);
|
|
2457
|
-
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
2458
|
-
filter: (r) => scoredEntityIdSet.has(r.entity_id),
|
|
2459
|
-
combineWith: "OR"
|
|
2460
|
-
});
|
|
2461
|
-
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
2462
|
-
const topResults = msResults.slice(0, keywordOversampledLimit);
|
|
2463
|
-
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
2464
|
-
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
2465
|
-
scored = topResults.map((result) => {
|
|
2466
|
-
const metadata = candidateMap.get(result.id);
|
|
2467
|
-
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
2468
|
-
return {
|
|
2469
|
-
id: result.id,
|
|
2470
|
-
entity_id: entityForScore,
|
|
2471
|
-
score: result.score ?? 0,
|
|
2472
|
-
access_count: metadata?.access_count ?? null,
|
|
2473
|
-
updated_at: metadata?.updated_at ?? null
|
|
2474
|
-
};
|
|
2475
|
-
});
|
|
2476
|
-
} else {
|
|
2477
|
-
scored = [];
|
|
2478
|
-
}
|
|
2479
|
-
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
2480
|
-
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
2481
|
-
cause: this._sanitizeRankerError(rankerErr)
|
|
2482
|
-
});
|
|
2483
|
-
pendingRankerFallbackError = mirrored;
|
|
2484
|
-
}
|
|
2485
|
-
}
|
|
2486
|
-
} else {
|
|
2487
|
-
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
2488
|
-
scored = await this._rankWithJsCosine({
|
|
2489
|
-
entityId: entityCacheKey,
|
|
2490
|
-
queryVec,
|
|
2491
|
-
candidateRows,
|
|
2492
|
-
weight,
|
|
2493
|
-
miniSearchScores,
|
|
2494
|
-
populateCache,
|
|
2495
|
-
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
2496
|
-
skipSort: jsCosineNeedsTierSort
|
|
2497
|
-
// read() re-sorts after applying tier weights
|
|
2498
|
-
});
|
|
2499
|
-
}
|
|
2500
|
-
if (scored.length > 0) {
|
|
2501
|
-
scored = scored.map((row) => ({
|
|
2502
|
-
...row,
|
|
2503
|
-
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
2504
|
-
}));
|
|
2505
|
-
this._tieBreakSort(scored);
|
|
2506
|
-
const selectedScored = scored.slice(0, maxResults);
|
|
2507
|
-
const topIds = selectedScored.map((s) => s.id);
|
|
2508
|
-
if (exposeMetadata && trimmedQuery) {
|
|
2509
|
-
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
2510
|
-
}
|
|
2511
|
-
if (topIds.length > 0) {
|
|
2512
|
-
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
2513
|
-
if (facts2.length < topIds.length) {
|
|
2514
|
-
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
2515
|
-
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
2516
|
-
const missingCount = missingIds.length;
|
|
2517
|
-
const sample = missingIds.slice(0, 5);
|
|
2518
|
-
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
2519
|
-
const error = new Error(
|
|
2520
|
-
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
2521
|
-
);
|
|
2522
|
-
this.options.onRetrievalFallback?.(error);
|
|
2523
|
-
}
|
|
2524
|
-
facts = facts2;
|
|
2525
|
-
}
|
|
2526
|
-
if (pendingRankerFallbackError) {
|
|
2527
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
2528
|
-
pendingRankerFallbackError = void 0;
|
|
2529
|
-
}
|
|
2530
|
-
usedEmbed = true;
|
|
2531
|
-
} else {
|
|
2532
|
-
if (pendingRankerFallbackError) {
|
|
2533
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
2534
|
-
pendingRankerFallbackError = void 0;
|
|
2535
|
-
}
|
|
2536
|
-
usedEmbed = true;
|
|
2537
|
-
}
|
|
2538
|
-
}
|
|
2539
|
-
} catch (err) {
|
|
2540
|
-
const error = err instanceof Error ? err : new Error(String(err));
|
|
2541
|
-
if (rankerShouldRethrow) {
|
|
2542
|
-
throw error;
|
|
2543
|
-
}
|
|
2544
|
-
if (pendingRankerFallbackError) {
|
|
2545
|
-
error.cause = pendingRankerFallbackError;
|
|
2546
|
-
pendingRankerFallbackError = void 0;
|
|
2547
|
-
}
|
|
2548
|
-
this.options.onRetrievalFallback?.(error);
|
|
2549
|
-
}
|
|
2550
|
-
}
|
|
2551
|
-
if (!usedEmbed && scoredEntityIds.length > 0) {
|
|
2552
|
-
const fallbackEntityIdSet = new Set(scoredEntityIds);
|
|
2553
|
-
const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
2554
|
-
const results = this.miniSearch.search(trimmedQuery, {
|
|
2555
|
-
filter: (r) => fallbackEntityIdSet.has(r.entity_id),
|
|
2556
|
-
combineWith: "OR"
|
|
2557
|
-
});
|
|
2558
|
-
const candidates = results.slice(0, fallbackOversampledLimit).map((r) => ({
|
|
2559
|
-
id: r.id,
|
|
2560
|
-
entity_id: r.entity_id,
|
|
2561
|
-
score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
|
|
2562
|
-
updated_at: null,
|
|
2563
|
-
access_count: null
|
|
2564
|
-
}));
|
|
2565
|
-
this._tieBreakSort(candidates);
|
|
2566
|
-
const topCandidates = candidates.slice(0, maxResults);
|
|
2567
|
-
const topIds = topCandidates.map((c) => c.id);
|
|
2568
|
-
if (topIds.length > 0) {
|
|
2569
|
-
facts = await this._hydrateFactsByIds(topIds, entityIds);
|
|
2570
|
-
if (exposeMetadata) {
|
|
2571
|
-
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
2572
|
-
}
|
|
2573
|
-
}
|
|
2574
|
-
}
|
|
2575
|
-
if (facts.length > 0) {
|
|
2576
|
-
const ids = facts.map((f) => f.id);
|
|
2577
|
-
const now = Date.now();
|
|
2578
|
-
await this.entryRepo.trackAccess(ids, now);
|
|
2579
|
-
}
|
|
2580
|
-
} else {
|
|
2581
|
-
facts = await this.entryRepo.findRecentByEntityIds(entityIds, maxResults);
|
|
2582
|
-
}
|
|
2583
|
-
const eventsLimit = Math.min(10 * entityIds.length, 100);
|
|
2584
|
-
const [tasks, events] = await Promise.all([
|
|
2585
|
-
this.taskRepo.findAllPending(entityIds, entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200)),
|
|
2586
|
-
entityIds.length === 1 ? this.eventRepo.getRecent(entityIds[0], eventsLimit) : this.eventRepo.getRecentForEntities(entityIds, eventsLimit)
|
|
2587
|
-
]);
|
|
2588
|
-
let factScores;
|
|
2589
|
-
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
2590
|
-
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
2591
|
-
}
|
|
2592
|
-
const bundle = { facts, tasks, events: events.reverse() };
|
|
2593
|
-
if (exposeMetadata) {
|
|
2594
|
-
bundle.metadata = { query, entityIds };
|
|
2595
|
-
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
2596
|
-
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
2597
|
-
}
|
|
2598
|
-
return bundle;
|
|
2599
|
-
}
|
|
2600
|
-
/**
|
|
2601
|
-
* Returns entity IDs that will participate in scored retrieval.
|
|
2602
|
-
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
2603
|
-
*/
|
|
2604
|
-
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
2605
|
-
return entityIds.filter((id) => {
|
|
2606
|
-
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
2607
|
-
return includeZeroWeightEntities === true || w !== 0;
|
|
2608
|
-
});
|
|
2609
|
-
}
|
|
2610
|
-
/**
|
|
2611
|
-
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
2612
|
-
*/
|
|
2613
|
-
_tieBreakSort(items) {
|
|
2614
|
-
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
1566
|
+
return this.retrievalService.read(entityId, query, options);
|
|
2615
1567
|
}
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
* Negative return means "a ranks ahead of b" for descending score order.
|
|
2619
|
-
*/
|
|
2620
|
-
_compareScoredRows(a, b) {
|
|
2621
|
-
const scoreDiff = b.score - a.score;
|
|
2622
|
-
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
2623
|
-
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
2624
|
-
if (accessCountDiff !== 0) return accessCountDiff;
|
|
2625
|
-
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
2626
|
-
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
2627
|
-
return a.id.localeCompare(b.id);
|
|
1568
|
+
async getMemoryBundle(entityId) {
|
|
1569
|
+
return this.importExportService.getFullBundle(entityId, { maxEvents: 10 });
|
|
2628
1570
|
}
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
* (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
|
|
2632
|
-
*/
|
|
2633
|
-
async _hydrateFactsByIds(ids, scopedEntityIds, tx) {
|
|
2634
|
-
return this.entryRepo.findByIds(ids, scopedEntityIds, tx);
|
|
1571
|
+
async write(entityId, event) {
|
|
1572
|
+
return this.writeService.write(entityId, event);
|
|
2635
1573
|
}
|
|
2636
1574
|
/**
|
|
2637
|
-
*
|
|
2638
|
-
*
|
|
2639
|
-
*
|
|
1575
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
1576
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
1577
|
+
* set `options.config.prompts.librarianSystemPrompt` at WikiMemory construction time.
|
|
2640
1578
|
*/
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
return err instanceof Error ? err : new Error(String(err));
|
|
2644
|
-
}
|
|
2645
|
-
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
2646
|
-
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
2647
|
-
const sanitized = new Error(
|
|
2648
|
-
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
2649
|
-
innerCause ? { cause: innerCause } : void 0
|
|
2650
|
-
);
|
|
2651
|
-
sanitized.name = typeName;
|
|
2652
|
-
return sanitized;
|
|
1579
|
+
async runLibrarian(entityId, options) {
|
|
1580
|
+
return this.maintenanceService.runLibrarian(entityId, options);
|
|
2653
1581
|
}
|
|
2654
1582
|
/**
|
|
2655
|
-
*
|
|
2656
|
-
*
|
|
1583
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
1584
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
1585
|
+
* set `options.config.prompts.healSystemPrompt` at WikiMemory construction time.
|
|
2657
1586
|
*/
|
|
2658
|
-
async
|
|
2659
|
-
|
|
2660
|
-
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
2661
|
-
let entityCache = this.vectorCache.get(entityId);
|
|
2662
|
-
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
2663
|
-
if (tooLarge && entityCache) {
|
|
2664
|
-
this.vectorCache.delete(entityId);
|
|
2665
|
-
entityCache = void 0;
|
|
2666
|
-
}
|
|
2667
|
-
const canCache = populateCache && !tooLarge;
|
|
2668
|
-
if (canCache && !entityCache) {
|
|
2669
|
-
entityCache = /* @__PURE__ */ new Map();
|
|
2670
|
-
}
|
|
2671
|
-
const scored = candidateRows.map((row) => {
|
|
2672
|
-
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
2673
|
-
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
2674
|
-
entityCache.set(row.id, vector);
|
|
2675
|
-
}
|
|
2676
|
-
let score = 0;
|
|
2677
|
-
if (vector && vector.length === queryVec.length) {
|
|
2678
|
-
const cosSim = cosineSimilarity(queryVec, vector);
|
|
2679
|
-
if (weight !== void 0) {
|
|
2680
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
2681
|
-
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
2682
|
-
} else {
|
|
2683
|
-
score = cosSim;
|
|
2684
|
-
}
|
|
2685
|
-
} else if (weight !== void 0 && weight < 1) {
|
|
2686
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
2687
|
-
score = (1 - weight) * kwScore;
|
|
2688
|
-
} else {
|
|
2689
|
-
score = -2;
|
|
2690
|
-
}
|
|
2691
|
-
return {
|
|
2692
|
-
id: row.id,
|
|
2693
|
-
entity_id: row.entity_id,
|
|
2694
|
-
score,
|
|
2695
|
-
updated_at: row.updated_at,
|
|
2696
|
-
access_count: row.access_count
|
|
2697
|
-
};
|
|
2698
|
-
});
|
|
2699
|
-
if (canCache && entityCache && entityCache.size > 0) {
|
|
2700
|
-
if (!this.vectorCache.has(entityId)) {
|
|
2701
|
-
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
2702
|
-
const oldestKey = this.vectorCache.keys().next().value;
|
|
2703
|
-
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
2704
|
-
}
|
|
2705
|
-
this.vectorCache.set(entityId, entityCache);
|
|
2706
|
-
}
|
|
2707
|
-
}
|
|
2708
|
-
if (!skipSort) this._tieBreakSort(scored);
|
|
2709
|
-
return scored.slice(0, limit);
|
|
2710
|
-
}
|
|
2711
|
-
/**
|
|
2712
|
-
* Delegate semantic ranking to the injected VectorRanker.
|
|
2713
|
-
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
2714
|
-
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
2715
|
-
*/
|
|
2716
|
-
async _rankWithVectorRanker(args) {
|
|
2717
|
-
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
2718
|
-
const ranker = this.options.vectorRanker;
|
|
2719
|
-
if (!ranker) {
|
|
2720
|
-
throw new Error("vectorRanker not configured");
|
|
2721
|
-
}
|
|
2722
|
-
const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
2723
|
-
const rankerResults = await ranker.rankBySimilarity({
|
|
2724
|
-
entityId,
|
|
2725
|
-
queryVec: queryVecCopy,
|
|
2726
|
-
candidateIds,
|
|
2727
|
-
limit
|
|
2728
|
-
});
|
|
2729
|
-
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
2730
|
-
const seen = /* @__PURE__ */ new Set();
|
|
2731
|
-
const normalized = [];
|
|
2732
|
-
for (const r of rankerResults) {
|
|
2733
|
-
if (normalized.length >= limit) break;
|
|
2734
|
-
if (seen.has(r.id)) continue;
|
|
2735
|
-
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
2736
|
-
if (!Number.isFinite(r.semanticScore)) continue;
|
|
2737
|
-
seen.add(r.id);
|
|
2738
|
-
normalized.push(r);
|
|
2739
|
-
}
|
|
2740
|
-
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
2741
|
-
const scored = normalized.map((r) => {
|
|
2742
|
-
let score = r.semanticScore;
|
|
2743
|
-
if (weight !== void 0) {
|
|
2744
|
-
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
2745
|
-
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
2746
|
-
}
|
|
2747
|
-
return {
|
|
2748
|
-
id: r.id,
|
|
2749
|
-
entity_id: entityIdByCandidateId.get(r.id),
|
|
2750
|
-
// allowedIds filter above guarantees membership
|
|
2751
|
-
score
|
|
2752
|
-
};
|
|
2753
|
-
});
|
|
2754
|
-
return scored;
|
|
2755
|
-
}
|
|
2756
|
-
async getMemoryBundle(entityId) {
|
|
2757
|
-
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
2758
|
-
}
|
|
2759
|
-
async write(entityId, event) {
|
|
2760
|
-
const id = generateId("evt_");
|
|
2761
|
-
const now = Date.now();
|
|
2762
|
-
let eventType = event.event_type;
|
|
2763
|
-
if (!["observation", "decision", "action", "outcome"].includes(eventType)) {
|
|
2764
|
-
eventType = "observation";
|
|
2765
|
-
}
|
|
2766
|
-
const newEvent = {
|
|
2767
|
-
id,
|
|
2768
|
-
entity_id: entityId,
|
|
2769
|
-
event_type: eventType,
|
|
2770
|
-
summary: event.summary,
|
|
2771
|
-
related_entry_id: event.related_entry_id || null,
|
|
2772
|
-
created_at: now
|
|
2773
|
-
};
|
|
2774
|
-
let shouldRunLibrarian = false;
|
|
2775
|
-
let librarianCount = 0;
|
|
2776
|
-
let librarianJobKey = null;
|
|
2777
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
2778
|
-
await this.eventRepo.add(newEvent, tx);
|
|
2779
|
-
const threshold = this.options.config?.autoLibrarianThreshold || 20;
|
|
2780
|
-
const [count, cp] = await Promise.all([
|
|
2781
|
-
this.eventRepo.count(entityId, tx),
|
|
2782
|
-
this.metadataRepo.getCheckpoint(entityId, tx)
|
|
2783
|
-
]);
|
|
2784
|
-
let memoryCheckpoint = cp.memory ?? 0;
|
|
2785
|
-
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
2786
|
-
if (count - memoryCheckpoint >= threshold) {
|
|
2787
|
-
const jobKey = this._librarianKey(entityId);
|
|
2788
|
-
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
|
|
2789
|
-
shouldRunLibrarian = true;
|
|
2790
|
-
librarianCount = count;
|
|
2791
|
-
librarianJobKey = jobKey;
|
|
2792
|
-
await this.metadataRepo.updateCheckpoint(entityId, { memory: count }, tx);
|
|
2793
|
-
}
|
|
2794
|
-
}
|
|
2795
|
-
});
|
|
2796
|
-
if (shouldRunLibrarian && librarianJobKey !== null) {
|
|
2797
|
-
this.activeMaintenanceJobs.add(librarianJobKey);
|
|
2798
|
-
this._notifyStatusSubscribers(entityId);
|
|
2799
|
-
this.runLibrarianThenMaybeHeal(entityId, librarianCount).catch(console.error).finally(() => {
|
|
2800
|
-
this.activeMaintenanceJobs.delete(librarianJobKey);
|
|
2801
|
-
this._notifyStatusSubscribers(entityId);
|
|
2802
|
-
});
|
|
2803
|
-
}
|
|
2804
|
-
}
|
|
2805
|
-
async runLibrarianThenMaybeHeal(entityId, currentEventCount) {
|
|
2806
|
-
await this._doRunLibrarian(entityId);
|
|
2807
|
-
const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
|
|
2808
|
-
const cp = await this.metadataRepo.getCheckpoint(entityId, this.db);
|
|
2809
|
-
let healCheckpoint = cp.heal ?? 0;
|
|
2810
|
-
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
2811
|
-
const shouldRunHeal = currentEventCount - healCheckpoint >= autoHealThreshold;
|
|
2812
|
-
if (shouldRunHeal) {
|
|
2813
|
-
const healKey = this._healKey(entityId);
|
|
2814
|
-
if (!this.activeMaintenanceJobs.has(healKey)) {
|
|
2815
|
-
this.activeMaintenanceJobs.add(healKey);
|
|
2816
|
-
this._notifyStatusSubscribers(entityId);
|
|
2817
|
-
try {
|
|
2818
|
-
await this._doRunHeal(entityId);
|
|
2819
|
-
await this.metadataRepo.updateCheckpoint(entityId, { heal: currentEventCount }, this.db);
|
|
2820
|
-
} finally {
|
|
2821
|
-
this.activeMaintenanceJobs.delete(healKey);
|
|
2822
|
-
this._notifyStatusSubscribers(entityId);
|
|
2823
|
-
}
|
|
2824
|
-
}
|
|
2825
|
-
}
|
|
2826
|
-
}
|
|
2827
|
-
async _doRunLibrarian(entityId) {
|
|
2828
|
-
const events = await this.eventRepo.getRecent(entityId, 50);
|
|
2829
|
-
const currentFactsRows = await this.entryRepo.findRecentByEntityId(entityId, 100);
|
|
2830
|
-
const currentFacts = currentFactsRows.map((f) => {
|
|
2831
|
-
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
2832
|
-
return {
|
|
2833
|
-
...rest,
|
|
2834
|
-
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
2835
|
-
};
|
|
2836
|
-
});
|
|
2837
|
-
const userPrompt = `Events:
|
|
2838
|
-
${JSON.stringify(events.reverse(), null, 2)}
|
|
2839
|
-
|
|
2840
|
-
Current Facts:
|
|
2841
|
-
${JSON.stringify(currentFacts, null, 2)}`;
|
|
2842
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2843
|
-
systemPrompt: LIBRARIAN_SYSTEM_PROMPT,
|
|
2844
|
-
userPrompt
|
|
2845
|
-
});
|
|
2846
|
-
const result = parseJsonResponse(responseText);
|
|
2847
|
-
const facts = Array.isArray(result.facts) ? result.facts : [];
|
|
2848
|
-
const tasks = Array.isArray(result.tasks) ? result.tasks : [];
|
|
2849
|
-
const validFacts = facts.map(validateFact).filter((f) => f !== null);
|
|
2850
|
-
const validTasks = tasks.map(validateTask).filter((t) => t !== null);
|
|
2851
|
-
const now = Date.now();
|
|
2852
|
-
const insertedFacts = [];
|
|
2853
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
2854
|
-
const factsForDedupe = await this.entryRepo.findRecentByEntityId(entityId, 100, tx);
|
|
2855
|
-
for (const fact of validFacts) {
|
|
2856
|
-
const newTokens = titleTokens(fact.title);
|
|
2857
|
-
let skip = false;
|
|
2858
|
-
if (newTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
2859
|
-
for (const existing of factsForDedupe) {
|
|
2860
|
-
if (existing.source_type !== "librarian_inferred") continue;
|
|
2861
|
-
const existingTokens = titleTokens(existing.title);
|
|
2862
|
-
if (existingTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
2863
|
-
if (jaccardScore(newTokens, existingTokens) >= FUZZY_THRESHOLD) {
|
|
2864
|
-
skip = true;
|
|
2865
|
-
break;
|
|
2866
|
-
}
|
|
2867
|
-
}
|
|
2868
|
-
}
|
|
2869
|
-
}
|
|
2870
|
-
if (skip) continue;
|
|
2871
|
-
const id = generateId("fact_");
|
|
2872
|
-
const factObj = {
|
|
2873
|
-
id,
|
|
2874
|
-
entity_id: entityId,
|
|
2875
|
-
title: fact.title,
|
|
2876
|
-
body: fact.body,
|
|
2877
|
-
tags: fact.tags,
|
|
2878
|
-
confidence: fact.confidence,
|
|
2879
|
-
source_type: "librarian_inferred",
|
|
2880
|
-
source_hash: null,
|
|
2881
|
-
source_ref: null,
|
|
2882
|
-
created_at: now,
|
|
2883
|
-
updated_at: now,
|
|
2884
|
-
last_accessed_at: null,
|
|
2885
|
-
access_count: 0,
|
|
2886
|
-
deleted_at: null
|
|
2887
|
-
};
|
|
2888
|
-
await this.entryRepo.upsert(factObj, tx);
|
|
2889
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2890
|
-
}
|
|
2891
|
-
for (const task of validTasks) {
|
|
2892
|
-
const id = generateId("task_");
|
|
2893
|
-
const taskObj = { id, entity_id: entityId, description: task.description, status: "pending", priority: task.priority, created_at: now, updated_at: now, resolved_at: null, deleted_at: null };
|
|
2894
|
-
await this.taskRepo.upsert(taskObj, tx);
|
|
2895
|
-
}
|
|
2896
|
-
});
|
|
2897
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2898
|
-
this.vectorCache.delete(entityId);
|
|
2899
|
-
for (const fact of insertedFacts) {
|
|
2900
|
-
await this.embedFact(fact);
|
|
2901
|
-
}
|
|
2902
|
-
this.vectorCache.delete(entityId);
|
|
2903
|
-
}
|
|
2904
|
-
async _doRunHeal(entityId) {
|
|
2905
|
-
const now = Date.now();
|
|
2906
|
-
const orphanAfterDays = this.options.config?.orphanAfterDays !== void 0 ? this.options.config.orphanAfterDays : 30;
|
|
2907
|
-
const staleInferredAfterDays = this.options.config?.staleInferredAfterDays !== void 0 ? this.options.config.staleInferredAfterDays : 60;
|
|
2908
|
-
const MS_PER_DAY = 24 * 60 * 60 * 1e3;
|
|
2909
|
-
if (orphanAfterDays !== null && (typeof orphanAfterDays !== "number" || !Number.isFinite(orphanAfterDays) || orphanAfterDays < 0)) {
|
|
2910
|
-
throw new Error("Invalid orphanAfterDays: must be a finite number >= 0 or null");
|
|
2911
|
-
}
|
|
2912
|
-
if (staleInferredAfterDays !== null && (typeof staleInferredAfterDays !== "number" || !Number.isFinite(staleInferredAfterDays) || staleInferredAfterDays < 0)) {
|
|
2913
|
-
throw new Error("Invalid staleInferredAfterDays: must be a finite number >= 0 or null");
|
|
2914
|
-
}
|
|
2915
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
2916
|
-
if (orphanAfterDays !== null) {
|
|
2917
|
-
const orphanThreshold = now - orphanAfterDays * MS_PER_DAY;
|
|
2918
|
-
await this.entryRepo.markOrphaned(entityId, orphanThreshold, tx);
|
|
2919
|
-
}
|
|
2920
|
-
if (staleInferredAfterDays !== null) {
|
|
2921
|
-
const staleThreshold = now - staleInferredAfterDays * MS_PER_DAY;
|
|
2922
|
-
await this.entryRepo.downgradeStaleInferred(entityId, staleThreshold, tx);
|
|
2923
|
-
}
|
|
2924
|
-
});
|
|
2925
|
-
const allFactsRows = await this.entryRepo.findAllByEntityId(entityId);
|
|
2926
|
-
const allTasks = await this.taskRepo.findAllPending([entityId]);
|
|
2927
|
-
const recentEvents = await this.eventRepo.getRecent(entityId, 20);
|
|
2928
|
-
const healCandidates = allFactsRows.filter((f) => f.source_type !== "immutable_document");
|
|
2929
|
-
const documentAnchors = allFactsRows.filter((f) => f.source_type === "immutable_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
2930
|
-
const userPrompt = `Heal Candidates:
|
|
2931
|
-
${JSON.stringify(healCandidates.map((f) => {
|
|
2932
|
-
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
2933
|
-
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
2934
|
-
}), null, 2)}
|
|
2935
|
-
|
|
2936
|
-
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
2937
|
-
${JSON.stringify(documentAnchors, null, 2)}
|
|
2938
|
-
|
|
2939
|
-
All Tasks:
|
|
2940
|
-
${JSON.stringify(allTasks, null, 2)}
|
|
2941
|
-
|
|
2942
|
-
Recent Events:
|
|
2943
|
-
${JSON.stringify(recentEvents, null, 2)}
|
|
2944
|
-
|
|
2945
|
-
The following document anchors are provided for contradiction detection only. Do not include them in \`downgraded\`, \`deleted\`, or \`newFacts\`.`;
|
|
2946
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2947
|
-
systemPrompt: HEAL_SYSTEM_PROMPT,
|
|
2948
|
-
userPrompt
|
|
2949
|
-
});
|
|
2950
|
-
const result = parseJsonResponse(responseText);
|
|
2951
|
-
const mutableIds = new Set(healCandidates.map((f) => f.id));
|
|
2952
|
-
const downgraded = Array.isArray(result.downgraded) ? result.downgraded : [];
|
|
2953
|
-
const deleted = Array.isArray(result.deleted) ? result.deleted : [];
|
|
2954
|
-
const newFacts = Array.isArray(result.newFacts) ? result.newFacts : [];
|
|
2955
|
-
const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
|
|
2956
|
-
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
2957
|
-
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
2958
|
-
const insertedFacts = [];
|
|
2959
|
-
const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
|
|
2960
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
2961
|
-
await this.entryRepo.downgradeByIds(safeDowngraded, entityId, tx);
|
|
2962
|
-
await this.entryRepo.softDeleteByIds(safeDeleted, entityId, tx);
|
|
2963
|
-
for (const fact of validNewFacts) {
|
|
2964
|
-
const id = generateId("fact_");
|
|
2965
|
-
const factObj = {
|
|
2966
|
-
id,
|
|
2967
|
-
entity_id: entityId,
|
|
2968
|
-
title: fact.title,
|
|
2969
|
-
body: fact.body,
|
|
2970
|
-
tags: fact.tags,
|
|
2971
|
-
confidence: fact.confidence,
|
|
2972
|
-
source_type: "librarian_inferred",
|
|
2973
|
-
source_hash: null,
|
|
2974
|
-
source_ref: null,
|
|
2975
|
-
created_at: now,
|
|
2976
|
-
updated_at: now,
|
|
2977
|
-
last_accessed_at: null,
|
|
2978
|
-
access_count: 0,
|
|
2979
|
-
deleted_at: null
|
|
2980
|
-
};
|
|
2981
|
-
await this.entryRepo.upsert(factObj, tx);
|
|
2982
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2983
|
-
}
|
|
2984
|
-
});
|
|
2985
|
-
this.vectorCache.delete(entityId);
|
|
2986
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2987
|
-
for (const factId of uniqueDeletedFactIds) {
|
|
2988
|
-
try {
|
|
2989
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2990
|
-
} catch (hookErr) {
|
|
2991
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
2992
|
-
}
|
|
2993
|
-
}
|
|
2994
|
-
for (const fact of insertedFacts) {
|
|
2995
|
-
await this.embedFact(fact);
|
|
2996
|
-
}
|
|
2997
|
-
this.vectorCache.delete(entityId);
|
|
2998
|
-
}
|
|
2999
|
-
async runLibrarian(entityId) {
|
|
3000
|
-
const jobKey = this._librarianKey(entityId);
|
|
3001
|
-
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
3002
|
-
throw new WikiBusyError("librarian", entityId);
|
|
3003
|
-
}
|
|
3004
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3005
|
-
throw new WikiBusyError("prune", entityId);
|
|
3006
|
-
}
|
|
3007
|
-
if (this._isReembedActive(entityId)) {
|
|
3008
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3009
|
-
}
|
|
3010
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3011
|
-
throw new WikiBusyError("import", entityId);
|
|
3012
|
-
}
|
|
3013
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3014
|
-
throw new WikiBusyError("forget", entityId);
|
|
3015
|
-
}
|
|
3016
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
3017
|
-
this._notifyStatusSubscribers(entityId);
|
|
3018
|
-
try {
|
|
3019
|
-
await this._doRunLibrarian(entityId);
|
|
3020
|
-
} finally {
|
|
3021
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
3022
|
-
this._notifyStatusSubscribers(entityId);
|
|
3023
|
-
}
|
|
3024
|
-
}
|
|
3025
|
-
async runHeal(entityId) {
|
|
3026
|
-
const jobKey = this._healKey(entityId);
|
|
3027
|
-
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
3028
|
-
throw new WikiBusyError("heal", entityId);
|
|
3029
|
-
}
|
|
3030
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3031
|
-
throw new WikiBusyError("prune", entityId);
|
|
3032
|
-
}
|
|
3033
|
-
if (this._isReembedActive(entityId)) {
|
|
3034
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3035
|
-
}
|
|
3036
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3037
|
-
throw new WikiBusyError("import", entityId);
|
|
3038
|
-
}
|
|
3039
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3040
|
-
throw new WikiBusyError("forget", entityId);
|
|
3041
|
-
}
|
|
3042
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
3043
|
-
this._notifyStatusSubscribers(entityId);
|
|
3044
|
-
try {
|
|
3045
|
-
await this._doRunHeal(entityId);
|
|
3046
|
-
} finally {
|
|
3047
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
3048
|
-
this._notifyStatusSubscribers(entityId);
|
|
3049
|
-
}
|
|
1587
|
+
async runHeal(entityId, options) {
|
|
1588
|
+
return this.maintenanceService.runHeal(entityId, options);
|
|
3050
1589
|
}
|
|
3051
1590
|
async runReembed(entityId, opts) {
|
|
3052
|
-
|
|
3053
|
-
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
3054
|
-
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
3055
|
-
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
3056
|
-
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
3057
|
-
}
|
|
3058
|
-
if (entityId) {
|
|
3059
|
-
if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
|
|
3060
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3061
|
-
}
|
|
3062
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3063
|
-
throw new WikiBusyError("prune", entityId);
|
|
3064
|
-
}
|
|
3065
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3066
|
-
throw new WikiBusyError("librarian", entityId);
|
|
3067
|
-
}
|
|
3068
|
-
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3069
|
-
throw new WikiBusyError("heal", entityId);
|
|
3070
|
-
}
|
|
3071
|
-
if (this._isIngestActiveFor(entityId)) {
|
|
3072
|
-
throw new WikiBusyError("ingest", entityId);
|
|
3073
|
-
}
|
|
3074
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3075
|
-
throw new WikiBusyError("import", entityId);
|
|
3076
|
-
}
|
|
3077
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3078
|
-
throw new WikiBusyError("forget", entityId);
|
|
3079
|
-
}
|
|
3080
|
-
} else {
|
|
3081
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
3082
|
-
throw new WikiBusyError("reembed", "*");
|
|
3083
|
-
}
|
|
3084
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
|
|
3085
|
-
throw new WikiBusyError("prune", "*");
|
|
3086
|
-
}
|
|
3087
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
|
|
3088
|
-
throw new WikiBusyError("librarian", "*");
|
|
3089
|
-
}
|
|
3090
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
|
|
3091
|
-
throw new WikiBusyError("heal", "*");
|
|
3092
|
-
}
|
|
3093
|
-
if (this.activeIngestJobs.size > 0) {
|
|
3094
|
-
throw new WikiBusyError("ingest", "*");
|
|
3095
|
-
}
|
|
3096
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
3097
|
-
throw new WikiBusyError("import", "*");
|
|
3098
|
-
}
|
|
3099
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
3100
|
-
throw new WikiBusyError("forget", "*");
|
|
3101
|
-
}
|
|
3102
|
-
}
|
|
3103
|
-
this.activeMaintenanceJobs.add(reembedKey);
|
|
3104
|
-
try {
|
|
3105
|
-
const rows = await this.entryRepo.findAllForReembed(entityId);
|
|
3106
|
-
if (entityId) {
|
|
3107
|
-
this.vectorCache.delete(entityId);
|
|
3108
|
-
} else {
|
|
3109
|
-
this.vectorCache.clear();
|
|
3110
|
-
}
|
|
3111
|
-
const skipExisting = opts?.skipExisting ?? false;
|
|
3112
|
-
let effectiveSkip = skipExisting;
|
|
3113
|
-
if (skipExisting) {
|
|
3114
|
-
const mismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
3115
|
-
if (mismatchValue) {
|
|
3116
|
-
if (entityId) {
|
|
3117
|
-
const mismatchDim = parseInt(mismatchValue, 10);
|
|
3118
|
-
const staleCount = await this.entryRepo.countStaleForEntity(entityId, mismatchDim);
|
|
3119
|
-
if (staleCount > 0) effectiveSkip = false;
|
|
3120
|
-
} else {
|
|
3121
|
-
effectiveSkip = false;
|
|
3122
|
-
}
|
|
3123
|
-
}
|
|
3124
|
-
}
|
|
3125
|
-
let embedded = 0;
|
|
3126
|
-
let skipped = 0;
|
|
3127
|
-
let failed = 0;
|
|
3128
|
-
try {
|
|
3129
|
-
for (const row of rows) {
|
|
3130
|
-
const existingBlob = row.embedding_blob;
|
|
3131
|
-
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
3132
|
-
if (effectiveSkip && blobIsValid) {
|
|
3133
|
-
const vec = parseEmbedding(existingBlob, null);
|
|
3134
|
-
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
3135
|
-
skipped++;
|
|
3136
|
-
continue;
|
|
3137
|
-
}
|
|
3138
|
-
}
|
|
3139
|
-
const success = await this.embedFact(row);
|
|
3140
|
-
if (success) embedded++;
|
|
3141
|
-
else failed++;
|
|
3142
|
-
}
|
|
3143
|
-
if (embedded > 0) {
|
|
3144
|
-
await this._reconcileEmbeddingDimension();
|
|
3145
|
-
}
|
|
3146
|
-
} finally {
|
|
3147
|
-
if (entityId) {
|
|
3148
|
-
this.vectorCache.delete(entityId);
|
|
3149
|
-
} else {
|
|
3150
|
-
this.vectorCache.clear();
|
|
3151
|
-
}
|
|
3152
|
-
}
|
|
3153
|
-
return { embedded, skipped, failed };
|
|
3154
|
-
} finally {
|
|
3155
|
-
this.activeMaintenanceJobs.delete(reembedKey);
|
|
3156
|
-
}
|
|
1591
|
+
return this.maintenanceService.runReembed(entityId, opts);
|
|
3157
1592
|
}
|
|
3158
1593
|
getEntityStatus(entityId) {
|
|
3159
|
-
|
|
3160
|
-
let ingesting = false;
|
|
3161
|
-
for (const k of this.activeIngestJobs) {
|
|
3162
|
-
if (k.startsWith(ingestPrefix)) {
|
|
3163
|
-
ingesting = true;
|
|
3164
|
-
break;
|
|
3165
|
-
}
|
|
3166
|
-
}
|
|
3167
|
-
return {
|
|
3168
|
-
ingesting,
|
|
3169
|
-
librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
|
|
3170
|
-
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
3171
|
-
};
|
|
1594
|
+
return this.jobManager.getEntityStatus(entityId);
|
|
3172
1595
|
}
|
|
3173
|
-
/**
|
|
3174
|
-
* Subscribe to {@link EntityStatus} changes for a single entity. The callback
|
|
3175
|
-
* is invoked synchronously once with the current status before this method
|
|
3176
|
-
* returns, then again on every transition where any of `ingesting`,
|
|
3177
|
-
* `librarian`, or `heal` flips. No polling, no duplicate snapshots.
|
|
3178
|
-
*
|
|
3179
|
-
* Returns an idempotent unsubscribe function.
|
|
3180
|
-
*
|
|
3181
|
-
* See also {@link getEntityStatus} for a synchronous point-in-time read.
|
|
3182
|
-
*/
|
|
3183
1596
|
subscribeEntityStatus(entityId, callback) {
|
|
3184
|
-
|
|
3185
|
-
let set = this.statusSubscribers.get(entityId);
|
|
3186
|
-
if (!set) {
|
|
3187
|
-
set = /* @__PURE__ */ new Set();
|
|
3188
|
-
this.statusSubscribers.set(entityId, set);
|
|
3189
|
-
}
|
|
3190
|
-
const entry = { callback, last: this._copyEntityStatus(initial) };
|
|
3191
|
-
set.add(entry);
|
|
3192
|
-
try {
|
|
3193
|
-
callback(this._copyEntityStatus(initial));
|
|
3194
|
-
} catch (err) {
|
|
3195
|
-
console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during initial emission`, err);
|
|
3196
|
-
}
|
|
3197
|
-
let active = true;
|
|
3198
|
-
return () => {
|
|
3199
|
-
if (!active) return;
|
|
3200
|
-
active = false;
|
|
3201
|
-
const s = this.statusSubscribers.get(entityId);
|
|
3202
|
-
if (!s) return;
|
|
3203
|
-
s.delete(entry);
|
|
3204
|
-
if (s.size === 0) this.statusSubscribers.delete(entityId);
|
|
3205
|
-
};
|
|
1597
|
+
return this.jobManager.subscribeEntityStatus(entityId, callback);
|
|
3206
1598
|
}
|
|
3207
1599
|
clearVectorCache() {
|
|
3208
|
-
this.
|
|
3209
|
-
}
|
|
3210
|
-
async _getFullBundle(entityId, opts) {
|
|
3211
|
-
const [factsRaw, tasks, events] = await Promise.all([
|
|
3212
|
-
this.entryRepo.findAllByEntityId(entityId),
|
|
3213
|
-
this.taskRepo.findAllByEntityId(entityId),
|
|
3214
|
-
this.eventRepo.getByEntityId(entityId, opts?.maxEvents)
|
|
3215
|
-
]);
|
|
3216
|
-
const facts = factsRaw.map((f) => {
|
|
3217
|
-
const { embedding: _embedding, embedding_blob, ...rest } = f;
|
|
3218
|
-
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
3219
|
-
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
3220
|
-
new Uint8Array(c).set(embedding_blob);
|
|
3221
|
-
return new Uint8Array(c);
|
|
3222
|
-
})() : void 0;
|
|
3223
|
-
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
3224
|
-
return {
|
|
3225
|
-
...factBase,
|
|
3226
|
-
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
3227
|
-
};
|
|
3228
|
-
});
|
|
3229
|
-
return { facts, tasks, events };
|
|
1600
|
+
this.searchService.evictCache();
|
|
3230
1601
|
}
|
|
3231
1602
|
async exportDump(entityIds) {
|
|
3232
|
-
|
|
3233
|
-
if (entityIds && entityIds.length > 0) {
|
|
3234
|
-
ids = Array.from(new Set(entityIds));
|
|
3235
|
-
} else {
|
|
3236
|
-
ids = await this.metadataRepo.getDistinctEntityIds();
|
|
3237
|
-
}
|
|
3238
|
-
const entities = {};
|
|
3239
|
-
const BATCH = 3;
|
|
3240
|
-
for (let i = 0; i < ids.length; i += BATCH) {
|
|
3241
|
-
const batch = ids.slice(i, i + BATCH);
|
|
3242
|
-
const batchResults = await Promise.all(
|
|
3243
|
-
batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
|
|
3244
|
-
);
|
|
3245
|
-
for (const [id, bundle] of batchResults) {
|
|
3246
|
-
entities[id] = bundle;
|
|
3247
|
-
}
|
|
3248
|
-
}
|
|
3249
|
-
return { generatedAt: Date.now(), entities };
|
|
1603
|
+
return this.importExportService.exportDump(entityIds);
|
|
3250
1604
|
}
|
|
3251
1605
|
async importDump(dump, opts) {
|
|
3252
|
-
|
|
3253
|
-
const entityIds = Object.keys(dump.entities);
|
|
3254
|
-
for (const entityId of entityIds) {
|
|
3255
|
-
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
|
|
3256
|
-
throw new WikiBusyError("import", entityId);
|
|
3257
|
-
}
|
|
3258
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3259
|
-
throw new WikiBusyError("librarian", entityId);
|
|
3260
|
-
}
|
|
3261
|
-
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3262
|
-
throw new WikiBusyError("heal", entityId);
|
|
3263
|
-
}
|
|
3264
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3265
|
-
throw new WikiBusyError("prune", entityId);
|
|
3266
|
-
}
|
|
3267
|
-
if (this._isReembedActive(entityId)) {
|
|
3268
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3269
|
-
}
|
|
3270
|
-
if (this._isIngestActiveFor(entityId)) {
|
|
3271
|
-
throw new WikiBusyError("ingest", entityId);
|
|
3272
|
-
}
|
|
3273
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3274
|
-
throw new WikiBusyError("forget", entityId);
|
|
3275
|
-
}
|
|
3276
|
-
}
|
|
3277
|
-
if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
3278
|
-
throw new WikiBusyError("import", "*");
|
|
3279
|
-
}
|
|
3280
|
-
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
3281
|
-
for (const entityId of entityIds) {
|
|
3282
|
-
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
3283
|
-
}
|
|
3284
|
-
try {
|
|
3285
|
-
await this.assertNoLegacySourceTypes();
|
|
3286
|
-
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
3287
|
-
await this._doImportEntity(entityId, bundle, merge);
|
|
3288
|
-
}
|
|
3289
|
-
} finally {
|
|
3290
|
-
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
3291
|
-
for (const entityId of entityIds) {
|
|
3292
|
-
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
3293
|
-
}
|
|
3294
|
-
}
|
|
3295
|
-
}
|
|
3296
|
-
async _doImportEntity(entityId, bundle, merge) {
|
|
3297
|
-
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
3298
|
-
const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
|
|
3299
|
-
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
3300
|
-
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
3301
|
-
const softDeletedFactIds = [];
|
|
3302
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
3303
|
-
if (!merge) {
|
|
3304
|
-
const deletedLiveFactIds = await this.entryRepo.findIdsBySource(entityId, null, null, tx, false);
|
|
3305
|
-
softDeletedFactIds.push(...deletedLiveFactIds);
|
|
3306
|
-
await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
3307
|
-
await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
3308
|
-
await this.metadataRepo.deleteCheckpoint(entityId, tx);
|
|
3309
|
-
}
|
|
3310
|
-
const factIds = bundle.facts.map((fact) => fact.id);
|
|
3311
|
-
const existingFactsById = /* @__PURE__ */ new Map();
|
|
3312
|
-
const existingFacts = await this.entryRepo.findExistingMetadataByIds(factIds, tx);
|
|
3313
|
-
for (const existingFact of existingFacts) {
|
|
3314
|
-
existingFactsById.set(existingFact.id, existingFact);
|
|
3315
|
-
}
|
|
3316
|
-
for (const fact of bundle.facts) {
|
|
3317
|
-
const sourceType = this._normalizeImportedSourceType(String(fact.source_type), {
|
|
3318
|
-
entityId,
|
|
3319
|
-
factId: fact.id
|
|
3320
|
-
});
|
|
3321
|
-
JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
3322
|
-
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
3323
|
-
const existing = existingFactsById.get(fact.id);
|
|
3324
|
-
const rawBlobRaw = fact.embedding_blob;
|
|
3325
|
-
let rawBlob = null;
|
|
3326
|
-
if (rawBlobRaw instanceof Uint8Array) {
|
|
3327
|
-
rawBlob = rawBlobRaw;
|
|
3328
|
-
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
3329
|
-
const obj = rawBlobRaw;
|
|
3330
|
-
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
3331
|
-
rawBlob = new Uint8Array(obj["data"]);
|
|
3332
|
-
} else if (!Array.isArray(rawBlobRaw)) {
|
|
3333
|
-
const entries = Object.keys(obj);
|
|
3334
|
-
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
3335
|
-
const len = entries.length;
|
|
3336
|
-
rawBlob = new Uint8Array(len);
|
|
3337
|
-
for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
|
|
3338
|
-
}
|
|
3339
|
-
}
|
|
3340
|
-
}
|
|
3341
|
-
let blobData = null;
|
|
3342
|
-
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
3343
|
-
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
3344
|
-
const alignedBlob = new Uint8Array(copy);
|
|
3345
|
-
alignedBlob.set(rawBlob);
|
|
3346
|
-
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
3347
|
-
let allFinite = true;
|
|
3348
|
-
for (let i = 0; i < floats.length; i++) {
|
|
3349
|
-
if (!isFinite(floats[i])) {
|
|
3350
|
-
allFinite = false;
|
|
3351
|
-
break;
|
|
3352
|
-
}
|
|
3353
|
-
}
|
|
3354
|
-
if (allFinite) {
|
|
3355
|
-
blobData = alignedBlob;
|
|
3356
|
-
}
|
|
3357
|
-
}
|
|
3358
|
-
if (existing) {
|
|
3359
|
-
if (existing.entity_id !== entityId) {
|
|
3360
|
-
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
3361
|
-
continue;
|
|
3362
|
-
}
|
|
3363
|
-
if (merge) {
|
|
3364
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
3365
|
-
}
|
|
3366
|
-
}
|
|
3367
|
-
const factObj = {
|
|
3368
|
-
id: fact.id,
|
|
3369
|
-
entity_id: entityId,
|
|
3370
|
-
title: fact.title,
|
|
3371
|
-
body: fact.body,
|
|
3372
|
-
tags: Array.isArray(fact.tags) ? fact.tags : [],
|
|
3373
|
-
confidence: fact.confidence,
|
|
3374
|
-
source_type: sourceType,
|
|
3375
|
-
source_hash: fact.source_hash,
|
|
3376
|
-
source_ref: fact.source_ref,
|
|
3377
|
-
created_at: fact.created_at,
|
|
3378
|
-
updated_at: safeUpdatedAt,
|
|
3379
|
-
last_accessed_at: fact.last_accessed_at,
|
|
3380
|
-
access_count: fact.access_count,
|
|
3381
|
-
deleted_at: fact.deleted_at,
|
|
3382
|
-
embedding_blob: blobData ?? void 0
|
|
3383
|
-
};
|
|
3384
|
-
await this.entryRepo.upsertForImport(factObj, tx);
|
|
3385
|
-
if (blobData != null) {
|
|
3386
|
-
factsWithPreservedBlob.set(fact.id, blobData);
|
|
3387
|
-
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
3388
|
-
}
|
|
3389
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
3390
|
-
upsertedFactIds.add(fact.id);
|
|
3391
|
-
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
3392
|
-
}
|
|
3393
|
-
const taskIds = bundle.tasks.map((task) => task.id);
|
|
3394
|
-
const existingTasksById = /* @__PURE__ */ new Map();
|
|
3395
|
-
const existingTasks = await this.taskRepo.findExistingMetadataByIds(taskIds, tx);
|
|
3396
|
-
for (const existingTask of existingTasks) {
|
|
3397
|
-
existingTasksById.set(existingTask.id, existingTask);
|
|
3398
|
-
}
|
|
3399
|
-
for (const task of bundle.tasks) {
|
|
3400
|
-
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
3401
|
-
const existing = existingTasksById.get(task.id);
|
|
3402
|
-
if (existing) {
|
|
3403
|
-
if (existing.entity_id !== entityId) {
|
|
3404
|
-
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
3405
|
-
continue;
|
|
3406
|
-
}
|
|
3407
|
-
if (merge) {
|
|
3408
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
3409
|
-
}
|
|
3410
|
-
}
|
|
3411
|
-
await this.taskRepo.upsertForImport({
|
|
3412
|
-
id: task.id,
|
|
3413
|
-
entity_id: entityId,
|
|
3414
|
-
description: task.description,
|
|
3415
|
-
status: task.status,
|
|
3416
|
-
priority: task.priority,
|
|
3417
|
-
created_at: task.created_at,
|
|
3418
|
-
updated_at: safeUpdatedAt,
|
|
3419
|
-
resolved_at: task.resolved_at,
|
|
3420
|
-
deleted_at: task.deleted_at
|
|
3421
|
-
}, tx, safeUpdatedAt);
|
|
3422
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
3423
|
-
}
|
|
3424
|
-
for (const event of bundle.events) {
|
|
3425
|
-
await this.eventRepo.addIgnoreDuplicate({
|
|
3426
|
-
id: event.id,
|
|
3427
|
-
entity_id: entityId,
|
|
3428
|
-
event_type: event.event_type,
|
|
3429
|
-
summary: event.summary,
|
|
3430
|
-
related_entry_id: event.related_entry_id ?? null,
|
|
3431
|
-
created_at: event.created_at
|
|
3432
|
-
}, tx);
|
|
3433
|
-
}
|
|
3434
|
-
});
|
|
3435
|
-
this.vectorCache.delete(entityId);
|
|
3436
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3437
|
-
for (const fact of bundle.facts) {
|
|
3438
|
-
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
3439
|
-
await this.embedFact({
|
|
3440
|
-
id: fact.id,
|
|
3441
|
-
entity_id: entityId,
|
|
3442
|
-
// Use authoritative entityId from dump key, not fact.entity_id
|
|
3443
|
-
title: fact.title,
|
|
3444
|
-
body: fact.body,
|
|
3445
|
-
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
3446
|
-
});
|
|
3447
|
-
}
|
|
3448
|
-
}
|
|
3449
|
-
for (const fact of bundle.facts) {
|
|
3450
|
-
const blobData = factsWithPreservedBlob.get(fact.id);
|
|
3451
|
-
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
3452
|
-
try {
|
|
3453
|
-
const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
|
|
3454
|
-
await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
|
|
3455
|
-
} catch (hookErr) {
|
|
3456
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
|
|
3457
|
-
}
|
|
3458
|
-
}
|
|
3459
|
-
}
|
|
3460
|
-
for (const factId of softDeletedFactIds) {
|
|
3461
|
-
if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
|
|
3462
|
-
try {
|
|
3463
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
3464
|
-
} catch (hookErr) {
|
|
3465
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
|
|
3466
|
-
}
|
|
3467
|
-
}
|
|
3468
|
-
}
|
|
3469
|
-
try {
|
|
3470
|
-
const canonicalDimValue = await this.metadataRepo.getMeta("embedding_dimension");
|
|
3471
|
-
const canonicalDim = canonicalDimValue ? parseInt(canonicalDimValue, 10) : null;
|
|
3472
|
-
if (preservedBlobDims.size === 1) {
|
|
3473
|
-
const preservedDim = [...preservedBlobDims][0];
|
|
3474
|
-
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
3475
|
-
await this.storeEmbeddingDimension(preservedDim);
|
|
3476
|
-
const staleMismatchValue = await this.metadataRepo.getMeta("embedding_dimension_mismatch");
|
|
3477
|
-
if (staleMismatchValue && parseInt(staleMismatchValue, 10) !== preservedDim) {
|
|
3478
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(preservedDim), this.db);
|
|
3479
|
-
}
|
|
3480
|
-
await this._reconcileEmbeddingDimension();
|
|
3481
|
-
} else {
|
|
3482
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(canonicalDim), this.db);
|
|
3483
|
-
}
|
|
3484
|
-
} else if (preservedBlobDims.size > 1) {
|
|
3485
|
-
if (canonicalDim === null) {
|
|
3486
|
-
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
3487
|
-
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
3488
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(sortedPreservedBlobDims[0]), this.db);
|
|
3489
|
-
} else {
|
|
3490
|
-
await this.metadataRepo.setMeta("embedding_dimension_mismatch", String(canonicalDim), this.db);
|
|
3491
|
-
}
|
|
3492
|
-
}
|
|
3493
|
-
} finally {
|
|
3494
|
-
this.vectorCache.delete(entityId);
|
|
3495
|
-
}
|
|
1606
|
+
return this.importExportService.importDump(dump, opts);
|
|
3496
1607
|
}
|
|
3497
1608
|
async forget(entityId, params) {
|
|
3498
|
-
|
|
3499
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
3500
|
-
blockingOperation = "librarian";
|
|
3501
|
-
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
3502
|
-
blockingOperation = "heal";
|
|
3503
|
-
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3504
|
-
blockingOperation = "prune";
|
|
3505
|
-
} else if (this._isReembedActive(entityId)) {
|
|
3506
|
-
blockingOperation = "reembed";
|
|
3507
|
-
} else if (this._isIngestActiveFor(entityId)) {
|
|
3508
|
-
blockingOperation = "ingest";
|
|
3509
|
-
} else if (this._isImportActiveFor(entityId)) {
|
|
3510
|
-
blockingOperation = "import";
|
|
3511
|
-
} else if (this._isForgetActiveFor(entityId)) {
|
|
3512
|
-
blockingOperation = "forget";
|
|
3513
|
-
}
|
|
3514
|
-
if (blockingOperation !== null) {
|
|
3515
|
-
throw new WikiBusyError(blockingOperation, entityId);
|
|
3516
|
-
}
|
|
3517
|
-
const forgetKey = this._forgetKey(entityId);
|
|
3518
|
-
this.activeMaintenanceJobs.add(forgetKey);
|
|
3519
|
-
try {
|
|
3520
|
-
const now = Date.now();
|
|
3521
|
-
let deletedEntries = 0;
|
|
3522
|
-
let deletedTasks = 0;
|
|
3523
|
-
const deletedEntryIds = [];
|
|
3524
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
3525
|
-
if (params.clearAll) {
|
|
3526
|
-
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, null, null, tx, true));
|
|
3527
|
-
const entriesRes = await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
3528
|
-
const tasksRes = await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
3529
|
-
await this.metadataRepo.updateCheckpoint(entityId, { memory: 0, heal: 0 }, tx);
|
|
3530
|
-
deletedEntries = entriesRes;
|
|
3531
|
-
deletedTasks = tasksRes;
|
|
3532
|
-
} else {
|
|
3533
|
-
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
3534
|
-
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
3535
|
-
if (hasIdSelectors && hasSourceSelectors) {
|
|
3536
|
-
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
3537
|
-
}
|
|
3538
|
-
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
3539
|
-
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
3540
|
-
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
3541
|
-
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
3542
|
-
if (params.entryId) {
|
|
3543
|
-
const entryId = await this.entryRepo.findIdById(params.entryId, entityId, tx);
|
|
3544
|
-
if (entryId) deletedEntryIds.push(entryId);
|
|
3545
|
-
}
|
|
3546
|
-
if (sourceRef || sourceHash) {
|
|
3547
|
-
deletedEntryIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, sourceHash, tx, true));
|
|
3548
|
-
}
|
|
3549
|
-
const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
3550
|
-
const taskDeletedPromise = params.taskId ? this.taskRepo.softDeleteById(params.taskId, entityId, tx).then((r) => r.changes > 0) : null;
|
|
3551
|
-
const refPromise = sourceRef || sourceHash ? this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, sourceHash) : null;
|
|
3552
|
-
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
3553
|
-
entryPromise ?? Promise.resolve(false),
|
|
3554
|
-
taskDeletedPromise ?? Promise.resolve(false),
|
|
3555
|
-
refPromise ?? Promise.resolve(0)
|
|
3556
|
-
]);
|
|
3557
|
-
if (entryResult) deletedEntries++;
|
|
3558
|
-
if (taskResult) deletedTasks++;
|
|
3559
|
-
deletedEntries += refResult;
|
|
3560
|
-
}
|
|
3561
|
-
});
|
|
3562
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3563
|
-
this.vectorCache.delete(entityId);
|
|
3564
|
-
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
3565
|
-
for (const factId of uniqueDeletedIds) {
|
|
3566
|
-
try {
|
|
3567
|
-
await this._notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
3568
|
-
} catch (hookErr) {
|
|
3569
|
-
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
3570
|
-
if (isTimeout) {
|
|
3571
|
-
throw new Error(
|
|
3572
|
-
`forget(${entityId}/${factId}) failed: ${hookErr.message}`
|
|
3573
|
-
);
|
|
3574
|
-
}
|
|
3575
|
-
const errMsg = hookErr?.message ?? "";
|
|
3576
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
3577
|
-
if (isValidationError) {
|
|
3578
|
-
throw new Error(
|
|
3579
|
-
`forget(${entityId}/${factId}) failed: ${errMsg}`,
|
|
3580
|
-
{ cause: hookErr }
|
|
3581
|
-
);
|
|
3582
|
-
}
|
|
3583
|
-
throw new Error(
|
|
3584
|
-
`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`,
|
|
3585
|
-
{ cause: this._sanitizeRankerError(hookErr) }
|
|
3586
|
-
);
|
|
3587
|
-
}
|
|
3588
|
-
}
|
|
3589
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
3590
|
-
} finally {
|
|
3591
|
-
this.activeMaintenanceJobs.delete(forgetKey);
|
|
3592
|
-
}
|
|
1609
|
+
return this.maintenanceService.forget(entityId, params);
|
|
3593
1610
|
}
|
|
1611
|
+
/**
|
|
1612
|
+
* @param params.promptOverride - Overrides the system prompt for this ingest call only.
|
|
1613
|
+
* For persistent customization, set `options.config.prompts.ingestSystemPrompt` at
|
|
1614
|
+
* WikiMemory construction time.
|
|
1615
|
+
*/
|
|
3594
1616
|
async ingestDocument(entityId, params) {
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
|
|
3598
|
-
|
|
3599
|
-
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
);
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3610
|
-
const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
|
|
3611
|
-
if (this.activeIngestJobs.has(jobKey)) {
|
|
3612
|
-
throw new WikiBusyError("ingest", entityId);
|
|
3613
|
-
}
|
|
3614
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
3615
|
-
throw new WikiBusyError("prune", entityId);
|
|
3616
|
-
}
|
|
3617
|
-
if (this._isReembedActive(entityId)) {
|
|
3618
|
-
throw new WikiBusyError("reembed", entityId);
|
|
3619
|
-
}
|
|
3620
|
-
if (this._isImportActiveFor(entityId)) {
|
|
3621
|
-
throw new WikiBusyError("import", entityId);
|
|
3622
|
-
}
|
|
3623
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
3624
|
-
throw new WikiBusyError("forget", entityId);
|
|
3625
|
-
}
|
|
3626
|
-
this.activeIngestJobs.add(jobKey);
|
|
3627
|
-
this._notifyStatusSubscribers(entityId);
|
|
3628
|
-
try {
|
|
3629
|
-
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
3630
|
-
if (chunks.length === 0) {
|
|
3631
|
-
return { truncated: false, chunks: 0 };
|
|
3632
|
-
}
|
|
3633
|
-
const chunkResults = await withConcurrency(
|
|
3634
|
-
chunks.map((chunk) => async () => {
|
|
3635
|
-
const userPrompt = `Document Chunk:
|
|
3636
|
-
${chunk}`;
|
|
3637
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
3638
|
-
systemPrompt: INGEST_SYSTEM_PROMPT,
|
|
3639
|
-
userPrompt
|
|
3640
|
-
});
|
|
3641
|
-
const result = parseJsonResponse(responseText);
|
|
3642
|
-
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
3643
|
-
}),
|
|
3644
|
-
chunkConcurrency
|
|
3645
|
-
);
|
|
3646
|
-
const seen = /* @__PURE__ */ new Set();
|
|
3647
|
-
const allValidFacts = [];
|
|
3648
|
-
for (const facts of chunkResults) {
|
|
3649
|
-
for (const fact of facts) {
|
|
3650
|
-
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
3651
|
-
if (!seen.has(normalized)) {
|
|
3652
|
-
seen.add(normalized);
|
|
3653
|
-
allValidFacts.push(fact);
|
|
3654
|
-
}
|
|
3655
|
-
}
|
|
3656
|
-
}
|
|
3657
|
-
const now = Date.now();
|
|
3658
|
-
const insertedFacts = [];
|
|
3659
|
-
const deletedSourceFactIds = [];
|
|
3660
|
-
await this.db.withTransactionAsync(async (tx) => {
|
|
3661
|
-
deletedSourceFactIds.push(...await this.entryRepo.findIdsBySource(entityId, sourceRef, null, tx, false));
|
|
3662
|
-
await this.entryRepo.softDeleteBySource(entityId, tx, sourceRef, null);
|
|
3663
|
-
for (const fact of allValidFacts) {
|
|
3664
|
-
const id = generateId("fact_");
|
|
3665
|
-
const wikiFact = {
|
|
3666
|
-
id,
|
|
3667
|
-
entity_id: entityId,
|
|
3668
|
-
title: fact.title,
|
|
3669
|
-
body: fact.body,
|
|
3670
|
-
tags: fact.tags,
|
|
3671
|
-
confidence: fact.confidence,
|
|
3672
|
-
source_type: "immutable_document",
|
|
3673
|
-
source_hash: sourceHash,
|
|
3674
|
-
source_ref: sourceRef,
|
|
3675
|
-
created_at: now,
|
|
3676
|
-
updated_at: now,
|
|
3677
|
-
last_accessed_at: null,
|
|
3678
|
-
access_count: 0,
|
|
3679
|
-
deleted_at: null
|
|
3680
|
-
};
|
|
3681
|
-
await this.entryRepo.upsert(wikiFact, tx);
|
|
3682
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
3683
|
-
}
|
|
3684
|
-
});
|
|
3685
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
3686
|
-
this.vectorCache.delete(entityId);
|
|
3687
|
-
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
3688
|
-
for (const factId of uniqueDeletedSourceFactIds) {
|
|
3689
|
-
try {
|
|
3690
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
3691
|
-
} catch (hookErr) {
|
|
3692
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
3693
|
-
}
|
|
3694
|
-
}
|
|
3695
|
-
for (const fact of insertedFacts) {
|
|
3696
|
-
await this.embedFact(fact);
|
|
1617
|
+
return this.ingestionService.ingestDocument(entityId, params);
|
|
1618
|
+
}
|
|
1619
|
+
/**
|
|
1620
|
+
* Returns up to `limit` unprocessed outbox events, oldest first.
|
|
1621
|
+
* Works regardless of enableOutbox value — allows draining after disabling.
|
|
1622
|
+
*/
|
|
1623
|
+
async getUnprocessedOutboxEvents(limit = 100) {
|
|
1624
|
+
if (Number.isFinite(limit) && limit <= 0) return [];
|
|
1625
|
+
const safeLimit = Number.isFinite(limit) && limit >= 1 ? Math.trunc(limit) : 100;
|
|
1626
|
+
const rows = await this.outboxRepo.fetchPending(safeLimit);
|
|
1627
|
+
return rows.map((row) => {
|
|
1628
|
+
let payload = null;
|
|
1629
|
+
try {
|
|
1630
|
+
payload = JSON.parse(row.payload);
|
|
1631
|
+
} catch {
|
|
3697
1632
|
}
|
|
3698
|
-
|
|
3699
|
-
|
|
3700
|
-
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
|
|
1633
|
+
return { ...row, payload };
|
|
1634
|
+
});
|
|
1635
|
+
}
|
|
1636
|
+
/**
|
|
1637
|
+
* Deletes the given event IDs from the outbox table.
|
|
1638
|
+
* Call after successfully committing events to the external system.
|
|
1639
|
+
*/
|
|
1640
|
+
async markOutboxEventsProcessed(eventIds) {
|
|
1641
|
+
await this.outboxRepo.acknowledge(eventIds);
|
|
3704
1642
|
}
|
|
3705
1643
|
};
|
|
3706
|
-
|
|
3707
|
-
* Maximum number of entities whose parsed embedding vectors are held in
|
|
3708
|
-
* memory. This cap is intentionally conservative so the cache remains safe
|
|
3709
|
-
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
3710
|
-
*/
|
|
3711
|
-
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
3712
|
-
/**
|
|
3713
|
-
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
3714
|
-
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
3715
|
-
* entities while still maintaining a bounded memory footprint.
|
|
3716
|
-
*/
|
|
3717
|
-
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
3718
|
-
var WikiMemory = _WikiMemory;
|
|
1644
|
+
_testAccessNonTestEnvWarned = new WeakMap();
|
|
3719
1645
|
|
|
3720
1646
|
// src/utils/formatContext.ts
|
|
3721
1647
|
function validateMaxOption(value, name) {
|
|
@@ -3982,6 +1908,6 @@ function createWiki(db, options) {
|
|
|
3982
1908
|
return new WikiMemory(db, options);
|
|
3983
1909
|
}
|
|
3984
1910
|
|
|
3985
|
-
export { DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT,
|
|
1911
|
+
export { DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT, WikiMemory, createWiki, formatContext, formatMemoryDump, hydrateLibrarianPrompt, mapLibrarianOptionsToReadOptions, validateLibrarianPromptTemplate };
|
|
3986
1912
|
//# sourceMappingURL=index.mjs.map
|
|
3987
1913
|
//# sourceMappingURL=index.mjs.map
|