@equationalapplications/core-llm-wiki 4.6.1 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -0
- package/dist/chunk-2FGDZKC2.mjs +2547 -0
- package/dist/chunk-2FGDZKC2.mjs.map +1 -0
- package/dist/index.d.mts +4 -530
- package/dist/index.d.ts +4 -530
- package/dist/index.js +3720 -2489
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1237 -2549
- package/dist/index.mjs.map +1 -1
- package/dist/testing-hfpeX01Q.d.mts +1112 -0
- package/dist/testing-hfpeX01Q.d.ts +1112 -0
- package/dist/testing.d.mts +2 -0
- package/dist/testing.d.ts +2 -0
- package/dist/testing.js +2552 -0
- package/dist/testing.js.map +1 -0
- package/dist/testing.mjs +3 -0
- package/dist/testing.mjs.map +1 -0
- package/package.json +6 -1
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { __privateAdd, EmbeddingService, SearchService, JobManager, PromptService, IngestionService, MaintenanceService, ImportExportService, RetrievalService, WriteService, __privateGet, __privateSet, normalizeSourceRef, normalizeSourceHash, generateId } from './chunk-2FGDZKC2.mjs';
|
|
2
|
+
export { HOOK_TIMEOUT_MARKER, PromptService, PrunePartialFailureError, WikiBusyError, parseEmbedding } from './chunk-2FGDZKC2.mjs';
|
|
2
3
|
|
|
3
4
|
// src/db/schema.ts
|
|
4
5
|
async function setupDatabase(db, prefix) {
|
|
@@ -62,6 +63,19 @@ async function setupDatabase(db, prefix) {
|
|
|
62
63
|
key TEXT PRIMARY KEY,
|
|
63
64
|
value TEXT NOT NULL
|
|
64
65
|
);
|
|
66
|
+
|
|
67
|
+
CREATE TABLE IF NOT EXISTS ${prefix}outbox (
|
|
68
|
+
id TEXT PRIMARY KEY,
|
|
69
|
+
entity_id TEXT NOT NULL,
|
|
70
|
+
table_name TEXT NOT NULL,
|
|
71
|
+
record_id TEXT NOT NULL,
|
|
72
|
+
operation TEXT NOT NULL,
|
|
73
|
+
payload TEXT NOT NULL,
|
|
74
|
+
created_at INTEGER NOT NULL
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
CREATE INDEX IF NOT EXISTS ${prefix}outbox_entity_id_created_at
|
|
78
|
+
ON ${prefix}outbox (entity_id, created_at);
|
|
65
79
|
`);
|
|
66
80
|
}
|
|
67
81
|
|
|
@@ -77,8 +91,8 @@ var MIGRATIONS = [
|
|
|
77
91
|
version: 2,
|
|
78
92
|
description: "Remove FTS5; add embedding column for semantic retrieval",
|
|
79
93
|
run: async (db, prefix) => {
|
|
80
|
-
await db.withTransactionAsync(async () => {
|
|
81
|
-
await
|
|
94
|
+
await db.withTransactionAsync(async (tx) => {
|
|
95
|
+
await tx.execAsync(`
|
|
82
96
|
DROP TRIGGER IF EXISTS ${prefix}entries_ai;
|
|
83
97
|
DROP TRIGGER IF EXISTS ${prefix}entries_ad;
|
|
84
98
|
DROP TRIGGER IF EXISTS ${prefix}entries_au;
|
|
@@ -106,6 +120,25 @@ var MIGRATIONS = [
|
|
|
106
120
|
);
|
|
107
121
|
}
|
|
108
122
|
}
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
version: 4,
|
|
126
|
+
description: "Create outbox table for change data capture",
|
|
127
|
+
run: async (db, prefix) => {
|
|
128
|
+
await db.execAsync(`
|
|
129
|
+
CREATE TABLE IF NOT EXISTS ${prefix}outbox (
|
|
130
|
+
id TEXT PRIMARY KEY,
|
|
131
|
+
entity_id TEXT NOT NULL,
|
|
132
|
+
table_name TEXT NOT NULL,
|
|
133
|
+
record_id TEXT NOT NULL,
|
|
134
|
+
operation TEXT NOT NULL,
|
|
135
|
+
payload TEXT NOT NULL,
|
|
136
|
+
created_at INTEGER NOT NULL
|
|
137
|
+
);
|
|
138
|
+
CREATE INDEX IF NOT EXISTS ${prefix}outbox_entity_id_created_at
|
|
139
|
+
ON ${prefix}outbox (entity_id, created_at);
|
|
140
|
+
`);
|
|
141
|
+
}
|
|
109
142
|
}
|
|
110
143
|
];
|
|
111
144
|
for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
@@ -117,28 +150,6 @@ for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
|
117
150
|
}
|
|
118
151
|
var CURRENT_SCHEMA_VERSION = MIGRATIONS.length > 0 ? MIGRATIONS[MIGRATIONS.length - 1].version : 0;
|
|
119
152
|
|
|
120
|
-
// src/types.ts
|
|
121
|
-
var WikiBusyError = class extends Error {
|
|
122
|
-
constructor(operation, entityId) {
|
|
123
|
-
super(`${operation} already running for entity ${entityId}`);
|
|
124
|
-
this.name = "WikiBusyError";
|
|
125
|
-
this.operation = operation;
|
|
126
|
-
this.entityId = entityId;
|
|
127
|
-
}
|
|
128
|
-
};
|
|
129
|
-
var PrunePartialFailureError = class extends Error {
|
|
130
|
-
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
131
|
-
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
132
|
-
this.name = "PrunePartialFailureError";
|
|
133
|
-
this.deleted = deleted;
|
|
134
|
-
this.failedAt = failedAt;
|
|
135
|
-
this.remaining = remaining;
|
|
136
|
-
this.deletedTasks = deletedTasks;
|
|
137
|
-
this.deletedEvents = deletedEvents;
|
|
138
|
-
this.cause = cause;
|
|
139
|
-
}
|
|
140
|
-
};
|
|
141
|
-
|
|
142
153
|
// src/repositories/BaseRepository.ts
|
|
143
154
|
var BaseRepository = class {
|
|
144
155
|
constructor(db, prefix) {
|
|
@@ -182,9 +193,32 @@ function mapRowToFact(row) {
|
|
|
182
193
|
access_count: Number(row.access_count ?? 0)
|
|
183
194
|
};
|
|
184
195
|
}
|
|
196
|
+
function normalizeEmbeddingBlobValue(blob) {
|
|
197
|
+
if (blob instanceof Uint8Array) return blob;
|
|
198
|
+
if (blob !== null && blob !== void 0 && typeof blob === "object") {
|
|
199
|
+
const obj = blob;
|
|
200
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
201
|
+
return new Uint8Array(obj["data"]);
|
|
202
|
+
}
|
|
203
|
+
const entries = Object.keys(obj);
|
|
204
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
205
|
+
const len = entries.length;
|
|
206
|
+
const arr = new Uint8Array(len);
|
|
207
|
+
for (let i = 0; i < len; i++) arr[i] = obj[String(i)] ?? 0;
|
|
208
|
+
return arr;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
function mapRowToFactWithBlobs(row) {
|
|
214
|
+
const base = mapRowToFact(row);
|
|
215
|
+
const embeddingBlob = normalizeEmbeddingBlobValue(row.embedding_blob);
|
|
216
|
+
return embeddingBlob ? { ...base, embedding_blob: embeddingBlob } : base;
|
|
217
|
+
}
|
|
185
218
|
var EntryRepository = class extends BaseRepository {
|
|
186
|
-
constructor() {
|
|
187
|
-
super(
|
|
219
|
+
constructor(db, prefix, outbox) {
|
|
220
|
+
super(db, prefix);
|
|
221
|
+
this.outbox = outbox;
|
|
188
222
|
this.chunkSize = 500;
|
|
189
223
|
}
|
|
190
224
|
/**
|
|
@@ -211,19 +245,19 @@ var EntryRepository = class extends BaseRepository {
|
|
|
211
245
|
/**
|
|
212
246
|
* Upsert a WikiFact. Nullable fields set to null when fact value is null.
|
|
213
247
|
* Returns { changes, lastInsertRowId }.
|
|
248
|
+
* `tx` is REQUIRED to ensure atomic outbox staging.
|
|
214
249
|
*/
|
|
215
250
|
async upsert(fact, tx) {
|
|
216
251
|
const executor = this.getExecutor(tx);
|
|
217
252
|
const now = Date.now();
|
|
218
253
|
const tagsJson = JSON.stringify(fact.tags);
|
|
219
|
-
const embeddingBlob =
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
return executor.runAsync(
|
|
254
|
+
const embeddingBlob = this.normalizeEmbeddingBlob(fact.embedding_blob);
|
|
255
|
+
const existingRow = await executor.getFirstAsync(
|
|
256
|
+
`SELECT id FROM ${this.prefix}entries WHERE id = ?`,
|
|
257
|
+
[fact.id]
|
|
258
|
+
);
|
|
259
|
+
const operation = fact.deleted_at ? "DELETE" : existingRow ? "UPDATE" : "INSERT";
|
|
260
|
+
const result = await executor.runAsync(
|
|
227
261
|
`INSERT INTO ${this.prefix}entries (
|
|
228
262
|
id, entity_id, title, body, tags, confidence, source_type,
|
|
229
263
|
source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count,
|
|
@@ -256,7 +290,6 @@ var EntryRepository = class extends BaseRepository {
|
|
|
256
290
|
fact.source_ref,
|
|
257
291
|
fact.created_at,
|
|
258
292
|
now,
|
|
259
|
-
// updated_at set by repo
|
|
260
293
|
fact.last_accessed_at === null ? null : fact.last_accessed_at,
|
|
261
294
|
fact.access_count,
|
|
262
295
|
fact.deleted_at ?? null,
|
|
@@ -264,17 +297,172 @@ var EntryRepository = class extends BaseRepository {
|
|
|
264
297
|
null
|
|
265
298
|
]
|
|
266
299
|
);
|
|
300
|
+
await this.outbox.push({
|
|
301
|
+
entityId: fact.entity_id,
|
|
302
|
+
tableName: "entries",
|
|
303
|
+
recordId: fact.id,
|
|
304
|
+
operation,
|
|
305
|
+
payload: fact
|
|
306
|
+
}, tx);
|
|
307
|
+
return result;
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Normalize an embedding blob value to Uint8Array or null.
|
|
311
|
+
*/
|
|
312
|
+
normalizeEmbeddingBlob(blob) {
|
|
313
|
+
return normalizeEmbeddingBlobValue(blob);
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Fetch existing rows by IDs and return id/entity_id/updated_at for import collision resolution.
|
|
317
|
+
*/
|
|
318
|
+
async findExistingMetadataByIds(ids, tx) {
|
|
319
|
+
const executor = this.getExecutor(tx);
|
|
320
|
+
const rows = [];
|
|
321
|
+
for (let i = 0; i < ids.length; i += this.chunkSize) {
|
|
322
|
+
const chunk = ids.slice(i, i + this.chunkSize);
|
|
323
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
324
|
+
const chunkRows = await executor.getAllAsync(
|
|
325
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
326
|
+
chunk
|
|
327
|
+
);
|
|
328
|
+
rows.push(...chunkRows.map((row) => ({ id: row.id, entity_id: row.entity_id, updated_at: Number(row.updated_at) })));
|
|
329
|
+
}
|
|
330
|
+
return rows;
|
|
331
|
+
}
|
|
332
|
+
async findIdById(id, entityId, tx) {
|
|
333
|
+
const executor = this.getExecutor(tx);
|
|
334
|
+
const row = await executor.getFirstAsync(
|
|
335
|
+
`SELECT id FROM ${this.prefix}entries WHERE id = ? AND entity_id = ?`,
|
|
336
|
+
[id, entityId]
|
|
337
|
+
);
|
|
338
|
+
return row?.id ?? null;
|
|
339
|
+
}
|
|
340
|
+
async findIdsBySource(entityId, sourceRef, sourceHash, tx, includeDeleted = false) {
|
|
341
|
+
const executor = this.getExecutor(tx);
|
|
342
|
+
let sql = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ?`;
|
|
343
|
+
const args = [entityId];
|
|
344
|
+
if (sourceRef !== null) {
|
|
345
|
+
sql += ` AND source_ref = ?`;
|
|
346
|
+
args.push(sourceRef);
|
|
347
|
+
}
|
|
348
|
+
if (sourceHash !== null) {
|
|
349
|
+
sql += ` AND source_hash = ?`;
|
|
350
|
+
args.push(sourceHash);
|
|
351
|
+
}
|
|
352
|
+
if (!includeDeleted) {
|
|
353
|
+
sql += ` AND deleted_at IS NULL`;
|
|
354
|
+
}
|
|
355
|
+
const rows = await executor.getAllAsync(sql, args);
|
|
356
|
+
return rows.map((row) => row.id);
|
|
357
|
+
}
|
|
358
|
+
async upsertForImport(fact, tx) {
|
|
359
|
+
const executor = this.getExecutor(tx);
|
|
360
|
+
const tagsJson = JSON.stringify(fact.tags);
|
|
361
|
+
const embeddingBlob = this.normalizeEmbeddingBlob(fact.embedding_blob);
|
|
362
|
+
const result = await executor.runAsync(
|
|
363
|
+
`INSERT INTO ${this.prefix}entries (
|
|
364
|
+
id, entity_id, title, body, tags, confidence, source_type,
|
|
365
|
+
source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count,
|
|
366
|
+
deleted_at, embedding_blob, embedding
|
|
367
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
368
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
369
|
+
entity_id = excluded.entity_id,
|
|
370
|
+
title = excluded.title,
|
|
371
|
+
body = excluded.body,
|
|
372
|
+
tags = excluded.tags,
|
|
373
|
+
confidence = excluded.confidence,
|
|
374
|
+
source_type = excluded.source_type,
|
|
375
|
+
source_hash = excluded.source_hash,
|
|
376
|
+
source_ref = excluded.source_ref,
|
|
377
|
+
created_at = excluded.created_at,
|
|
378
|
+
updated_at = excluded.updated_at,
|
|
379
|
+
last_accessed_at = excluded.last_accessed_at,
|
|
380
|
+
access_count = excluded.access_count,
|
|
381
|
+
deleted_at = excluded.deleted_at,
|
|
382
|
+
embedding_blob = excluded.embedding_blob,
|
|
383
|
+
embedding = NULL`,
|
|
384
|
+
[
|
|
385
|
+
fact.id,
|
|
386
|
+
fact.entity_id,
|
|
387
|
+
fact.title,
|
|
388
|
+
fact.body,
|
|
389
|
+
tagsJson,
|
|
390
|
+
fact.confidence,
|
|
391
|
+
fact.source_type,
|
|
392
|
+
fact.source_hash,
|
|
393
|
+
fact.source_ref,
|
|
394
|
+
fact.created_at,
|
|
395
|
+
fact.updated_at,
|
|
396
|
+
fact.last_accessed_at === null ? null : fact.last_accessed_at,
|
|
397
|
+
fact.access_count,
|
|
398
|
+
fact.deleted_at ?? null,
|
|
399
|
+
embeddingBlob ?? null,
|
|
400
|
+
null
|
|
401
|
+
]
|
|
402
|
+
);
|
|
403
|
+
return result;
|
|
267
404
|
}
|
|
268
405
|
/**
|
|
269
406
|
* Soft-delete a single entry by ID scoped to entityId. Sets deleted_at + updated_at.
|
|
407
|
+
* `tx` is REQUIRED to ensure atomic outbox staging.
|
|
270
408
|
*/
|
|
271
409
|
async softDelete(entryId, entityId, tx) {
|
|
272
410
|
const executor = this.getExecutor(tx);
|
|
273
411
|
const now = Date.now();
|
|
274
|
-
|
|
412
|
+
const result = await executor.runAsync(
|
|
275
413
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
276
414
|
[now, now, entryId, entityId]
|
|
277
415
|
);
|
|
416
|
+
await this.outbox.push({
|
|
417
|
+
entityId,
|
|
418
|
+
tableName: "entries",
|
|
419
|
+
recordId: entryId,
|
|
420
|
+
operation: "DELETE",
|
|
421
|
+
payload: { id: entryId, entity_id: entityId, deleted_at: now }
|
|
422
|
+
}, tx);
|
|
423
|
+
return result;
|
|
424
|
+
}
|
|
425
|
+
/**
|
|
426
|
+
* Soft-delete entries by source_ref and/or source_hash within a transaction.
|
|
427
|
+
* Stages a DELETE outbox entry for each row in the same transaction.
|
|
428
|
+
* `tx` is REQUIRED.
|
|
429
|
+
* Returns the number of rows deleted.
|
|
430
|
+
*/
|
|
431
|
+
async softDeleteBySource(entityId, tx, sourceRef, sourceHash) {
|
|
432
|
+
const executor = this.getExecutor(tx);
|
|
433
|
+
const now = Date.now();
|
|
434
|
+
let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
435
|
+
const args = [now, now, entityId];
|
|
436
|
+
if (sourceRef) {
|
|
437
|
+
q += ` AND source_ref = ?`;
|
|
438
|
+
args.push(sourceRef);
|
|
439
|
+
}
|
|
440
|
+
if (sourceHash) {
|
|
441
|
+
q += ` AND source_hash = ?`;
|
|
442
|
+
args.push(sourceHash);
|
|
443
|
+
}
|
|
444
|
+
let selectQ = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
445
|
+
const selectArgs = [entityId];
|
|
446
|
+
if (sourceRef) {
|
|
447
|
+
selectQ += ` AND source_ref = ?`;
|
|
448
|
+
selectArgs.push(sourceRef);
|
|
449
|
+
}
|
|
450
|
+
if (sourceHash) {
|
|
451
|
+
selectQ += ` AND source_hash = ?`;
|
|
452
|
+
selectArgs.push(sourceHash);
|
|
453
|
+
}
|
|
454
|
+
const idsToDelete = await executor.getAllAsync(selectQ, selectArgs);
|
|
455
|
+
const result = await executor.runAsync(q, args);
|
|
456
|
+
for (const row of idsToDelete) {
|
|
457
|
+
await this.outbox.push({
|
|
458
|
+
entityId,
|
|
459
|
+
tableName: "entries",
|
|
460
|
+
recordId: row.id,
|
|
461
|
+
operation: "DELETE",
|
|
462
|
+
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
463
|
+
}, tx);
|
|
464
|
+
}
|
|
465
|
+
return result.changes;
|
|
278
466
|
}
|
|
279
467
|
/**
|
|
280
468
|
* Fetch IDs + entity_ids of soft-deleted rows older than cutoff for a given entity.
|
|
@@ -288,2635 +476,1135 @@ var EntryRepository = class extends BaseRepository {
|
|
|
288
476
|
[entityId, cutoff]
|
|
289
477
|
);
|
|
290
478
|
}
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
Return ONLY a valid JSON object matching this schema:
|
|
303
|
-
{
|
|
304
|
-
"downgraded": ["string (fact IDs)"],
|
|
305
|
-
"deleted": ["string (fact IDs)"],
|
|
306
|
-
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
307
|
-
}
|
|
308
|
-
Do not return markdown, just raw JSON.`;
|
|
309
|
-
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
310
|
-
Return ONLY a valid JSON object matching this schema:
|
|
311
|
-
{
|
|
312
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
313
|
-
}
|
|
314
|
-
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
315
|
-
|
|
316
|
-
// src/utils/cosine.ts
|
|
317
|
-
function cosineSimilarity(a, b) {
|
|
318
|
-
let dot = 0, normA = 0, normB = 0;
|
|
319
|
-
const len = Math.min(a.length, b.length);
|
|
320
|
-
for (let i = 0; i < len; i++) {
|
|
321
|
-
dot += a[i] * b[i];
|
|
322
|
-
normA += a[i] * a[i];
|
|
323
|
-
normB += b[i] * b[i];
|
|
324
|
-
}
|
|
325
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
326
|
-
return denom === 0 ? 0 : dot / denom;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// src/utils/embedding.ts
|
|
330
|
-
function parseEmbedding(blob, text) {
|
|
331
|
-
if (blob && blob.byteLength > 0) {
|
|
332
|
-
if (blob.byteLength % 4 !== 0) return null;
|
|
333
|
-
const copy = new ArrayBuffer(blob.byteLength);
|
|
334
|
-
new Uint8Array(copy).set(blob);
|
|
335
|
-
const vector = new Float32Array(copy);
|
|
336
|
-
for (const value of vector) {
|
|
337
|
-
if (!Number.isFinite(value)) return null;
|
|
338
|
-
}
|
|
339
|
-
return vector;
|
|
340
|
-
}
|
|
341
|
-
if (text) {
|
|
342
|
-
try {
|
|
343
|
-
const arr = JSON.parse(text);
|
|
344
|
-
if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
|
|
345
|
-
const vector = new Float32Array(arr);
|
|
346
|
-
for (const value of vector) {
|
|
347
|
-
if (!Number.isFinite(value)) return null;
|
|
348
|
-
}
|
|
349
|
-
return vector;
|
|
350
|
-
} catch {
|
|
351
|
-
return null;
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
return null;
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
// src/readOptions.ts
|
|
358
|
-
function normalizeEntityIds(entityId) {
|
|
359
|
-
const input = Array.isArray(entityId) ? entityId : [entityId];
|
|
360
|
-
const seen = /* @__PURE__ */ new Set();
|
|
361
|
-
const normalized = [];
|
|
362
|
-
for (const id of input) {
|
|
363
|
-
if (seen.has(id)) continue;
|
|
364
|
-
seen.add(id);
|
|
365
|
-
normalized.push(id);
|
|
366
|
-
}
|
|
367
|
-
return normalized;
|
|
368
|
-
}
|
|
369
|
-
function sanitizeTierWeights(entityIds, tierWeights) {
|
|
370
|
-
if (tierWeights === void 0) return void 0;
|
|
371
|
-
const sanitized = /* @__PURE__ */ Object.create(null);
|
|
372
|
-
for (const entityId of entityIds) {
|
|
373
|
-
const raw = tierWeights[entityId];
|
|
374
|
-
if (raw === void 0 || !Number.isFinite(raw)) {
|
|
375
|
-
sanitized[entityId] = 1;
|
|
376
|
-
} else {
|
|
377
|
-
sanitized[entityId] = Math.max(0, raw);
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
return sanitized;
|
|
381
|
-
}
|
|
382
|
-
function applyTierWeight(score, entityId, sanitizedTierWeights) {
|
|
383
|
-
const weight = sanitizedTierWeights?.[entityId] ?? 1;
|
|
384
|
-
if (weight === 0) return -Infinity;
|
|
385
|
-
return score * weight;
|
|
386
|
-
}
|
|
387
|
-
function shouldExposeReadMetadata(entityId) {
|
|
388
|
-
return Array.isArray(entityId);
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// src/WikiMemory.ts
|
|
392
|
-
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
393
|
-
function parseJsonResponse(text) {
|
|
394
|
-
const firstBrace = text.indexOf("{");
|
|
395
|
-
const firstBracket = text.indexOf("[");
|
|
396
|
-
let start;
|
|
397
|
-
let openChar;
|
|
398
|
-
let closeChar;
|
|
399
|
-
if (firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket)) {
|
|
400
|
-
start = firstBrace;
|
|
401
|
-
openChar = "{";
|
|
402
|
-
closeChar = "}";
|
|
403
|
-
} else if (firstBracket !== -1) {
|
|
404
|
-
start = firstBracket;
|
|
405
|
-
openChar = "[";
|
|
406
|
-
closeChar = "]";
|
|
407
|
-
} else {
|
|
408
|
-
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
409
|
-
}
|
|
410
|
-
let depth = 0;
|
|
411
|
-
let inString = false;
|
|
412
|
-
let escape = false;
|
|
413
|
-
let end = -1;
|
|
414
|
-
for (let i = start; i < text.length; i++) {
|
|
415
|
-
const ch = text[i];
|
|
416
|
-
if (escape) {
|
|
417
|
-
escape = false;
|
|
418
|
-
continue;
|
|
419
|
-
}
|
|
420
|
-
if (ch === "\\" && inString) {
|
|
421
|
-
escape = true;
|
|
422
|
-
continue;
|
|
423
|
-
}
|
|
424
|
-
if (ch === '"') {
|
|
425
|
-
inString = !inString;
|
|
426
|
-
continue;
|
|
427
|
-
}
|
|
428
|
-
if (inString) continue;
|
|
429
|
-
if (ch === openChar) {
|
|
430
|
-
depth++;
|
|
431
|
-
continue;
|
|
432
|
-
}
|
|
433
|
-
if (ch === closeChar) {
|
|
434
|
-
depth--;
|
|
435
|
-
if (depth === 0) {
|
|
436
|
-
end = i;
|
|
437
|
-
break;
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
442
|
-
return JSON.parse(text.slice(start, end + 1));
|
|
443
|
-
}
|
|
444
|
-
function generateId(prefix = "") {
|
|
445
|
-
return prefix + Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
|
|
446
|
-
}
|
|
447
|
-
function safeSlice(value, start, end) {
|
|
448
|
-
const length = value.length;
|
|
449
|
-
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
450
|
-
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
451
|
-
if (safeStart > safeEnd) {
|
|
452
|
-
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
453
|
-
}
|
|
454
|
-
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
455
|
-
safeStart--;
|
|
456
|
-
}
|
|
457
|
-
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
458
|
-
safeEnd--;
|
|
459
|
-
}
|
|
460
|
-
return value.slice(safeStart, safeEnd);
|
|
461
|
-
}
|
|
462
|
-
function chunkText(input, maxChunkLength, overlap) {
|
|
463
|
-
const text = input.trim();
|
|
464
|
-
if (text.length === 0) return { chunks: [], truncated: false };
|
|
465
|
-
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
466
|
-
throw new Error("maxChunkLength must be an integer >= 2");
|
|
467
|
-
}
|
|
468
|
-
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
469
|
-
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
470
|
-
}
|
|
471
|
-
const chunks = [];
|
|
472
|
-
let truncated = false;
|
|
473
|
-
let cursor = 0;
|
|
474
|
-
const halfMax = Math.floor(maxChunkLength / 2);
|
|
475
|
-
while (cursor < text.length) {
|
|
476
|
-
const remaining = text.length - cursor;
|
|
477
|
-
if (remaining <= maxChunkLength) {
|
|
478
|
-
chunks.push(safeSlice(text, cursor, text.length));
|
|
479
|
-
break;
|
|
480
|
-
}
|
|
481
|
-
const windowEnd = cursor + maxChunkLength;
|
|
482
|
-
const minSplit = cursor + halfMax;
|
|
483
|
-
let splitPoint = -1;
|
|
484
|
-
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
485
|
-
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
486
|
-
splitPoint = paraIdx + 2;
|
|
487
|
-
}
|
|
488
|
-
if (splitPoint === -1) {
|
|
489
|
-
let lastTerm = -1;
|
|
490
|
-
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
491
|
-
const ch = text[i];
|
|
492
|
-
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
493
|
-
lastTerm = i + 2;
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
497
|
-
}
|
|
498
|
-
if (splitPoint === -1) {
|
|
499
|
-
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
500
|
-
if (/\s/.test(text[i])) {
|
|
501
|
-
splitPoint = i + 1;
|
|
502
|
-
break;
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
if (splitPoint === -1) {
|
|
507
|
-
truncated = true;
|
|
508
|
-
splitPoint = windowEnd;
|
|
509
|
-
}
|
|
510
|
-
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
511
|
-
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
512
|
-
cursor = next;
|
|
513
|
-
}
|
|
514
|
-
return { chunks, truncated };
|
|
515
|
-
}
|
|
516
|
-
async function withConcurrency(tasks, limit) {
|
|
517
|
-
const results = new Array(tasks.length);
|
|
518
|
-
let index = 0;
|
|
519
|
-
let failed = false;
|
|
520
|
-
let firstError;
|
|
521
|
-
async function worker() {
|
|
522
|
-
while (index < tasks.length && !failed) {
|
|
523
|
-
const i = index++;
|
|
524
|
-
try {
|
|
525
|
-
results[i] = await tasks[i]();
|
|
526
|
-
} catch (e) {
|
|
527
|
-
if (!failed) {
|
|
528
|
-
failed = true;
|
|
529
|
-
firstError = e;
|
|
530
|
-
}
|
|
531
|
-
return;
|
|
532
|
-
}
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
536
|
-
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
537
|
-
if (failed) throw firstError;
|
|
538
|
-
return results;
|
|
539
|
-
}
|
|
540
|
-
function clip(value, max) {
|
|
541
|
-
if (typeof value !== "string") return "";
|
|
542
|
-
const s = value.trim();
|
|
543
|
-
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
544
|
-
}
|
|
545
|
-
function validateTags(tags) {
|
|
546
|
-
if (!Array.isArray(tags)) return [];
|
|
547
|
-
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
548
|
-
}
|
|
549
|
-
function validateFact(fact) {
|
|
550
|
-
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
551
|
-
const title = clip(fact.title, 80);
|
|
552
|
-
const body = clip(fact.body, 800);
|
|
553
|
-
if (!title || !body) return null;
|
|
554
|
-
let confidence = fact.confidence;
|
|
555
|
-
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
556
|
-
return {
|
|
557
|
-
...fact,
|
|
558
|
-
title,
|
|
559
|
-
body,
|
|
560
|
-
confidence,
|
|
561
|
-
tags: validateTags(fact.tags)
|
|
562
|
-
};
|
|
563
|
-
}
|
|
564
|
-
function validateTask(task) {
|
|
565
|
-
if (typeof task?.description !== "string") return null;
|
|
566
|
-
const description = clip(task.description, 200);
|
|
567
|
-
if (!description) return null;
|
|
568
|
-
let priority = task.priority;
|
|
569
|
-
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
570
|
-
return {
|
|
571
|
-
...task,
|
|
572
|
-
description,
|
|
573
|
-
priority
|
|
574
|
-
};
|
|
575
|
-
}
|
|
576
|
-
function normalizeSourceRef(value) {
|
|
577
|
-
if (typeof value !== "string") return null;
|
|
578
|
-
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
579
|
-
return cleaned.length > 0 ? cleaned : null;
|
|
580
|
-
}
|
|
581
|
-
function normalizeSourceHash(value) {
|
|
582
|
-
if (typeof value !== "string") return null;
|
|
583
|
-
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
584
|
-
}
|
|
585
|
-
function titleTokens(title) {
|
|
586
|
-
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
587
|
-
}
|
|
588
|
-
function jaccardScore(a, b) {
|
|
589
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
590
|
-
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
591
|
-
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
592
|
-
return intersection.size / union.size;
|
|
593
|
-
}
|
|
594
|
-
var FUZZY_THRESHOLD = 0.5;
|
|
595
|
-
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
596
|
-
var _WikiMemory = class _WikiMemory {
|
|
597
|
-
constructor(db, options) {
|
|
598
|
-
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
599
|
-
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
600
|
-
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
601
|
-
this.miniSearch = new MiniSearch({
|
|
602
|
-
fields: ["title", "body", "tags"],
|
|
603
|
-
storeFields: ["entity_id"],
|
|
604
|
-
searchOptions: {
|
|
605
|
-
boost: { title: 2 },
|
|
606
|
-
fuzzy: 0.2,
|
|
607
|
-
prefix: true
|
|
608
|
-
}
|
|
609
|
-
});
|
|
610
|
-
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
611
|
-
this.vectorCache = /* @__PURE__ */ new Map();
|
|
612
|
-
this.db = db;
|
|
613
|
-
this.options = options;
|
|
614
|
-
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
615
|
-
this.entryRepo = new EntryRepository(db, this.prefix);
|
|
479
|
+
/**
|
|
480
|
+
* Fetch all non-deleted entries for an entity, ordered by updated_at DESC.
|
|
481
|
+
* Used by _getFullBundle().
|
|
482
|
+
*/
|
|
483
|
+
async findAllByEntityId(entityId, tx) {
|
|
484
|
+
const executor = this.getExecutor(tx);
|
|
485
|
+
const rows = await executor.getAllAsync(
|
|
486
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
487
|
+
[entityId]
|
|
488
|
+
);
|
|
489
|
+
return rows.map(mapRowToFact);
|
|
616
490
|
}
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
return row.tags;
|
|
629
|
-
}
|
|
630
|
-
})()
|
|
631
|
-
};
|
|
491
|
+
/**
|
|
492
|
+
* Fetch recent non-deleted entries for an entity (limited), ordered by updated_at DESC.
|
|
493
|
+
* Used by MaintenanceService.doRunLibrarian().
|
|
494
|
+
*/
|
|
495
|
+
async findRecentByEntityId(entityId, limit, tx) {
|
|
496
|
+
const executor = this.getExecutor(tx);
|
|
497
|
+
const rows = await executor.getAllAsync(
|
|
498
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC LIMIT ?`,
|
|
499
|
+
[entityId, limit]
|
|
500
|
+
);
|
|
501
|
+
return rows.map(mapRowToFact);
|
|
632
502
|
}
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
}
|
|
645
|
-
const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
|
|
646
|
-
if (documents2.length > 0) {
|
|
647
|
-
this.miniSearch.addAll(documents2);
|
|
648
|
-
}
|
|
649
|
-
this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
|
|
650
|
-
return;
|
|
651
|
-
}
|
|
652
|
-
const rows = await this.db.getAllAsync(`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL`);
|
|
653
|
-
this.miniSearch.removeAll();
|
|
654
|
-
this.miniSearchEntryIdsByEntity.clear();
|
|
655
|
-
const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
|
|
656
|
-
if (documents.length > 0) {
|
|
657
|
-
this.miniSearch.addAll(documents);
|
|
658
|
-
}
|
|
659
|
-
for (const document of documents) {
|
|
660
|
-
const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
|
|
661
|
-
ids.add(document.id);
|
|
662
|
-
this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
|
|
663
|
-
}
|
|
503
|
+
/**
|
|
504
|
+
* Fetch all non-deleted entries for an entity with embedding blobs preserved.
|
|
505
|
+
* Used by ImportExportService for export/import round-tripping.
|
|
506
|
+
*/
|
|
507
|
+
async findAllByEntityIdWithBlobs(entityId, tx) {
|
|
508
|
+
const executor = this.getExecutor(tx);
|
|
509
|
+
const rows = await executor.getAllAsync(
|
|
510
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
511
|
+
[entityId]
|
|
512
|
+
);
|
|
513
|
+
return rows.map(mapRowToFactWithBlobs);
|
|
664
514
|
}
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
515
|
+
/**
|
|
516
|
+
* Count non-deleted entries for the given entities whose embedding_blob dimension
|
|
517
|
+
* doesn't match queryVecLength. Used by read() to detect model-switch mismatches.
|
|
518
|
+
*/
|
|
519
|
+
async countDimensionMismatched(entityIds, queryVecLength, tx) {
|
|
520
|
+
if (entityIds.length === 0) return 0;
|
|
521
|
+
const executor = this.getExecutor(tx);
|
|
522
|
+
const placeholders = entityIds.map(() => "?").join(",");
|
|
523
|
+
const row = await executor.getFirstAsync(
|
|
524
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
525
|
+
WHERE entity_id IN (${placeholders}) AND deleted_at IS NULL
|
|
526
|
+
AND embedding_blob IS NOT NULL
|
|
527
|
+
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
528
|
+
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
529
|
+
[...entityIds, queryVecLength]
|
|
668
530
|
);
|
|
669
|
-
|
|
670
|
-
const storedDim = parseInt(existing.value, 10);
|
|
671
|
-
if (storedDim !== dim) {
|
|
672
|
-
console.warn(
|
|
673
|
-
`[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
|
|
674
|
-
);
|
|
675
|
-
await this.db.runAsync(
|
|
676
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
677
|
-
[String(dim)]
|
|
678
|
-
);
|
|
679
|
-
}
|
|
680
|
-
} else {
|
|
681
|
-
await this.db.runAsync(
|
|
682
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
683
|
-
[String(dim)]
|
|
684
|
-
);
|
|
685
|
-
}
|
|
531
|
+
return row?.cnt ?? 0;
|
|
686
532
|
}
|
|
687
533
|
/**
|
|
688
|
-
*
|
|
689
|
-
*
|
|
690
|
-
* This ensures future read() calls use embedding-based retrieval rather than staying
|
|
691
|
-
* stuck on the MiniSearch fallback.
|
|
534
|
+
* Count non-deleted entries for entityId that are stale relative to targetDim
|
|
535
|
+
* (either no blob or wrong dimension). Used by runReembed() per-entity skip logic.
|
|
692
536
|
*/
|
|
693
|
-
async
|
|
694
|
-
const
|
|
695
|
-
|
|
537
|
+
async countStaleForEntity(entityId, targetDim, tx) {
|
|
538
|
+
const executor = this.getExecutor(tx);
|
|
539
|
+
const row = await executor.getFirstAsync(
|
|
540
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
541
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
542
|
+
AND (
|
|
543
|
+
embedding_blob IS NULL
|
|
544
|
+
OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
|
|
545
|
+
)`,
|
|
546
|
+
[entityId, targetDim]
|
|
696
547
|
);
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
548
|
+
return row?.cnt ?? 0;
|
|
549
|
+
}
|
|
550
|
+
/**
|
|
551
|
+
* Count non-deleted entries with stale or unconverted embeddings relative to `dim`.
|
|
552
|
+
* Used by _reconcileEmbeddingDimension() to decide when to promote the pending
|
|
553
|
+
* embedding_dimension value.
|
|
554
|
+
*/
|
|
555
|
+
async countStaleEmbeddings(dim, tx) {
|
|
556
|
+
const executor = this.getExecutor(tx);
|
|
557
|
+
const row = await executor.getFirstAsync(
|
|
700
558
|
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
701
559
|
WHERE deleted_at IS NULL
|
|
702
560
|
AND (
|
|
703
561
|
(embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
|
|
704
562
|
OR (embedding_blob IS NULL AND embedding IS NOT NULL)
|
|
705
563
|
)`,
|
|
706
|
-
[
|
|
564
|
+
[dim]
|
|
707
565
|
);
|
|
708
|
-
|
|
709
|
-
await this.db.runAsync(
|
|
710
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
711
|
-
[mismatch.value]
|
|
712
|
-
);
|
|
713
|
-
await this.db.runAsync(
|
|
714
|
-
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
715
|
-
);
|
|
716
|
-
}
|
|
566
|
+
return row?.cnt ?? 0;
|
|
717
567
|
}
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
try {
|
|
734
|
-
const vector = await embedFn(text);
|
|
735
|
-
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
736
|
-
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
737
|
-
return false;
|
|
738
|
-
}
|
|
739
|
-
const float32Vector = new Float32Array(vector);
|
|
740
|
-
let hasNonFinite = false;
|
|
741
|
-
for (let i = 0; i < float32Vector.length; i++) {
|
|
742
|
-
if (!isFinite(float32Vector[i])) {
|
|
743
|
-
hasNonFinite = true;
|
|
744
|
-
break;
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
if (hasNonFinite) {
|
|
748
|
-
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
749
|
-
return false;
|
|
750
|
-
}
|
|
751
|
-
await this.storeEmbeddingDimension(float32Vector.length);
|
|
752
|
-
const blob = new Uint8Array(float32Vector.buffer);
|
|
753
|
-
await this.db.runAsync(
|
|
754
|
-
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
755
|
-
[blob, fact.id]
|
|
568
|
+
/**
|
|
569
|
+
* Bulk delete pruned entries (already soft-deleted) by IDs.
|
|
570
|
+
* Used by runPrune(). Returns total number of deleted rows.
|
|
571
|
+
* `tx` is REQUIRED so outbox deletion events are staged atomically.
|
|
572
|
+
*/
|
|
573
|
+
async bulkDeletePruned(entityId, cutoff, ids, tx) {
|
|
574
|
+
const executor = this.getExecutor(tx);
|
|
575
|
+
let totalChanges = 0;
|
|
576
|
+
const chunkSize = 500;
|
|
577
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
578
|
+
const chunk = ids.slice(i, i + chunkSize);
|
|
579
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
580
|
+
const result = await executor.runAsync(
|
|
581
|
+
`DELETE FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at <= ? AND id IN (${placeholders})`,
|
|
582
|
+
[entityId, cutoff, ...chunk]
|
|
756
583
|
);
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
584
|
+
totalChanges += result.changes;
|
|
585
|
+
if (result.changes > 0) {
|
|
586
|
+
for (const id of chunk) {
|
|
587
|
+
await this.outbox.push({
|
|
588
|
+
entityId,
|
|
589
|
+
tableName: "entries",
|
|
590
|
+
recordId: id,
|
|
591
|
+
operation: "DELETE",
|
|
592
|
+
payload: { id, entity_id: entityId, deleted_at: cutoff }
|
|
593
|
+
}, tx);
|
|
594
|
+
}
|
|
761
595
|
}
|
|
762
|
-
return true;
|
|
763
|
-
} catch (err) {
|
|
764
|
-
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
765
|
-
return false;
|
|
766
596
|
}
|
|
597
|
+
return totalChanges;
|
|
767
598
|
}
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
const allowed = ["user_stated", "librarian_inferred", "user_confirmed", "immutable_document"];
|
|
782
|
-
if (allowed.includes(raw)) return raw;
|
|
783
|
-
const where = ctx !== void 0 ? ` for entity "${ctx.entityId}" fact "${ctx.factId}"` : "";
|
|
784
|
-
throw new Error(
|
|
785
|
-
`importDump: invalid source_type "${raw}"${where} (expected one of: ${allowed.join(", ")}, or legacy aliases user_document / agent_inferred)`
|
|
599
|
+
/**
|
|
600
|
+
* Mark orphaned entries (never accessed, old) as deleted.
|
|
601
|
+
* Used by MaintenanceService.doRunHeal().
|
|
602
|
+
*/
|
|
603
|
+
async markOrphaned(entityId, orphanThreshold, tx) {
|
|
604
|
+
const executor = this.getExecutor(tx);
|
|
605
|
+
const now = Date.now();
|
|
606
|
+
const updatedRows = await executor.getAllAsync(
|
|
607
|
+
`UPDATE ${this.prefix}entries
|
|
608
|
+
SET deleted_at = ?, updated_at = ?
|
|
609
|
+
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
610
|
+
RETURNING id`,
|
|
611
|
+
[now, now, entityId, orphanThreshold]
|
|
786
612
|
);
|
|
613
|
+
for (const row of updatedRows) {
|
|
614
|
+
await this.outbox.push({
|
|
615
|
+
entityId,
|
|
616
|
+
tableName: "entries",
|
|
617
|
+
recordId: row.id,
|
|
618
|
+
operation: "DELETE",
|
|
619
|
+
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
620
|
+
}, tx);
|
|
621
|
+
}
|
|
622
|
+
return updatedRows.map((r) => r.id);
|
|
787
623
|
}
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
);
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
624
|
+
/**
|
|
625
|
+
* Downgrade stale inferred entries to 'tentative'.
|
|
626
|
+
* Used by MaintenanceService.doRunHeal().
|
|
627
|
+
*/
|
|
628
|
+
async downgradeStaleInferred(entityId, staleThreshold, tx) {
|
|
629
|
+
const executor = this.getExecutor(tx);
|
|
630
|
+
const now = Date.now();
|
|
631
|
+
const eligibleRows = await executor.getAllAsync(
|
|
632
|
+
`SELECT id FROM ${this.prefix}entries
|
|
633
|
+
WHERE entity_id = ? AND confidence = 'inferred'
|
|
634
|
+
AND (last_accessed_at <= ? OR (last_accessed_at IS NULL AND created_at <= ?))
|
|
635
|
+
AND source_type != 'immutable_document' AND deleted_at IS NULL`,
|
|
636
|
+
[entityId, staleThreshold, staleThreshold]
|
|
800
637
|
);
|
|
801
|
-
|
|
802
|
-
const
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
throw new Error(
|
|
808
|
-
`Database contains ${count} entries with legacy source_type values ('user_document' or 'agent_inferred'). These enum values were renamed in this release. Running without migration would allow legacy 'user_document' facts to bypass immutability guards, causing data corruption.
|
|
809
|
-
|
|
810
|
-
${migrationSQL}
|
|
811
|
-
|
|
812
|
-
After running the migration SQL, restart your application.`
|
|
638
|
+
if (eligibleRows.length === 0) return 0;
|
|
639
|
+
const result = await executor.runAsync(
|
|
640
|
+
`UPDATE ${this.prefix}entries
|
|
641
|
+
SET confidence = 'tentative', updated_at = ?
|
|
642
|
+
WHERE entity_id = ? AND confidence = 'inferred' AND (last_accessed_at <= ? OR (last_accessed_at IS NULL AND created_at <= ?)) AND source_type != 'immutable_document' AND deleted_at IS NULL`,
|
|
643
|
+
[now, entityId, staleThreshold, staleThreshold]
|
|
813
644
|
);
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
}
|
|
645
|
+
for (const row of eligibleRows) {
|
|
646
|
+
await this.outbox.push({
|
|
647
|
+
entityId,
|
|
648
|
+
tableName: "entries",
|
|
649
|
+
recordId: row.id,
|
|
650
|
+
operation: "UPDATE",
|
|
651
|
+
payload: { id: row.id, entity_id: entityId, confidence: "tentative", updated_at: now }
|
|
652
|
+
}, tx);
|
|
653
|
+
}
|
|
654
|
+
return result.changes;
|
|
823
655
|
}
|
|
824
656
|
/**
|
|
825
|
-
*
|
|
826
|
-
*
|
|
827
|
-
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
657
|
+
* Downgrade specific entries to 'tentative' by IDs.
|
|
658
|
+
* Used by MaintenanceService.doRunHeal().
|
|
828
659
|
*/
|
|
829
|
-
async
|
|
830
|
-
if (
|
|
831
|
-
|
|
832
|
-
const
|
|
833
|
-
const
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
const timeoutMs = rawTimeout;
|
|
838
|
-
let timeoutHandle;
|
|
839
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
840
|
-
timeoutHandle = setTimeout(
|
|
841
|
-
() => {
|
|
842
|
-
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
843
|
-
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
844
|
-
reject(timeoutError);
|
|
845
|
-
},
|
|
846
|
-
timeoutMs
|
|
847
|
-
);
|
|
848
|
-
});
|
|
849
|
-
const hookPromise = Promise.resolve(
|
|
850
|
-
this.options.vectorRanker.onEmbeddingPersisted({
|
|
851
|
-
entityId,
|
|
852
|
-
factId,
|
|
853
|
-
vector: vectorCopy
|
|
854
|
-
})
|
|
660
|
+
async downgradeByIds(ids, entityId, tx) {
|
|
661
|
+
if (ids.length === 0) return;
|
|
662
|
+
const executor = this.getExecutor(tx);
|
|
663
|
+
const now = Date.now();
|
|
664
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
665
|
+
await executor.runAsync(
|
|
666
|
+
`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id IN (${placeholders}) AND entity_id = ?`,
|
|
667
|
+
[now, ...ids, entityId]
|
|
855
668
|
);
|
|
856
|
-
|
|
857
|
-
await
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
669
|
+
for (const id of ids) {
|
|
670
|
+
await this.outbox.push({
|
|
671
|
+
entityId,
|
|
672
|
+
tableName: "entries",
|
|
673
|
+
recordId: id,
|
|
674
|
+
operation: "UPDATE",
|
|
675
|
+
payload: { id, entity_id: entityId, confidence: "tentative", updated_at: now }
|
|
676
|
+
}, tx);
|
|
864
677
|
}
|
|
865
678
|
}
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
679
|
+
/**
|
|
680
|
+
* Soft-delete specific entries by IDs.
|
|
681
|
+
* Used by MaintenanceService.doRunHeal().
|
|
682
|
+
*/
|
|
683
|
+
async softDeleteByIds(ids, entityId, tx) {
|
|
684
|
+
if (ids.length === 0) return;
|
|
685
|
+
const executor = this.getExecutor(tx);
|
|
686
|
+
const now = Date.now();
|
|
687
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
688
|
+
await executor.runAsync(
|
|
689
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id IN (${placeholders}) AND entity_id = ?`,
|
|
690
|
+
[now, now, ...ids, entityId]
|
|
870
691
|
);
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
} else {
|
|
880
|
-
const metaRow = await this.db.getFirstAsync(
|
|
881
|
-
`SELECT value FROM ${this.prefix}meta WHERE key = 'schema_version'`
|
|
882
|
-
);
|
|
883
|
-
if (metaRow) {
|
|
884
|
-
currentVersion = parseInt(metaRow.value, 10);
|
|
885
|
-
if (!Number.isFinite(currentVersion)) currentVersion = 0;
|
|
886
|
-
} else {
|
|
887
|
-
const ftsMeta = await this.db.getFirstAsync(
|
|
888
|
-
`SELECT sql FROM sqlite_master WHERE type='table' AND name=?`,
|
|
889
|
-
[`${this.prefix}entries_fts`]
|
|
890
|
-
);
|
|
891
|
-
const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsMeta?.sql ?? "");
|
|
892
|
-
currentVersion = hasPorter ? 1 : 0;
|
|
893
|
-
}
|
|
894
|
-
}
|
|
895
|
-
for (const migration of MIGRATIONS) {
|
|
896
|
-
if (migration.version > currentVersion) {
|
|
897
|
-
await migration.run(this.db, this.prefix);
|
|
898
|
-
await this.db.runAsync(
|
|
899
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('schema_version', ?)`,
|
|
900
|
-
[String(migration.version)]
|
|
901
|
-
);
|
|
902
|
-
currentVersion = migration.version;
|
|
903
|
-
}
|
|
904
|
-
}
|
|
905
|
-
if (entriesExistedBeforeSetup) {
|
|
906
|
-
const metaCheck = await this.db.getFirstAsync(
|
|
907
|
-
`SELECT value FROM ${this.prefix}meta WHERE key = 'schema_version'`
|
|
908
|
-
);
|
|
909
|
-
if (!metaCheck) {
|
|
910
|
-
await this.db.runAsync(
|
|
911
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('schema_version', ?)`,
|
|
912
|
-
[String(currentVersion)]
|
|
913
|
-
);
|
|
914
|
-
}
|
|
915
|
-
}
|
|
916
|
-
if (entriesExistedBeforeSetup) {
|
|
917
|
-
await this.assertNoLegacySourceTypes();
|
|
692
|
+
for (const id of ids) {
|
|
693
|
+
await this.outbox.push({
|
|
694
|
+
entityId,
|
|
695
|
+
tableName: "entries",
|
|
696
|
+
recordId: id,
|
|
697
|
+
operation: "DELETE",
|
|
698
|
+
payload: { id, entity_id: entityId, deleted_at: now }
|
|
699
|
+
}, tx);
|
|
918
700
|
}
|
|
919
|
-
const rows = await this.db.getAllAsync(`
|
|
920
|
-
SELECT rowid, source_ref FROM ${this.prefix}entries
|
|
921
|
-
WHERE source_ref IS NOT NULL
|
|
922
|
-
AND (
|
|
923
|
-
TRIM(source_ref) != source_ref
|
|
924
|
-
OR INSTR(source_ref, '/') > 0
|
|
925
|
-
OR INSTR(source_ref, '\\') > 0
|
|
926
|
-
OR INSTR(source_ref, CHAR(0)) > 0
|
|
927
|
-
OR source_ref GLOB '*[^-A-Za-z0-9._ ]*'
|
|
928
|
-
)
|
|
929
|
-
`);
|
|
930
|
-
await this.db.withTransactionAsync(async () => {
|
|
931
|
-
for (const row of rows) {
|
|
932
|
-
const normalized = normalizeSourceRef(row.source_ref);
|
|
933
|
-
if (normalized !== row.source_ref) {
|
|
934
|
-
await this.db.runAsync(
|
|
935
|
-
`UPDATE ${this.prefix}entries SET source_ref = ? WHERE rowid = ?`,
|
|
936
|
-
[normalized, row.rowid]
|
|
937
|
-
);
|
|
938
|
-
}
|
|
939
|
-
}
|
|
940
|
-
});
|
|
941
|
-
await this.rebuildMiniSearchIndex();
|
|
942
701
|
}
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
702
|
+
/**
|
|
703
|
+
* Bulk soft-delete all entries for an entity.
|
|
704
|
+
* Stages DELETE outbox entries for each row in the same transaction.
|
|
705
|
+
* `tx` is REQUIRED.
|
|
706
|
+
*/
|
|
707
|
+
async bulkSoftDeleteByEntityId(entityId, tx) {
|
|
708
|
+
const executor = this.getExecutor(tx);
|
|
709
|
+
const now = Date.now();
|
|
710
|
+
const idsToDelete = await executor.getAllAsync(
|
|
711
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
712
|
+
[entityId]
|
|
713
|
+
);
|
|
714
|
+
const result = await executor.runAsync(
|
|
715
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
716
|
+
[now, now, entityId]
|
|
717
|
+
);
|
|
718
|
+
for (const row of idsToDelete) {
|
|
719
|
+
await this.outbox.push({
|
|
720
|
+
entityId,
|
|
721
|
+
tableName: "entries",
|
|
722
|
+
recordId: row.id,
|
|
723
|
+
operation: "DELETE",
|
|
724
|
+
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
725
|
+
}, tx);
|
|
947
726
|
}
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
727
|
+
return result.changes;
|
|
728
|
+
}
|
|
729
|
+
async findMiniSearchRows(entityId, tx) {
|
|
730
|
+
const executor = this.getExecutor(tx);
|
|
731
|
+
if (entityId !== void 0) {
|
|
732
|
+
return executor.getAllAsync(
|
|
733
|
+
`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL AND entity_id = ?`,
|
|
734
|
+
[entityId]
|
|
735
|
+
);
|
|
951
736
|
}
|
|
952
|
-
|
|
953
|
-
`SELECT
|
|
954
|
-
WHERE entity_id = ? AND source_ref = ? AND deleted_at IS NULL
|
|
955
|
-
ORDER BY updated_at DESC
|
|
956
|
-
LIMIT 1`,
|
|
957
|
-
[entityId, normalizedRef]
|
|
737
|
+
return executor.getAllAsync(
|
|
738
|
+
`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL`
|
|
958
739
|
);
|
|
959
|
-
if (!row) return true;
|
|
960
|
-
const normalizedStoredHash = row.source_hash ? normalizeSourceHash(row.source_hash) : null;
|
|
961
|
-
return normalizedStoredHash !== normalizedHash;
|
|
962
|
-
}
|
|
963
|
-
_pruneKey(entityId) {
|
|
964
|
-
return `${this.prefix}:${entityId}:prune`;
|
|
965
|
-
}
|
|
966
|
-
_reembedKey(entityId) {
|
|
967
|
-
return `${this.prefix}:${entityId}:reembed`;
|
|
968
740
|
}
|
|
969
|
-
|
|
970
|
-
|
|
741
|
+
async updateEmbeddingBlob(id, blob, tx) {
|
|
742
|
+
const executor = this.getExecutor(tx);
|
|
743
|
+
await executor.runAsync(
|
|
744
|
+
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
745
|
+
[blob, id]
|
|
746
|
+
);
|
|
971
747
|
}
|
|
972
|
-
|
|
973
|
-
|
|
748
|
+
async hasLegacySourceTypes(tx) {
|
|
749
|
+
const executor = this.getExecutor(tx);
|
|
750
|
+
const row = await executor.getFirstAsync(
|
|
751
|
+
`SELECT 1 AS one FROM ${this.prefix}entries WHERE source_type IN ('user_document', 'agent_inferred') LIMIT 1`,
|
|
752
|
+
[]
|
|
753
|
+
);
|
|
754
|
+
return row != null;
|
|
974
755
|
}
|
|
975
|
-
|
|
976
|
-
|
|
756
|
+
async countLegacySourceTypes(tx) {
|
|
757
|
+
const executor = this.getExecutor(tx);
|
|
758
|
+
const row = await executor.getFirstAsync(
|
|
759
|
+
`SELECT COUNT(*) as count FROM ${this.prefix}entries WHERE source_type IN ('user_document', 'agent_inferred')`,
|
|
760
|
+
[]
|
|
761
|
+
);
|
|
762
|
+
return row?.count ?? 0;
|
|
977
763
|
}
|
|
978
|
-
|
|
979
|
-
|
|
764
|
+
async findAllForReembed(entityId, tx) {
|
|
765
|
+
const executor = this.getExecutor(tx);
|
|
766
|
+
if (entityId !== void 0) {
|
|
767
|
+
return executor.getAllAsync(
|
|
768
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
769
|
+
[entityId]
|
|
770
|
+
);
|
|
771
|
+
}
|
|
772
|
+
return executor.getAllAsync(
|
|
773
|
+
`SELECT * FROM ${this.prefix}entries WHERE deleted_at IS NULL`
|
|
774
|
+
);
|
|
980
775
|
}
|
|
981
|
-
|
|
982
|
-
|
|
776
|
+
async findRowsForSourceRefMigration(tx) {
|
|
777
|
+
const executor = this.getExecutor(tx);
|
|
778
|
+
return executor.getAllAsync(
|
|
779
|
+
`SELECT rowid, source_ref FROM ${this.prefix}entries
|
|
780
|
+
WHERE source_ref IS NOT NULL
|
|
781
|
+
AND (
|
|
782
|
+
TRIM(source_ref) != source_ref
|
|
783
|
+
OR INSTR(source_ref, '/') > 0
|
|
784
|
+
OR INSTR(source_ref, '\\') > 0
|
|
785
|
+
OR INSTR(source_ref, CHAR(0)) > 0
|
|
786
|
+
OR source_ref GLOB '*[^-A-Za-z0-9._ ]*'
|
|
787
|
+
)`
|
|
788
|
+
);
|
|
983
789
|
}
|
|
984
|
-
|
|
985
|
-
|
|
790
|
+
async updateSourceRefByRowid(rowid, sourceRef, tx) {
|
|
791
|
+
const executor = this.getExecutor(tx);
|
|
792
|
+
await executor.runAsync(
|
|
793
|
+
`UPDATE ${this.prefix}entries SET source_ref = ? WHERE rowid = ?`,
|
|
794
|
+
[sourceRef, rowid]
|
|
795
|
+
);
|
|
986
796
|
}
|
|
987
|
-
|
|
988
|
-
|
|
797
|
+
async findLatestSourceHash(entityId, sourceRef, tx) {
|
|
798
|
+
const executor = this.getExecutor(tx);
|
|
799
|
+
const row = await executor.getFirstAsync(
|
|
800
|
+
`SELECT source_hash FROM ${this.prefix}entries
|
|
801
|
+
WHERE entity_id = ? AND source_ref = ? AND deleted_at IS NULL
|
|
802
|
+
ORDER BY updated_at DESC
|
|
803
|
+
LIMIT 1`,
|
|
804
|
+
[entityId, sourceRef]
|
|
805
|
+
);
|
|
806
|
+
return row?.source_hash ?? null;
|
|
989
807
|
}
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
const
|
|
993
|
-
|
|
994
|
-
|
|
808
|
+
async findMetadataByIds(ids, tx) {
|
|
809
|
+
if (ids.length === 0) return [];
|
|
810
|
+
const executor = this.getExecutor(tx);
|
|
811
|
+
const rows = [];
|
|
812
|
+
for (let i = 0; i < ids.length; i += this.chunkSize) {
|
|
813
|
+
const chunk = ids.slice(i, i + this.chunkSize);
|
|
814
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
815
|
+
const chunkRows = await executor.getAllAsync(
|
|
816
|
+
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
817
|
+
chunk
|
|
818
|
+
);
|
|
819
|
+
rows.push(...chunkRows);
|
|
995
820
|
}
|
|
996
|
-
return
|
|
821
|
+
return rows;
|
|
997
822
|
}
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
const
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
const set = this.statusSubscribers.get(entityId);
|
|
1011
|
-
if (!set || set.size === 0) return;
|
|
1012
|
-
for (const entry of Array.from(set)) {
|
|
1013
|
-
if (!set.has(entry)) continue;
|
|
1014
|
-
const next = this.getEntityStatus(entityId);
|
|
1015
|
-
if (entry.last.ingesting === next.ingesting && entry.last.librarian === next.librarian && entry.last.heal === next.heal) continue;
|
|
1016
|
-
entry.last = this._copyEntityStatus(next);
|
|
1017
|
-
try {
|
|
1018
|
-
entry.callback(this._copyEntityStatus(next));
|
|
1019
|
-
} catch (err) {
|
|
1020
|
-
console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during transition emission`, err);
|
|
1021
|
-
}
|
|
823
|
+
async findWithEmbeddingsByIds(ids, tx) {
|
|
824
|
+
if (ids.length === 0) return [];
|
|
825
|
+
const executor = this.getExecutor(tx);
|
|
826
|
+
const rows = [];
|
|
827
|
+
for (let i = 0; i < ids.length; i += this.chunkSize) {
|
|
828
|
+
const chunk = ids.slice(i, i + this.chunkSize);
|
|
829
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
830
|
+
const chunkRows = await executor.getAllAsync(
|
|
831
|
+
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
832
|
+
chunk
|
|
833
|
+
);
|
|
834
|
+
rows.push(...chunkRows);
|
|
1022
835
|
}
|
|
836
|
+
return rows;
|
|
1023
837
|
}
|
|
1024
|
-
|
|
1025
|
-
if (
|
|
1026
|
-
|
|
1027
|
-
|
|
838
|
+
async findMetadataByEntityIds(entityIds, tx) {
|
|
839
|
+
if (entityIds.length === 0) return [];
|
|
840
|
+
const executor = this.getExecutor(tx);
|
|
841
|
+
const placeholders = entityIds.map(() => "?").join(",");
|
|
842
|
+
return executor.getAllAsync(
|
|
843
|
+
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
844
|
+
[...entityIds]
|
|
845
|
+
);
|
|
1028
846
|
}
|
|
1029
|
-
async
|
|
1030
|
-
|
|
1031
|
-
const
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
}
|
|
1038
|
-
}
|
|
1039
|
-
let blockingOperation = null;
|
|
1040
|
-
if (this.activeMaintenanceJobs.has(pruneKey)) {
|
|
1041
|
-
blockingOperation = "prune";
|
|
1042
|
-
} else if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1043
|
-
blockingOperation = "librarian";
|
|
1044
|
-
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1045
|
-
blockingOperation = "heal";
|
|
1046
|
-
} else if (this._isReembedActive(entityId)) {
|
|
1047
|
-
blockingOperation = "reembed";
|
|
1048
|
-
} else if (isIngestRunning) {
|
|
1049
|
-
blockingOperation = "ingest";
|
|
1050
|
-
} else if (this._isImportActiveFor(entityId)) {
|
|
1051
|
-
blockingOperation = "import";
|
|
1052
|
-
} else if (this._isForgetActiveFor(entityId)) {
|
|
1053
|
-
blockingOperation = "forget";
|
|
1054
|
-
}
|
|
1055
|
-
if (blockingOperation !== null) {
|
|
1056
|
-
throw new WikiBusyError(blockingOperation, entityId);
|
|
1057
|
-
}
|
|
1058
|
-
this.activeMaintenanceJobs.add(pruneKey);
|
|
1059
|
-
try {
|
|
1060
|
-
const retainSoftDeletedFor = options?.retainSoftDeletedFor !== void 0 ? options.retainSoftDeletedFor : this.options.config?.pruneRetainSoftDeletedFor ?? 7;
|
|
1061
|
-
const retainEventsFor = options?.retainEventsFor !== void 0 ? options.retainEventsFor : this.options.config?.pruneEventsAfter ?? 30;
|
|
1062
|
-
const vacuum = options?.vacuum ?? false;
|
|
1063
|
-
this._validatePruneDuration(retainSoftDeletedFor, "retainSoftDeletedFor");
|
|
1064
|
-
this._validatePruneDuration(retainEventsFor, "retainEventsFor");
|
|
1065
|
-
const now = Date.now();
|
|
1066
|
-
let deletedEntries = 0;
|
|
1067
|
-
let deletedTasks = 0;
|
|
1068
|
-
let deletedEvents = 0;
|
|
1069
|
-
if (retainSoftDeletedFor !== null) {
|
|
1070
|
-
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
1071
|
-
const entriesToDelete = await this.entryRepo.getPrunableMetadata(entityId, cutoff);
|
|
1072
|
-
const succeeded = [];
|
|
1073
|
-
let failure = null;
|
|
1074
|
-
for (const row of entriesToDelete) {
|
|
1075
|
-
try {
|
|
1076
|
-
await this._notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
|
|
1077
|
-
succeeded.push({ entity_id: row.entity_id, id: row.id });
|
|
1078
|
-
} catch (err) {
|
|
1079
|
-
failure = { factId: row.id, cause: err };
|
|
1080
|
-
break;
|
|
1081
|
-
}
|
|
1082
|
-
}
|
|
1083
|
-
if (succeeded.length > 0) {
|
|
1084
|
-
const chunkSize = 500;
|
|
1085
|
-
for (let i = 0; i < succeeded.length; i += chunkSize) {
|
|
1086
|
-
const chunk = succeeded.slice(i, i + chunkSize);
|
|
1087
|
-
const placeholders = chunk.map(() => "?").join(",");
|
|
1088
|
-
const entryResult = await this.db.runAsync(
|
|
1089
|
-
`DELETE FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at <= ? AND id IN (${placeholders})`,
|
|
1090
|
-
[entityId, cutoff, ...chunk.map((r) => r.id)]
|
|
1091
|
-
);
|
|
1092
|
-
deletedEntries += entryResult.changes;
|
|
1093
|
-
}
|
|
1094
|
-
}
|
|
1095
|
-
const taskResult = await this.db.runAsync(
|
|
1096
|
-
`DELETE FROM ${this.prefix}tasks
|
|
1097
|
-
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at <= ?`,
|
|
1098
|
-
[entityId, cutoff]
|
|
1099
|
-
);
|
|
1100
|
-
deletedTasks = taskResult.changes;
|
|
1101
|
-
if (failure) {
|
|
1102
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1103
|
-
this.vectorCache.delete(entityId);
|
|
1104
|
-
const remaining = entriesToDelete.length - succeeded.length - 1;
|
|
1105
|
-
const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
|
|
1106
|
-
if (isTimeout) {
|
|
1107
|
-
throw new PrunePartialFailureError(
|
|
1108
|
-
succeeded.length,
|
|
1109
|
-
failure.factId,
|
|
1110
|
-
remaining,
|
|
1111
|
-
new Error("Deletion hook timed out"),
|
|
1112
|
-
deletedTasks,
|
|
1113
|
-
0
|
|
1114
|
-
// events not yet deleted at this point
|
|
1115
|
-
);
|
|
1116
|
-
}
|
|
1117
|
-
const errMsg = failure.cause?.message ?? "";
|
|
1118
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
1119
|
-
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
1120
|
-
throw new PrunePartialFailureError(
|
|
1121
|
-
succeeded.length,
|
|
1122
|
-
failure.factId,
|
|
1123
|
-
remaining,
|
|
1124
|
-
sanitizedCause,
|
|
1125
|
-
deletedTasks,
|
|
1126
|
-
0
|
|
1127
|
-
// events not yet deleted at this point
|
|
1128
|
-
);
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
if (retainEventsFor !== null) {
|
|
1132
|
-
const cutoff = now - retainEventsFor * 864e5;
|
|
1133
|
-
const eventResult = await this.db.runAsync(
|
|
1134
|
-
`DELETE FROM ${this.prefix}events
|
|
1135
|
-
WHERE entity_id = ? AND created_at <= ?`,
|
|
1136
|
-
[entityId, cutoff]
|
|
1137
|
-
);
|
|
1138
|
-
deletedEvents = eventResult.changes;
|
|
1139
|
-
}
|
|
1140
|
-
if (vacuum) {
|
|
1141
|
-
await this.db.execAsync(`PRAGMA wal_checkpoint(TRUNCATE)`);
|
|
1142
|
-
await this.db.execAsync(`VACUUM`);
|
|
1143
|
-
}
|
|
1144
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1145
|
-
this.vectorCache.delete(entityId);
|
|
1146
|
-
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
1147
|
-
} finally {
|
|
1148
|
-
this.activeMaintenanceJobs.delete(pruneKey);
|
|
1149
|
-
}
|
|
847
|
+
async findWithEmbeddingsByEntityIds(entityIds, tx) {
|
|
848
|
+
if (entityIds.length === 0) return [];
|
|
849
|
+
const executor = this.getExecutor(tx);
|
|
850
|
+
const placeholders = entityIds.map(() => "?").join(",");
|
|
851
|
+
return executor.getAllAsync(
|
|
852
|
+
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
853
|
+
[...entityIds]
|
|
854
|
+
);
|
|
1150
855
|
}
|
|
1151
|
-
async
|
|
1152
|
-
|
|
1153
|
-
const
|
|
1154
|
-
const
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
const
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
}
|
|
1162
|
-
return empty;
|
|
1163
|
-
}
|
|
1164
|
-
const MAX_ENTITY_IDS = 100;
|
|
1165
|
-
if (entityIds.length > MAX_ENTITY_IDS) {
|
|
1166
|
-
throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
|
|
1167
|
-
}
|
|
1168
|
-
const nullByteId = entityIds.find((id) => id.includes("\0"));
|
|
1169
|
-
if (nullByteId !== void 0) {
|
|
1170
|
-
throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
|
|
1171
|
-
}
|
|
1172
|
-
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
1173
|
-
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
1174
|
-
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
1175
|
-
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
1176
|
-
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
1177
|
-
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
1178
|
-
const skipEmbed = weight === 0;
|
|
1179
|
-
const embedFn = this.options.llmProvider.embed;
|
|
1180
|
-
const trimmedQuery = query.trim();
|
|
1181
|
-
let facts = [];
|
|
1182
|
-
let scoreByFactId;
|
|
1183
|
-
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
1184
|
-
let usedEmbed = false;
|
|
1185
|
-
const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
|
|
1186
|
-
if (scoredEntityIds.length === 0) {
|
|
1187
|
-
usedEmbed = true;
|
|
1188
|
-
} else if (!skipEmbed && embedFn) {
|
|
1189
|
-
let rankerShouldRethrow = false;
|
|
1190
|
-
let pendingRankerFallbackError;
|
|
1191
|
-
try {
|
|
1192
|
-
const queryVec = await embedFn(trimmedQuery);
|
|
1193
|
-
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
1194
|
-
throw new Error(
|
|
1195
|
-
"embed() returned an empty or non-finite vector. Falling back to keyword search."
|
|
1196
|
-
);
|
|
1197
|
-
}
|
|
1198
|
-
const storedDimRow = await this.db.getFirstAsync(
|
|
1199
|
-
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
1200
|
-
);
|
|
1201
|
-
if (storedDimRow) {
|
|
1202
|
-
const storedDim = parseInt(storedDimRow.value, 10);
|
|
1203
|
-
if (storedDim !== queryVec.length) {
|
|
1204
|
-
throw new Error(
|
|
1205
|
-
`Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
|
|
1206
|
-
);
|
|
1207
|
-
}
|
|
1208
|
-
}
|
|
1209
|
-
const mismatchScope = this._entityInClause(scoredEntityIds);
|
|
1210
|
-
const mismatchedCount = await this.db.getFirstAsync(
|
|
1211
|
-
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1212
|
-
WHERE ${mismatchScope.clause} AND deleted_at IS NULL
|
|
1213
|
-
AND embedding_blob IS NOT NULL
|
|
1214
|
-
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
1215
|
-
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
1216
|
-
[...mismatchScope.params, queryVec.length]
|
|
1217
|
-
);
|
|
1218
|
-
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
1219
|
-
throw new Error(
|
|
1220
|
-
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
1221
|
-
);
|
|
1222
|
-
}
|
|
1223
|
-
const useRanker = Boolean(this.options.vectorRanker);
|
|
1224
|
-
let candidateRows;
|
|
1225
|
-
let populateCache = entityIds.length === 1;
|
|
1226
|
-
let miniSearchScores;
|
|
1227
|
-
if (effectivePreFilterLimit !== void 0) {
|
|
1228
|
-
populateCache = false;
|
|
1229
|
-
const entityIdSet = new Set(scoredEntityIds);
|
|
1230
|
-
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
1231
|
-
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1232
|
-
combineWith: "OR"
|
|
1233
|
-
});
|
|
1234
|
-
if (preResults.length === 0) {
|
|
1235
|
-
candidateRows = null;
|
|
1236
|
-
} else {
|
|
1237
|
-
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
1238
|
-
if (topKResults.length === 0) {
|
|
1239
|
-
candidateRows = null;
|
|
1240
|
-
} else {
|
|
1241
|
-
const topKIds = topKResults.map((r) => r.id);
|
|
1242
|
-
const inClauseChunkSize = 500;
|
|
1243
|
-
if (useRanker) {
|
|
1244
|
-
const rows = [];
|
|
1245
|
-
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
1246
|
-
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1247
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1248
|
-
const chunkRows = await this.db.getAllAsync(
|
|
1249
|
-
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1250
|
-
idChunk
|
|
1251
|
-
);
|
|
1252
|
-
rows.push(...chunkRows);
|
|
1253
|
-
}
|
|
1254
|
-
candidateRows = rows;
|
|
1255
|
-
} else {
|
|
1256
|
-
const rows = [];
|
|
1257
|
-
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
1258
|
-
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1259
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1260
|
-
const chunkRows = await this.db.getAllAsync(
|
|
1261
|
-
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1262
|
-
idChunk
|
|
1263
|
-
);
|
|
1264
|
-
rows.push(...chunkRows);
|
|
1265
|
-
}
|
|
1266
|
-
candidateRows = rows;
|
|
1267
|
-
}
|
|
1268
|
-
if (weight !== void 0 && weight < 1) {
|
|
1269
|
-
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
1270
|
-
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
1271
|
-
}
|
|
1272
|
-
}
|
|
1273
|
-
}
|
|
1274
|
-
} else {
|
|
1275
|
-
if (useRanker) {
|
|
1276
|
-
const entityScope = this._entityInClause(scoredEntityIds);
|
|
1277
|
-
candidateRows = await this.db.getAllAsync(
|
|
1278
|
-
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE ${entityScope.clause} AND deleted_at IS NULL`,
|
|
1279
|
-
entityScope.params
|
|
1280
|
-
);
|
|
1281
|
-
} else {
|
|
1282
|
-
const entityScope = this._entityInClause(scoredEntityIds);
|
|
1283
|
-
candidateRows = await this.db.getAllAsync(
|
|
1284
|
-
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE ${entityScope.clause} AND deleted_at IS NULL`,
|
|
1285
|
-
entityScope.params
|
|
1286
|
-
);
|
|
1287
|
-
}
|
|
1288
|
-
if (weight !== void 0 && weight < 1) {
|
|
1289
|
-
const entityIdSet = new Set(scoredEntityIds);
|
|
1290
|
-
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1291
|
-
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1292
|
-
combineWith: "OR"
|
|
1293
|
-
});
|
|
1294
|
-
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
1295
|
-
miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
1296
|
-
}
|
|
1297
|
-
}
|
|
1298
|
-
if (candidateRows === null) {
|
|
1299
|
-
usedEmbed = true;
|
|
1300
|
-
} else {
|
|
1301
|
-
const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
|
|
1302
|
-
let scored;
|
|
1303
|
-
if (useRanker) {
|
|
1304
|
-
const candidateRowsByEntity = /* @__PURE__ */ new Map();
|
|
1305
|
-
for (const row of candidateRows) {
|
|
1306
|
-
const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
|
|
1307
|
-
rows.push(row);
|
|
1308
|
-
candidateRowsByEntity.set(row.entity_id, rows);
|
|
1309
|
-
}
|
|
1310
|
-
try {
|
|
1311
|
-
const rankerResultsByEntity = await Promise.all(
|
|
1312
|
-
scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
|
|
1313
|
-
const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
|
|
1314
|
-
const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
|
|
1315
|
-
const ranked = await this._rankWithVectorRanker({
|
|
1316
|
-
entityId: scopedEntityId,
|
|
1317
|
-
queryVec,
|
|
1318
|
-
candidateIds,
|
|
1319
|
-
candidateRows: rowsForEntity,
|
|
1320
|
-
weight,
|
|
1321
|
-
miniSearchScores,
|
|
1322
|
-
limit: Math.max(maxResults * 2, maxResults + 50)
|
|
1323
|
-
});
|
|
1324
|
-
return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
|
|
1325
|
-
})
|
|
1326
|
-
);
|
|
1327
|
-
scored = rankerResultsByEntity.flat();
|
|
1328
|
-
const scoredIds = new Set(scored.map((s) => s.id));
|
|
1329
|
-
const metadataById = new Map(
|
|
1330
|
-
candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
|
|
1331
|
-
);
|
|
1332
|
-
scored = scored.map((row) => {
|
|
1333
|
-
const metadata = metadataById.get(row.id);
|
|
1334
|
-
return {
|
|
1335
|
-
...row,
|
|
1336
|
-
updated_at: metadata?.updated_at ?? null,
|
|
1337
|
-
access_count: metadata?.access_count ?? null
|
|
1338
|
-
};
|
|
1339
|
-
});
|
|
1340
|
-
const isHybrid = weight !== void 0 && weight < 1;
|
|
1341
|
-
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
1342
|
-
if (maxBackfill > 0) {
|
|
1343
|
-
if (isHybrid) {
|
|
1344
|
-
const topK = [];
|
|
1345
|
-
for (const row of candidateRows) {
|
|
1346
|
-
if (scoredIds.has(row.id)) continue;
|
|
1347
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1348
|
-
const candidate = { row, kwScore };
|
|
1349
|
-
if (topK.length < maxBackfill) {
|
|
1350
|
-
let insertIdx = topK.length;
|
|
1351
|
-
for (let i = 0; i < topK.length; i++) {
|
|
1352
|
-
const cmp = this._compareScoredRows(
|
|
1353
|
-
{
|
|
1354
|
-
id: candidate.row.id,
|
|
1355
|
-
score: candidate.kwScore,
|
|
1356
|
-
updated_at: candidate.row.updated_at,
|
|
1357
|
-
access_count: candidate.row.access_count
|
|
1358
|
-
},
|
|
1359
|
-
{
|
|
1360
|
-
id: topK[i].row.id,
|
|
1361
|
-
score: topK[i].kwScore,
|
|
1362
|
-
updated_at: topK[i].row.updated_at,
|
|
1363
|
-
access_count: topK[i].row.access_count
|
|
1364
|
-
}
|
|
1365
|
-
);
|
|
1366
|
-
if (cmp < 0) {
|
|
1367
|
-
insertIdx = i;
|
|
1368
|
-
break;
|
|
1369
|
-
}
|
|
1370
|
-
}
|
|
1371
|
-
topK.splice(insertIdx, 0, candidate);
|
|
1372
|
-
} else {
|
|
1373
|
-
const cmpWorst = this._compareScoredRows(
|
|
1374
|
-
{
|
|
1375
|
-
id: candidate.row.id,
|
|
1376
|
-
score: candidate.kwScore,
|
|
1377
|
-
updated_at: candidate.row.updated_at,
|
|
1378
|
-
access_count: candidate.row.access_count
|
|
1379
|
-
},
|
|
1380
|
-
{
|
|
1381
|
-
id: topK[maxBackfill - 1].row.id,
|
|
1382
|
-
score: topK[maxBackfill - 1].kwScore,
|
|
1383
|
-
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
1384
|
-
access_count: topK[maxBackfill - 1].row.access_count
|
|
1385
|
-
}
|
|
1386
|
-
);
|
|
1387
|
-
if (cmpWorst < 0) {
|
|
1388
|
-
let insertIdx = maxBackfill - 1;
|
|
1389
|
-
for (let i = 0; i < topK.length; i++) {
|
|
1390
|
-
const cmp = this._compareScoredRows(
|
|
1391
|
-
{
|
|
1392
|
-
id: candidate.row.id,
|
|
1393
|
-
score: candidate.kwScore,
|
|
1394
|
-
updated_at: candidate.row.updated_at,
|
|
1395
|
-
access_count: candidate.row.access_count
|
|
1396
|
-
},
|
|
1397
|
-
{
|
|
1398
|
-
id: topK[i].row.id,
|
|
1399
|
-
score: topK[i].kwScore,
|
|
1400
|
-
updated_at: topK[i].row.updated_at,
|
|
1401
|
-
access_count: topK[i].row.access_count
|
|
1402
|
-
}
|
|
1403
|
-
);
|
|
1404
|
-
if (cmp < 0) {
|
|
1405
|
-
insertIdx = i;
|
|
1406
|
-
break;
|
|
1407
|
-
}
|
|
1408
|
-
}
|
|
1409
|
-
topK.splice(insertIdx, 0, candidate);
|
|
1410
|
-
topK.pop();
|
|
1411
|
-
}
|
|
1412
|
-
}
|
|
1413
|
-
}
|
|
1414
|
-
for (const { row, kwScore } of topK) {
|
|
1415
|
-
scored.push({
|
|
1416
|
-
id: row.id,
|
|
1417
|
-
entity_id: row.entity_id,
|
|
1418
|
-
score: (1 - weight) * kwScore,
|
|
1419
|
-
updated_at: row.updated_at,
|
|
1420
|
-
access_count: row.access_count
|
|
1421
|
-
});
|
|
1422
|
-
}
|
|
1423
|
-
} else {
|
|
1424
|
-
const omitted = [];
|
|
1425
|
-
for (const row of candidateRows) {
|
|
1426
|
-
if (scoredIds.has(row.id)) continue;
|
|
1427
|
-
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1428
|
-
}
|
|
1429
|
-
if (omitted.length > 0) {
|
|
1430
|
-
this._tieBreakSort(omitted);
|
|
1431
|
-
scored.push(...omitted.slice(0, maxBackfill));
|
|
1432
|
-
}
|
|
1433
|
-
}
|
|
1434
|
-
}
|
|
1435
|
-
} catch (rankerErr) {
|
|
1436
|
-
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
1437
|
-
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
1438
|
-
this.options.onVectorRankerFallback?.({
|
|
1439
|
-
error: this._sanitizeRankerError(rankerError),
|
|
1440
|
-
policy
|
|
1441
|
-
});
|
|
1442
|
-
if (policy === "throw") {
|
|
1443
|
-
rankerShouldRethrow = true;
|
|
1444
|
-
throw rankerError;
|
|
1445
|
-
} else if (policy === "js-cosine") {
|
|
1446
|
-
let fallbackRows = candidateRows;
|
|
1447
|
-
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
1448
|
-
const rowIds = fallbackRows.map((r) => r.id);
|
|
1449
|
-
const embeddingsMap = /* @__PURE__ */ new Map();
|
|
1450
|
-
const chunkSize = 500;
|
|
1451
|
-
for (let i = 0; i < rowIds.length; i += chunkSize) {
|
|
1452
|
-
const idChunk = rowIds.slice(i, i + chunkSize);
|
|
1453
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1454
|
-
const embeddingRows = await this.db.getAllAsync(
|
|
1455
|
-
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1456
|
-
idChunk
|
|
1457
|
-
);
|
|
1458
|
-
for (const row of embeddingRows) {
|
|
1459
|
-
embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
|
|
1460
|
-
}
|
|
1461
|
-
}
|
|
1462
|
-
fallbackRows = fallbackRows.map((r) => ({
|
|
1463
|
-
...r,
|
|
1464
|
-
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
1465
|
-
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
1466
|
-
}));
|
|
1467
|
-
}
|
|
1468
|
-
scored = await this._rankWithJsCosine({
|
|
1469
|
-
entityId: entityCacheKey,
|
|
1470
|
-
queryVec,
|
|
1471
|
-
candidateRows: fallbackRows,
|
|
1472
|
-
weight,
|
|
1473
|
-
miniSearchScores,
|
|
1474
|
-
populateCache,
|
|
1475
|
-
limit: fallbackRows.length,
|
|
1476
|
-
skipSort: true
|
|
1477
|
-
// read() re-sorts after applying tier weights
|
|
1478
|
-
});
|
|
1479
|
-
} else if (policy === "keyword") {
|
|
1480
|
-
const scoredEntityIdSet = new Set(scoredEntityIds);
|
|
1481
|
-
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1482
|
-
filter: (r) => scoredEntityIdSet.has(r.entity_id),
|
|
1483
|
-
combineWith: "OR"
|
|
1484
|
-
});
|
|
1485
|
-
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1486
|
-
const topResults = msResults.slice(0, keywordOversampledLimit);
|
|
1487
|
-
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
1488
|
-
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
1489
|
-
scored = topResults.map((result) => {
|
|
1490
|
-
const metadata = candidateMap.get(result.id);
|
|
1491
|
-
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
1492
|
-
return {
|
|
1493
|
-
id: result.id,
|
|
1494
|
-
entity_id: entityForScore,
|
|
1495
|
-
score: result.score ?? 0,
|
|
1496
|
-
access_count: metadata?.access_count ?? null,
|
|
1497
|
-
updated_at: metadata?.updated_at ?? null
|
|
1498
|
-
};
|
|
1499
|
-
});
|
|
1500
|
-
} else {
|
|
1501
|
-
scored = [];
|
|
1502
|
-
}
|
|
1503
|
-
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
1504
|
-
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
1505
|
-
cause: this._sanitizeRankerError(rankerErr)
|
|
1506
|
-
});
|
|
1507
|
-
pendingRankerFallbackError = mirrored;
|
|
1508
|
-
}
|
|
1509
|
-
}
|
|
1510
|
-
} else {
|
|
1511
|
-
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
1512
|
-
scored = await this._rankWithJsCosine({
|
|
1513
|
-
entityId: entityCacheKey,
|
|
1514
|
-
queryVec,
|
|
1515
|
-
candidateRows,
|
|
1516
|
-
weight,
|
|
1517
|
-
miniSearchScores,
|
|
1518
|
-
populateCache,
|
|
1519
|
-
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
1520
|
-
skipSort: jsCosineNeedsTierSort
|
|
1521
|
-
// read() re-sorts after applying tier weights
|
|
1522
|
-
});
|
|
1523
|
-
}
|
|
1524
|
-
if (scored.length > 0) {
|
|
1525
|
-
scored = scored.map((row) => ({
|
|
1526
|
-
...row,
|
|
1527
|
-
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
1528
|
-
}));
|
|
1529
|
-
this._tieBreakSort(scored);
|
|
1530
|
-
const selectedScored = scored.slice(0, maxResults);
|
|
1531
|
-
const topIds = selectedScored.map((s) => s.id);
|
|
1532
|
-
if (exposeMetadata && trimmedQuery) {
|
|
1533
|
-
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
1534
|
-
}
|
|
1535
|
-
if (topIds.length > 0) {
|
|
1536
|
-
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
1537
|
-
if (facts2.length < topIds.length) {
|
|
1538
|
-
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
1539
|
-
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
1540
|
-
const missingCount = missingIds.length;
|
|
1541
|
-
const sample = missingIds.slice(0, 5);
|
|
1542
|
-
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
1543
|
-
const error = new Error(
|
|
1544
|
-
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
1545
|
-
);
|
|
1546
|
-
this.options.onRetrievalFallback?.(error);
|
|
1547
|
-
}
|
|
1548
|
-
facts = facts2;
|
|
1549
|
-
}
|
|
1550
|
-
if (pendingRankerFallbackError) {
|
|
1551
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1552
|
-
pendingRankerFallbackError = void 0;
|
|
1553
|
-
}
|
|
1554
|
-
usedEmbed = true;
|
|
1555
|
-
} else {
|
|
1556
|
-
if (pendingRankerFallbackError) {
|
|
1557
|
-
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1558
|
-
pendingRankerFallbackError = void 0;
|
|
1559
|
-
}
|
|
1560
|
-
usedEmbed = true;
|
|
1561
|
-
}
|
|
1562
|
-
}
|
|
1563
|
-
} catch (err) {
|
|
1564
|
-
const error = err instanceof Error ? err : new Error(String(err));
|
|
1565
|
-
if (rankerShouldRethrow) {
|
|
1566
|
-
throw error;
|
|
1567
|
-
}
|
|
1568
|
-
if (pendingRankerFallbackError) {
|
|
1569
|
-
error.cause = pendingRankerFallbackError;
|
|
1570
|
-
pendingRankerFallbackError = void 0;
|
|
1571
|
-
}
|
|
1572
|
-
this.options.onRetrievalFallback?.(error);
|
|
1573
|
-
}
|
|
1574
|
-
}
|
|
1575
|
-
if (!usedEmbed && scoredEntityIds.length > 0) {
|
|
1576
|
-
const fallbackEntityIdSet = new Set(scoredEntityIds);
|
|
1577
|
-
const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1578
|
-
const results = this.miniSearch.search(trimmedQuery, {
|
|
1579
|
-
filter: (r) => fallbackEntityIdSet.has(r.entity_id),
|
|
1580
|
-
combineWith: "OR"
|
|
1581
|
-
});
|
|
1582
|
-
const candidates = results.slice(0, fallbackOversampledLimit).map((r) => ({
|
|
1583
|
-
id: r.id,
|
|
1584
|
-
entity_id: r.entity_id,
|
|
1585
|
-
score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
|
|
1586
|
-
updated_at: null,
|
|
1587
|
-
access_count: null
|
|
1588
|
-
}));
|
|
1589
|
-
this._tieBreakSort(candidates);
|
|
1590
|
-
const topCandidates = candidates.slice(0, maxResults);
|
|
1591
|
-
const topIds = topCandidates.map((c) => c.id);
|
|
1592
|
-
if (topIds.length > 0) {
|
|
1593
|
-
facts = await this._hydrateFactsByIds(topIds, entityIds);
|
|
1594
|
-
if (exposeMetadata) {
|
|
1595
|
-
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
1596
|
-
}
|
|
1597
|
-
}
|
|
1598
|
-
}
|
|
1599
|
-
if (facts.length > 0) {
|
|
1600
|
-
const ids = facts.map((f) => f.id);
|
|
1601
|
-
const now = Date.now();
|
|
1602
|
-
const accessChunkSize = 500;
|
|
1603
|
-
for (let i = 0; i < ids.length; i += accessChunkSize) {
|
|
1604
|
-
const idChunk = ids.slice(i, i + accessChunkSize);
|
|
1605
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1606
|
-
await this.db.runAsync(
|
|
1607
|
-
`UPDATE ${this.prefix}entries
|
|
1608
|
-
SET access_count = access_count + 1, last_accessed_at = ?
|
|
1609
|
-
WHERE id IN (${placeholders})`,
|
|
1610
|
-
[now, ...idChunk]
|
|
1611
|
-
);
|
|
1612
|
-
}
|
|
1613
|
-
}
|
|
1614
|
-
} else {
|
|
1615
|
-
const entityScope = this._entityInClause(entityIds);
|
|
1616
|
-
const rawFacts = await this.db.getAllAsync(
|
|
1617
|
-
`SELECT * FROM ${this.prefix}entries
|
|
1618
|
-
WHERE ${entityScope.clause} AND deleted_at IS NULL
|
|
1619
|
-
ORDER BY updated_at DESC
|
|
1620
|
-
LIMIT ?`,
|
|
1621
|
-
[...entityScope.params, maxResults]
|
|
856
|
+
async findEmbeddingsByIds(ids, tx) {
|
|
857
|
+
if (ids.length === 0) return [];
|
|
858
|
+
const executor = this.getExecutor(tx);
|
|
859
|
+
const rows = [];
|
|
860
|
+
for (let i = 0; i < ids.length; i += this.chunkSize) {
|
|
861
|
+
const chunk = ids.slice(i, i + this.chunkSize);
|
|
862
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
863
|
+
const chunkRows = await executor.getAllAsync(
|
|
864
|
+
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
865
|
+
chunk
|
|
1622
866
|
);
|
|
1623
|
-
|
|
1624
|
-
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1625
|
-
return {
|
|
1626
|
-
...rest,
|
|
1627
|
-
tags: (() => {
|
|
1628
|
-
if (Array.isArray(rest.tags)) return rest.tags;
|
|
1629
|
-
try {
|
|
1630
|
-
const p = JSON.parse(rest.tags);
|
|
1631
|
-
return Array.isArray(p) ? p : [];
|
|
1632
|
-
} catch {
|
|
1633
|
-
return [];
|
|
1634
|
-
}
|
|
1635
|
-
})()
|
|
1636
|
-
};
|
|
1637
|
-
});
|
|
1638
|
-
}
|
|
1639
|
-
const [tasks, events] = await Promise.all([
|
|
1640
|
-
(async () => {
|
|
1641
|
-
const entityScope = this._entityInClause(entityIds);
|
|
1642
|
-
const tasksLimit = entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200);
|
|
1643
|
-
return this.db.getAllAsync(
|
|
1644
|
-
`SELECT * FROM ${this.prefix}tasks
|
|
1645
|
-
WHERE ${entityScope.clause} AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
|
|
1646
|
-
ORDER BY priority DESC, created_at ASC${tasksLimit !== void 0 ? "\n LIMIT ?" : ""}`,
|
|
1647
|
-
tasksLimit !== void 0 ? [...entityScope.params, tasksLimit] : entityScope.params
|
|
1648
|
-
);
|
|
1649
|
-
})(),
|
|
1650
|
-
(async () => {
|
|
1651
|
-
const entityScope = this._entityInClause(entityIds);
|
|
1652
|
-
const eventsLimit = Math.min(10 * entityIds.length, 100);
|
|
1653
|
-
return this.db.getAllAsync(
|
|
1654
|
-
`SELECT * FROM ${this.prefix}events
|
|
1655
|
-
WHERE ${entityScope.clause}
|
|
1656
|
-
ORDER BY created_at DESC
|
|
1657
|
-
LIMIT ?`,
|
|
1658
|
-
[...entityScope.params, eventsLimit]
|
|
1659
|
-
);
|
|
1660
|
-
})()
|
|
1661
|
-
]);
|
|
1662
|
-
let factScores;
|
|
1663
|
-
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
1664
|
-
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
867
|
+
rows.push(...chunkRows);
|
|
1665
868
|
}
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
869
|
+
return rows;
|
|
870
|
+
}
|
|
871
|
+
async trackAccess(ids, now, tx) {
|
|
872
|
+
if (ids.length === 0) return;
|
|
873
|
+
const executor = this.getExecutor(tx);
|
|
874
|
+
for (let i = 0; i < ids.length; i += this.chunkSize) {
|
|
875
|
+
const chunk = ids.slice(i, i + this.chunkSize);
|
|
876
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
877
|
+
await executor.runAsync(
|
|
878
|
+
`UPDATE ${this.prefix}entries SET access_count = access_count + 1, last_accessed_at = ? WHERE id IN (${placeholders})`,
|
|
879
|
+
[now, ...chunk]
|
|
880
|
+
);
|
|
1671
881
|
}
|
|
1672
|
-
return bundle;
|
|
1673
882
|
}
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
1681
|
-
return includeZeroWeightEntities === true || w !== 0;
|
|
1682
|
-
});
|
|
883
|
+
getLegacyMigrationSQL() {
|
|
884
|
+
return [
|
|
885
|
+
`-- Migrate legacy source_type values (targets your WikiMemory prefix: ${this.prefix})`,
|
|
886
|
+
`UPDATE ${this.prefix}entries SET source_type = 'immutable_document' WHERE source_type = 'user_document';`,
|
|
887
|
+
`UPDATE ${this.prefix}entries SET source_type = 'librarian_inferred' WHERE source_type = 'agent_inferred';`
|
|
888
|
+
].join("\n");
|
|
1683
889
|
}
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
890
|
+
async findRecentByEntityIds(entityIds, limit, tx) {
|
|
891
|
+
if (entityIds.length === 0) return [];
|
|
892
|
+
const executor = this.getExecutor(tx);
|
|
893
|
+
const placeholders = entityIds.map(() => "?").join(",");
|
|
894
|
+
const rows = await executor.getAllAsync(
|
|
895
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id IN (${placeholders}) AND deleted_at IS NULL ORDER BY updated_at DESC LIMIT ?`,
|
|
896
|
+
[...entityIds, limit]
|
|
897
|
+
);
|
|
898
|
+
return rows.map(mapRowToFact);
|
|
1689
899
|
}
|
|
900
|
+
};
|
|
901
|
+
|
|
902
|
+
// src/repositories/OutboxRepository.ts
|
|
903
|
+
var OutboxRepository = class extends BaseRepository {
|
|
1690
904
|
/**
|
|
1691
|
-
*
|
|
1692
|
-
*
|
|
905
|
+
* Insert a new outbox event within the provided transaction.
|
|
906
|
+
* `tx` is required — callers must always pass the active transaction
|
|
907
|
+
* so the write is atomic with the main table mutation.
|
|
1693
908
|
*/
|
|
1694
|
-
|
|
1695
|
-
const
|
|
1696
|
-
|
|
1697
|
-
const
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
909
|
+
async push(params, tx) {
|
|
910
|
+
const executor = this.getExecutor(tx);
|
|
911
|
+
const id = generateId("out_");
|
|
912
|
+
const now = Date.now();
|
|
913
|
+
await executor.runAsync(
|
|
914
|
+
`INSERT INTO ${this.prefix}outbox (id, entity_id, table_name, record_id, operation, payload, created_at)
|
|
915
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
|
916
|
+
[id, params.entityId, params.tableName, params.recordId, params.operation, JSON.stringify(params.payload), now]
|
|
917
|
+
);
|
|
1702
918
|
}
|
|
1703
919
|
/**
|
|
1704
|
-
*
|
|
920
|
+
* Fetch pending outbox rows ordered by created_at ASC.
|
|
921
|
+
* Reads directly from `this.db` (not a transaction).
|
|
1705
922
|
*/
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
923
|
+
async fetchPending(limit = 50) {
|
|
924
|
+
return this.db.getAllAsync(
|
|
925
|
+
`SELECT * FROM ${this.prefix}outbox ORDER BY created_at ASC LIMIT ?`,
|
|
926
|
+
[limit]
|
|
927
|
+
);
|
|
1710
928
|
}
|
|
1711
929
|
/**
|
|
1712
|
-
*
|
|
1713
|
-
*
|
|
930
|
+
* Delete acknowledged outbox rows by their IDs.
|
|
931
|
+
* No-op when `ids` is empty.
|
|
932
|
+
* Deletes directly from `this.db` (not a transaction).
|
|
1714
933
|
*/
|
|
1715
|
-
async
|
|
1716
|
-
|
|
934
|
+
async acknowledge(ids) {
|
|
935
|
+
if (ids.length === 0) return;
|
|
936
|
+
const placeholders = ids.map(() => "?").join(", ");
|
|
937
|
+
await this.db.runAsync(
|
|
938
|
+
`DELETE FROM ${this.prefix}outbox WHERE id IN (${placeholders})`,
|
|
939
|
+
ids
|
|
940
|
+
);
|
|
941
|
+
}
|
|
942
|
+
};
|
|
943
|
+
|
|
944
|
+
// src/repositories/TaskRepository.ts
|
|
945
|
+
function mapRowToTask(row) {
|
|
946
|
+
return {
|
|
947
|
+
id: row.id,
|
|
948
|
+
entity_id: row.entity_id,
|
|
949
|
+
description: row.description,
|
|
950
|
+
status: row.status,
|
|
951
|
+
priority: Number(row.priority),
|
|
952
|
+
created_at: Number(row.created_at),
|
|
953
|
+
updated_at: Number(row.updated_at),
|
|
954
|
+
resolved_at: row.resolved_at != null ? Number(row.resolved_at) : null,
|
|
955
|
+
deleted_at: row.deleted_at != null ? Number(row.deleted_at) : null
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
var TaskRepository = class extends BaseRepository {
|
|
959
|
+
constructor(db, prefix, outbox) {
|
|
960
|
+
super(db, prefix);
|
|
961
|
+
this.outbox = outbox;
|
|
1717
962
|
}
|
|
1718
963
|
/**
|
|
1719
|
-
*
|
|
1720
|
-
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
1721
|
-
* Recursively sanitizes one level of .cause; deeper chains collapse to type only.
|
|
964
|
+
* Fetch a single task by ID. Returns null if not found or soft-deleted.
|
|
1722
965
|
*/
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
1728
|
-
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
1729
|
-
const sanitized = new Error(
|
|
1730
|
-
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
1731
|
-
innerCause ? { cause: innerCause } : void 0
|
|
966
|
+
async findById(id) {
|
|
967
|
+
const row = await this.db.getFirstAsync(
|
|
968
|
+
`SELECT * FROM ${this.prefix}tasks WHERE id = ? AND deleted_at IS NULL`,
|
|
969
|
+
[id]
|
|
1732
970
|
);
|
|
1733
|
-
|
|
1734
|
-
return sanitized;
|
|
971
|
+
return row ? mapRowToTask(row) : null;
|
|
1735
972
|
}
|
|
1736
973
|
/**
|
|
1737
|
-
*
|
|
1738
|
-
*
|
|
974
|
+
* Fetch all pending/in_progress tasks for the given entity IDs.
|
|
975
|
+
* Returns empty array when entityIds is empty.
|
|
1739
976
|
*/
|
|
1740
|
-
async
|
|
1741
|
-
|
|
1742
|
-
const
|
|
1743
|
-
|
|
1744
|
-
const
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
const
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
const cosSim = cosineSimilarity(queryVec, vector);
|
|
1761
|
-
if (weight !== void 0) {
|
|
1762
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1763
|
-
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
1764
|
-
} else {
|
|
1765
|
-
score = cosSim;
|
|
1766
|
-
}
|
|
1767
|
-
} else if (weight !== void 0 && weight < 1) {
|
|
1768
|
-
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1769
|
-
score = (1 - weight) * kwScore;
|
|
1770
|
-
} else {
|
|
1771
|
-
score = -2;
|
|
1772
|
-
}
|
|
1773
|
-
return {
|
|
1774
|
-
id: row.id,
|
|
1775
|
-
entity_id: row.entity_id,
|
|
1776
|
-
score,
|
|
1777
|
-
updated_at: row.updated_at,
|
|
1778
|
-
access_count: row.access_count
|
|
1779
|
-
};
|
|
1780
|
-
});
|
|
1781
|
-
if (canCache && entityCache && entityCache.size > 0) {
|
|
1782
|
-
if (!this.vectorCache.has(entityId)) {
|
|
1783
|
-
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
1784
|
-
const oldestKey = this.vectorCache.keys().next().value;
|
|
1785
|
-
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1786
|
-
}
|
|
1787
|
-
this.vectorCache.set(entityId, entityCache);
|
|
1788
|
-
}
|
|
977
|
+
async findAllPending(entityIds, limit) {
|
|
978
|
+
if (entityIds.length === 0) return [];
|
|
979
|
+
const placeholders = entityIds.map(() => "?").join(", ");
|
|
980
|
+
const sql = `SELECT * FROM ${this.prefix}tasks WHERE entity_id IN (${placeholders}) AND status IN ('pending', 'in_progress') AND deleted_at IS NULL ORDER BY priority DESC, created_at ASC` + (limit != null ? ` LIMIT ?` : "");
|
|
981
|
+
const params = limit != null ? [...entityIds, limit] : [...entityIds];
|
|
982
|
+
const rows = await this.db.getAllAsync(sql, params);
|
|
983
|
+
return rows.map(mapRowToTask);
|
|
984
|
+
}
|
|
985
|
+
async findExistingMetadataByIds(ids, tx) {
|
|
986
|
+
const executor = this.getExecutor(tx);
|
|
987
|
+
const rows = [];
|
|
988
|
+
const chunkSize = 500;
|
|
989
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
990
|
+
const chunk = ids.slice(i, i + chunkSize);
|
|
991
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
992
|
+
const chunkRows = await executor.getAllAsync(
|
|
993
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
994
|
+
chunk
|
|
995
|
+
);
|
|
996
|
+
rows.push(...chunkRows.map((row) => ({ id: row.id, entity_id: row.entity_id, updated_at: Number(row.updated_at) })));
|
|
1789
997
|
}
|
|
1790
|
-
|
|
1791
|
-
return scored.slice(0, limit);
|
|
998
|
+
return rows;
|
|
1792
999
|
}
|
|
1793
1000
|
/**
|
|
1794
|
-
*
|
|
1795
|
-
*
|
|
1796
|
-
*
|
|
1001
|
+
* Upsert a WikiTask within the provided transaction.
|
|
1002
|
+
* Uses ON CONFLICT(id) DO UPDATE (not INSERT OR REPLACE).
|
|
1003
|
+
* Stages an outbox entry in the same transaction.
|
|
1004
|
+
* `tx` is REQUIRED.
|
|
1797
1005
|
*/
|
|
1798
|
-
async
|
|
1799
|
-
const
|
|
1800
|
-
const
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
const
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1006
|
+
async upsert(task, tx, updatedAt) {
|
|
1007
|
+
const executor = this.getExecutor(tx);
|
|
1008
|
+
const now = Number.isFinite(updatedAt) ? updatedAt : Date.now();
|
|
1009
|
+
const existingRow = await executor.getFirstAsync(
|
|
1010
|
+
`SELECT id FROM ${this.prefix}tasks WHERE id = ?`,
|
|
1011
|
+
[task.id]
|
|
1012
|
+
);
|
|
1013
|
+
const operation = task.deleted_at != null ? "DELETE" : existingRow ? "UPDATE" : "INSERT";
|
|
1014
|
+
await executor.runAsync(
|
|
1015
|
+
`INSERT INTO ${this.prefix}tasks (
|
|
1016
|
+
id, entity_id, description, status, priority,
|
|
1017
|
+
created_at, updated_at, resolved_at, deleted_at
|
|
1018
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1019
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
1020
|
+
entity_id = excluded.entity_id,
|
|
1021
|
+
description = excluded.description,
|
|
1022
|
+
status = excluded.status,
|
|
1023
|
+
priority = excluded.priority,
|
|
1024
|
+
updated_at = excluded.updated_at,
|
|
1025
|
+
resolved_at = excluded.resolved_at,
|
|
1026
|
+
deleted_at = excluded.deleted_at`,
|
|
1027
|
+
[
|
|
1028
|
+
task.id,
|
|
1029
|
+
task.entity_id,
|
|
1030
|
+
task.description,
|
|
1031
|
+
task.status,
|
|
1032
|
+
task.priority,
|
|
1033
|
+
task.created_at,
|
|
1034
|
+
now,
|
|
1035
|
+
// updated_at set by repo or import override
|
|
1036
|
+
task.resolved_at ?? null,
|
|
1037
|
+
task.deleted_at ?? null
|
|
1038
|
+
]
|
|
1039
|
+
);
|
|
1040
|
+
await this.outbox.push(
|
|
1041
|
+
{
|
|
1042
|
+
entityId: task.entity_id,
|
|
1043
|
+
tableName: "tasks",
|
|
1044
|
+
recordId: task.id,
|
|
1045
|
+
operation,
|
|
1046
|
+
payload: task
|
|
1047
|
+
},
|
|
1048
|
+
tx
|
|
1049
|
+
);
|
|
1837
1050
|
}
|
|
1838
|
-
async
|
|
1839
|
-
|
|
1051
|
+
async upsertForImport(task, tx, updatedAt) {
|
|
1052
|
+
const executor = this.getExecutor(tx);
|
|
1053
|
+
const now = Number.isFinite(updatedAt) ? updatedAt : Date.now();
|
|
1054
|
+
await executor.runAsync(
|
|
1055
|
+
`INSERT INTO ${this.prefix}tasks (
|
|
1056
|
+
id, entity_id, description, status, priority,
|
|
1057
|
+
created_at, updated_at, resolved_at, deleted_at
|
|
1058
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1059
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
1060
|
+
entity_id = excluded.entity_id,
|
|
1061
|
+
description = excluded.description,
|
|
1062
|
+
status = excluded.status,
|
|
1063
|
+
priority = excluded.priority,
|
|
1064
|
+
updated_at = excluded.updated_at,
|
|
1065
|
+
resolved_at = excluded.resolved_at,
|
|
1066
|
+
deleted_at = excluded.deleted_at`,
|
|
1067
|
+
[
|
|
1068
|
+
task.id,
|
|
1069
|
+
task.entity_id,
|
|
1070
|
+
task.description,
|
|
1071
|
+
task.status,
|
|
1072
|
+
task.priority,
|
|
1073
|
+
task.created_at,
|
|
1074
|
+
now,
|
|
1075
|
+
task.resolved_at ?? null,
|
|
1076
|
+
task.deleted_at ?? null
|
|
1077
|
+
]
|
|
1078
|
+
);
|
|
1840
1079
|
}
|
|
1841
|
-
|
|
1842
|
-
|
|
1080
|
+
/**
|
|
1081
|
+
* Soft-delete a task by ID. Sets deleted_at and updated_at.
|
|
1082
|
+
* Stages a DELETE outbox entry in the same transaction.
|
|
1083
|
+
* `tx` is REQUIRED.
|
|
1084
|
+
*/
|
|
1085
|
+
async softDelete(id, entityId, tx) {
|
|
1086
|
+
const executor = this.getExecutor(tx);
|
|
1843
1087
|
const now = Date.now();
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
await this.
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
let memoryCheckpoint = cp?.memory_checkpoint || 0;
|
|
1859
|
-
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
1860
|
-
if (count - memoryCheckpoint >= threshold) {
|
|
1861
|
-
const jobKey = this._librarianKey(entityId);
|
|
1862
|
-
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
|
|
1863
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
1864
|
-
this._notifyStatusSubscribers(entityId);
|
|
1865
|
-
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => {
|
|
1866
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
1867
|
-
this._notifyStatusSubscribers(entityId);
|
|
1868
|
-
});
|
|
1869
|
-
}
|
|
1870
|
-
}
|
|
1871
|
-
}
|
|
1872
|
-
async runLibrarianThenMaybeHeal(entityId, currentEventCount) {
|
|
1873
|
-
await this._doRunLibrarian(entityId);
|
|
1874
|
-
await this.db.runAsync(`
|
|
1875
|
-
INSERT INTO ${this.prefix}checkpoints (entity_id, memory_checkpoint)
|
|
1876
|
-
VALUES (?, ?)
|
|
1877
|
-
ON CONFLICT(entity_id) DO UPDATE SET memory_checkpoint = ?
|
|
1878
|
-
`, [entityId, currentEventCount, currentEventCount]);
|
|
1879
|
-
const autoHealThreshold = this.options.config?.autoHealThreshold || 100;
|
|
1880
|
-
const cp = await this.db.getFirstAsync(`SELECT * FROM ${this.prefix}checkpoints WHERE entity_id = ?`, [entityId]);
|
|
1881
|
-
let healCheckpoint = cp?.heal_checkpoint || 0;
|
|
1882
|
-
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
1883
|
-
if (currentEventCount - healCheckpoint >= autoHealThreshold) {
|
|
1884
|
-
const healKey = this._healKey(entityId);
|
|
1885
|
-
if (!this.activeMaintenanceJobs.has(healKey)) {
|
|
1886
|
-
this.activeMaintenanceJobs.add(healKey);
|
|
1887
|
-
this._notifyStatusSubscribers(entityId);
|
|
1888
|
-
try {
|
|
1889
|
-
await this._doRunHeal(entityId);
|
|
1890
|
-
await this.db.runAsync(`
|
|
1891
|
-
INSERT INTO ${this.prefix}checkpoints (entity_id, heal_checkpoint)
|
|
1892
|
-
VALUES (?, ?)
|
|
1893
|
-
ON CONFLICT(entity_id) DO UPDATE SET heal_checkpoint = ?
|
|
1894
|
-
`, [entityId, currentEventCount, currentEventCount]);
|
|
1895
|
-
} finally {
|
|
1896
|
-
this.activeMaintenanceJobs.delete(healKey);
|
|
1897
|
-
this._notifyStatusSubscribers(entityId);
|
|
1898
|
-
}
|
|
1899
|
-
}
|
|
1900
|
-
}
|
|
1088
|
+
await executor.runAsync(
|
|
1089
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
1090
|
+
[now, now, id, entityId]
|
|
1091
|
+
);
|
|
1092
|
+
await this.outbox.push(
|
|
1093
|
+
{
|
|
1094
|
+
entityId,
|
|
1095
|
+
tableName: "tasks",
|
|
1096
|
+
recordId: id,
|
|
1097
|
+
operation: "DELETE",
|
|
1098
|
+
payload: { id, entity_id: entityId, deleted_at: now }
|
|
1099
|
+
},
|
|
1100
|
+
tx
|
|
1101
|
+
);
|
|
1901
1102
|
}
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
${
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
const newTokens = titleTokens(fact.title);
|
|
1941
|
-
let skip = false;
|
|
1942
|
-
if (newTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
1943
|
-
for (const existing of currentFactsRows) {
|
|
1944
|
-
if (existing.source_type !== "librarian_inferred") continue;
|
|
1945
|
-
const existingTokens = titleTokens(existing.title);
|
|
1946
|
-
if (existingTokens.size >= MIN_TOKENS_TO_QUALIFY) {
|
|
1947
|
-
if (jaccardScore(newTokens, existingTokens) >= FUZZY_THRESHOLD) {
|
|
1948
|
-
skip = true;
|
|
1949
|
-
break;
|
|
1950
|
-
}
|
|
1951
|
-
}
|
|
1952
|
-
}
|
|
1953
|
-
}
|
|
1954
|
-
if (skip) continue;
|
|
1955
|
-
const id = generateId("fact_");
|
|
1956
|
-
await this.db.runAsync(`
|
|
1957
|
-
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
1958
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1959
|
-
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "librarian_inferred", now, now]);
|
|
1960
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1961
|
-
}
|
|
1962
|
-
for (const task of validTasks) {
|
|
1963
|
-
const id = generateId("task_");
|
|
1964
|
-
await this.db.runAsync(`
|
|
1965
|
-
INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at)
|
|
1966
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
1967
|
-
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
1968
|
-
}
|
|
1969
|
-
});
|
|
1970
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1971
|
-
this.vectorCache.delete(entityId);
|
|
1972
|
-
for (const fact of insertedFacts) {
|
|
1973
|
-
await this.embedFact(fact);
|
|
1103
|
+
/**
|
|
1104
|
+
* Fetch all non-deleted tasks for an entity, ordered by priority DESC, created_at ASC.
|
|
1105
|
+
* Used by _getFullBundle().
|
|
1106
|
+
*/
|
|
1107
|
+
async findAllByEntityId(entityId, tx) {
|
|
1108
|
+
const executor = this.getExecutor(tx);
|
|
1109
|
+
const rows = await executor.getAllAsync(
|
|
1110
|
+
`SELECT * FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NULL ORDER BY priority DESC, created_at ASC`,
|
|
1111
|
+
[entityId]
|
|
1112
|
+
);
|
|
1113
|
+
return rows.map(mapRowToTask);
|
|
1114
|
+
}
|
|
1115
|
+
/**
|
|
1116
|
+
* Bulk delete pruned tasks (already soft-deleted) by cutoff date.
|
|
1117
|
+
* Used by runPrune(). Returns number of deleted rows.
|
|
1118
|
+
*/
|
|
1119
|
+
async bulkDeletePruned(entityId, cutoff, tx) {
|
|
1120
|
+
const executor = this.getExecutor(tx);
|
|
1121
|
+
const rowsToDelete = await executor.getAllAsync(
|
|
1122
|
+
`SELECT id, deleted_at FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at <= ?`,
|
|
1123
|
+
[entityId, cutoff]
|
|
1124
|
+
);
|
|
1125
|
+
if (rowsToDelete.length === 0) return 0;
|
|
1126
|
+
const result = await executor.runAsync(
|
|
1127
|
+
`DELETE FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at <= ?`,
|
|
1128
|
+
[entityId, cutoff]
|
|
1129
|
+
);
|
|
1130
|
+
for (const row of rowsToDelete) {
|
|
1131
|
+
await this.outbox.push(
|
|
1132
|
+
{
|
|
1133
|
+
entityId,
|
|
1134
|
+
tableName: "tasks",
|
|
1135
|
+
recordId: row.id,
|
|
1136
|
+
operation: "DELETE",
|
|
1137
|
+
payload: { id: row.id, entity_id: entityId, deleted_at: row.deleted_at }
|
|
1138
|
+
},
|
|
1139
|
+
tx
|
|
1140
|
+
);
|
|
1974
1141
|
}
|
|
1975
|
-
|
|
1142
|
+
return result.changes;
|
|
1976
1143
|
}
|
|
1977
|
-
|
|
1144
|
+
/**
|
|
1145
|
+
* Soft-delete a task by ID within a transaction.
|
|
1146
|
+
* Stages a DELETE outbox entry in the same transaction.
|
|
1147
|
+
* `tx` is REQUIRED.
|
|
1148
|
+
*/
|
|
1149
|
+
async softDeleteById(id, entityId, tx) {
|
|
1150
|
+
const executor = this.getExecutor(tx);
|
|
1978
1151
|
const now = Date.now();
|
|
1979
|
-
const
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1152
|
+
const result = await executor.runAsync(
|
|
1153
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
1154
|
+
[now, now, id, entityId]
|
|
1155
|
+
);
|
|
1156
|
+
if (result.changes > 0) {
|
|
1157
|
+
await this.outbox.push(
|
|
1158
|
+
{
|
|
1159
|
+
entityId,
|
|
1160
|
+
tableName: "tasks",
|
|
1161
|
+
recordId: id,
|
|
1162
|
+
operation: "DELETE",
|
|
1163
|
+
payload: { id, entity_id: entityId, deleted_at: now }
|
|
1164
|
+
},
|
|
1165
|
+
tx
|
|
1166
|
+
);
|
|
1984
1167
|
}
|
|
1985
|
-
|
|
1986
|
-
|
|
1168
|
+
return result;
|
|
1169
|
+
}
|
|
1170
|
+
/**
|
|
1171
|
+
* Bulk soft-delete all tasks for an entity.
|
|
1172
|
+
* Stages DELETE outbox entries for each row in the same transaction.
|
|
1173
|
+
* `tx` is REQUIRED.
|
|
1174
|
+
*/
|
|
1175
|
+
async bulkSoftDeleteByEntityId(entityId, tx) {
|
|
1176
|
+
const executor = this.getExecutor(tx);
|
|
1177
|
+
const now = Date.now();
|
|
1178
|
+
const idsToDelete = await executor.getAllAsync(
|
|
1179
|
+
`SELECT id FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1180
|
+
[entityId]
|
|
1181
|
+
);
|
|
1182
|
+
const result = await executor.runAsync(
|
|
1183
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1184
|
+
[now, now, entityId]
|
|
1185
|
+
);
|
|
1186
|
+
for (const row of idsToDelete) {
|
|
1187
|
+
await this.outbox.push({
|
|
1188
|
+
entityId,
|
|
1189
|
+
tableName: "tasks",
|
|
1190
|
+
recordId: row.id,
|
|
1191
|
+
operation: "DELETE",
|
|
1192
|
+
payload: { id: row.id, entity_id: entityId, deleted_at: now }
|
|
1193
|
+
}, tx);
|
|
1987
1194
|
}
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
await this.db.runAsync(`
|
|
1992
|
-
UPDATE ${this.prefix}entries
|
|
1993
|
-
SET deleted_at = ?, updated_at = ?
|
|
1994
|
-
WHERE entity_id = ? AND access_count = 0 AND created_at <= ? AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
1995
|
-
`, [now, now, entityId, orphanThreshold]);
|
|
1996
|
-
}
|
|
1997
|
-
if (staleInferredAfterDays !== null) {
|
|
1998
|
-
const staleThreshold = now - staleInferredAfterDays * MS_PER_DAY;
|
|
1999
|
-
await this.db.runAsync(`
|
|
2000
|
-
UPDATE ${this.prefix}entries
|
|
2001
|
-
SET confidence = 'tentative', updated_at = ?
|
|
2002
|
-
WHERE entity_id = ? AND confidence = 'inferred' AND (last_accessed_at <= ? OR (last_accessed_at IS NULL AND created_at <= ?)) AND source_type != 'immutable_document' AND deleted_at IS NULL
|
|
2003
|
-
`, [now, entityId, staleThreshold, staleThreshold]);
|
|
2004
|
-
}
|
|
2005
|
-
});
|
|
2006
|
-
const allFactsRows = await this.db.getAllAsync(`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`, [entityId]);
|
|
2007
|
-
const allTasks = await this.db.getAllAsync(`SELECT * FROM ${this.prefix}tasks WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL`, [entityId]);
|
|
2008
|
-
const recentEvents = await this.db.getAllAsync(`SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT 20`, [entityId]);
|
|
2009
|
-
const healCandidates = allFactsRows.filter((f) => f.source_type !== "immutable_document");
|
|
2010
|
-
const documentAnchors = allFactsRows.filter((f) => f.source_type === "immutable_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
2011
|
-
const userPrompt = `Heal Candidates:
|
|
2012
|
-
${JSON.stringify(healCandidates.map((f) => {
|
|
2013
|
-
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
2014
|
-
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
2015
|
-
}), null, 2)}
|
|
2016
|
-
|
|
2017
|
-
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
2018
|
-
${JSON.stringify(documentAnchors, null, 2)}
|
|
2019
|
-
|
|
2020
|
-
All Tasks:
|
|
2021
|
-
${JSON.stringify(allTasks, null, 2)}
|
|
2022
|
-
|
|
2023
|
-
Recent Events:
|
|
2024
|
-
${JSON.stringify(recentEvents, null, 2)}
|
|
1195
|
+
return result.changes;
|
|
1196
|
+
}
|
|
1197
|
+
};
|
|
2025
1198
|
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
const
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
await this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ?`, [now, now, id, entityId]);
|
|
2047
|
-
}
|
|
2048
|
-
for (const fact of validNewFacts) {
|
|
2049
|
-
const id = generateId("fact_");
|
|
2050
|
-
await this.db.runAsync(`
|
|
2051
|
-
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
2052
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2053
|
-
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "librarian_inferred", now, now]);
|
|
2054
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2055
|
-
}
|
|
2056
|
-
});
|
|
2057
|
-
this.vectorCache.delete(entityId);
|
|
2058
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2059
|
-
for (const factId of uniqueDeletedFactIds) {
|
|
2060
|
-
try {
|
|
2061
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2062
|
-
} catch (hookErr) {
|
|
2063
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
2064
|
-
}
|
|
2065
|
-
}
|
|
2066
|
-
for (const fact of insertedFacts) {
|
|
2067
|
-
await this.embedFact(fact);
|
|
2068
|
-
}
|
|
2069
|
-
this.vectorCache.delete(entityId);
|
|
1199
|
+
// src/repositories/EventRepository.ts
|
|
1200
|
+
var EventRepository = class extends BaseRepository {
|
|
1201
|
+
/**
|
|
1202
|
+
* Insert a new event row.
|
|
1203
|
+
* Pass `tx` to participate in a caller-owned transaction; omit to run against the default db.
|
|
1204
|
+
*/
|
|
1205
|
+
async add(event, tx) {
|
|
1206
|
+
const executor = this.getExecutor(tx);
|
|
1207
|
+
await executor.runAsync(
|
|
1208
|
+
`INSERT INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
1209
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
1210
|
+
[
|
|
1211
|
+
event.id,
|
|
1212
|
+
event.entity_id,
|
|
1213
|
+
event.event_type,
|
|
1214
|
+
event.summary,
|
|
1215
|
+
event.related_entry_id ?? null,
|
|
1216
|
+
event.created_at
|
|
1217
|
+
]
|
|
1218
|
+
);
|
|
2070
1219
|
}
|
|
2071
|
-
async
|
|
2072
|
-
const
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
2086
|
-
throw new WikiBusyError("forget", entityId);
|
|
2087
|
-
}
|
|
2088
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
2089
|
-
this._notifyStatusSubscribers(entityId);
|
|
2090
|
-
try {
|
|
2091
|
-
await this._doRunLibrarian(entityId);
|
|
2092
|
-
} finally {
|
|
2093
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
2094
|
-
this._notifyStatusSubscribers(entityId);
|
|
2095
|
-
}
|
|
1220
|
+
async addIgnoreDuplicate(event, tx) {
|
|
1221
|
+
const executor = this.getExecutor(tx);
|
|
1222
|
+
await executor.runAsync(
|
|
1223
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
1224
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
1225
|
+
[
|
|
1226
|
+
event.id,
|
|
1227
|
+
event.entity_id,
|
|
1228
|
+
event.event_type,
|
|
1229
|
+
event.summary,
|
|
1230
|
+
event.related_entry_id ?? null,
|
|
1231
|
+
event.created_at
|
|
1232
|
+
]
|
|
1233
|
+
);
|
|
2096
1234
|
}
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
throw new WikiBusyError("reembed", entityId);
|
|
2107
|
-
}
|
|
2108
|
-
if (this._isImportActiveFor(entityId)) {
|
|
2109
|
-
throw new WikiBusyError("import", entityId);
|
|
2110
|
-
}
|
|
2111
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
2112
|
-
throw new WikiBusyError("forget", entityId);
|
|
2113
|
-
}
|
|
2114
|
-
this.activeMaintenanceJobs.add(jobKey);
|
|
2115
|
-
this._notifyStatusSubscribers(entityId);
|
|
2116
|
-
try {
|
|
2117
|
-
await this._doRunHeal(entityId);
|
|
2118
|
-
} finally {
|
|
2119
|
-
this.activeMaintenanceJobs.delete(jobKey);
|
|
2120
|
-
this._notifyStatusSubscribers(entityId);
|
|
2121
|
-
}
|
|
1235
|
+
/**
|
|
1236
|
+
* Return the most recent events for an entity, newest first.
|
|
1237
|
+
* Defaults to a limit of 50.
|
|
1238
|
+
*/
|
|
1239
|
+
async getRecent(entityId, limit = 50) {
|
|
1240
|
+
return this.db.getAllAsync(
|
|
1241
|
+
`SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?`,
|
|
1242
|
+
[entityId, limit]
|
|
1243
|
+
);
|
|
2122
1244
|
}
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
throw new WikiBusyError("heal", "*");
|
|
2164
|
-
}
|
|
2165
|
-
if (this.activeIngestJobs.size > 0) {
|
|
2166
|
-
throw new WikiBusyError("ingest", "*");
|
|
2167
|
-
}
|
|
2168
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
2169
|
-
throw new WikiBusyError("import", "*");
|
|
2170
|
-
}
|
|
2171
|
-
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
2172
|
-
throw new WikiBusyError("forget", "*");
|
|
2173
|
-
}
|
|
2174
|
-
}
|
|
2175
|
-
this.activeMaintenanceJobs.add(reembedKey);
|
|
2176
|
-
try {
|
|
2177
|
-
const where = entityId ? `entity_id = ? AND deleted_at IS NULL` : `deleted_at IS NULL`;
|
|
2178
|
-
const params = entityId ? [entityId] : [];
|
|
1245
|
+
/**
|
|
1246
|
+
* Return the most recent events for the given entity IDs, newest first.
|
|
1247
|
+
* Defaults to a limit of 50.
|
|
1248
|
+
*/
|
|
1249
|
+
async getRecentForEntities(entityIds, limit = 50) {
|
|
1250
|
+
if (entityIds.length === 0) return [];
|
|
1251
|
+
const placeholders = entityIds.map(() => "?").join(", ");
|
|
1252
|
+
return this.db.getAllAsync(
|
|
1253
|
+
`SELECT * FROM ${this.prefix}events WHERE entity_id IN (${placeholders}) ORDER BY created_at DESC LIMIT ?`,
|
|
1254
|
+
[...entityIds, limit]
|
|
1255
|
+
);
|
|
1256
|
+
}
|
|
1257
|
+
/**
|
|
1258
|
+
* Delete events for an entity that were created at or before the given cutoff timestamp.
|
|
1259
|
+
* Returns the number of deleted rows.
|
|
1260
|
+
*/
|
|
1261
|
+
async prune(entityId, cutoff) {
|
|
1262
|
+
return this.db.runAsync(
|
|
1263
|
+
`DELETE FROM ${this.prefix}events WHERE entity_id = ? AND created_at <= ?`,
|
|
1264
|
+
[entityId, cutoff]
|
|
1265
|
+
);
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Return the total number of events stored for an entity.
|
|
1269
|
+
* `tx` is optional — pass an active transaction handle for atomic reads.
|
|
1270
|
+
*/
|
|
1271
|
+
async count(entityId, tx) {
|
|
1272
|
+
const executor = tx ?? this.db;
|
|
1273
|
+
const row = await executor.getFirstAsync(
|
|
1274
|
+
`SELECT COUNT(*) as count FROM ${this.prefix}events WHERE entity_id = ?`,
|
|
1275
|
+
[entityId]
|
|
1276
|
+
);
|
|
1277
|
+
return row?.count ?? 0;
|
|
1278
|
+
}
|
|
1279
|
+
/**
|
|
1280
|
+
* Return all events for an entity in chronological (ASC) order.
|
|
1281
|
+
* When limit is provided, fetches newest-first then reverses to preserve chronological order.
|
|
1282
|
+
*/
|
|
1283
|
+
async getByEntityId(entityId, limit) {
|
|
1284
|
+
if (limit != null) {
|
|
2179
1285
|
const rows = await this.db.getAllAsync(
|
|
2180
|
-
`SELECT * FROM ${this.prefix}
|
|
2181
|
-
|
|
1286
|
+
`SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?`,
|
|
1287
|
+
[entityId, limit]
|
|
2182
1288
|
);
|
|
2183
|
-
|
|
2184
|
-
this.vectorCache.delete(entityId);
|
|
2185
|
-
} else {
|
|
2186
|
-
this.vectorCache.clear();
|
|
2187
|
-
}
|
|
2188
|
-
const skipExisting = opts?.skipExisting ?? false;
|
|
2189
|
-
let effectiveSkip = skipExisting;
|
|
2190
|
-
if (skipExisting) {
|
|
2191
|
-
const mismatchRow = await this.db.getFirstAsync(
|
|
2192
|
-
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
2193
|
-
);
|
|
2194
|
-
if (mismatchRow) {
|
|
2195
|
-
if (entityId) {
|
|
2196
|
-
const mismatchDim = parseInt(mismatchRow.value, 10);
|
|
2197
|
-
const staleForEntity = await this.db.getFirstAsync(
|
|
2198
|
-
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
2199
|
-
WHERE entity_id = ? AND deleted_at IS NULL
|
|
2200
|
-
AND (
|
|
2201
|
-
embedding_blob IS NULL
|
|
2202
|
-
OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
|
|
2203
|
-
)`,
|
|
2204
|
-
[entityId, mismatchDim]
|
|
2205
|
-
);
|
|
2206
|
-
if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
|
|
2207
|
-
} else {
|
|
2208
|
-
effectiveSkip = false;
|
|
2209
|
-
}
|
|
2210
|
-
}
|
|
2211
|
-
}
|
|
2212
|
-
let embedded = 0;
|
|
2213
|
-
let skipped = 0;
|
|
2214
|
-
let failed = 0;
|
|
2215
|
-
try {
|
|
2216
|
-
for (const row of rows) {
|
|
2217
|
-
const existingBlob = row.embedding_blob;
|
|
2218
|
-
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
2219
|
-
if (effectiveSkip && blobIsValid) {
|
|
2220
|
-
const vec = parseEmbedding(existingBlob, null);
|
|
2221
|
-
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
2222
|
-
skipped++;
|
|
2223
|
-
continue;
|
|
2224
|
-
}
|
|
2225
|
-
}
|
|
2226
|
-
const success = await this.embedFact(row);
|
|
2227
|
-
if (success) embedded++;
|
|
2228
|
-
else failed++;
|
|
2229
|
-
}
|
|
2230
|
-
if (embedded > 0) {
|
|
2231
|
-
await this._reconcileEmbeddingDimension();
|
|
2232
|
-
}
|
|
2233
|
-
} finally {
|
|
2234
|
-
if (entityId) {
|
|
2235
|
-
this.vectorCache.delete(entityId);
|
|
2236
|
-
} else {
|
|
2237
|
-
this.vectorCache.clear();
|
|
2238
|
-
}
|
|
2239
|
-
}
|
|
2240
|
-
return { embedded, skipped, failed };
|
|
2241
|
-
} finally {
|
|
2242
|
-
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1289
|
+
return rows.slice().reverse();
|
|
2243
1290
|
}
|
|
1291
|
+
return this.db.getAllAsync(
|
|
1292
|
+
`SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`,
|
|
1293
|
+
[entityId]
|
|
1294
|
+
);
|
|
2244
1295
|
}
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
1296
|
+
};
|
|
1297
|
+
|
|
1298
|
+
// src/repositories/MetadataRepository.ts
|
|
1299
|
+
var MetadataRepository = class extends BaseRepository {
|
|
1300
|
+
// CHECKPOINTS TABLE METHODS
|
|
1301
|
+
async getCheckpoint(entityId, tx) {
|
|
1302
|
+
const executor = this.getExecutor(tx);
|
|
1303
|
+
const row = await executor.getFirstAsync(
|
|
1304
|
+
`SELECT memory_checkpoint, heal_checkpoint FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
1305
|
+
[entityId]
|
|
1306
|
+
);
|
|
1307
|
+
if (!row) return {};
|
|
2254
1308
|
return {
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1309
|
+
memory: row.memory_checkpoint ?? void 0,
|
|
1310
|
+
heal: row.heal_checkpoint ?? void 0
|
|
2258
1311
|
};
|
|
2259
1312
|
}
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
* Returns an idempotent unsubscribe function.
|
|
2267
|
-
*
|
|
2268
|
-
* See also {@link getEntityStatus} for a synchronous point-in-time read.
|
|
2269
|
-
*/
|
|
2270
|
-
subscribeEntityStatus(entityId, callback) {
|
|
2271
|
-
const initial = this.getEntityStatus(entityId);
|
|
2272
|
-
let set = this.statusSubscribers.get(entityId);
|
|
2273
|
-
if (!set) {
|
|
2274
|
-
set = /* @__PURE__ */ new Set();
|
|
2275
|
-
this.statusSubscribers.set(entityId, set);
|
|
1313
|
+
async updateCheckpoint(entityId, updates, tx) {
|
|
1314
|
+
const fields = [];
|
|
1315
|
+
const values = [];
|
|
1316
|
+
if (updates.memory !== void 0) {
|
|
1317
|
+
fields.push("memory_checkpoint = ?");
|
|
1318
|
+
values.push(updates.memory);
|
|
2276
1319
|
}
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
callback(this._copyEntityStatus(initial));
|
|
2281
|
-
} catch (err) {
|
|
2282
|
-
console.error(`[WikiMemory.subscribeEntityStatus] callback error for entityId="${entityId}" during initial emission`, err);
|
|
1320
|
+
if (updates.heal !== void 0) {
|
|
1321
|
+
fields.push("heal_checkpoint = ?");
|
|
1322
|
+
values.push(updates.heal);
|
|
2283
1323
|
}
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
};
|
|
1324
|
+
if (fields.length === 0) return;
|
|
1325
|
+
const executor = this.getExecutor(tx);
|
|
1326
|
+
await executor.runAsync(
|
|
1327
|
+
`INSERT INTO ${this.prefix}checkpoints (entity_id, memory_checkpoint, heal_checkpoint)
|
|
1328
|
+
VALUES (?, ?, ?)
|
|
1329
|
+
ON CONFLICT(entity_id) DO UPDATE SET ${fields.join(", ")}`,
|
|
1330
|
+
[entityId, updates.memory ?? 0, updates.heal ?? 0, ...values]
|
|
1331
|
+
);
|
|
2293
1332
|
}
|
|
2294
|
-
|
|
2295
|
-
this.
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
const eventsParams = maxEvents != null ? [entityId, maxEvents] : [entityId];
|
|
2301
|
-
const [factsRaw, tasks, eventsRaw] = await Promise.all([
|
|
2302
|
-
this.db.getAllAsync(
|
|
2303
|
-
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
2304
|
-
[entityId]
|
|
2305
|
-
),
|
|
2306
|
-
this.db.getAllAsync(
|
|
2307
|
-
`SELECT * FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NULL ORDER BY priority DESC, created_at ASC`,
|
|
2308
|
-
[entityId]
|
|
2309
|
-
),
|
|
2310
|
-
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
2311
|
-
]);
|
|
2312
|
-
const facts = factsRaw.map((f) => {
|
|
2313
|
-
const { embedding: _embedding, embedding_blob, ...rest } = f;
|
|
2314
|
-
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
2315
|
-
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
2316
|
-
new Uint8Array(c).set(embedding_blob);
|
|
2317
|
-
return new Uint8Array(c);
|
|
2318
|
-
})() : void 0;
|
|
2319
|
-
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
2320
|
-
return {
|
|
2321
|
-
...factBase,
|
|
2322
|
-
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
2323
|
-
};
|
|
2324
|
-
});
|
|
2325
|
-
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
2326
|
-
return { facts, tasks, events };
|
|
1333
|
+
async deleteCheckpoint(entityId, tx) {
|
|
1334
|
+
const executor = this.getExecutor(tx);
|
|
1335
|
+
await executor.runAsync(
|
|
1336
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
1337
|
+
[entityId]
|
|
1338
|
+
);
|
|
2327
1339
|
}
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
1340
|
+
// META TABLE METHODS
|
|
1341
|
+
async getMeta(key, tx) {
|
|
1342
|
+
const executor = this.getExecutor(tx);
|
|
1343
|
+
const row = await executor.getFirstAsync(
|
|
1344
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = ?`,
|
|
1345
|
+
[key]
|
|
1346
|
+
);
|
|
1347
|
+
return row ? row.value : null;
|
|
1348
|
+
}
|
|
1349
|
+
async setMeta(key, value, tx) {
|
|
1350
|
+
const executor = this.getExecutor(tx);
|
|
1351
|
+
await executor.runAsync(
|
|
1352
|
+
`INSERT INTO ${this.prefix}meta (key, value) VALUES (?, ?)
|
|
1353
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value`,
|
|
1354
|
+
[key, value]
|
|
1355
|
+
);
|
|
1356
|
+
}
|
|
1357
|
+
async clearDimensionMismatch(tx) {
|
|
1358
|
+
const executor = this.getExecutor(tx);
|
|
1359
|
+
await executor.runAsync(
|
|
1360
|
+
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1361
|
+
);
|
|
1362
|
+
}
|
|
1363
|
+
async tableExists(tableName, tx) {
|
|
1364
|
+
const executor = this.getExecutor(tx);
|
|
1365
|
+
const row = await executor.getFirstAsync(
|
|
1366
|
+
`SELECT name FROM sqlite_master WHERE type='table' AND name=?`,
|
|
1367
|
+
[tableName]
|
|
1368
|
+
);
|
|
1369
|
+
return row != null;
|
|
1370
|
+
}
|
|
1371
|
+
async getTableDdl(tableName, tx) {
|
|
1372
|
+
const executor = this.getExecutor(tx);
|
|
1373
|
+
const row = await executor.getFirstAsync(
|
|
1374
|
+
`SELECT sql FROM sqlite_master WHERE type='table' AND name=?`,
|
|
1375
|
+
[tableName]
|
|
1376
|
+
);
|
|
1377
|
+
return row?.sql ?? null;
|
|
1378
|
+
}
|
|
1379
|
+
async vacuum() {
|
|
1380
|
+
await this.db.execAsync(`PRAGMA wal_checkpoint(TRUNCATE)`);
|
|
1381
|
+
await this.db.execAsync(`VACUUM`);
|
|
1382
|
+
}
|
|
1383
|
+
async getDistinctEntityIds(tx) {
|
|
1384
|
+
const executor = this.getExecutor(tx);
|
|
1385
|
+
const rows = await executor.getAllAsync(
|
|
1386
|
+
`SELECT DISTINCT entity_id FROM (
|
|
1387
|
+
SELECT entity_id FROM ${this.prefix}entries WHERE deleted_at IS NULL
|
|
1388
|
+
UNION
|
|
1389
|
+
SELECT entity_id FROM ${this.prefix}tasks WHERE deleted_at IS NULL
|
|
1390
|
+
UNION
|
|
1391
|
+
SELECT entity_id FROM ${this.prefix}events
|
|
1392
|
+
) ORDER BY entity_id`
|
|
1393
|
+
);
|
|
1394
|
+
return rows.map((r) => r.entity_id);
|
|
1395
|
+
}
|
|
1396
|
+
};
|
|
1397
|
+
|
|
1398
|
+
// src/WikiMemory.ts
|
|
1399
|
+
var _testAccessNonTestEnvWarned;
|
|
1400
|
+
var WikiMemory = class {
|
|
1401
|
+
constructor(db, options) {
|
|
1402
|
+
/** Emits `__testAccess` console warning at most once per instance when NODE_ENV ≠ "test". */
|
|
1403
|
+
__privateAdd(this, _testAccessNonTestEnvWarned, false);
|
|
1404
|
+
this.db = db;
|
|
1405
|
+
this.options = options;
|
|
1406
|
+
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
1407
|
+
this.outboxRepo = new OutboxRepository(db, this.prefix);
|
|
1408
|
+
this.entryRepo = new EntryRepository(db, this.prefix, this.outboxRepo);
|
|
1409
|
+
this.taskRepo = new TaskRepository(db, this.prefix, this.outboxRepo);
|
|
1410
|
+
this.eventRepo = new EventRepository(db, this.prefix);
|
|
1411
|
+
this.metadataRepo = new MetadataRepository(db, this.prefix);
|
|
1412
|
+
this.embeddingService = new EmbeddingService(this.db, this.options, this.entryRepo, this.metadataRepo);
|
|
1413
|
+
this.searchService = new SearchService(this.entryRepo);
|
|
1414
|
+
this.jobManager = new JobManager(this.prefix);
|
|
1415
|
+
this.promptService = new PromptService(options.config?.prompts);
|
|
1416
|
+
this.ingestionService = new IngestionService(
|
|
1417
|
+
this.db,
|
|
1418
|
+
this.prefix,
|
|
1419
|
+
this.options,
|
|
1420
|
+
this.entryRepo,
|
|
1421
|
+
this.searchService,
|
|
1422
|
+
this.jobManager,
|
|
1423
|
+
this.embeddingService,
|
|
1424
|
+
this.promptService
|
|
1425
|
+
);
|
|
1426
|
+
this.maintenanceService = new MaintenanceService(
|
|
1427
|
+
this.db,
|
|
1428
|
+
this.prefix,
|
|
1429
|
+
this.options,
|
|
1430
|
+
this.entryRepo,
|
|
1431
|
+
this.taskRepo,
|
|
1432
|
+
this.eventRepo,
|
|
1433
|
+
this.metadataRepo,
|
|
1434
|
+
this.searchService,
|
|
1435
|
+
this.jobManager,
|
|
1436
|
+
this.embeddingService,
|
|
1437
|
+
this.promptService
|
|
1438
|
+
);
|
|
1439
|
+
this.importExportService = new ImportExportService(
|
|
1440
|
+
this.db,
|
|
1441
|
+
this.entryRepo,
|
|
1442
|
+
this.taskRepo,
|
|
1443
|
+
this.eventRepo,
|
|
1444
|
+
this.metadataRepo,
|
|
1445
|
+
this.searchService,
|
|
1446
|
+
this.jobManager,
|
|
1447
|
+
this.embeddingService
|
|
1448
|
+
);
|
|
1449
|
+
this.retrievalService = new RetrievalService(
|
|
1450
|
+
this.options,
|
|
1451
|
+
this.entryRepo,
|
|
1452
|
+
this.taskRepo,
|
|
1453
|
+
this.eventRepo,
|
|
1454
|
+
this.metadataRepo,
|
|
1455
|
+
this.searchService
|
|
1456
|
+
);
|
|
1457
|
+
this.writeService = new WriteService(
|
|
1458
|
+
this.db,
|
|
1459
|
+
this.options,
|
|
1460
|
+
this.eventRepo,
|
|
1461
|
+
this.metadataRepo,
|
|
1462
|
+
this.jobManager,
|
|
1463
|
+
this.maintenanceService
|
|
1464
|
+
);
|
|
1465
|
+
}
|
|
1466
|
+
/**
|
|
1467
|
+
* Explicit escape hatch for test suites: typed access to composed services for mocks/spies.
|
|
1468
|
+
* If `NODE_ENV` is not `"test"`, emits a single `console.warn` per instance (skipped when `process` is undefined).
|
|
1469
|
+
*/
|
|
1470
|
+
get __testAccess() {
|
|
1471
|
+
const processEnv = typeof globalThis !== "undefined" ? globalThis.process?.env : void 0;
|
|
1472
|
+
if (processEnv !== void 0 && processEnv.NODE_ENV !== "test" && !__privateGet(this, _testAccessNonTestEnvWarned)) {
|
|
1473
|
+
__privateSet(this, _testAccessNonTestEnvWarned, true);
|
|
1474
|
+
console.warn('Warning: WikiMemory.__testAccess is intended for tests (NODE_ENV !== "test").');
|
|
2354
1475
|
}
|
|
2355
|
-
return {
|
|
1476
|
+
return {
|
|
1477
|
+
embeddingService: this.embeddingService,
|
|
1478
|
+
importExportService: this.importExportService,
|
|
1479
|
+
ingestionService: this.ingestionService,
|
|
1480
|
+
maintenanceService: this.maintenanceService,
|
|
1481
|
+
retrievalService: this.retrievalService,
|
|
1482
|
+
searchService: this.searchService,
|
|
1483
|
+
writeService: this.writeService,
|
|
1484
|
+
promptService: this.promptService,
|
|
1485
|
+
entryRepo: this.entryRepo,
|
|
1486
|
+
metadataRepo: this.metadataRepo,
|
|
1487
|
+
jobManager: this.jobManager
|
|
1488
|
+
};
|
|
2356
1489
|
}
|
|
2357
|
-
async
|
|
2358
|
-
const
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
}
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
if (this._isReembedActive(entityId)) {
|
|
2374
|
-
throw new WikiBusyError("reembed", entityId);
|
|
2375
|
-
}
|
|
2376
|
-
if (this._isIngestActiveFor(entityId)) {
|
|
2377
|
-
throw new WikiBusyError("ingest", entityId);
|
|
2378
|
-
}
|
|
2379
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
2380
|
-
throw new WikiBusyError("forget", entityId);
|
|
1490
|
+
async setup() {
|
|
1491
|
+
const entriesExistedBeforeSetup = await this.metadataRepo.tableExists(`${this.prefix}entries`);
|
|
1492
|
+
await setupDatabase(this.db, this.prefix);
|
|
1493
|
+
let currentVersion;
|
|
1494
|
+
if (!entriesExistedBeforeSetup) {
|
|
1495
|
+
await this.metadataRepo.setMeta("schema_version", String(CURRENT_SCHEMA_VERSION), this.db);
|
|
1496
|
+
currentVersion = CURRENT_SCHEMA_VERSION;
|
|
1497
|
+
} else {
|
|
1498
|
+
const schemaVersionValue = await this.metadataRepo.getMeta("schema_version");
|
|
1499
|
+
if (schemaVersionValue) {
|
|
1500
|
+
currentVersion = parseInt(schemaVersionValue, 10);
|
|
1501
|
+
if (!Number.isFinite(currentVersion)) currentVersion = 0;
|
|
1502
|
+
} else {
|
|
1503
|
+
const ftsDdl = await this.metadataRepo.getTableDdl(`${this.prefix}entries_fts`);
|
|
1504
|
+
const hasPorter = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsDdl ?? "");
|
|
1505
|
+
currentVersion = hasPorter ? 1 : 0;
|
|
2381
1506
|
}
|
|
2382
1507
|
}
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
2389
|
-
}
|
|
2390
|
-
try {
|
|
2391
|
-
await this.assertNoLegacySourceTypes();
|
|
2392
|
-
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
2393
|
-
await this._doImportEntity(entityId, bundle, merge);
|
|
2394
|
-
}
|
|
2395
|
-
} finally {
|
|
2396
|
-
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
2397
|
-
for (const entityId of entityIds) {
|
|
2398
|
-
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1508
|
+
for (const migration of MIGRATIONS) {
|
|
1509
|
+
if (migration.version > currentVersion) {
|
|
1510
|
+
await migration.run(this.db, this.prefix);
|
|
1511
|
+
await this.metadataRepo.setMeta("schema_version", String(migration.version), this.db);
|
|
1512
|
+
currentVersion = migration.version;
|
|
2399
1513
|
}
|
|
2400
1514
|
}
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
2406
|
-
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
2407
|
-
const softDeletedFactIds = [];
|
|
2408
|
-
await this.db.withTransactionAsync(async () => {
|
|
2409
|
-
if (!merge) {
|
|
2410
|
-
const toDelete = await this.db.getAllAsync(
|
|
2411
|
-
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2412
|
-
[entityId]
|
|
2413
|
-
);
|
|
2414
|
-
softDeletedFactIds.push(...toDelete.map((r) => r.id));
|
|
2415
|
-
const now = Date.now();
|
|
2416
|
-
await this.db.runAsync(
|
|
2417
|
-
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2418
|
-
[now, now, entityId]
|
|
2419
|
-
);
|
|
2420
|
-
await this.db.runAsync(
|
|
2421
|
-
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2422
|
-
[now, now, entityId]
|
|
2423
|
-
);
|
|
2424
|
-
await this.db.runAsync(
|
|
2425
|
-
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
2426
|
-
[entityId]
|
|
2427
|
-
);
|
|
2428
|
-
}
|
|
2429
|
-
const factIds = bundle.facts.map((fact) => fact.id);
|
|
2430
|
-
const existingFactsById = /* @__PURE__ */ new Map();
|
|
2431
|
-
const factLookupChunkSize = 500;
|
|
2432
|
-
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
2433
|
-
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
2434
|
-
if (factIdChunk.length === 0) continue;
|
|
2435
|
-
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
2436
|
-
const existingFacts = await this.db.getAllAsync(
|
|
2437
|
-
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
2438
|
-
factIdChunk
|
|
2439
|
-
);
|
|
2440
|
-
for (const existingFact of existingFacts) {
|
|
2441
|
-
existingFactsById.set(existingFact.id, existingFact);
|
|
2442
|
-
}
|
|
2443
|
-
}
|
|
2444
|
-
for (const fact of bundle.facts) {
|
|
2445
|
-
const sourceType = this._normalizeImportedSourceType(String(fact.source_type), {
|
|
2446
|
-
entityId,
|
|
2447
|
-
factId: fact.id
|
|
2448
|
-
});
|
|
2449
|
-
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
2450
|
-
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
2451
|
-
const existing = existingFactsById.get(fact.id);
|
|
2452
|
-
const rawBlobRaw = fact.embedding_blob;
|
|
2453
|
-
let rawBlob = null;
|
|
2454
|
-
if (rawBlobRaw instanceof Uint8Array) {
|
|
2455
|
-
rawBlob = rawBlobRaw;
|
|
2456
|
-
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
2457
|
-
const obj = rawBlobRaw;
|
|
2458
|
-
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
2459
|
-
rawBlob = new Uint8Array(obj["data"]);
|
|
2460
|
-
} else if (!Array.isArray(rawBlobRaw)) {
|
|
2461
|
-
const entries = Object.keys(obj);
|
|
2462
|
-
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
2463
|
-
const len = entries.length;
|
|
2464
|
-
rawBlob = new Uint8Array(len);
|
|
2465
|
-
for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
|
|
2466
|
-
}
|
|
2467
|
-
}
|
|
2468
|
-
}
|
|
2469
|
-
let blobData = null;
|
|
2470
|
-
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
2471
|
-
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
2472
|
-
const alignedBlob = new Uint8Array(copy);
|
|
2473
|
-
alignedBlob.set(rawBlob);
|
|
2474
|
-
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
2475
|
-
let allFinite = true;
|
|
2476
|
-
for (let i = 0; i < floats.length; i++) {
|
|
2477
|
-
if (!isFinite(floats[i])) {
|
|
2478
|
-
allFinite = false;
|
|
2479
|
-
break;
|
|
2480
|
-
}
|
|
2481
|
-
}
|
|
2482
|
-
if (allFinite) {
|
|
2483
|
-
blobData = alignedBlob;
|
|
2484
|
-
}
|
|
2485
|
-
}
|
|
2486
|
-
if (existing) {
|
|
2487
|
-
if (existing.entity_id !== entityId) {
|
|
2488
|
-
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
2489
|
-
continue;
|
|
2490
|
-
}
|
|
2491
|
-
if (merge) {
|
|
2492
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
2493
|
-
}
|
|
2494
|
-
if (blobData != null) {
|
|
2495
|
-
await this.db.runAsync(
|
|
2496
|
-
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
2497
|
-
[entityId, fact.title, fact.body, tagsJson, fact.confidence, sourceType, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
|
|
2498
|
-
);
|
|
2499
|
-
factsWithPreservedBlob.set(fact.id, blobData);
|
|
2500
|
-
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
2501
|
-
} else {
|
|
2502
|
-
await this.db.runAsync(
|
|
2503
|
-
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
|
|
2504
|
-
[entityId, fact.title, fact.body, tagsJson, fact.confidence, sourceType, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
2505
|
-
);
|
|
2506
|
-
}
|
|
2507
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2508
|
-
upsertedFactIds.add(fact.id);
|
|
2509
|
-
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
2510
|
-
} else {
|
|
2511
|
-
if (blobData != null) {
|
|
2512
|
-
await this.db.runAsync(
|
|
2513
|
-
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2514
|
-
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, sourceType, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
|
|
2515
|
-
);
|
|
2516
|
-
factsWithPreservedBlob.set(fact.id, blobData);
|
|
2517
|
-
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
2518
|
-
} else {
|
|
2519
|
-
await this.db.runAsync(
|
|
2520
|
-
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2521
|
-
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, sourceType, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
2522
|
-
);
|
|
2523
|
-
}
|
|
2524
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2525
|
-
upsertedFactIds.add(fact.id);
|
|
2526
|
-
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
2527
|
-
}
|
|
2528
|
-
}
|
|
2529
|
-
const taskIds = bundle.tasks.map((task) => task.id);
|
|
2530
|
-
const existingTasksById = /* @__PURE__ */ new Map();
|
|
2531
|
-
const taskLookupChunkSize = 500;
|
|
2532
|
-
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
2533
|
-
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
2534
|
-
if (taskIdChunk.length === 0) continue;
|
|
2535
|
-
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
2536
|
-
const existingTasks = await this.db.getAllAsync(
|
|
2537
|
-
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
2538
|
-
taskIdChunk
|
|
2539
|
-
);
|
|
2540
|
-
for (const existingTask of existingTasks) {
|
|
2541
|
-
existingTasksById.set(existingTask.id, existingTask);
|
|
2542
|
-
}
|
|
2543
|
-
}
|
|
2544
|
-
for (const task of bundle.tasks) {
|
|
2545
|
-
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
2546
|
-
const existing = existingTasksById.get(task.id);
|
|
2547
|
-
if (existing) {
|
|
2548
|
-
if (existing.entity_id !== entityId) {
|
|
2549
|
-
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
2550
|
-
continue;
|
|
2551
|
-
}
|
|
2552
|
-
if (merge) {
|
|
2553
|
-
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
2554
|
-
}
|
|
2555
|
-
await this.db.runAsync(
|
|
2556
|
-
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
2557
|
-
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
2558
|
-
);
|
|
2559
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2560
|
-
} else {
|
|
2561
|
-
await this.db.runAsync(
|
|
2562
|
-
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2563
|
-
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
2564
|
-
);
|
|
2565
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2566
|
-
}
|
|
2567
|
-
}
|
|
2568
|
-
for (const event of bundle.events) {
|
|
2569
|
-
await this.db.runAsync(
|
|
2570
|
-
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
2571
|
-
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
2572
|
-
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
2573
|
-
);
|
|
2574
|
-
}
|
|
2575
|
-
});
|
|
2576
|
-
this.vectorCache.delete(entityId);
|
|
2577
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2578
|
-
for (const fact of bundle.facts) {
|
|
2579
|
-
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
2580
|
-
await this.embedFact({
|
|
2581
|
-
id: fact.id,
|
|
2582
|
-
entity_id: entityId,
|
|
2583
|
-
// Use authoritative entityId from dump key, not fact.entity_id
|
|
2584
|
-
title: fact.title,
|
|
2585
|
-
body: fact.body,
|
|
2586
|
-
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
2587
|
-
});
|
|
1515
|
+
if (entriesExistedBeforeSetup) {
|
|
1516
|
+
const schemaVersionCheck = await this.metadataRepo.getMeta("schema_version");
|
|
1517
|
+
if (!schemaVersionCheck) {
|
|
1518
|
+
await this.metadataRepo.setMeta("schema_version", String(currentVersion), this.db);
|
|
2588
1519
|
}
|
|
2589
1520
|
}
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
2593
|
-
try {
|
|
2594
|
-
const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
|
|
2595
|
-
await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
|
|
2596
|
-
} catch (hookErr) {
|
|
2597
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
|
|
2598
|
-
}
|
|
2599
|
-
}
|
|
1521
|
+
if (entriesExistedBeforeSetup) {
|
|
1522
|
+
await this.importExportService.assertNoLegacySourceTypes();
|
|
2600
1523
|
}
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
1524
|
+
const rows = await this.entryRepo.findRowsForSourceRefMigration();
|
|
1525
|
+
await this.db.withTransactionAsync(async (tx) => {
|
|
1526
|
+
for (const row of rows) {
|
|
1527
|
+
const normalized = normalizeSourceRef(row.source_ref);
|
|
1528
|
+
if (normalized !== row.source_ref) {
|
|
1529
|
+
await this.entryRepo.updateSourceRefByRowid(row.rowid, normalized, tx);
|
|
2607
1530
|
}
|
|
2608
1531
|
}
|
|
1532
|
+
});
|
|
1533
|
+
await this.searchService.sync();
|
|
1534
|
+
}
|
|
1535
|
+
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
1536
|
+
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
1537
|
+
if (!normalizedRef) {
|
|
1538
|
+
throw new Error(`Invalid sourceRef: "${sourceRef}"`);
|
|
2609
1539
|
}
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
);
|
|
2614
|
-
const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
|
|
2615
|
-
if (preservedBlobDims.size === 1) {
|
|
2616
|
-
const preservedDim = [...preservedBlobDims][0];
|
|
2617
|
-
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
2618
|
-
await this.storeEmbeddingDimension(preservedDim);
|
|
2619
|
-
const staleMismatch = await this.db.getFirstAsync(
|
|
2620
|
-
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
2621
|
-
);
|
|
2622
|
-
if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
|
|
2623
|
-
await this.db.runAsync(
|
|
2624
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2625
|
-
[String(preservedDim)]
|
|
2626
|
-
);
|
|
2627
|
-
}
|
|
2628
|
-
await this._reconcileEmbeddingDimension();
|
|
2629
|
-
} else {
|
|
2630
|
-
await this.db.runAsync(
|
|
2631
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2632
|
-
[String(canonicalDim)]
|
|
2633
|
-
);
|
|
2634
|
-
}
|
|
2635
|
-
} else if (preservedBlobDims.size > 1) {
|
|
2636
|
-
if (canonicalDim === null) {
|
|
2637
|
-
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
2638
|
-
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
2639
|
-
await this.db.runAsync(
|
|
2640
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2641
|
-
[String(sortedPreservedBlobDims[0])]
|
|
2642
|
-
);
|
|
2643
|
-
} else {
|
|
2644
|
-
await this.db.runAsync(
|
|
2645
|
-
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2646
|
-
[String(canonicalDim)]
|
|
2647
|
-
);
|
|
2648
|
-
}
|
|
2649
|
-
}
|
|
2650
|
-
} finally {
|
|
2651
|
-
this.vectorCache.delete(entityId);
|
|
1540
|
+
const normalizedHash = normalizeSourceHash(sourceHash);
|
|
1541
|
+
if (!normalizedHash) {
|
|
1542
|
+
throw new Error(`Invalid sourceHash: must be a 64-character hex string (normalized to lowercase)`);
|
|
2652
1543
|
}
|
|
1544
|
+
const storedHash = await this.entryRepo.findLatestSourceHash(entityId, normalizedRef);
|
|
1545
|
+
if (storedHash === null) return true;
|
|
1546
|
+
const normalizedStoredHash = normalizeSourceHash(storedHash);
|
|
1547
|
+
return normalizedStoredHash !== normalizedHash;
|
|
1548
|
+
}
|
|
1549
|
+
async runPrune(entityId, options) {
|
|
1550
|
+
return this.maintenanceService.runPrune(entityId, options);
|
|
1551
|
+
}
|
|
1552
|
+
async read(entityId, query, options) {
|
|
1553
|
+
return this.retrievalService.read(entityId, query, options);
|
|
1554
|
+
}
|
|
1555
|
+
async getMemoryBundle(entityId) {
|
|
1556
|
+
return this.importExportService.getFullBundle(entityId, { maxEvents: 10 });
|
|
1557
|
+
}
|
|
1558
|
+
async write(entityId, event) {
|
|
1559
|
+
return this.writeService.write(entityId, event);
|
|
1560
|
+
}
|
|
1561
|
+
/**
|
|
1562
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
1563
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
1564
|
+
* set `options.config.prompts.librarianSystemPrompt` at WikiMemory construction time.
|
|
1565
|
+
*/
|
|
1566
|
+
async runLibrarian(entityId, options) {
|
|
1567
|
+
return this.maintenanceService.runLibrarian(entityId, options);
|
|
1568
|
+
}
|
|
1569
|
+
/**
|
|
1570
|
+
* @param options.promptOverride - Applies only to this manual call. Does NOT affect
|
|
1571
|
+
* WriteService-triggered auto-runs. For persistent prompt customization across auto-runs,
|
|
1572
|
+
* set `options.config.prompts.healSystemPrompt` at WikiMemory construction time.
|
|
1573
|
+
*/
|
|
1574
|
+
async runHeal(entityId, options) {
|
|
1575
|
+
return this.maintenanceService.runHeal(entityId, options);
|
|
1576
|
+
}
|
|
1577
|
+
async runReembed(entityId, opts) {
|
|
1578
|
+
return this.maintenanceService.runReembed(entityId, opts);
|
|
1579
|
+
}
|
|
1580
|
+
getEntityStatus(entityId) {
|
|
1581
|
+
return this.jobManager.getEntityStatus(entityId);
|
|
1582
|
+
}
|
|
1583
|
+
subscribeEntityStatus(entityId, callback) {
|
|
1584
|
+
return this.jobManager.subscribeEntityStatus(entityId, callback);
|
|
1585
|
+
}
|
|
1586
|
+
clearVectorCache() {
|
|
1587
|
+
this.searchService.evictCache();
|
|
1588
|
+
}
|
|
1589
|
+
async exportDump(entityIds) {
|
|
1590
|
+
return this.importExportService.exportDump(entityIds);
|
|
1591
|
+
}
|
|
1592
|
+
async importDump(dump, opts) {
|
|
1593
|
+
return this.importExportService.importDump(dump, opts);
|
|
2653
1594
|
}
|
|
2654
1595
|
async forget(entityId, params) {
|
|
2655
|
-
|
|
2656
|
-
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
2657
|
-
blockingOperation = "librarian";
|
|
2658
|
-
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
2659
|
-
blockingOperation = "heal";
|
|
2660
|
-
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
2661
|
-
blockingOperation = "prune";
|
|
2662
|
-
} else if (this._isReembedActive(entityId)) {
|
|
2663
|
-
blockingOperation = "reembed";
|
|
2664
|
-
} else if (this._isIngestActiveFor(entityId)) {
|
|
2665
|
-
blockingOperation = "ingest";
|
|
2666
|
-
} else if (this._isImportActiveFor(entityId)) {
|
|
2667
|
-
blockingOperation = "import";
|
|
2668
|
-
} else if (this._isForgetActiveFor(entityId)) {
|
|
2669
|
-
blockingOperation = "forget";
|
|
2670
|
-
}
|
|
2671
|
-
if (blockingOperation !== null) {
|
|
2672
|
-
throw new WikiBusyError(blockingOperation, entityId);
|
|
2673
|
-
}
|
|
2674
|
-
const forgetKey = this._forgetKey(entityId);
|
|
2675
|
-
this.activeMaintenanceJobs.add(forgetKey);
|
|
2676
|
-
try {
|
|
2677
|
-
const now = Date.now();
|
|
2678
|
-
let deletedEntries = 0;
|
|
2679
|
-
let deletedTasks = 0;
|
|
2680
|
-
const deletedEntryIds = [];
|
|
2681
|
-
if (params.clearAll) {
|
|
2682
|
-
const newDeletions = await this.db.getAllAsync(
|
|
2683
|
-
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2684
|
-
[entityId]
|
|
2685
|
-
);
|
|
2686
|
-
const alreadySoftDeleted = await this.db.getAllAsync(
|
|
2687
|
-
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NOT NULL`,
|
|
2688
|
-
[entityId]
|
|
2689
|
-
);
|
|
2690
|
-
deletedEntryIds.push(...newDeletions.map((e) => e.id), ...alreadySoftDeleted.map((e) => e.id));
|
|
2691
|
-
const [entriesRes, tasksRes] = await Promise.all([
|
|
2692
|
-
this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
|
|
2693
|
-
this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
|
|
2694
|
-
]);
|
|
2695
|
-
await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
|
|
2696
|
-
deletedEntries = entriesRes.changes;
|
|
2697
|
-
deletedTasks = tasksRes.changes;
|
|
2698
|
-
} else {
|
|
2699
|
-
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
2700
|
-
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
2701
|
-
if (hasIdSelectors && hasSourceSelectors) {
|
|
2702
|
-
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
2703
|
-
}
|
|
2704
|
-
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
2705
|
-
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
2706
|
-
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
2707
|
-
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2708
|
-
if (params.entryId) {
|
|
2709
|
-
const entry = await this.db.getFirstAsync(
|
|
2710
|
-
`SELECT id FROM ${this.prefix}entries WHERE id = ? AND entity_id = ?`,
|
|
2711
|
-
[params.entryId, entityId]
|
|
2712
|
-
);
|
|
2713
|
-
if (entry) deletedEntryIds.push(entry.id);
|
|
2714
|
-
}
|
|
2715
|
-
if (sourceRef || sourceHash) {
|
|
2716
|
-
let q = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ?`;
|
|
2717
|
-
const args = [entityId];
|
|
2718
|
-
if (sourceRef) {
|
|
2719
|
-
q += ` AND source_ref = ?`;
|
|
2720
|
-
args.push(sourceRef);
|
|
2721
|
-
}
|
|
2722
|
-
if (sourceHash) {
|
|
2723
|
-
q += ` AND source_hash = ?`;
|
|
2724
|
-
args.push(sourceHash);
|
|
2725
|
-
}
|
|
2726
|
-
const entriesToDelete = await this.db.getAllAsync(q, args);
|
|
2727
|
-
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
2728
|
-
}
|
|
2729
|
-
const entryPromise = params.entryId ? this.entryRepo.softDelete(params.entryId, entityId) : null;
|
|
2730
|
-
const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
|
|
2731
|
-
let refPromise = null;
|
|
2732
|
-
if (sourceRef || sourceHash) {
|
|
2733
|
-
let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
2734
|
-
const args = [now, now, entityId];
|
|
2735
|
-
if (sourceRef) {
|
|
2736
|
-
q += ` AND source_ref = ?`;
|
|
2737
|
-
args.push(sourceRef);
|
|
2738
|
-
}
|
|
2739
|
-
if (sourceHash) {
|
|
2740
|
-
q += ` AND source_hash = ?`;
|
|
2741
|
-
args.push(sourceHash);
|
|
2742
|
-
}
|
|
2743
|
-
refPromise = this.db.runAsync(q, args);
|
|
2744
|
-
}
|
|
2745
|
-
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
2746
|
-
entryPromise ?? Promise.resolve(null),
|
|
2747
|
-
taskPromise ?? Promise.resolve(null),
|
|
2748
|
-
refPromise ?? Promise.resolve(null)
|
|
2749
|
-
]);
|
|
2750
|
-
if (entryResult) deletedEntries += entryResult.changes;
|
|
2751
|
-
if (taskResult) deletedTasks += taskResult.changes;
|
|
2752
|
-
if (refResult) deletedEntries += refResult.changes;
|
|
2753
|
-
}
|
|
2754
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2755
|
-
this.vectorCache.delete(entityId);
|
|
2756
|
-
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
2757
|
-
for (const factId of uniqueDeletedIds) {
|
|
2758
|
-
try {
|
|
2759
|
-
await this._notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
2760
|
-
} catch (hookErr) {
|
|
2761
|
-
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2762
|
-
if (isTimeout) {
|
|
2763
|
-
throw new Error(
|
|
2764
|
-
`forget(${entityId}/${factId}) failed: ${hookErr.message}`
|
|
2765
|
-
);
|
|
2766
|
-
}
|
|
2767
|
-
const errMsg = hookErr?.message ?? "";
|
|
2768
|
-
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2769
|
-
if (isValidationError) {
|
|
2770
|
-
throw new Error(
|
|
2771
|
-
`forget(${entityId}/${factId}) failed: ${errMsg}`,
|
|
2772
|
-
{ cause: hookErr }
|
|
2773
|
-
);
|
|
2774
|
-
}
|
|
2775
|
-
throw new Error(
|
|
2776
|
-
`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`,
|
|
2777
|
-
{ cause: this._sanitizeRankerError(hookErr) }
|
|
2778
|
-
);
|
|
2779
|
-
}
|
|
2780
|
-
}
|
|
2781
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
2782
|
-
} finally {
|
|
2783
|
-
this.activeMaintenanceJobs.delete(forgetKey);
|
|
2784
|
-
}
|
|
1596
|
+
return this.maintenanceService.forget(entityId, params);
|
|
2785
1597
|
}
|
|
1598
|
+
/**
|
|
1599
|
+
* @param params.promptOverride - Overrides the system prompt for this ingest call only.
|
|
1600
|
+
* For persistent customization, set `options.config.prompts.ingestSystemPrompt` at
|
|
1601
|
+
* WikiMemory construction time.
|
|
1602
|
+
*/
|
|
2786
1603
|
async ingestDocument(entityId, params) {
|
|
2787
|
-
|
|
2788
|
-
if (!sourceRef) throw new Error("Invalid sourceRef");
|
|
2789
|
-
const sourceHash = normalizeSourceHash(params.sourceHash);
|
|
2790
|
-
if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2791
|
-
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
|
|
2792
|
-
const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
|
|
2793
|
-
const chunkOverlap = Math.min(
|
|
2794
|
-
Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
|
|
2795
|
-
maxChunkLength - 1
|
|
2796
|
-
);
|
|
2797
|
-
const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
|
|
2798
|
-
const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
|
|
2799
|
-
if (typeof params.documentChunk !== "string") {
|
|
2800
|
-
throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
|
|
2801
|
-
}
|
|
2802
|
-
const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
|
|
2803
|
-
if (this.activeIngestJobs.has(jobKey)) {
|
|
2804
|
-
throw new WikiBusyError("ingest", entityId);
|
|
2805
|
-
}
|
|
2806
|
-
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
2807
|
-
throw new WikiBusyError("prune", entityId);
|
|
2808
|
-
}
|
|
2809
|
-
if (this._isReembedActive(entityId)) {
|
|
2810
|
-
throw new WikiBusyError("reembed", entityId);
|
|
2811
|
-
}
|
|
2812
|
-
if (this._isImportActiveFor(entityId)) {
|
|
2813
|
-
throw new WikiBusyError("import", entityId);
|
|
2814
|
-
}
|
|
2815
|
-
if (this._isForgetActiveFor(entityId)) {
|
|
2816
|
-
throw new WikiBusyError("forget", entityId);
|
|
2817
|
-
}
|
|
2818
|
-
this.activeIngestJobs.add(jobKey);
|
|
2819
|
-
this._notifyStatusSubscribers(entityId);
|
|
2820
|
-
try {
|
|
2821
|
-
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
2822
|
-
if (chunks.length === 0) {
|
|
2823
|
-
return { truncated: false, chunks: 0 };
|
|
2824
|
-
}
|
|
2825
|
-
const chunkResults = await withConcurrency(
|
|
2826
|
-
chunks.map((chunk) => async () => {
|
|
2827
|
-
const userPrompt = `Document Chunk:
|
|
2828
|
-
${chunk}`;
|
|
2829
|
-
const responseText = await this.options.llmProvider.generateText({
|
|
2830
|
-
systemPrompt: INGEST_SYSTEM_PROMPT,
|
|
2831
|
-
userPrompt
|
|
2832
|
-
});
|
|
2833
|
-
const result = parseJsonResponse(responseText);
|
|
2834
|
-
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
2835
|
-
}),
|
|
2836
|
-
chunkConcurrency
|
|
2837
|
-
);
|
|
2838
|
-
const seen = /* @__PURE__ */ new Set();
|
|
2839
|
-
const allValidFacts = [];
|
|
2840
|
-
for (const facts of chunkResults) {
|
|
2841
|
-
for (const fact of facts) {
|
|
2842
|
-
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
2843
|
-
if (!seen.has(normalized)) {
|
|
2844
|
-
seen.add(normalized);
|
|
2845
|
-
allValidFacts.push(fact);
|
|
2846
|
-
}
|
|
2847
|
-
}
|
|
2848
|
-
}
|
|
2849
|
-
const now = Date.now();
|
|
2850
|
-
const insertedFacts = [];
|
|
2851
|
-
const deletedSourceFactIds = [];
|
|
2852
|
-
await this.db.withTransactionAsync(async () => {
|
|
2853
|
-
const existingSourceFacts = await this.db.getAllAsync(
|
|
2854
|
-
`SELECT id FROM ${this.prefix}entries WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2855
|
-
[sourceRef, entityId]
|
|
2856
|
-
);
|
|
2857
|
-
for (const row of existingSourceFacts) {
|
|
2858
|
-
deletedSourceFactIds.push(row.id);
|
|
2859
|
-
}
|
|
2860
|
-
await this.db.runAsync(
|
|
2861
|
-
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2862
|
-
[now, now, sourceRef, entityId]
|
|
2863
|
-
);
|
|
2864
|
-
for (const fact of allValidFacts) {
|
|
2865
|
-
const id = generateId("fact_");
|
|
2866
|
-
const wikiFact = {
|
|
2867
|
-
id,
|
|
2868
|
-
entity_id: entityId,
|
|
2869
|
-
title: fact.title,
|
|
2870
|
-
body: fact.body,
|
|
2871
|
-
tags: fact.tags,
|
|
2872
|
-
confidence: fact.confidence,
|
|
2873
|
-
source_type: "immutable_document",
|
|
2874
|
-
source_hash: sourceHash,
|
|
2875
|
-
source_ref: sourceRef,
|
|
2876
|
-
created_at: now,
|
|
2877
|
-
updated_at: now,
|
|
2878
|
-
last_accessed_at: null,
|
|
2879
|
-
access_count: 0,
|
|
2880
|
-
deleted_at: null
|
|
2881
|
-
};
|
|
2882
|
-
await this.entryRepo.upsert(wikiFact);
|
|
2883
|
-
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2884
|
-
}
|
|
2885
|
-
});
|
|
2886
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
2887
|
-
this.vectorCache.delete(entityId);
|
|
2888
|
-
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
2889
|
-
for (const factId of uniqueDeletedSourceFactIds) {
|
|
2890
|
-
try {
|
|
2891
|
-
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2892
|
-
} catch (hookErr) {
|
|
2893
|
-
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
2894
|
-
}
|
|
2895
|
-
}
|
|
2896
|
-
for (const fact of insertedFacts) {
|
|
2897
|
-
await this.embedFact(fact);
|
|
2898
|
-
}
|
|
2899
|
-
this.vectorCache.delete(entityId);
|
|
2900
|
-
return { truncated, chunks: chunks.length };
|
|
2901
|
-
} finally {
|
|
2902
|
-
this.activeIngestJobs.delete(jobKey);
|
|
2903
|
-
this._notifyStatusSubscribers(entityId);
|
|
2904
|
-
}
|
|
1604
|
+
return this.ingestionService.ingestDocument(entityId, params);
|
|
2905
1605
|
}
|
|
2906
1606
|
};
|
|
2907
|
-
|
|
2908
|
-
* Maximum number of entities whose parsed embedding vectors are held in
|
|
2909
|
-
* memory. This cap is intentionally conservative so the cache remains safe
|
|
2910
|
-
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
2911
|
-
*/
|
|
2912
|
-
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
2913
|
-
/**
|
|
2914
|
-
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
2915
|
-
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
2916
|
-
* entities while still maintaining a bounded memory footprint.
|
|
2917
|
-
*/
|
|
2918
|
-
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
2919
|
-
var WikiMemory = _WikiMemory;
|
|
1607
|
+
_testAccessNonTestEnvWarned = new WeakMap();
|
|
2920
1608
|
|
|
2921
1609
|
// src/utils/formatContext.ts
|
|
2922
1610
|
function validateMaxOption(value, name) {
|
|
@@ -3183,6 +1871,6 @@ function createWiki(db, options) {
|
|
|
3183
1871
|
return new WikiMemory(db, options);
|
|
3184
1872
|
}
|
|
3185
1873
|
|
|
3186
|
-
export { DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT,
|
|
1874
|
+
export { DEFAULT_LIBRARIAN_SYNTHESIS_PROMPT, WikiMemory, createWiki, formatContext, formatMemoryDump, hydrateLibrarianPrompt, mapLibrarianOptionsToReadOptions, validateLibrarianPromptTemplate };
|
|
3187
1875
|
//# sourceMappingURL=index.mjs.map
|
|
3188
1876
|
//# sourceMappingURL=index.mjs.map
|