@equationalapplications/expo-llm-wiki 0.0.0-development → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -1
- package/dist/{WikiMemory-BI2Aizwv.d.mts → WikiMemory-BDVn-TJf.d.mts} +46 -1
- package/dist/{WikiMemory-BI2Aizwv.d.ts → WikiMemory-BDVn-TJf.d.ts} +46 -1
- package/dist/index.d.mts +5 -3
- package/dist/index.d.ts +5 -3
- package/dist/index.js +521 -69
- package/dist/index.mjs +518 -68
- package/dist/react/index.d.mts +9 -2
- package/dist/react/index.d.ts +9 -2
- package/dist/react/index.js +30 -0
- package/dist/react/index.mjs +29 -0
- package/package.json +15 -5
- package/dist/WikiMemory-B-yFw9Dc.d.mts +0 -118
- package/dist/WikiMemory-B-yFw9Dc.d.ts +0 -118
- package/dist/WikiMemory-BWTt1Ynm.d.mts +0 -103
- package/dist/WikiMemory-BWTt1Ynm.d.ts +0 -103
package/dist/index.mjs
CHANGED
|
@@ -29,7 +29,8 @@ async function setupDatabase(db, prefix) {
|
|
|
29
29
|
body,
|
|
30
30
|
tags,
|
|
31
31
|
content='${prefix}entries',
|
|
32
|
-
content_rowid='rowid'
|
|
32
|
+
content_rowid='rowid',
|
|
33
|
+
tokenize='porter unicode61'
|
|
33
34
|
);
|
|
34
35
|
|
|
35
36
|
-- Triggers to keep FTS5 in sync with entries
|
|
@@ -83,11 +84,23 @@ async function setupDatabase(db, prefix) {
|
|
|
83
84
|
`);
|
|
84
85
|
}
|
|
85
86
|
|
|
87
|
+
// src/types.ts
|
|
88
|
+
var WikiBusyError = class extends Error {
|
|
89
|
+
operation;
|
|
90
|
+
entityId;
|
|
91
|
+
constructor(operation, entityId) {
|
|
92
|
+
super(`${operation} already running for entity ${entityId}`);
|
|
93
|
+
this.name = "WikiBusyError";
|
|
94
|
+
this.operation = operation;
|
|
95
|
+
this.entityId = entityId;
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
|
|
86
99
|
// src/prompts.ts
|
|
87
100
|
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
88
101
|
Return ONLY a valid JSON object matching this schema:
|
|
89
102
|
{
|
|
90
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max
|
|
103
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
91
104
|
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
92
105
|
}
|
|
93
106
|
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
@@ -96,13 +109,13 @@ Return ONLY a valid JSON object matching this schema:
|
|
|
96
109
|
{
|
|
97
110
|
"downgraded": ["string (fact IDs)"],
|
|
98
111
|
"deleted": ["string (fact IDs)"],
|
|
99
|
-
"newFacts": [{ "title": "string", "body": "string", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
112
|
+
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
100
113
|
}
|
|
101
114
|
Do not return markdown, just raw JSON.`;
|
|
102
115
|
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
103
116
|
Return ONLY a valid JSON object matching this schema:
|
|
104
117
|
{
|
|
105
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max
|
|
118
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
106
119
|
}
|
|
107
120
|
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
108
121
|
|
|
@@ -176,6 +189,84 @@ function safeSlice(value, start, end) {
|
|
|
176
189
|
}
|
|
177
190
|
return value.slice(safeStart, safeEnd);
|
|
178
191
|
}
|
|
192
|
+
function chunkText(input, maxChunkLength, overlap) {
|
|
193
|
+
const text = input.trim();
|
|
194
|
+
if (text.length === 0) return { chunks: [], truncated: false };
|
|
195
|
+
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
196
|
+
throw new Error("maxChunkLength must be an integer >= 2");
|
|
197
|
+
}
|
|
198
|
+
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
199
|
+
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
200
|
+
}
|
|
201
|
+
const chunks = [];
|
|
202
|
+
let truncated = false;
|
|
203
|
+
let cursor = 0;
|
|
204
|
+
const halfMax = Math.floor(maxChunkLength / 2);
|
|
205
|
+
while (cursor < text.length) {
|
|
206
|
+
const remaining = text.length - cursor;
|
|
207
|
+
if (remaining <= maxChunkLength) {
|
|
208
|
+
chunks.push(safeSlice(text, cursor, text.length));
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
const windowEnd = cursor + maxChunkLength;
|
|
212
|
+
const minSplit = cursor + halfMax;
|
|
213
|
+
let splitPoint = -1;
|
|
214
|
+
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
215
|
+
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
216
|
+
splitPoint = paraIdx + 2;
|
|
217
|
+
}
|
|
218
|
+
if (splitPoint === -1) {
|
|
219
|
+
let lastTerm = -1;
|
|
220
|
+
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
221
|
+
const ch = text[i];
|
|
222
|
+
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
223
|
+
lastTerm = i + 2;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
227
|
+
}
|
|
228
|
+
if (splitPoint === -1) {
|
|
229
|
+
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
230
|
+
if (/\s/.test(text[i])) {
|
|
231
|
+
splitPoint = i + 1;
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
if (splitPoint === -1) {
|
|
237
|
+
truncated = true;
|
|
238
|
+
splitPoint = windowEnd;
|
|
239
|
+
}
|
|
240
|
+
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
241
|
+
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
242
|
+
cursor = next;
|
|
243
|
+
}
|
|
244
|
+
return { chunks, truncated };
|
|
245
|
+
}
|
|
246
|
+
async function withConcurrency(tasks, limit) {
|
|
247
|
+
const results = new Array(tasks.length);
|
|
248
|
+
let index = 0;
|
|
249
|
+
let failed = false;
|
|
250
|
+
let firstError;
|
|
251
|
+
async function worker() {
|
|
252
|
+
while (index < tasks.length && !failed) {
|
|
253
|
+
const i = index++;
|
|
254
|
+
try {
|
|
255
|
+
results[i] = await tasks[i]();
|
|
256
|
+
} catch (e) {
|
|
257
|
+
if (!failed) {
|
|
258
|
+
failed = true;
|
|
259
|
+
firstError = e;
|
|
260
|
+
}
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
266
|
+
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
267
|
+
if (failed) throw firstError;
|
|
268
|
+
return results;
|
|
269
|
+
}
|
|
179
270
|
function clip(value, max) {
|
|
180
271
|
if (typeof value !== "string") return "";
|
|
181
272
|
const s = value.trim();
|
|
@@ -188,7 +279,7 @@ function validateTags(tags) {
|
|
|
188
279
|
function validateFact(fact) {
|
|
189
280
|
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
190
281
|
const title = clip(fact.title, 80);
|
|
191
|
-
const body = clip(fact.body,
|
|
282
|
+
const body = clip(fact.body, 800);
|
|
192
283
|
if (!title || !body) return null;
|
|
193
284
|
let confidence = fact.confidence;
|
|
194
285
|
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
@@ -237,6 +328,16 @@ var WikiMemory = class {
|
|
|
237
328
|
prefix;
|
|
238
329
|
options;
|
|
239
330
|
activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
331
|
+
activeIngestJobs = /* @__PURE__ */ new Set();
|
|
332
|
+
_librarianKey(entityId) {
|
|
333
|
+
return `${this.prefix}:${entityId}:librarian`;
|
|
334
|
+
}
|
|
335
|
+
_healKey(entityId) {
|
|
336
|
+
return `${this.prefix}:${entityId}:heal`;
|
|
337
|
+
}
|
|
338
|
+
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
339
|
+
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
340
|
+
}
|
|
240
341
|
constructor(db, options) {
|
|
241
342
|
this.db = db;
|
|
242
343
|
this.options = options;
|
|
@@ -244,6 +345,45 @@ var WikiMemory = class {
|
|
|
244
345
|
}
|
|
245
346
|
async setup() {
|
|
246
347
|
await setupDatabase(this.db, this.prefix);
|
|
348
|
+
const ftsMeta = await this.db.getFirstAsync(
|
|
349
|
+
`SELECT sql FROM sqlite_master WHERE type='table' AND name=?`,
|
|
350
|
+
[`${this.prefix}entries_fts`]
|
|
351
|
+
);
|
|
352
|
+
const hasPorterTokenizer = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsMeta?.sql ?? "");
|
|
353
|
+
if (ftsMeta?.sql && !hasPorterTokenizer) {
|
|
354
|
+
await this.db.withTransactionAsync(async () => {
|
|
355
|
+
await this.db.execAsync(`
|
|
356
|
+
DROP TRIGGER IF EXISTS ${this.prefix}entries_ai;
|
|
357
|
+
DROP TRIGGER IF EXISTS ${this.prefix}entries_ad;
|
|
358
|
+
DROP TRIGGER IF EXISTS ${this.prefix}entries_au;
|
|
359
|
+
DROP TABLE IF EXISTS ${this.prefix}entries_fts;
|
|
360
|
+
CREATE VIRTUAL TABLE ${this.prefix}entries_fts USING fts5(
|
|
361
|
+
title,
|
|
362
|
+
body,
|
|
363
|
+
tags,
|
|
364
|
+
content='${this.prefix}entries',
|
|
365
|
+
content_rowid='rowid',
|
|
366
|
+
tokenize='porter unicode61'
|
|
367
|
+
);
|
|
368
|
+
INSERT INTO ${this.prefix}entries_fts(rowid, title, body, tags)
|
|
369
|
+
SELECT rowid, title, body, tags FROM ${this.prefix}entries;
|
|
370
|
+
CREATE TRIGGER ${this.prefix}entries_ai AFTER INSERT ON ${this.prefix}entries BEGIN
|
|
371
|
+
INSERT INTO ${this.prefix}entries_fts(rowid, title, body, tags)
|
|
372
|
+
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
373
|
+
END;
|
|
374
|
+
CREATE TRIGGER ${this.prefix}entries_ad AFTER DELETE ON ${this.prefix}entries BEGIN
|
|
375
|
+
INSERT INTO ${this.prefix}entries_fts(${this.prefix}entries_fts, rowid, title, body, tags)
|
|
376
|
+
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
377
|
+
END;
|
|
378
|
+
CREATE TRIGGER ${this.prefix}entries_au AFTER UPDATE ON ${this.prefix}entries BEGIN
|
|
379
|
+
INSERT INTO ${this.prefix}entries_fts(${this.prefix}entries_fts, rowid, title, body, tags)
|
|
380
|
+
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
381
|
+
INSERT INTO ${this.prefix}entries_fts(rowid, title, body, tags)
|
|
382
|
+
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
383
|
+
END;
|
|
384
|
+
`);
|
|
385
|
+
});
|
|
386
|
+
}
|
|
247
387
|
const rows = await this.db.getAllAsync(`
|
|
248
388
|
SELECT rowid, source_ref FROM ${this.prefix}entries
|
|
249
389
|
WHERE source_ref IS NOT NULL
|
|
@@ -268,9 +408,35 @@ var WikiMemory = class {
|
|
|
268
408
|
});
|
|
269
409
|
}
|
|
270
410
|
formatSearchQuery(query) {
|
|
271
|
-
const
|
|
272
|
-
|
|
273
|
-
|
|
411
|
+
const normalizeTokens = (value) => value.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3);
|
|
412
|
+
const baseTokens = normalizeTokens(query);
|
|
413
|
+
if (baseTokens.length === 0) return "";
|
|
414
|
+
const synonymMap = this.options.config?.synonymMap;
|
|
415
|
+
const expanded = [];
|
|
416
|
+
const seen = /* @__PURE__ */ new Set();
|
|
417
|
+
const pushNormalized = (value) => {
|
|
418
|
+
for (const token of normalizeTokens(value)) {
|
|
419
|
+
if (expanded.length >= 12) return false;
|
|
420
|
+
if (seen.has(token)) continue;
|
|
421
|
+
seen.add(token);
|
|
422
|
+
expanded.push(token);
|
|
423
|
+
}
|
|
424
|
+
return true;
|
|
425
|
+
};
|
|
426
|
+
for (const t of baseTokens) {
|
|
427
|
+
if (!pushNormalized(t)) break;
|
|
428
|
+
if (synonymMap) {
|
|
429
|
+
const synonyms = synonymMap[t];
|
|
430
|
+
if (Array.isArray(synonyms)) {
|
|
431
|
+
for (const s of synonyms) {
|
|
432
|
+
if (typeof s === "string") {
|
|
433
|
+
if (!pushNormalized(s)) break;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return expanded.map((t) => `"${t}"*`).join(" OR ");
|
|
274
440
|
}
|
|
275
441
|
async read(entityId, query) {
|
|
276
442
|
const ftsQuery = this.formatSearchQuery(query);
|
|
@@ -322,6 +488,9 @@ var WikiMemory = class {
|
|
|
322
488
|
}));
|
|
323
489
|
return { facts, tasks, events: events.reverse() };
|
|
324
490
|
}
|
|
491
|
+
async getMemoryBundle(entityId) {
|
|
492
|
+
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
493
|
+
}
|
|
325
494
|
async write(entityId, event) {
|
|
326
495
|
const id = generateId("evt_");
|
|
327
496
|
const now = Date.now();
|
|
@@ -342,7 +511,7 @@ var WikiMemory = class {
|
|
|
342
511
|
let memoryCheckpoint = cp?.memory_checkpoint || 0;
|
|
343
512
|
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
344
513
|
if (count - memoryCheckpoint >= threshold) {
|
|
345
|
-
const jobKey =
|
|
514
|
+
const jobKey = this._librarianKey(entityId);
|
|
346
515
|
if (!this.activeMaintenanceJobs.has(jobKey)) {
|
|
347
516
|
this.activeMaintenanceJobs.add(jobKey);
|
|
348
517
|
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
|
|
@@ -361,12 +530,20 @@ var WikiMemory = class {
|
|
|
361
530
|
let healCheckpoint = cp?.heal_checkpoint || 0;
|
|
362
531
|
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
363
532
|
if (currentEventCount - healCheckpoint >= autoHealThreshold) {
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
533
|
+
const healKey = this._healKey(entityId);
|
|
534
|
+
if (!this.activeMaintenanceJobs.has(healKey)) {
|
|
535
|
+
this.activeMaintenanceJobs.add(healKey);
|
|
536
|
+
try {
|
|
537
|
+
await this._doRunHeal(entityId);
|
|
538
|
+
await this.db.runAsync(`
|
|
539
|
+
INSERT INTO ${this.prefix}checkpoints (entity_id, heal_checkpoint)
|
|
540
|
+
VALUES (?, ?)
|
|
541
|
+
ON CONFLICT(entity_id) DO UPDATE SET heal_checkpoint = ?
|
|
542
|
+
`, [entityId, currentEventCount, currentEventCount]);
|
|
543
|
+
} finally {
|
|
544
|
+
this.activeMaintenanceJobs.delete(healKey);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
370
547
|
}
|
|
371
548
|
}
|
|
372
549
|
async _doRunLibrarian(entityId) {
|
|
@@ -509,8 +686,10 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
509
686
|
});
|
|
510
687
|
}
|
|
511
688
|
async runLibrarian(entityId) {
|
|
512
|
-
const jobKey =
|
|
513
|
-
if (this.activeMaintenanceJobs.has(jobKey))
|
|
689
|
+
const jobKey = this._librarianKey(entityId);
|
|
690
|
+
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
691
|
+
throw new WikiBusyError("librarian", entityId);
|
|
692
|
+
}
|
|
514
693
|
this.activeMaintenanceJobs.add(jobKey);
|
|
515
694
|
try {
|
|
516
695
|
await this._doRunLibrarian(entityId);
|
|
@@ -519,8 +698,10 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
519
698
|
}
|
|
520
699
|
}
|
|
521
700
|
async runHeal(entityId) {
|
|
522
|
-
const jobKey =
|
|
523
|
-
if (this.activeMaintenanceJobs.has(jobKey))
|
|
701
|
+
const jobKey = this._healKey(entityId);
|
|
702
|
+
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
703
|
+
throw new WikiBusyError("heal", entityId);
|
|
704
|
+
}
|
|
524
705
|
this.activeMaintenanceJobs.add(jobKey);
|
|
525
706
|
try {
|
|
526
707
|
await this._doRunHeal(entityId);
|
|
@@ -528,6 +709,180 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
528
709
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
529
710
|
}
|
|
530
711
|
}
|
|
712
|
+
getEntityStatus(entityId) {
|
|
713
|
+
const ingestPrefix = `${this.prefix}:${entityId}:`;
|
|
714
|
+
let ingesting = false;
|
|
715
|
+
for (const k of this.activeIngestJobs) {
|
|
716
|
+
if (k.startsWith(ingestPrefix)) {
|
|
717
|
+
ingesting = true;
|
|
718
|
+
break;
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
return {
|
|
722
|
+
ingesting,
|
|
723
|
+
librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
|
|
724
|
+
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
async _getFullBundle(entityId, opts) {
|
|
728
|
+
const maxEvents = opts?.maxEvents;
|
|
729
|
+
const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
|
|
730
|
+
const eventsParams = maxEvents != null ? [entityId, maxEvents] : [entityId];
|
|
731
|
+
const [factsRaw, tasks, eventsRaw] = await Promise.all([
|
|
732
|
+
this.db.getAllAsync(
|
|
733
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
734
|
+
[entityId]
|
|
735
|
+
),
|
|
736
|
+
this.db.getAllAsync(
|
|
737
|
+
`SELECT * FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NULL ORDER BY priority DESC, created_at ASC`,
|
|
738
|
+
[entityId]
|
|
739
|
+
),
|
|
740
|
+
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
741
|
+
]);
|
|
742
|
+
const facts = factsRaw.map((f) => ({
|
|
743
|
+
...f,
|
|
744
|
+
tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
|
|
745
|
+
}));
|
|
746
|
+
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
747
|
+
return { facts, tasks, events };
|
|
748
|
+
}
|
|
749
|
+
async exportDump(entityIds) {
|
|
750
|
+
let ids;
|
|
751
|
+
if (entityIds && entityIds.length > 0) {
|
|
752
|
+
ids = Array.from(new Set(entityIds));
|
|
753
|
+
} else {
|
|
754
|
+
const rows = await this.db.getAllAsync(`
|
|
755
|
+
SELECT DISTINCT entity_id FROM (
|
|
756
|
+
SELECT entity_id FROM ${this.prefix}entries WHERE deleted_at IS NULL
|
|
757
|
+
UNION
|
|
758
|
+
SELECT entity_id FROM ${this.prefix}tasks WHERE deleted_at IS NULL
|
|
759
|
+
UNION
|
|
760
|
+
SELECT entity_id FROM ${this.prefix}events
|
|
761
|
+
) ORDER BY entity_id
|
|
762
|
+
`);
|
|
763
|
+
ids = rows.map((r) => r.entity_id);
|
|
764
|
+
}
|
|
765
|
+
const entities = {};
|
|
766
|
+
const BATCH = 3;
|
|
767
|
+
for (let i = 0; i < ids.length; i += BATCH) {
|
|
768
|
+
const batch = ids.slice(i, i + BATCH);
|
|
769
|
+
const batchResults = await Promise.all(
|
|
770
|
+
batch.map(async (id) => [id, await this._getFullBundle(id)])
|
|
771
|
+
);
|
|
772
|
+
for (const [id, bundle] of batchResults) {
|
|
773
|
+
entities[id] = bundle;
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
return { generatedAt: Date.now(), entities };
|
|
777
|
+
}
|
|
778
|
+
async importDump(dump, opts) {
|
|
779
|
+
const merge = opts?.merge ?? false;
|
|
780
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
781
|
+
await this.db.withTransactionAsync(async () => {
|
|
782
|
+
if (!merge) {
|
|
783
|
+
const now = Date.now();
|
|
784
|
+
await this.db.runAsync(
|
|
785
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
786
|
+
[now, now, entityId]
|
|
787
|
+
);
|
|
788
|
+
await this.db.runAsync(
|
|
789
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
790
|
+
[now, now, entityId]
|
|
791
|
+
);
|
|
792
|
+
await this.db.runAsync(
|
|
793
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
794
|
+
[entityId]
|
|
795
|
+
);
|
|
796
|
+
}
|
|
797
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
798
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
799
|
+
const factLookupChunkSize = 500;
|
|
800
|
+
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
801
|
+
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
802
|
+
if (factIdChunk.length === 0) continue;
|
|
803
|
+
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
804
|
+
const existingFacts = await this.db.getAllAsync(
|
|
805
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
806
|
+
factIdChunk
|
|
807
|
+
);
|
|
808
|
+
for (const existingFact of existingFacts) {
|
|
809
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
for (const fact of bundle.facts) {
|
|
813
|
+
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
814
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
815
|
+
const existing = existingFactsById.get(fact.id);
|
|
816
|
+
if (existing) {
|
|
817
|
+
if (existing.entity_id !== entityId) {
|
|
818
|
+
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
819
|
+
continue;
|
|
820
|
+
}
|
|
821
|
+
if (merge) {
|
|
822
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
823
|
+
}
|
|
824
|
+
await this.db.runAsync(
|
|
825
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ? WHERE id = ?`,
|
|
826
|
+
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
827
|
+
);
|
|
828
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
829
|
+
} else {
|
|
830
|
+
await this.db.runAsync(
|
|
831
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
832
|
+
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
833
|
+
);
|
|
834
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
838
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
839
|
+
const taskLookupChunkSize = 500;
|
|
840
|
+
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
841
|
+
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
842
|
+
if (taskIdChunk.length === 0) continue;
|
|
843
|
+
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
844
|
+
const existingTasks = await this.db.getAllAsync(
|
|
845
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
846
|
+
taskIdChunk
|
|
847
|
+
);
|
|
848
|
+
for (const existingTask of existingTasks) {
|
|
849
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
for (const task of bundle.tasks) {
|
|
853
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
854
|
+
const existing = existingTasksById.get(task.id);
|
|
855
|
+
if (existing) {
|
|
856
|
+
if (existing.entity_id !== entityId) {
|
|
857
|
+
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
858
|
+
continue;
|
|
859
|
+
}
|
|
860
|
+
if (merge) {
|
|
861
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
862
|
+
}
|
|
863
|
+
await this.db.runAsync(
|
|
864
|
+
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
865
|
+
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
866
|
+
);
|
|
867
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
868
|
+
} else {
|
|
869
|
+
await this.db.runAsync(
|
|
870
|
+
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
871
|
+
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
872
|
+
);
|
|
873
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
for (const event of bundle.events) {
|
|
877
|
+
await this.db.runAsync(
|
|
878
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
879
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
880
|
+
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
881
|
+
);
|
|
882
|
+
}
|
|
883
|
+
});
|
|
884
|
+
}
|
|
885
|
+
}
|
|
531
886
|
async forget(entityId, params) {
|
|
532
887
|
const now = Date.now();
|
|
533
888
|
let deletedEntries = 0;
|
|
@@ -582,70 +937,165 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
582
937
|
if (!sourceRef) throw new Error("Invalid sourceRef");
|
|
583
938
|
const sourceHash = normalizeSourceHash(params.sourceHash);
|
|
584
939
|
if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
585
|
-
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ??
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
940
|
+
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
|
|
941
|
+
const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
|
|
942
|
+
const chunkOverlap = Math.min(
|
|
943
|
+
Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
|
|
944
|
+
maxChunkLength - 1
|
|
945
|
+
);
|
|
946
|
+
const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
|
|
947
|
+
const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
|
|
589
948
|
if (typeof params.documentChunk !== "string") {
|
|
590
949
|
throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
|
|
591
950
|
}
|
|
592
|
-
const
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
if (text.length === 0) {
|
|
596
|
-
return { truncated: false, chunks: 0 };
|
|
951
|
+
const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
|
|
952
|
+
if (this.activeIngestJobs.has(jobKey)) {
|
|
953
|
+
throw new WikiBusyError("ingest", entityId);
|
|
597
954
|
}
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
const searchArea = text.slice(0, maxChunkLength + 1);
|
|
604
|
-
const match = searchArea.match(/[.!?]\s+(?![\s\S]*[.!?]\s+)/);
|
|
605
|
-
if (match && match.index !== void 0) {
|
|
606
|
-
const splitPoint = Math.min(match.index + match[0].length, maxChunkLength);
|
|
607
|
-
const chunk = safeSlice(text, 0, splitPoint);
|
|
608
|
-
chunks.push(chunk);
|
|
609
|
-
text = text.slice(chunk.length);
|
|
610
|
-
} else {
|
|
611
|
-
truncated = true;
|
|
612
|
-
const chunk = safeSlice(text, 0, maxChunkLength);
|
|
613
|
-
chunks.push(chunk);
|
|
614
|
-
text = text.slice(chunk.length);
|
|
955
|
+
this.activeIngestJobs.add(jobKey);
|
|
956
|
+
try {
|
|
957
|
+
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
958
|
+
if (chunks.length === 0) {
|
|
959
|
+
return { truncated: false, chunks: 0 };
|
|
615
960
|
}
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
const userPrompt = `Document Chunk:
|
|
961
|
+
const chunkResults = await withConcurrency(
|
|
962
|
+
chunks.map((chunk) => async () => {
|
|
963
|
+
const userPrompt = `Document Chunk:
|
|
620
964
|
${chunk}`;
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
965
|
+
const responseText = await this.options.llmProvider.generateText({
|
|
966
|
+
systemPrompt: INGEST_SYSTEM_PROMPT,
|
|
967
|
+
userPrompt
|
|
968
|
+
});
|
|
969
|
+
const result = parseJsonResponse(responseText);
|
|
970
|
+
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
971
|
+
}),
|
|
972
|
+
chunkConcurrency
|
|
973
|
+
);
|
|
974
|
+
const seen = /* @__PURE__ */ new Set();
|
|
975
|
+
const allValidFacts = [];
|
|
976
|
+
for (const facts of chunkResults) {
|
|
977
|
+
for (const fact of facts) {
|
|
978
|
+
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
979
|
+
if (!seen.has(normalized)) {
|
|
980
|
+
seen.add(normalized);
|
|
981
|
+
allValidFacts.push(fact);
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
const now = Date.now();
|
|
986
|
+
await this.db.withTransactionAsync(async () => {
|
|
987
|
+
await this.db.runAsync(
|
|
988
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
989
|
+
[now, now, sourceRef, entityId]
|
|
990
|
+
);
|
|
991
|
+
for (const fact of allValidFacts) {
|
|
992
|
+
const id = generateId("fact_");
|
|
993
|
+
await this.db.runAsync(
|
|
994
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at)
|
|
995
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
996
|
+
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
997
|
+
);
|
|
998
|
+
}
|
|
624
999
|
});
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
1000
|
+
return { truncated, chunks: chunks.length };
|
|
1001
|
+
} finally {
|
|
1002
|
+
this.activeIngestJobs.delete(jobKey);
|
|
628
1003
|
}
|
|
629
|
-
const now = Date.now();
|
|
630
|
-
await this.db.withTransactionAsync(async () => {
|
|
631
|
-
await this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, sourceRef, entityId]);
|
|
632
|
-
for (const fact of allValidFacts) {
|
|
633
|
-
const id = generateId("fact_");
|
|
634
|
-
await this.db.runAsync(`
|
|
635
|
-
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at)
|
|
636
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
637
|
-
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]);
|
|
638
|
-
}
|
|
639
|
-
});
|
|
640
|
-
return { truncated, chunks: chunks.length };
|
|
641
1004
|
}
|
|
642
1005
|
};
|
|
643
1006
|
|
|
1007
|
+
// src/utils/formatMemoryDump.ts
|
|
1008
|
+
function renderFact(f) {
|
|
1009
|
+
const tags = (f.tags || []).join(", ");
|
|
1010
|
+
const source = f.source_ref ?? f.source_type;
|
|
1011
|
+
return `### ${f.title}
|
|
1012
|
+
**Tags:** ${tags}
|
|
1013
|
+
**Confidence:** ${f.confidence}
|
|
1014
|
+
**Source:** ${source}
|
|
1015
|
+
|
|
1016
|
+
${f.body}
|
|
1017
|
+
|
|
1018
|
+
---
|
|
1019
|
+
`;
|
|
1020
|
+
}
|
|
1021
|
+
function renderTask(t) {
|
|
1022
|
+
const checked = t.status === "done" ? "x" : " ";
|
|
1023
|
+
const note = t.status === "done" ? " (done)" : t.status === "abandoned" ? " (abandoned)" : t.status === "in_progress" ? " (in progress)" : "";
|
|
1024
|
+
return `- [${checked}] ${t.description}${note}
|
|
1025
|
+
`;
|
|
1026
|
+
}
|
|
1027
|
+
function renderEvent(e) {
|
|
1028
|
+
const ts = new Date(e.created_at).toISOString();
|
|
1029
|
+
return `- [${ts}] (${e.event_type}) ${e.summary}
|
|
1030
|
+
`;
|
|
1031
|
+
}
|
|
1032
|
+
function renderEntity(entityId, bundle, generatedAt) {
|
|
1033
|
+
const lines = [];
|
|
1034
|
+
lines.push(`# Memory Dump: ${entityId}`);
|
|
1035
|
+
lines.push(`Generated: ${new Date(generatedAt).toISOString()}`);
|
|
1036
|
+
lines.push("");
|
|
1037
|
+
lines.push("## Facts");
|
|
1038
|
+
lines.push("");
|
|
1039
|
+
if (bundle.facts.length === 0) {
|
|
1040
|
+
lines.push("_(none)_\n");
|
|
1041
|
+
} else {
|
|
1042
|
+
for (const f of bundle.facts) lines.push(renderFact(f));
|
|
1043
|
+
}
|
|
1044
|
+
lines.push("## Tasks");
|
|
1045
|
+
lines.push("");
|
|
1046
|
+
if (bundle.tasks.length === 0) {
|
|
1047
|
+
lines.push("_(none)_\n");
|
|
1048
|
+
} else {
|
|
1049
|
+
for (const t of bundle.tasks) lines.push(renderTask(t));
|
|
1050
|
+
}
|
|
1051
|
+
lines.push("");
|
|
1052
|
+
lines.push("## Recent Events");
|
|
1053
|
+
lines.push("");
|
|
1054
|
+
if (bundle.events.length === 0) {
|
|
1055
|
+
lines.push("_(none)_\n");
|
|
1056
|
+
} else {
|
|
1057
|
+
for (const e of bundle.events) lines.push(renderEvent(e));
|
|
1058
|
+
}
|
|
1059
|
+
return lines.join("\n");
|
|
1060
|
+
}
|
|
1061
|
+
function shortHash(value) {
|
|
1062
|
+
let h1 = 5381;
|
|
1063
|
+
let h2 = 52711;
|
|
1064
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
1065
|
+
const c = value.charCodeAt(i);
|
|
1066
|
+
h1 = Math.imul(h1, 33) ^ c;
|
|
1067
|
+
h2 = Math.imul(h2, 31) ^ c;
|
|
1068
|
+
}
|
|
1069
|
+
return (h1 >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0");
|
|
1070
|
+
}
|
|
1071
|
+
function formatEntityFileName(entityId) {
|
|
1072
|
+
const normalized = entityId.normalize("NFKC");
|
|
1073
|
+
const sanitized = normalized.replace(/[^A-Za-z0-9._-]+/g, "_").replace(/^\.+/, "_").replace(/_+/g, "_").replace(/^[_-]+|[_-]+$/g, "");
|
|
1074
|
+
const MAX_BASE = 200;
|
|
1075
|
+
const trimmed = sanitized.length > MAX_BASE ? sanitized.slice(0, MAX_BASE) : sanitized;
|
|
1076
|
+
const baseName = trimmed && trimmed !== "." && trimmed !== ".." ? trimmed : "entity";
|
|
1077
|
+
const needsSuffix = baseName !== entityId || sanitized.length > MAX_BASE;
|
|
1078
|
+
const uniqueBaseName = needsSuffix ? `${baseName}-${shortHash(entityId)}` : baseName;
|
|
1079
|
+
return `${uniqueBaseName}.md`;
|
|
1080
|
+
}
|
|
1081
|
+
function formatMemoryDump(dump) {
|
|
1082
|
+
const files = Object.entries(dump.entities).map(([entityId, bundle]) => ({
|
|
1083
|
+
name: formatEntityFileName(entityId),
|
|
1084
|
+
content: renderEntity(entityId, bundle, dump.generatedAt)
|
|
1085
|
+
}));
|
|
1086
|
+
return {
|
|
1087
|
+
manifest: JSON.stringify(dump, null, 2),
|
|
1088
|
+
files
|
|
1089
|
+
};
|
|
1090
|
+
}
|
|
1091
|
+
|
|
644
1092
|
// src/index.ts
|
|
645
1093
|
function createWiki(db, options) {
|
|
646
1094
|
return new WikiMemory(db, options);
|
|
647
1095
|
}
|
|
648
1096
|
export {
|
|
1097
|
+
WikiBusyError,
|
|
649
1098
|
WikiMemory,
|
|
650
|
-
createWiki
|
|
1099
|
+
createWiki,
|
|
1100
|
+
formatMemoryDump
|
|
651
1101
|
};
|