@equationalapplications/expo-llm-wiki 0.0.0-development → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -1
- package/dist/{WikiMemory-BI2Aizwv.d.mts → WikiMemory-BDVn-TJf.d.mts} +46 -1
- package/dist/{WikiMemory-BI2Aizwv.d.ts → WikiMemory-BDVn-TJf.d.ts} +46 -1
- package/dist/index.d.mts +5 -3
- package/dist/index.d.ts +5 -3
- package/dist/index.js +521 -69
- package/dist/index.mjs +518 -68
- package/dist/react/index.d.mts +9 -2
- package/dist/react/index.d.ts +9 -2
- package/dist/react/index.js +30 -0
- package/dist/react/index.mjs +29 -0
- package/package.json +15 -5
- package/dist/WikiMemory-B-yFw9Dc.d.mts +0 -118
- package/dist/WikiMemory-B-yFw9Dc.d.ts +0 -118
- package/dist/WikiMemory-BWTt1Ynm.d.mts +0 -103
- package/dist/WikiMemory-BWTt1Ynm.d.ts +0 -103
package/dist/index.js
CHANGED
|
@@ -20,8 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
WikiBusyError: () => WikiBusyError,
|
|
23
24
|
WikiMemory: () => WikiMemory,
|
|
24
|
-
createWiki: () => createWiki
|
|
25
|
+
createWiki: () => createWiki,
|
|
26
|
+
formatMemoryDump: () => formatMemoryDump
|
|
25
27
|
});
|
|
26
28
|
module.exports = __toCommonJS(index_exports);
|
|
27
29
|
|
|
@@ -56,7 +58,8 @@ async function setupDatabase(db, prefix) {
|
|
|
56
58
|
body,
|
|
57
59
|
tags,
|
|
58
60
|
content='${prefix}entries',
|
|
59
|
-
content_rowid='rowid'
|
|
61
|
+
content_rowid='rowid',
|
|
62
|
+
tokenize='porter unicode61'
|
|
60
63
|
);
|
|
61
64
|
|
|
62
65
|
-- Triggers to keep FTS5 in sync with entries
|
|
@@ -110,11 +113,23 @@ async function setupDatabase(db, prefix) {
|
|
|
110
113
|
`);
|
|
111
114
|
}
|
|
112
115
|
|
|
116
|
+
// src/types.ts
|
|
117
|
+
var WikiBusyError = class extends Error {
|
|
118
|
+
operation;
|
|
119
|
+
entityId;
|
|
120
|
+
constructor(operation, entityId) {
|
|
121
|
+
super(`${operation} already running for entity ${entityId}`);
|
|
122
|
+
this.name = "WikiBusyError";
|
|
123
|
+
this.operation = operation;
|
|
124
|
+
this.entityId = entityId;
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
|
|
113
128
|
// src/prompts.ts
|
|
114
129
|
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
115
130
|
Return ONLY a valid JSON object matching this schema:
|
|
116
131
|
{
|
|
117
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max
|
|
132
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
|
|
118
133
|
"tasks": [{ "description": "string", "priority": "number (0-10)" }]
|
|
119
134
|
}
|
|
120
135
|
Keep facts concise. Do not return markdown, just raw JSON.`;
|
|
@@ -123,13 +138,13 @@ Return ONLY a valid JSON object matching this schema:
|
|
|
123
138
|
{
|
|
124
139
|
"downgraded": ["string (fact IDs)"],
|
|
125
140
|
"deleted": ["string (fact IDs)"],
|
|
126
|
-
"newFacts": [{ "title": "string", "body": "string", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
141
|
+
"newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
127
142
|
}
|
|
128
143
|
Do not return markdown, just raw JSON.`;
|
|
129
144
|
var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
|
|
130
145
|
Return ONLY a valid JSON object matching this schema:
|
|
131
146
|
{
|
|
132
|
-
"facts": [{ "title": "string (max 80 chars)", "body": "string (max
|
|
147
|
+
"facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
|
|
133
148
|
}
|
|
134
149
|
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
135
150
|
|
|
@@ -203,6 +218,84 @@ function safeSlice(value, start, end) {
|
|
|
203
218
|
}
|
|
204
219
|
return value.slice(safeStart, safeEnd);
|
|
205
220
|
}
|
|
221
|
+
function chunkText(input, maxChunkLength, overlap) {
|
|
222
|
+
const text = input.trim();
|
|
223
|
+
if (text.length === 0) return { chunks: [], truncated: false };
|
|
224
|
+
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
225
|
+
throw new Error("maxChunkLength must be an integer >= 2");
|
|
226
|
+
}
|
|
227
|
+
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
228
|
+
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
229
|
+
}
|
|
230
|
+
const chunks = [];
|
|
231
|
+
let truncated = false;
|
|
232
|
+
let cursor = 0;
|
|
233
|
+
const halfMax = Math.floor(maxChunkLength / 2);
|
|
234
|
+
while (cursor < text.length) {
|
|
235
|
+
const remaining = text.length - cursor;
|
|
236
|
+
if (remaining <= maxChunkLength) {
|
|
237
|
+
chunks.push(safeSlice(text, cursor, text.length));
|
|
238
|
+
break;
|
|
239
|
+
}
|
|
240
|
+
const windowEnd = cursor + maxChunkLength;
|
|
241
|
+
const minSplit = cursor + halfMax;
|
|
242
|
+
let splitPoint = -1;
|
|
243
|
+
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
244
|
+
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
245
|
+
splitPoint = paraIdx + 2;
|
|
246
|
+
}
|
|
247
|
+
if (splitPoint === -1) {
|
|
248
|
+
let lastTerm = -1;
|
|
249
|
+
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
250
|
+
const ch = text[i];
|
|
251
|
+
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
252
|
+
lastTerm = i + 2;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
256
|
+
}
|
|
257
|
+
if (splitPoint === -1) {
|
|
258
|
+
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
259
|
+
if (/\s/.test(text[i])) {
|
|
260
|
+
splitPoint = i + 1;
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
if (splitPoint === -1) {
|
|
266
|
+
truncated = true;
|
|
267
|
+
splitPoint = windowEnd;
|
|
268
|
+
}
|
|
269
|
+
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
270
|
+
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
271
|
+
cursor = next;
|
|
272
|
+
}
|
|
273
|
+
return { chunks, truncated };
|
|
274
|
+
}
|
|
275
|
+
async function withConcurrency(tasks, limit) {
|
|
276
|
+
const results = new Array(tasks.length);
|
|
277
|
+
let index = 0;
|
|
278
|
+
let failed = false;
|
|
279
|
+
let firstError;
|
|
280
|
+
async function worker() {
|
|
281
|
+
while (index < tasks.length && !failed) {
|
|
282
|
+
const i = index++;
|
|
283
|
+
try {
|
|
284
|
+
results[i] = await tasks[i]();
|
|
285
|
+
} catch (e) {
|
|
286
|
+
if (!failed) {
|
|
287
|
+
failed = true;
|
|
288
|
+
firstError = e;
|
|
289
|
+
}
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
295
|
+
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
296
|
+
if (failed) throw firstError;
|
|
297
|
+
return results;
|
|
298
|
+
}
|
|
206
299
|
function clip(value, max) {
|
|
207
300
|
if (typeof value !== "string") return "";
|
|
208
301
|
const s = value.trim();
|
|
@@ -215,7 +308,7 @@ function validateTags(tags) {
|
|
|
215
308
|
function validateFact(fact) {
|
|
216
309
|
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
217
310
|
const title = clip(fact.title, 80);
|
|
218
|
-
const body = clip(fact.body,
|
|
311
|
+
const body = clip(fact.body, 800);
|
|
219
312
|
if (!title || !body) return null;
|
|
220
313
|
let confidence = fact.confidence;
|
|
221
314
|
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
@@ -264,6 +357,16 @@ var WikiMemory = class {
|
|
|
264
357
|
prefix;
|
|
265
358
|
options;
|
|
266
359
|
activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
360
|
+
activeIngestJobs = /* @__PURE__ */ new Set();
|
|
361
|
+
_librarianKey(entityId) {
|
|
362
|
+
return `${this.prefix}:${entityId}:librarian`;
|
|
363
|
+
}
|
|
364
|
+
_healKey(entityId) {
|
|
365
|
+
return `${this.prefix}:${entityId}:heal`;
|
|
366
|
+
}
|
|
367
|
+
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
368
|
+
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
369
|
+
}
|
|
267
370
|
constructor(db, options) {
|
|
268
371
|
this.db = db;
|
|
269
372
|
this.options = options;
|
|
@@ -271,6 +374,45 @@ var WikiMemory = class {
|
|
|
271
374
|
}
|
|
272
375
|
async setup() {
|
|
273
376
|
await setupDatabase(this.db, this.prefix);
|
|
377
|
+
const ftsMeta = await this.db.getFirstAsync(
|
|
378
|
+
`SELECT sql FROM sqlite_master WHERE type='table' AND name=?`,
|
|
379
|
+
[`${this.prefix}entries_fts`]
|
|
380
|
+
);
|
|
381
|
+
const hasPorterTokenizer = /tokenize\s*=\s*['"]porter\s+unicode61['"]/i.test(ftsMeta?.sql ?? "");
|
|
382
|
+
if (ftsMeta?.sql && !hasPorterTokenizer) {
|
|
383
|
+
await this.db.withTransactionAsync(async () => {
|
|
384
|
+
await this.db.execAsync(`
|
|
385
|
+
DROP TRIGGER IF EXISTS ${this.prefix}entries_ai;
|
|
386
|
+
DROP TRIGGER IF EXISTS ${this.prefix}entries_ad;
|
|
387
|
+
DROP TRIGGER IF EXISTS ${this.prefix}entries_au;
|
|
388
|
+
DROP TABLE IF EXISTS ${this.prefix}entries_fts;
|
|
389
|
+
CREATE VIRTUAL TABLE ${this.prefix}entries_fts USING fts5(
|
|
390
|
+
title,
|
|
391
|
+
body,
|
|
392
|
+
tags,
|
|
393
|
+
content='${this.prefix}entries',
|
|
394
|
+
content_rowid='rowid',
|
|
395
|
+
tokenize='porter unicode61'
|
|
396
|
+
);
|
|
397
|
+
INSERT INTO ${this.prefix}entries_fts(rowid, title, body, tags)
|
|
398
|
+
SELECT rowid, title, body, tags FROM ${this.prefix}entries;
|
|
399
|
+
CREATE TRIGGER ${this.prefix}entries_ai AFTER INSERT ON ${this.prefix}entries BEGIN
|
|
400
|
+
INSERT INTO ${this.prefix}entries_fts(rowid, title, body, tags)
|
|
401
|
+
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
402
|
+
END;
|
|
403
|
+
CREATE TRIGGER ${this.prefix}entries_ad AFTER DELETE ON ${this.prefix}entries BEGIN
|
|
404
|
+
INSERT INTO ${this.prefix}entries_fts(${this.prefix}entries_fts, rowid, title, body, tags)
|
|
405
|
+
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
406
|
+
END;
|
|
407
|
+
CREATE TRIGGER ${this.prefix}entries_au AFTER UPDATE ON ${this.prefix}entries BEGIN
|
|
408
|
+
INSERT INTO ${this.prefix}entries_fts(${this.prefix}entries_fts, rowid, title, body, tags)
|
|
409
|
+
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
410
|
+
INSERT INTO ${this.prefix}entries_fts(rowid, title, body, tags)
|
|
411
|
+
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
412
|
+
END;
|
|
413
|
+
`);
|
|
414
|
+
});
|
|
415
|
+
}
|
|
274
416
|
const rows = await this.db.getAllAsync(`
|
|
275
417
|
SELECT rowid, source_ref FROM ${this.prefix}entries
|
|
276
418
|
WHERE source_ref IS NOT NULL
|
|
@@ -295,9 +437,35 @@ var WikiMemory = class {
|
|
|
295
437
|
});
|
|
296
438
|
}
|
|
297
439
|
formatSearchQuery(query) {
|
|
298
|
-
const
|
|
299
|
-
|
|
300
|
-
|
|
440
|
+
const normalizeTokens = (value) => value.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3);
|
|
441
|
+
const baseTokens = normalizeTokens(query);
|
|
442
|
+
if (baseTokens.length === 0) return "";
|
|
443
|
+
const synonymMap = this.options.config?.synonymMap;
|
|
444
|
+
const expanded = [];
|
|
445
|
+
const seen = /* @__PURE__ */ new Set();
|
|
446
|
+
const pushNormalized = (value) => {
|
|
447
|
+
for (const token of normalizeTokens(value)) {
|
|
448
|
+
if (expanded.length >= 12) return false;
|
|
449
|
+
if (seen.has(token)) continue;
|
|
450
|
+
seen.add(token);
|
|
451
|
+
expanded.push(token);
|
|
452
|
+
}
|
|
453
|
+
return true;
|
|
454
|
+
};
|
|
455
|
+
for (const t of baseTokens) {
|
|
456
|
+
if (!pushNormalized(t)) break;
|
|
457
|
+
if (synonymMap) {
|
|
458
|
+
const synonyms = synonymMap[t];
|
|
459
|
+
if (Array.isArray(synonyms)) {
|
|
460
|
+
for (const s of synonyms) {
|
|
461
|
+
if (typeof s === "string") {
|
|
462
|
+
if (!pushNormalized(s)) break;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
return expanded.map((t) => `"${t}"*`).join(" OR ");
|
|
301
469
|
}
|
|
302
470
|
async read(entityId, query) {
|
|
303
471
|
const ftsQuery = this.formatSearchQuery(query);
|
|
@@ -349,6 +517,9 @@ var WikiMemory = class {
|
|
|
349
517
|
}));
|
|
350
518
|
return { facts, tasks, events: events.reverse() };
|
|
351
519
|
}
|
|
520
|
+
async getMemoryBundle(entityId) {
|
|
521
|
+
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
522
|
+
}
|
|
352
523
|
async write(entityId, event) {
|
|
353
524
|
const id = generateId("evt_");
|
|
354
525
|
const now = Date.now();
|
|
@@ -369,7 +540,7 @@ var WikiMemory = class {
|
|
|
369
540
|
let memoryCheckpoint = cp?.memory_checkpoint || 0;
|
|
370
541
|
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
371
542
|
if (count - memoryCheckpoint >= threshold) {
|
|
372
|
-
const jobKey =
|
|
543
|
+
const jobKey = this._librarianKey(entityId);
|
|
373
544
|
if (!this.activeMaintenanceJobs.has(jobKey)) {
|
|
374
545
|
this.activeMaintenanceJobs.add(jobKey);
|
|
375
546
|
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
|
|
@@ -388,12 +559,20 @@ var WikiMemory = class {
|
|
|
388
559
|
let healCheckpoint = cp?.heal_checkpoint || 0;
|
|
389
560
|
if (healCheckpoint > currentEventCount) healCheckpoint = 0;
|
|
390
561
|
if (currentEventCount - healCheckpoint >= autoHealThreshold) {
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
562
|
+
const healKey = this._healKey(entityId);
|
|
563
|
+
if (!this.activeMaintenanceJobs.has(healKey)) {
|
|
564
|
+
this.activeMaintenanceJobs.add(healKey);
|
|
565
|
+
try {
|
|
566
|
+
await this._doRunHeal(entityId);
|
|
567
|
+
await this.db.runAsync(`
|
|
568
|
+
INSERT INTO ${this.prefix}checkpoints (entity_id, heal_checkpoint)
|
|
569
|
+
VALUES (?, ?)
|
|
570
|
+
ON CONFLICT(entity_id) DO UPDATE SET heal_checkpoint = ?
|
|
571
|
+
`, [entityId, currentEventCount, currentEventCount]);
|
|
572
|
+
} finally {
|
|
573
|
+
this.activeMaintenanceJobs.delete(healKey);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
397
576
|
}
|
|
398
577
|
}
|
|
399
578
|
async _doRunLibrarian(entityId) {
|
|
@@ -536,8 +715,10 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
536
715
|
});
|
|
537
716
|
}
|
|
538
717
|
async runLibrarian(entityId) {
|
|
539
|
-
const jobKey =
|
|
540
|
-
if (this.activeMaintenanceJobs.has(jobKey))
|
|
718
|
+
const jobKey = this._librarianKey(entityId);
|
|
719
|
+
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
720
|
+
throw new WikiBusyError("librarian", entityId);
|
|
721
|
+
}
|
|
541
722
|
this.activeMaintenanceJobs.add(jobKey);
|
|
542
723
|
try {
|
|
543
724
|
await this._doRunLibrarian(entityId);
|
|
@@ -546,8 +727,10 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
546
727
|
}
|
|
547
728
|
}
|
|
548
729
|
async runHeal(entityId) {
|
|
549
|
-
const jobKey =
|
|
550
|
-
if (this.activeMaintenanceJobs.has(jobKey))
|
|
730
|
+
const jobKey = this._healKey(entityId);
|
|
731
|
+
if (this.activeMaintenanceJobs.has(jobKey)) {
|
|
732
|
+
throw new WikiBusyError("heal", entityId);
|
|
733
|
+
}
|
|
551
734
|
this.activeMaintenanceJobs.add(jobKey);
|
|
552
735
|
try {
|
|
553
736
|
await this._doRunHeal(entityId);
|
|
@@ -555,6 +738,180 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
555
738
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
556
739
|
}
|
|
557
740
|
}
|
|
741
|
+
getEntityStatus(entityId) {
|
|
742
|
+
const ingestPrefix = `${this.prefix}:${entityId}:`;
|
|
743
|
+
let ingesting = false;
|
|
744
|
+
for (const k of this.activeIngestJobs) {
|
|
745
|
+
if (k.startsWith(ingestPrefix)) {
|
|
746
|
+
ingesting = true;
|
|
747
|
+
break;
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
return {
|
|
751
|
+
ingesting,
|
|
752
|
+
librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
|
|
753
|
+
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
754
|
+
};
|
|
755
|
+
}
|
|
756
|
+
async _getFullBundle(entityId, opts) {
|
|
757
|
+
const maxEvents = opts?.maxEvents;
|
|
758
|
+
const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
|
|
759
|
+
const eventsParams = maxEvents != null ? [entityId, maxEvents] : [entityId];
|
|
760
|
+
const [factsRaw, tasks, eventsRaw] = await Promise.all([
|
|
761
|
+
this.db.getAllAsync(
|
|
762
|
+
`SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
|
|
763
|
+
[entityId]
|
|
764
|
+
),
|
|
765
|
+
this.db.getAllAsync(
|
|
766
|
+
`SELECT * FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NULL ORDER BY priority DESC, created_at ASC`,
|
|
767
|
+
[entityId]
|
|
768
|
+
),
|
|
769
|
+
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
770
|
+
]);
|
|
771
|
+
const facts = factsRaw.map((f) => ({
|
|
772
|
+
...f,
|
|
773
|
+
tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
|
|
774
|
+
}));
|
|
775
|
+
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
776
|
+
return { facts, tasks, events };
|
|
777
|
+
}
|
|
778
|
+
async exportDump(entityIds) {
|
|
779
|
+
let ids;
|
|
780
|
+
if (entityIds && entityIds.length > 0) {
|
|
781
|
+
ids = Array.from(new Set(entityIds));
|
|
782
|
+
} else {
|
|
783
|
+
const rows = await this.db.getAllAsync(`
|
|
784
|
+
SELECT DISTINCT entity_id FROM (
|
|
785
|
+
SELECT entity_id FROM ${this.prefix}entries WHERE deleted_at IS NULL
|
|
786
|
+
UNION
|
|
787
|
+
SELECT entity_id FROM ${this.prefix}tasks WHERE deleted_at IS NULL
|
|
788
|
+
UNION
|
|
789
|
+
SELECT entity_id FROM ${this.prefix}events
|
|
790
|
+
) ORDER BY entity_id
|
|
791
|
+
`);
|
|
792
|
+
ids = rows.map((r) => r.entity_id);
|
|
793
|
+
}
|
|
794
|
+
const entities = {};
|
|
795
|
+
const BATCH = 3;
|
|
796
|
+
for (let i = 0; i < ids.length; i += BATCH) {
|
|
797
|
+
const batch = ids.slice(i, i + BATCH);
|
|
798
|
+
const batchResults = await Promise.all(
|
|
799
|
+
batch.map(async (id) => [id, await this._getFullBundle(id)])
|
|
800
|
+
);
|
|
801
|
+
for (const [id, bundle] of batchResults) {
|
|
802
|
+
entities[id] = bundle;
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
return { generatedAt: Date.now(), entities };
|
|
806
|
+
}
|
|
807
|
+
async importDump(dump, opts) {
|
|
808
|
+
const merge = opts?.merge ?? false;
|
|
809
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
810
|
+
await this.db.withTransactionAsync(async () => {
|
|
811
|
+
if (!merge) {
|
|
812
|
+
const now = Date.now();
|
|
813
|
+
await this.db.runAsync(
|
|
814
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
815
|
+
[now, now, entityId]
|
|
816
|
+
);
|
|
817
|
+
await this.db.runAsync(
|
|
818
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
819
|
+
[now, now, entityId]
|
|
820
|
+
);
|
|
821
|
+
await this.db.runAsync(
|
|
822
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
823
|
+
[entityId]
|
|
824
|
+
);
|
|
825
|
+
}
|
|
826
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
827
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
828
|
+
const factLookupChunkSize = 500;
|
|
829
|
+
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
830
|
+
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
831
|
+
if (factIdChunk.length === 0) continue;
|
|
832
|
+
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
833
|
+
const existingFacts = await this.db.getAllAsync(
|
|
834
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
835
|
+
factIdChunk
|
|
836
|
+
);
|
|
837
|
+
for (const existingFact of existingFacts) {
|
|
838
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
for (const fact of bundle.facts) {
|
|
842
|
+
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
843
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
844
|
+
const existing = existingFactsById.get(fact.id);
|
|
845
|
+
if (existing) {
|
|
846
|
+
if (existing.entity_id !== entityId) {
|
|
847
|
+
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
848
|
+
continue;
|
|
849
|
+
}
|
|
850
|
+
if (merge) {
|
|
851
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
852
|
+
}
|
|
853
|
+
await this.db.runAsync(
|
|
854
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ? WHERE id = ?`,
|
|
855
|
+
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
856
|
+
);
|
|
857
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
858
|
+
} else {
|
|
859
|
+
await this.db.runAsync(
|
|
860
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
861
|
+
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
862
|
+
);
|
|
863
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
867
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
868
|
+
const taskLookupChunkSize = 500;
|
|
869
|
+
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
870
|
+
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
871
|
+
if (taskIdChunk.length === 0) continue;
|
|
872
|
+
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
873
|
+
const existingTasks = await this.db.getAllAsync(
|
|
874
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
875
|
+
taskIdChunk
|
|
876
|
+
);
|
|
877
|
+
for (const existingTask of existingTasks) {
|
|
878
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
for (const task of bundle.tasks) {
|
|
882
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
883
|
+
const existing = existingTasksById.get(task.id);
|
|
884
|
+
if (existing) {
|
|
885
|
+
if (existing.entity_id !== entityId) {
|
|
886
|
+
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
887
|
+
continue;
|
|
888
|
+
}
|
|
889
|
+
if (merge) {
|
|
890
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
891
|
+
}
|
|
892
|
+
await this.db.runAsync(
|
|
893
|
+
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
894
|
+
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
895
|
+
);
|
|
896
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
897
|
+
} else {
|
|
898
|
+
await this.db.runAsync(
|
|
899
|
+
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
900
|
+
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
901
|
+
);
|
|
902
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
for (const event of bundle.events) {
|
|
906
|
+
await this.db.runAsync(
|
|
907
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
908
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
909
|
+
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
910
|
+
);
|
|
911
|
+
}
|
|
912
|
+
});
|
|
913
|
+
}
|
|
914
|
+
}
|
|
558
915
|
async forget(entityId, params) {
|
|
559
916
|
const now = Date.now();
|
|
560
917
|
let deletedEntries = 0;
|
|
@@ -609,71 +966,166 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
609
966
|
if (!sourceRef) throw new Error("Invalid sourceRef");
|
|
610
967
|
const sourceHash = normalizeSourceHash(params.sourceHash);
|
|
611
968
|
if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
612
|
-
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ??
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
969
|
+
const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
|
|
970
|
+
const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
|
|
971
|
+
const chunkOverlap = Math.min(
|
|
972
|
+
Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
|
|
973
|
+
maxChunkLength - 1
|
|
974
|
+
);
|
|
975
|
+
const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
|
|
976
|
+
const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
|
|
616
977
|
if (typeof params.documentChunk !== "string") {
|
|
617
978
|
throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
|
|
618
979
|
}
|
|
619
|
-
const
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
if (text.length === 0) {
|
|
623
|
-
return { truncated: false, chunks: 0 };
|
|
980
|
+
const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
|
|
981
|
+
if (this.activeIngestJobs.has(jobKey)) {
|
|
982
|
+
throw new WikiBusyError("ingest", entityId);
|
|
624
983
|
}
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
const searchArea = text.slice(0, maxChunkLength + 1);
|
|
631
|
-
const match = searchArea.match(/[.!?]\s+(?![\s\S]*[.!?]\s+)/);
|
|
632
|
-
if (match && match.index !== void 0) {
|
|
633
|
-
const splitPoint = Math.min(match.index + match[0].length, maxChunkLength);
|
|
634
|
-
const chunk = safeSlice(text, 0, splitPoint);
|
|
635
|
-
chunks.push(chunk);
|
|
636
|
-
text = text.slice(chunk.length);
|
|
637
|
-
} else {
|
|
638
|
-
truncated = true;
|
|
639
|
-
const chunk = safeSlice(text, 0, maxChunkLength);
|
|
640
|
-
chunks.push(chunk);
|
|
641
|
-
text = text.slice(chunk.length);
|
|
984
|
+
this.activeIngestJobs.add(jobKey);
|
|
985
|
+
try {
|
|
986
|
+
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
987
|
+
if (chunks.length === 0) {
|
|
988
|
+
return { truncated: false, chunks: 0 };
|
|
642
989
|
}
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
const userPrompt = `Document Chunk:
|
|
990
|
+
const chunkResults = await withConcurrency(
|
|
991
|
+
chunks.map((chunk) => async () => {
|
|
992
|
+
const userPrompt = `Document Chunk:
|
|
647
993
|
${chunk}`;
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
994
|
+
const responseText = await this.options.llmProvider.generateText({
|
|
995
|
+
systemPrompt: INGEST_SYSTEM_PROMPT,
|
|
996
|
+
userPrompt
|
|
997
|
+
});
|
|
998
|
+
const result = parseJsonResponse(responseText);
|
|
999
|
+
return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
|
|
1000
|
+
}),
|
|
1001
|
+
chunkConcurrency
|
|
1002
|
+
);
|
|
1003
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1004
|
+
const allValidFacts = [];
|
|
1005
|
+
for (const facts of chunkResults) {
|
|
1006
|
+
for (const fact of facts) {
|
|
1007
|
+
const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
|
|
1008
|
+
if (!seen.has(normalized)) {
|
|
1009
|
+
seen.add(normalized);
|
|
1010
|
+
allValidFacts.push(fact);
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
const now = Date.now();
|
|
1015
|
+
await this.db.withTransactionAsync(async () => {
|
|
1016
|
+
await this.db.runAsync(
|
|
1017
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
1018
|
+
[now, now, sourceRef, entityId]
|
|
1019
|
+
);
|
|
1020
|
+
for (const fact of allValidFacts) {
|
|
1021
|
+
const id = generateId("fact_");
|
|
1022
|
+
await this.db.runAsync(
|
|
1023
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at)
|
|
1024
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1025
|
+
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
1026
|
+
);
|
|
1027
|
+
}
|
|
651
1028
|
});
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
1029
|
+
return { truncated, chunks: chunks.length };
|
|
1030
|
+
} finally {
|
|
1031
|
+
this.activeIngestJobs.delete(jobKey);
|
|
655
1032
|
}
|
|
656
|
-
const now = Date.now();
|
|
657
|
-
await this.db.withTransactionAsync(async () => {
|
|
658
|
-
await this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, sourceRef, entityId]);
|
|
659
|
-
for (const fact of allValidFacts) {
|
|
660
|
-
const id = generateId("fact_");
|
|
661
|
-
await this.db.runAsync(`
|
|
662
|
-
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at)
|
|
663
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
664
|
-
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]);
|
|
665
|
-
}
|
|
666
|
-
});
|
|
667
|
-
return { truncated, chunks: chunks.length };
|
|
668
1033
|
}
|
|
669
1034
|
};
|
|
670
1035
|
|
|
1036
|
+
// src/utils/formatMemoryDump.ts
|
|
1037
|
+
function renderFact(f) {
|
|
1038
|
+
const tags = (f.tags || []).join(", ");
|
|
1039
|
+
const source = f.source_ref ?? f.source_type;
|
|
1040
|
+
return `### ${f.title}
|
|
1041
|
+
**Tags:** ${tags}
|
|
1042
|
+
**Confidence:** ${f.confidence}
|
|
1043
|
+
**Source:** ${source}
|
|
1044
|
+
|
|
1045
|
+
${f.body}
|
|
1046
|
+
|
|
1047
|
+
---
|
|
1048
|
+
`;
|
|
1049
|
+
}
|
|
1050
|
+
function renderTask(t) {
|
|
1051
|
+
const checked = t.status === "done" ? "x" : " ";
|
|
1052
|
+
const note = t.status === "done" ? " (done)" : t.status === "abandoned" ? " (abandoned)" : t.status === "in_progress" ? " (in progress)" : "";
|
|
1053
|
+
return `- [${checked}] ${t.description}${note}
|
|
1054
|
+
`;
|
|
1055
|
+
}
|
|
1056
|
+
function renderEvent(e) {
|
|
1057
|
+
const ts = new Date(e.created_at).toISOString();
|
|
1058
|
+
return `- [${ts}] (${e.event_type}) ${e.summary}
|
|
1059
|
+
`;
|
|
1060
|
+
}
|
|
1061
|
+
function renderEntity(entityId, bundle, generatedAt) {
|
|
1062
|
+
const lines = [];
|
|
1063
|
+
lines.push(`# Memory Dump: ${entityId}`);
|
|
1064
|
+
lines.push(`Generated: ${new Date(generatedAt).toISOString()}`);
|
|
1065
|
+
lines.push("");
|
|
1066
|
+
lines.push("## Facts");
|
|
1067
|
+
lines.push("");
|
|
1068
|
+
if (bundle.facts.length === 0) {
|
|
1069
|
+
lines.push("_(none)_\n");
|
|
1070
|
+
} else {
|
|
1071
|
+
for (const f of bundle.facts) lines.push(renderFact(f));
|
|
1072
|
+
}
|
|
1073
|
+
lines.push("## Tasks");
|
|
1074
|
+
lines.push("");
|
|
1075
|
+
if (bundle.tasks.length === 0) {
|
|
1076
|
+
lines.push("_(none)_\n");
|
|
1077
|
+
} else {
|
|
1078
|
+
for (const t of bundle.tasks) lines.push(renderTask(t));
|
|
1079
|
+
}
|
|
1080
|
+
lines.push("");
|
|
1081
|
+
lines.push("## Recent Events");
|
|
1082
|
+
lines.push("");
|
|
1083
|
+
if (bundle.events.length === 0) {
|
|
1084
|
+
lines.push("_(none)_\n");
|
|
1085
|
+
} else {
|
|
1086
|
+
for (const e of bundle.events) lines.push(renderEvent(e));
|
|
1087
|
+
}
|
|
1088
|
+
return lines.join("\n");
|
|
1089
|
+
}
|
|
1090
|
+
function shortHash(value) {
|
|
1091
|
+
let h1 = 5381;
|
|
1092
|
+
let h2 = 52711;
|
|
1093
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
1094
|
+
const c = value.charCodeAt(i);
|
|
1095
|
+
h1 = Math.imul(h1, 33) ^ c;
|
|
1096
|
+
h2 = Math.imul(h2, 31) ^ c;
|
|
1097
|
+
}
|
|
1098
|
+
return (h1 >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0");
|
|
1099
|
+
}
|
|
1100
|
+
function formatEntityFileName(entityId) {
|
|
1101
|
+
const normalized = entityId.normalize("NFKC");
|
|
1102
|
+
const sanitized = normalized.replace(/[^A-Za-z0-9._-]+/g, "_").replace(/^\.+/, "_").replace(/_+/g, "_").replace(/^[_-]+|[_-]+$/g, "");
|
|
1103
|
+
const MAX_BASE = 200;
|
|
1104
|
+
const trimmed = sanitized.length > MAX_BASE ? sanitized.slice(0, MAX_BASE) : sanitized;
|
|
1105
|
+
const baseName = trimmed && trimmed !== "." && trimmed !== ".." ? trimmed : "entity";
|
|
1106
|
+
const needsSuffix = baseName !== entityId || sanitized.length > MAX_BASE;
|
|
1107
|
+
const uniqueBaseName = needsSuffix ? `${baseName}-${shortHash(entityId)}` : baseName;
|
|
1108
|
+
return `${uniqueBaseName}.md`;
|
|
1109
|
+
}
|
|
1110
|
+
function formatMemoryDump(dump) {
|
|
1111
|
+
const files = Object.entries(dump.entities).map(([entityId, bundle]) => ({
|
|
1112
|
+
name: formatEntityFileName(entityId),
|
|
1113
|
+
content: renderEntity(entityId, bundle, dump.generatedAt)
|
|
1114
|
+
}));
|
|
1115
|
+
return {
|
|
1116
|
+
manifest: JSON.stringify(dump, null, 2),
|
|
1117
|
+
files
|
|
1118
|
+
};
|
|
1119
|
+
}
|
|
1120
|
+
|
|
671
1121
|
// src/index.ts
|
|
672
1122
|
function createWiki(db, options) {
|
|
673
1123
|
return new WikiMemory(db, options);
|
|
674
1124
|
}
|
|
675
1125
|
// Annotate the CommonJS export names for ESM import in node:
|
|
676
1126
|
0 && (module.exports = {
|
|
1127
|
+
WikiBusyError,
|
|
677
1128
|
WikiMemory,
|
|
678
|
-
createWiki
|
|
1129
|
+
createWiki,
|
|
1130
|
+
formatMemoryDump
|
|
679
1131
|
});
|