@equationalapplications/core-llm-wiki 4.14.0 → 4.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/dist/{chunk-6FWG2DG4.mjs → chunk-J4GBC6CP.mjs} +103 -31
- package/dist/chunk-J4GBC6CP.mjs.map +1 -0
- package/dist/index.d.mts +9 -3
- package/dist/index.d.ts +9 -3
- package/dist/index.js +441 -45
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +343 -20
- package/dist/index.mjs.map +1 -1
- package/dist/{testing-CDIDE4Jd.d.mts → testing-NH1_Aigh.d.mts} +47 -3
- package/dist/{testing-CDIDE4Jd.d.ts → testing-NH1_Aigh.d.ts} +47 -3
- package/dist/testing.d.mts +1 -1
- package/dist/testing.d.ts +1 -1
- package/dist/testing.js +316 -244
- package/dist/testing.js.map +1 -1
- package/dist/testing.mjs +1 -1
- package/package.json +2 -2
- package/dist/chunk-6FWG2DG4.mjs.map +0 -1
package/dist/testing.js
CHANGED
|
@@ -29,6 +29,221 @@ var PrunePartialFailureError = class extends Error {
|
|
|
29
29
|
};
|
|
30
30
|
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
31
31
|
|
|
32
|
+
// src/utils/pure.ts
|
|
33
|
+
function parseJsonResponse(text) {
|
|
34
|
+
const firstBrace = text.indexOf("{");
|
|
35
|
+
const firstBracket = text.indexOf("[");
|
|
36
|
+
let start;
|
|
37
|
+
let openChar;
|
|
38
|
+
let closeChar;
|
|
39
|
+
const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
|
|
40
|
+
if (useBrace) {
|
|
41
|
+
start = firstBrace;
|
|
42
|
+
openChar = "{";
|
|
43
|
+
closeChar = "}";
|
|
44
|
+
} else if (firstBracket !== -1) {
|
|
45
|
+
start = firstBracket;
|
|
46
|
+
openChar = "[";
|
|
47
|
+
closeChar = "]";
|
|
48
|
+
} else {
|
|
49
|
+
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
50
|
+
}
|
|
51
|
+
let depth = 0;
|
|
52
|
+
let inString = false;
|
|
53
|
+
let escape = false;
|
|
54
|
+
let end = -1;
|
|
55
|
+
for (let i = start; i < text.length; i++) {
|
|
56
|
+
const ch = text[i];
|
|
57
|
+
if (escape) {
|
|
58
|
+
escape = false;
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
if (ch === "\\" && inString) {
|
|
62
|
+
escape = true;
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (ch === '"') {
|
|
66
|
+
inString = !inString;
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
if (inString) continue;
|
|
70
|
+
if (ch === openChar) {
|
|
71
|
+
depth++;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
if (ch === closeChar) {
|
|
75
|
+
depth--;
|
|
76
|
+
if (depth === 0) {
|
|
77
|
+
end = i;
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
83
|
+
return JSON.parse(text.slice(start, end + 1));
|
|
84
|
+
}
|
|
85
|
+
function sanitizeRankerError(err, sanitizeRankerErrors) {
|
|
86
|
+
if (sanitizeRankerErrors === false) {
|
|
87
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
88
|
+
}
|
|
89
|
+
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
90
|
+
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
91
|
+
const sanitized = new Error(
|
|
92
|
+
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
93
|
+
innerCause ? { cause: innerCause } : void 0
|
|
94
|
+
);
|
|
95
|
+
sanitized.name = typeName;
|
|
96
|
+
return sanitized;
|
|
97
|
+
}
|
|
98
|
+
function safeSlice(value, start, end) {
|
|
99
|
+
const length = value.length;
|
|
100
|
+
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
101
|
+
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
102
|
+
if (safeStart > safeEnd) {
|
|
103
|
+
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
104
|
+
}
|
|
105
|
+
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
106
|
+
safeStart--;
|
|
107
|
+
}
|
|
108
|
+
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
109
|
+
safeEnd--;
|
|
110
|
+
}
|
|
111
|
+
return value.slice(safeStart, safeEnd);
|
|
112
|
+
}
|
|
113
|
+
function chunkText(input, maxChunkLength, overlap) {
|
|
114
|
+
const text = input.trim();
|
|
115
|
+
if (text.length === 0) return { chunks: [], truncated: false };
|
|
116
|
+
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
117
|
+
throw new Error("maxChunkLength must be an integer >= 2");
|
|
118
|
+
}
|
|
119
|
+
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
120
|
+
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
121
|
+
}
|
|
122
|
+
const chunks = [];
|
|
123
|
+
let truncated = false;
|
|
124
|
+
let cursor = 0;
|
|
125
|
+
const halfMax = Math.floor(maxChunkLength / 2);
|
|
126
|
+
while (cursor < text.length) {
|
|
127
|
+
const remaining = text.length - cursor;
|
|
128
|
+
if (remaining <= maxChunkLength) {
|
|
129
|
+
chunks.push(safeSlice(text, cursor, text.length));
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
const windowEnd = cursor + maxChunkLength;
|
|
133
|
+
const minSplit = cursor + halfMax;
|
|
134
|
+
let splitPoint = -1;
|
|
135
|
+
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
136
|
+
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
137
|
+
splitPoint = paraIdx + 2;
|
|
138
|
+
}
|
|
139
|
+
if (splitPoint === -1) {
|
|
140
|
+
let lastTerm = -1;
|
|
141
|
+
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
142
|
+
const ch = text[i];
|
|
143
|
+
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
144
|
+
lastTerm = i + 2;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
148
|
+
}
|
|
149
|
+
if (splitPoint === -1) {
|
|
150
|
+
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
151
|
+
if (/\s/.test(text[i])) {
|
|
152
|
+
splitPoint = i + 1;
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (splitPoint === -1) {
|
|
158
|
+
truncated = true;
|
|
159
|
+
splitPoint = windowEnd;
|
|
160
|
+
}
|
|
161
|
+
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
162
|
+
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
163
|
+
cursor = next;
|
|
164
|
+
}
|
|
165
|
+
return { chunks, truncated };
|
|
166
|
+
}
|
|
167
|
+
async function withConcurrency(tasks, limit) {
|
|
168
|
+
const results = new Array(tasks.length);
|
|
169
|
+
let index = 0;
|
|
170
|
+
let failed = false;
|
|
171
|
+
let firstError;
|
|
172
|
+
async function worker() {
|
|
173
|
+
while (index < tasks.length && !failed) {
|
|
174
|
+
const i = index++;
|
|
175
|
+
try {
|
|
176
|
+
results[i] = await tasks[i]();
|
|
177
|
+
} catch (e) {
|
|
178
|
+
if (!failed) {
|
|
179
|
+
failed = true;
|
|
180
|
+
firstError = e;
|
|
181
|
+
}
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
187
|
+
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
188
|
+
if (failed) throw firstError;
|
|
189
|
+
return results;
|
|
190
|
+
}
|
|
191
|
+
function clip(value, max) {
|
|
192
|
+
if (typeof value !== "string") return "";
|
|
193
|
+
const s = value.trim();
|
|
194
|
+
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
195
|
+
}
|
|
196
|
+
function validateTags(tags) {
|
|
197
|
+
if (!Array.isArray(tags)) return [];
|
|
198
|
+
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
199
|
+
}
|
|
200
|
+
function validateFact(fact) {
|
|
201
|
+
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
202
|
+
const title = clip(fact.title, 80);
|
|
203
|
+
const body = clip(fact.body, 800);
|
|
204
|
+
if (!title || !body) return null;
|
|
205
|
+
let confidence = fact.confidence;
|
|
206
|
+
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
207
|
+
return {
|
|
208
|
+
...fact,
|
|
209
|
+
title,
|
|
210
|
+
body,
|
|
211
|
+
confidence,
|
|
212
|
+
tags: validateTags(fact.tags)
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
function validateTask(task) {
|
|
216
|
+
if (typeof task?.description !== "string") return null;
|
|
217
|
+
const description = clip(task.description, 200);
|
|
218
|
+
if (!description) return null;
|
|
219
|
+
let priority = task.priority;
|
|
220
|
+
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
221
|
+
priority = Math.max(0, Math.min(10, Math.round(priority)));
|
|
222
|
+
return {
|
|
223
|
+
...task,
|
|
224
|
+
description,
|
|
225
|
+
priority
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
function normalizeSourceRef(value) {
|
|
229
|
+
if (typeof value !== "string") return null;
|
|
230
|
+
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
231
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
232
|
+
}
|
|
233
|
+
function normalizeSourceHash(value) {
|
|
234
|
+
if (typeof value !== "string") return null;
|
|
235
|
+
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
236
|
+
}
|
|
237
|
+
function titleTokens(title) {
|
|
238
|
+
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
239
|
+
}
|
|
240
|
+
function jaccardScore(a, b) {
|
|
241
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
242
|
+
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
243
|
+
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
244
|
+
return intersection.size / union.size;
|
|
245
|
+
}
|
|
246
|
+
|
|
32
247
|
// src/services/EmbeddingService.ts
|
|
33
248
|
var EmbeddingService = class {
|
|
34
249
|
constructor(db, options, entryRepo, metadataRepo) {
|
|
@@ -76,7 +291,7 @@ var EmbeddingService = class {
|
|
|
76
291
|
tagsStr = fact.tags;
|
|
77
292
|
}
|
|
78
293
|
}
|
|
79
|
-
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
294
|
+
const text = clip(`${fact.title} ${fact.body} ${tagsStr}`.trim(), 16e3);
|
|
80
295
|
try {
|
|
81
296
|
const vector = await embedFn(text);
|
|
82
297
|
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -155,12 +370,16 @@ var EmbeddingService = class {
|
|
|
155
370
|
};
|
|
156
371
|
|
|
157
372
|
// src/services/ImportExportService.ts
|
|
373
|
+
var MAX_EMBEDDING_BLOB_BYTES = 32 * 1024;
|
|
374
|
+
var IMPORT_TITLE_MAX = 500;
|
|
375
|
+
var IMPORT_BODY_MAX = 8e3;
|
|
158
376
|
var ImportExportService = class {
|
|
159
|
-
constructor(db, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService) {
|
|
377
|
+
constructor(db, entryRepo, taskRepo, eventRepo, edgeRepo, metadataRepo, searchService, jobManager, embeddingService) {
|
|
160
378
|
this.db = db;
|
|
161
379
|
this.entryRepo = entryRepo;
|
|
162
380
|
this.taskRepo = taskRepo;
|
|
163
381
|
this.eventRepo = eventRepo;
|
|
382
|
+
this.edgeRepo = edgeRepo;
|
|
164
383
|
this.metadataRepo = metadataRepo;
|
|
165
384
|
this.searchService = searchService;
|
|
166
385
|
this.jobManager = jobManager;
|
|
@@ -205,10 +424,11 @@ var ImportExportService = class {
|
|
|
205
424
|
}
|
|
206
425
|
}
|
|
207
426
|
async getFullBundle(entityId, opts) {
|
|
208
|
-
const [factsRaw, tasks, events] = await Promise.all([
|
|
427
|
+
const [factsRaw, tasks, events, edges] = await Promise.all([
|
|
209
428
|
opts?.includeBlobs ? this.entryRepo.findAllByEntityIdWithBlobs(entityId) : this.entryRepo.findAllByEntityId(entityId),
|
|
210
429
|
this.taskRepo.findAllByEntityId(entityId),
|
|
211
|
-
this.eventRepo.getByEntityId(entityId, opts?.maxEvents)
|
|
430
|
+
this.eventRepo.getByEntityId(entityId, opts?.maxEvents),
|
|
431
|
+
this.edgeRepo.getByEntityId(entityId)
|
|
212
432
|
]);
|
|
213
433
|
const facts = factsRaw.map((f) => {
|
|
214
434
|
const {
|
|
@@ -227,7 +447,7 @@ var ImportExportService = class {
|
|
|
227
447
|
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
228
448
|
};
|
|
229
449
|
});
|
|
230
|
-
return { facts, tasks, events };
|
|
450
|
+
return { facts, tasks, events, edges };
|
|
231
451
|
}
|
|
232
452
|
/** Single-entity import transaction + post-processing; package-internal hook for tests. */
|
|
233
453
|
async doImportEntity(entityId, bundle, merge) {
|
|
@@ -236,6 +456,7 @@ var ImportExportService = class {
|
|
|
236
456
|
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
237
457
|
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
238
458
|
const softDeletedFactIds = [];
|
|
459
|
+
const clippedTextByFactId = /* @__PURE__ */ new Map();
|
|
239
460
|
await this.db.withTransactionAsync(async (tx) => {
|
|
240
461
|
if (!merge) {
|
|
241
462
|
const deletedLiveFactIds = await this.entryRepo.findIdsBySource(
|
|
@@ -248,6 +469,7 @@ var ImportExportService = class {
|
|
|
248
469
|
softDeletedFactIds.push(...deletedLiveFactIds);
|
|
249
470
|
await this.entryRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
250
471
|
await this.taskRepo.bulkSoftDeleteByEntityId(entityId, tx);
|
|
472
|
+
await this.edgeRepo.bulkDeleteByEntityId(entityId, tx);
|
|
251
473
|
await this.metadataRepo.deleteCheckpoint(entityId, tx);
|
|
252
474
|
}
|
|
253
475
|
const factIds = bundle.facts.map((fact) => fact.id);
|
|
@@ -272,21 +494,32 @@ var ImportExportService = class {
|
|
|
272
494
|
const rawBlobRaw = fact.embedding_blob;
|
|
273
495
|
let rawBlob = null;
|
|
274
496
|
if (rawBlobRaw instanceof Uint8Array) {
|
|
275
|
-
|
|
497
|
+
if (rawBlobRaw.byteLength <= MAX_EMBEDDING_BLOB_BYTES) {
|
|
498
|
+
rawBlob = rawBlobRaw;
|
|
499
|
+
}
|
|
276
500
|
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
277
501
|
const obj = rawBlobRaw;
|
|
278
502
|
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
279
|
-
|
|
503
|
+
const data = obj["data"];
|
|
504
|
+
if (data.length <= MAX_EMBEDDING_BLOB_BYTES) {
|
|
505
|
+
rawBlob = new Uint8Array(data);
|
|
506
|
+
}
|
|
280
507
|
} else if (!Array.isArray(rawBlobRaw)) {
|
|
281
508
|
const entries = Object.keys(obj);
|
|
282
509
|
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
283
510
|
const len = entries.length;
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
511
|
+
if (len <= MAX_EMBEDDING_BLOB_BYTES) {
|
|
512
|
+
rawBlob = new Uint8Array(len);
|
|
513
|
+
for (let i = 0; i < len; i++) {
|
|
514
|
+
rawBlob[i] = obj[String(i)] ?? 0;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
287
517
|
}
|
|
288
518
|
}
|
|
289
519
|
}
|
|
520
|
+
if (rawBlob !== null && rawBlob.byteLength > MAX_EMBEDDING_BLOB_BYTES) {
|
|
521
|
+
rawBlob = null;
|
|
522
|
+
}
|
|
290
523
|
let blobData = null;
|
|
291
524
|
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
292
525
|
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
@@ -316,11 +549,14 @@ var ImportExportService = class {
|
|
|
316
549
|
}
|
|
317
550
|
if (merge && safeUpdatedAt <= existing.updated_at) continue;
|
|
318
551
|
}
|
|
552
|
+
const safeTitle = clip(String(fact.title ?? ""), IMPORT_TITLE_MAX);
|
|
553
|
+
const safeBody = clip(String(fact.body ?? ""), IMPORT_BODY_MAX);
|
|
554
|
+
clippedTextByFactId.set(fact.id, { title: safeTitle, body: safeBody });
|
|
319
555
|
const factObj = {
|
|
320
556
|
id: fact.id,
|
|
321
557
|
entity_id: entityId,
|
|
322
|
-
title:
|
|
323
|
-
body:
|
|
558
|
+
title: safeTitle,
|
|
559
|
+
body: safeBody,
|
|
324
560
|
tags: Array.isArray(fact.tags) ? fact.tags : [],
|
|
325
561
|
confidence: fact.confidence,
|
|
326
562
|
source_type: sourceType,
|
|
@@ -331,7 +567,8 @@ var ImportExportService = class {
|
|
|
331
567
|
last_accessed_at: fact.last_accessed_at,
|
|
332
568
|
access_count: fact.access_count,
|
|
333
569
|
deleted_at: fact.deleted_at,
|
|
334
|
-
embedding_blob: blobData ?? void 0
|
|
570
|
+
embedding_blob: blobData ?? void 0,
|
|
571
|
+
okf_type: fact.okf_type ?? null
|
|
335
572
|
};
|
|
336
573
|
await this.entryRepo.upsertForImport(factObj, tx);
|
|
337
574
|
if (blobData != null) {
|
|
@@ -380,7 +617,8 @@ var ImportExportService = class {
|
|
|
380
617
|
created_at: task.created_at,
|
|
381
618
|
updated_at: safeUpdatedAt,
|
|
382
619
|
resolved_at: task.resolved_at,
|
|
383
|
-
deleted_at: task.deleted_at
|
|
620
|
+
deleted_at: task.deleted_at,
|
|
621
|
+
okf_type: task.okf_type ?? null
|
|
384
622
|
},
|
|
385
623
|
tx,
|
|
386
624
|
safeUpdatedAt
|
|
@@ -404,15 +642,29 @@ var ImportExportService = class {
|
|
|
404
642
|
tx
|
|
405
643
|
);
|
|
406
644
|
}
|
|
645
|
+
for (const edge of bundle.edges ?? []) {
|
|
646
|
+
await this.edgeRepo.addIgnoreDuplicate(
|
|
647
|
+
{
|
|
648
|
+
id: edge.id,
|
|
649
|
+
entity_id: entityId,
|
|
650
|
+
source_id: edge.source_id,
|
|
651
|
+
target_id: edge.target_id,
|
|
652
|
+
edge_type: edge.edge_type,
|
|
653
|
+
created_at: edge.created_at
|
|
654
|
+
},
|
|
655
|
+
tx
|
|
656
|
+
);
|
|
657
|
+
}
|
|
407
658
|
});
|
|
408
659
|
await this.searchService.sync(entityId);
|
|
409
660
|
for (const fact of bundle.facts) {
|
|
410
661
|
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
662
|
+
const clipped = clippedTextByFactId.get(fact.id);
|
|
411
663
|
const embedded = await this.embeddingService.embedFact({
|
|
412
664
|
id: fact.id,
|
|
413
665
|
entity_id: entityId,
|
|
414
|
-
title: fact.title,
|
|
415
|
-
body: fact.body,
|
|
666
|
+
title: clipped?.title ?? fact.title,
|
|
667
|
+
body: clipped?.body ?? fact.body,
|
|
416
668
|
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
417
669
|
});
|
|
418
670
|
if (!embedded) {
|
|
@@ -512,7 +764,7 @@ var ImportExportService = class {
|
|
|
512
764
|
}
|
|
513
765
|
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
514
766
|
console.warn(
|
|
515
|
-
`[WikiMemory] importDump: ${type} id
|
|
767
|
+
`[WikiMemory] importDump: ${type} id ${JSON.stringify(id)} already belongs to entity ${JSON.stringify(existingEntityId)}; skipping for entity ${JSON.stringify(targetEntityId)}`
|
|
516
768
|
);
|
|
517
769
|
}
|
|
518
770
|
_normalizeImportedSourceType(raw, ctx) {
|
|
@@ -544,221 +796,6 @@ After running the migration SQL, restart your application.`
|
|
|
544
796
|
}
|
|
545
797
|
};
|
|
546
798
|
|
|
547
|
-
// src/utils/pure.ts
|
|
548
|
-
function parseJsonResponse(text) {
|
|
549
|
-
const firstBrace = text.indexOf("{");
|
|
550
|
-
const firstBracket = text.indexOf("[");
|
|
551
|
-
let start;
|
|
552
|
-
let openChar;
|
|
553
|
-
let closeChar;
|
|
554
|
-
const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
|
|
555
|
-
if (useBrace) {
|
|
556
|
-
start = firstBrace;
|
|
557
|
-
openChar = "{";
|
|
558
|
-
closeChar = "}";
|
|
559
|
-
} else if (firstBracket !== -1) {
|
|
560
|
-
start = firstBracket;
|
|
561
|
-
openChar = "[";
|
|
562
|
-
closeChar = "]";
|
|
563
|
-
} else {
|
|
564
|
-
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
565
|
-
}
|
|
566
|
-
let depth = 0;
|
|
567
|
-
let inString = false;
|
|
568
|
-
let escape = false;
|
|
569
|
-
let end = -1;
|
|
570
|
-
for (let i = start; i < text.length; i++) {
|
|
571
|
-
const ch = text[i];
|
|
572
|
-
if (escape) {
|
|
573
|
-
escape = false;
|
|
574
|
-
continue;
|
|
575
|
-
}
|
|
576
|
-
if (ch === "\\" && inString) {
|
|
577
|
-
escape = true;
|
|
578
|
-
continue;
|
|
579
|
-
}
|
|
580
|
-
if (ch === '"') {
|
|
581
|
-
inString = !inString;
|
|
582
|
-
continue;
|
|
583
|
-
}
|
|
584
|
-
if (inString) continue;
|
|
585
|
-
if (ch === openChar) {
|
|
586
|
-
depth++;
|
|
587
|
-
continue;
|
|
588
|
-
}
|
|
589
|
-
if (ch === closeChar) {
|
|
590
|
-
depth--;
|
|
591
|
-
if (depth === 0) {
|
|
592
|
-
end = i;
|
|
593
|
-
break;
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
}
|
|
597
|
-
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
598
|
-
return JSON.parse(text.slice(start, end + 1));
|
|
599
|
-
}
|
|
600
|
-
function sanitizeRankerError(err, sanitizeRankerErrors) {
|
|
601
|
-
if (sanitizeRankerErrors === false) {
|
|
602
|
-
return err instanceof Error ? err : new Error(String(err));
|
|
603
|
-
}
|
|
604
|
-
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
605
|
-
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
606
|
-
const sanitized = new Error(
|
|
607
|
-
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
608
|
-
innerCause ? { cause: innerCause } : void 0
|
|
609
|
-
);
|
|
610
|
-
sanitized.name = typeName;
|
|
611
|
-
return sanitized;
|
|
612
|
-
}
|
|
613
|
-
function safeSlice(value, start, end) {
|
|
614
|
-
const length = value.length;
|
|
615
|
-
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
616
|
-
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
617
|
-
if (safeStart > safeEnd) {
|
|
618
|
-
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
619
|
-
}
|
|
620
|
-
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
621
|
-
safeStart--;
|
|
622
|
-
}
|
|
623
|
-
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
624
|
-
safeEnd--;
|
|
625
|
-
}
|
|
626
|
-
return value.slice(safeStart, safeEnd);
|
|
627
|
-
}
|
|
628
|
-
function chunkText(input, maxChunkLength, overlap) {
|
|
629
|
-
const text = input.trim();
|
|
630
|
-
if (text.length === 0) return { chunks: [], truncated: false };
|
|
631
|
-
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
632
|
-
throw new Error("maxChunkLength must be an integer >= 2");
|
|
633
|
-
}
|
|
634
|
-
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
635
|
-
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
636
|
-
}
|
|
637
|
-
const chunks = [];
|
|
638
|
-
let truncated = false;
|
|
639
|
-
let cursor = 0;
|
|
640
|
-
const halfMax = Math.floor(maxChunkLength / 2);
|
|
641
|
-
while (cursor < text.length) {
|
|
642
|
-
const remaining = text.length - cursor;
|
|
643
|
-
if (remaining <= maxChunkLength) {
|
|
644
|
-
chunks.push(safeSlice(text, cursor, text.length));
|
|
645
|
-
break;
|
|
646
|
-
}
|
|
647
|
-
const windowEnd = cursor + maxChunkLength;
|
|
648
|
-
const minSplit = cursor + halfMax;
|
|
649
|
-
let splitPoint = -1;
|
|
650
|
-
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
651
|
-
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
652
|
-
splitPoint = paraIdx + 2;
|
|
653
|
-
}
|
|
654
|
-
if (splitPoint === -1) {
|
|
655
|
-
let lastTerm = -1;
|
|
656
|
-
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
657
|
-
const ch = text[i];
|
|
658
|
-
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
659
|
-
lastTerm = i + 2;
|
|
660
|
-
}
|
|
661
|
-
}
|
|
662
|
-
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
663
|
-
}
|
|
664
|
-
if (splitPoint === -1) {
|
|
665
|
-
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
666
|
-
if (/\s/.test(text[i])) {
|
|
667
|
-
splitPoint = i + 1;
|
|
668
|
-
break;
|
|
669
|
-
}
|
|
670
|
-
}
|
|
671
|
-
}
|
|
672
|
-
if (splitPoint === -1) {
|
|
673
|
-
truncated = true;
|
|
674
|
-
splitPoint = windowEnd;
|
|
675
|
-
}
|
|
676
|
-
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
677
|
-
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
678
|
-
cursor = next;
|
|
679
|
-
}
|
|
680
|
-
return { chunks, truncated };
|
|
681
|
-
}
|
|
682
|
-
async function withConcurrency(tasks, limit) {
|
|
683
|
-
const results = new Array(tasks.length);
|
|
684
|
-
let index = 0;
|
|
685
|
-
let failed = false;
|
|
686
|
-
let firstError;
|
|
687
|
-
async function worker() {
|
|
688
|
-
while (index < tasks.length && !failed) {
|
|
689
|
-
const i = index++;
|
|
690
|
-
try {
|
|
691
|
-
results[i] = await tasks[i]();
|
|
692
|
-
} catch (e) {
|
|
693
|
-
if (!failed) {
|
|
694
|
-
failed = true;
|
|
695
|
-
firstError = e;
|
|
696
|
-
}
|
|
697
|
-
return;
|
|
698
|
-
}
|
|
699
|
-
}
|
|
700
|
-
}
|
|
701
|
-
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
702
|
-
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
703
|
-
if (failed) throw firstError;
|
|
704
|
-
return results;
|
|
705
|
-
}
|
|
706
|
-
function clip(value, max) {
|
|
707
|
-
if (typeof value !== "string") return "";
|
|
708
|
-
const s = value.trim();
|
|
709
|
-
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
710
|
-
}
|
|
711
|
-
function validateTags(tags) {
|
|
712
|
-
if (!Array.isArray(tags)) return [];
|
|
713
|
-
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
714
|
-
}
|
|
715
|
-
function validateFact(fact) {
|
|
716
|
-
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
717
|
-
const title = clip(fact.title, 80);
|
|
718
|
-
const body = clip(fact.body, 800);
|
|
719
|
-
if (!title || !body) return null;
|
|
720
|
-
let confidence = fact.confidence;
|
|
721
|
-
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
722
|
-
return {
|
|
723
|
-
...fact,
|
|
724
|
-
title,
|
|
725
|
-
body,
|
|
726
|
-
confidence,
|
|
727
|
-
tags: validateTags(fact.tags)
|
|
728
|
-
};
|
|
729
|
-
}
|
|
730
|
-
function validateTask(task) {
|
|
731
|
-
if (typeof task?.description !== "string") return null;
|
|
732
|
-
const description = clip(task.description, 200);
|
|
733
|
-
if (!description) return null;
|
|
734
|
-
let priority = task.priority;
|
|
735
|
-
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
736
|
-
priority = Math.max(0, Math.min(10, Math.round(priority)));
|
|
737
|
-
return {
|
|
738
|
-
...task,
|
|
739
|
-
description,
|
|
740
|
-
priority
|
|
741
|
-
};
|
|
742
|
-
}
|
|
743
|
-
function normalizeSourceRef(value) {
|
|
744
|
-
if (typeof value !== "string") return null;
|
|
745
|
-
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
746
|
-
return cleaned.length > 0 ? cleaned : null;
|
|
747
|
-
}
|
|
748
|
-
function normalizeSourceHash(value) {
|
|
749
|
-
if (typeof value !== "string") return null;
|
|
750
|
-
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
751
|
-
}
|
|
752
|
-
function titleTokens(title) {
|
|
753
|
-
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
754
|
-
}
|
|
755
|
-
function jaccardScore(a, b) {
|
|
756
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
757
|
-
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
758
|
-
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
759
|
-
return intersection.size / union.size;
|
|
760
|
-
}
|
|
761
|
-
|
|
762
799
|
// src/utils/ids.ts
|
|
763
800
|
function generateId(prefix = "") {
|
|
764
801
|
if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
|
|
@@ -769,7 +806,9 @@ function generateId(prefix = "") {
|
|
|
769
806
|
crypto.getRandomValues(bytes);
|
|
770
807
|
return prefix + Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("").substring(0, 24);
|
|
771
808
|
}
|
|
772
|
-
|
|
809
|
+
throw new Error(
|
|
810
|
+
"generateId: no cryptographically secure random source available (crypto.randomUUID and crypto.getRandomValues are both missing)."
|
|
811
|
+
);
|
|
773
812
|
}
|
|
774
813
|
|
|
775
814
|
// src/prompts.ts
|
|
@@ -1472,7 +1511,7 @@ var RetrievalService = class {
|
|
|
1472
1511
|
const sanitizedTierWeights = shouldExposeReadMetadata(entityId) ? sanitizeTierWeights(entityIds, options?.tierWeights) : void 0;
|
|
1473
1512
|
const exposeMetadata = shouldExposeReadMetadata(entityId);
|
|
1474
1513
|
if (entityIds.length === 0) {
|
|
1475
|
-
const empty = { facts: [], tasks: [], events: [] };
|
|
1514
|
+
const empty = { facts: [], tasks: [], events: [], edges: [] };
|
|
1476
1515
|
if (exposeMetadata) {
|
|
1477
1516
|
empty.metadata = { query, entityIds: [] };
|
|
1478
1517
|
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
@@ -1862,7 +1901,7 @@ var RetrievalService = class {
|
|
|
1862
1901
|
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
1863
1902
|
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
1864
1903
|
}
|
|
1865
|
-
const bundle = { facts, tasks, events: events.reverse() };
|
|
1904
|
+
const bundle = { facts, tasks, events: events.reverse(), edges: [] };
|
|
1866
1905
|
if (exposeMetadata) {
|
|
1867
1906
|
bundle.metadata = { query, entityIds };
|
|
1868
1907
|
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
@@ -2181,6 +2220,20 @@ var JobManager = class {
|
|
|
2181
2220
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
2182
2221
|
this.activeIngestJobs = /* @__PURE__ */ new Map();
|
|
2183
2222
|
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
2223
|
+
/**
|
|
2224
|
+
* Lookup table for acquireLock/releaseLock's dynamic-dispatch branch.
|
|
2225
|
+
* Excludes 'ingest' | 'global_reembed' | 'global_import', which those
|
|
2226
|
+
* methods already handle via explicit if/else branches before reaching
|
|
2227
|
+
* this table.
|
|
2228
|
+
*/
|
|
2229
|
+
this.lockKeyFns = {
|
|
2230
|
+
prune: (id) => this._pruneKey(id),
|
|
2231
|
+
librarian: (id) => this._librarianKey(id),
|
|
2232
|
+
heal: (id) => this._healKey(id),
|
|
2233
|
+
reembed: (id) => this._reembedKey(id),
|
|
2234
|
+
import: (id) => this._importKey(id),
|
|
2235
|
+
forget: (id) => this._forgetKey(id)
|
|
2236
|
+
};
|
|
2184
2237
|
}
|
|
2185
2238
|
_pruneKey(entityId) {
|
|
2186
2239
|
return `${this.prefix}:${entityId}:prune`;
|
|
@@ -2330,9 +2383,7 @@ var JobManager = class {
|
|
|
2330
2383
|
} else if (operation === "global_import") {
|
|
2331
2384
|
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
2332
2385
|
} else {
|
|
2333
|
-
|
|
2334
|
-
const keyFn = this[keyFnName];
|
|
2335
|
-
this.activeMaintenanceJobs.add(keyFn.call(this, entityId));
|
|
2386
|
+
this.activeMaintenanceJobs.add(this.lockKeyFns[operation](entityId));
|
|
2336
2387
|
}
|
|
2337
2388
|
this._notifyStatusSubscribers(entityId);
|
|
2338
2389
|
}
|
|
@@ -2344,9 +2395,7 @@ var JobManager = class {
|
|
|
2344
2395
|
} else if (operation === "global_import") {
|
|
2345
2396
|
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
2346
2397
|
} else {
|
|
2347
|
-
|
|
2348
|
-
const keyFn = this[keyFnName];
|
|
2349
|
-
this.activeMaintenanceJobs.delete(keyFn.call(this, entityId));
|
|
2398
|
+
this.activeMaintenanceJobs.delete(this.lockKeyFns[operation](entityId));
|
|
2350
2399
|
}
|
|
2351
2400
|
this._notifyStatusSubscribers(entityId);
|
|
2352
2401
|
}
|
|
@@ -2460,15 +2509,38 @@ var JobManager = class {
|
|
|
2460
2509
|
|
|
2461
2510
|
// src/services/WriteService.ts
|
|
2462
2511
|
var WriteService = class {
|
|
2463
|
-
constructor(db, options, eventRepo, metadataRepo, jobManager, maintenanceService) {
|
|
2512
|
+
constructor(db, options, entryRepo, eventRepo, metadataRepo, jobManager, maintenanceService) {
|
|
2464
2513
|
this.db = db;
|
|
2465
2514
|
this.options = options;
|
|
2515
|
+
this.entryRepo = entryRepo;
|
|
2466
2516
|
this.eventRepo = eventRepo;
|
|
2467
2517
|
this.metadataRepo = metadataRepo;
|
|
2468
2518
|
this.jobManager = jobManager;
|
|
2469
2519
|
this.maintenanceService = maintenanceService;
|
|
2470
2520
|
}
|
|
2471
2521
|
async write(entityId, event) {
|
|
2522
|
+
if (typeof entityId !== "string" || entityId.length === 0 || entityId.length > 200 || entityId.includes("\0")) {
|
|
2523
|
+
throw new TypeError(
|
|
2524
|
+
`Invalid entityId: must be a non-empty string at most 200 chars with no null bytes; got ${JSON.stringify(entityId)}.`
|
|
2525
|
+
);
|
|
2526
|
+
}
|
|
2527
|
+
if (event === null || typeof event !== "object" || Array.isArray(event)) {
|
|
2528
|
+
throw new TypeError("Invalid event: must be a non-null object.");
|
|
2529
|
+
}
|
|
2530
|
+
if (typeof event.summary !== "string") {
|
|
2531
|
+
throw new TypeError("Invalid event.summary: must be a string.");
|
|
2532
|
+
}
|
|
2533
|
+
const summary = clip(event.summary, 4e3);
|
|
2534
|
+
let relatedEntryId = null;
|
|
2535
|
+
const rawRelatedEntryId = event.related_entry_id;
|
|
2536
|
+
if (rawRelatedEntryId != null && rawRelatedEntryId !== "") {
|
|
2537
|
+
if (typeof rawRelatedEntryId !== "string" || rawRelatedEntryId.length > 200 || rawRelatedEntryId.includes("\0")) {
|
|
2538
|
+
relatedEntryId = null;
|
|
2539
|
+
} else {
|
|
2540
|
+
const existing = await this.entryRepo.findByIds([rawRelatedEntryId], [entityId]);
|
|
2541
|
+
relatedEntryId = existing.length > 0 ? rawRelatedEntryId : null;
|
|
2542
|
+
}
|
|
2543
|
+
}
|
|
2472
2544
|
const id = generateId("evt_");
|
|
2473
2545
|
const now = Date.now();
|
|
2474
2546
|
let eventType = event.event_type;
|
|
@@ -2479,8 +2551,8 @@ var WriteService = class {
|
|
|
2479
2551
|
id,
|
|
2480
2552
|
entity_id: entityId,
|
|
2481
2553
|
event_type: eventType,
|
|
2482
|
-
summary
|
|
2483
|
-
related_entry_id:
|
|
2554
|
+
summary,
|
|
2555
|
+
related_entry_id: relatedEntryId,
|
|
2484
2556
|
created_at: now
|
|
2485
2557
|
};
|
|
2486
2558
|
let shouldRunLibrarian = false;
|