@equationalapplications/core-llm-wiki 4.14.0 → 4.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/dist/{chunk-6FWG2DG4.mjs → chunk-24ANTHZB.mjs} +77 -23
- package/dist/chunk-24ANTHZB.mjs.map +1 -0
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +84 -23
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +11 -4
- package/dist/index.mjs.map +1 -1
- package/dist/{testing-CDIDE4Jd.d.mts → testing-DW1qufP0.d.mts} +15 -1
- package/dist/{testing-CDIDE4Jd.d.ts → testing-DW1qufP0.d.ts} +15 -1
- package/dist/testing.d.mts +1 -1
- package/dist/testing.d.ts +1 -1
- package/dist/testing.js +290 -236
- package/dist/testing.js.map +1 -1
- package/dist/testing.mjs +1 -1
- package/package.json +2 -2
- package/dist/chunk-6FWG2DG4.mjs.map +0 -1
package/dist/testing.js
CHANGED
|
@@ -29,6 +29,221 @@ var PrunePartialFailureError = class extends Error {
|
|
|
29
29
|
};
|
|
30
30
|
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
31
31
|
|
|
32
|
+
// src/utils/pure.ts
|
|
33
|
+
function parseJsonResponse(text) {
|
|
34
|
+
const firstBrace = text.indexOf("{");
|
|
35
|
+
const firstBracket = text.indexOf("[");
|
|
36
|
+
let start;
|
|
37
|
+
let openChar;
|
|
38
|
+
let closeChar;
|
|
39
|
+
const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
|
|
40
|
+
if (useBrace) {
|
|
41
|
+
start = firstBrace;
|
|
42
|
+
openChar = "{";
|
|
43
|
+
closeChar = "}";
|
|
44
|
+
} else if (firstBracket !== -1) {
|
|
45
|
+
start = firstBracket;
|
|
46
|
+
openChar = "[";
|
|
47
|
+
closeChar = "]";
|
|
48
|
+
} else {
|
|
49
|
+
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
50
|
+
}
|
|
51
|
+
let depth = 0;
|
|
52
|
+
let inString = false;
|
|
53
|
+
let escape = false;
|
|
54
|
+
let end = -1;
|
|
55
|
+
for (let i = start; i < text.length; i++) {
|
|
56
|
+
const ch = text[i];
|
|
57
|
+
if (escape) {
|
|
58
|
+
escape = false;
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
if (ch === "\\" && inString) {
|
|
62
|
+
escape = true;
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (ch === '"') {
|
|
66
|
+
inString = !inString;
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
if (inString) continue;
|
|
70
|
+
if (ch === openChar) {
|
|
71
|
+
depth++;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
if (ch === closeChar) {
|
|
75
|
+
depth--;
|
|
76
|
+
if (depth === 0) {
|
|
77
|
+
end = i;
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
83
|
+
return JSON.parse(text.slice(start, end + 1));
|
|
84
|
+
}
|
|
85
|
+
function sanitizeRankerError(err, sanitizeRankerErrors) {
|
|
86
|
+
if (sanitizeRankerErrors === false) {
|
|
87
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
88
|
+
}
|
|
89
|
+
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
90
|
+
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
91
|
+
const sanitized = new Error(
|
|
92
|
+
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
93
|
+
innerCause ? { cause: innerCause } : void 0
|
|
94
|
+
);
|
|
95
|
+
sanitized.name = typeName;
|
|
96
|
+
return sanitized;
|
|
97
|
+
}
|
|
98
|
+
function safeSlice(value, start, end) {
|
|
99
|
+
const length = value.length;
|
|
100
|
+
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
101
|
+
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
102
|
+
if (safeStart > safeEnd) {
|
|
103
|
+
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
104
|
+
}
|
|
105
|
+
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
106
|
+
safeStart--;
|
|
107
|
+
}
|
|
108
|
+
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
109
|
+
safeEnd--;
|
|
110
|
+
}
|
|
111
|
+
return value.slice(safeStart, safeEnd);
|
|
112
|
+
}
|
|
113
|
+
function chunkText(input, maxChunkLength, overlap) {
|
|
114
|
+
const text = input.trim();
|
|
115
|
+
if (text.length === 0) return { chunks: [], truncated: false };
|
|
116
|
+
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
117
|
+
throw new Error("maxChunkLength must be an integer >= 2");
|
|
118
|
+
}
|
|
119
|
+
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
120
|
+
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
121
|
+
}
|
|
122
|
+
const chunks = [];
|
|
123
|
+
let truncated = false;
|
|
124
|
+
let cursor = 0;
|
|
125
|
+
const halfMax = Math.floor(maxChunkLength / 2);
|
|
126
|
+
while (cursor < text.length) {
|
|
127
|
+
const remaining = text.length - cursor;
|
|
128
|
+
if (remaining <= maxChunkLength) {
|
|
129
|
+
chunks.push(safeSlice(text, cursor, text.length));
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
const windowEnd = cursor + maxChunkLength;
|
|
133
|
+
const minSplit = cursor + halfMax;
|
|
134
|
+
let splitPoint = -1;
|
|
135
|
+
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
136
|
+
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
137
|
+
splitPoint = paraIdx + 2;
|
|
138
|
+
}
|
|
139
|
+
if (splitPoint === -1) {
|
|
140
|
+
let lastTerm = -1;
|
|
141
|
+
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
142
|
+
const ch = text[i];
|
|
143
|
+
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
144
|
+
lastTerm = i + 2;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
148
|
+
}
|
|
149
|
+
if (splitPoint === -1) {
|
|
150
|
+
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
151
|
+
if (/\s/.test(text[i])) {
|
|
152
|
+
splitPoint = i + 1;
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (splitPoint === -1) {
|
|
158
|
+
truncated = true;
|
|
159
|
+
splitPoint = windowEnd;
|
|
160
|
+
}
|
|
161
|
+
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
162
|
+
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
163
|
+
cursor = next;
|
|
164
|
+
}
|
|
165
|
+
return { chunks, truncated };
|
|
166
|
+
}
|
|
167
|
+
async function withConcurrency(tasks, limit) {
|
|
168
|
+
const results = new Array(tasks.length);
|
|
169
|
+
let index = 0;
|
|
170
|
+
let failed = false;
|
|
171
|
+
let firstError;
|
|
172
|
+
async function worker() {
|
|
173
|
+
while (index < tasks.length && !failed) {
|
|
174
|
+
const i = index++;
|
|
175
|
+
try {
|
|
176
|
+
results[i] = await tasks[i]();
|
|
177
|
+
} catch (e) {
|
|
178
|
+
if (!failed) {
|
|
179
|
+
failed = true;
|
|
180
|
+
firstError = e;
|
|
181
|
+
}
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
187
|
+
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
188
|
+
if (failed) throw firstError;
|
|
189
|
+
return results;
|
|
190
|
+
}
|
|
191
|
+
function clip(value, max) {
|
|
192
|
+
if (typeof value !== "string") return "";
|
|
193
|
+
const s = value.trim();
|
|
194
|
+
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
195
|
+
}
|
|
196
|
+
function validateTags(tags) {
|
|
197
|
+
if (!Array.isArray(tags)) return [];
|
|
198
|
+
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
199
|
+
}
|
|
200
|
+
function validateFact(fact) {
|
|
201
|
+
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
202
|
+
const title = clip(fact.title, 80);
|
|
203
|
+
const body = clip(fact.body, 800);
|
|
204
|
+
if (!title || !body) return null;
|
|
205
|
+
let confidence = fact.confidence;
|
|
206
|
+
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
207
|
+
return {
|
|
208
|
+
...fact,
|
|
209
|
+
title,
|
|
210
|
+
body,
|
|
211
|
+
confidence,
|
|
212
|
+
tags: validateTags(fact.tags)
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
function validateTask(task) {
|
|
216
|
+
if (typeof task?.description !== "string") return null;
|
|
217
|
+
const description = clip(task.description, 200);
|
|
218
|
+
if (!description) return null;
|
|
219
|
+
let priority = task.priority;
|
|
220
|
+
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
221
|
+
priority = Math.max(0, Math.min(10, Math.round(priority)));
|
|
222
|
+
return {
|
|
223
|
+
...task,
|
|
224
|
+
description,
|
|
225
|
+
priority
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
function normalizeSourceRef(value) {
|
|
229
|
+
if (typeof value !== "string") return null;
|
|
230
|
+
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
231
|
+
return cleaned.length > 0 ? cleaned : null;
|
|
232
|
+
}
|
|
233
|
+
function normalizeSourceHash(value) {
|
|
234
|
+
if (typeof value !== "string") return null;
|
|
235
|
+
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
236
|
+
}
|
|
237
|
+
function titleTokens(title) {
|
|
238
|
+
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
239
|
+
}
|
|
240
|
+
function jaccardScore(a, b) {
|
|
241
|
+
if (a.size === 0 && b.size === 0) return 0;
|
|
242
|
+
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
243
|
+
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
244
|
+
return intersection.size / union.size;
|
|
245
|
+
}
|
|
246
|
+
|
|
32
247
|
// src/services/EmbeddingService.ts
|
|
33
248
|
var EmbeddingService = class {
|
|
34
249
|
constructor(db, options, entryRepo, metadataRepo) {
|
|
@@ -76,7 +291,7 @@ var EmbeddingService = class {
|
|
|
76
291
|
tagsStr = fact.tags;
|
|
77
292
|
}
|
|
78
293
|
}
|
|
79
|
-
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
294
|
+
const text = clip(`${fact.title} ${fact.body} ${tagsStr}`.trim(), 16e3);
|
|
80
295
|
try {
|
|
81
296
|
const vector = await embedFn(text);
|
|
82
297
|
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -155,6 +370,9 @@ var EmbeddingService = class {
|
|
|
155
370
|
};
|
|
156
371
|
|
|
157
372
|
// src/services/ImportExportService.ts
|
|
373
|
+
var MAX_EMBEDDING_BLOB_BYTES = 32 * 1024;
|
|
374
|
+
var IMPORT_TITLE_MAX = 500;
|
|
375
|
+
var IMPORT_BODY_MAX = 8e3;
|
|
158
376
|
var ImportExportService = class {
|
|
159
377
|
constructor(db, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService) {
|
|
160
378
|
this.db = db;
|
|
@@ -236,6 +454,7 @@ var ImportExportService = class {
|
|
|
236
454
|
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
237
455
|
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
238
456
|
const softDeletedFactIds = [];
|
|
457
|
+
const clippedTextByFactId = /* @__PURE__ */ new Map();
|
|
239
458
|
await this.db.withTransactionAsync(async (tx) => {
|
|
240
459
|
if (!merge) {
|
|
241
460
|
const deletedLiveFactIds = await this.entryRepo.findIdsBySource(
|
|
@@ -272,21 +491,32 @@ var ImportExportService = class {
|
|
|
272
491
|
const rawBlobRaw = fact.embedding_blob;
|
|
273
492
|
let rawBlob = null;
|
|
274
493
|
if (rawBlobRaw instanceof Uint8Array) {
|
|
275
|
-
|
|
494
|
+
if (rawBlobRaw.byteLength <= MAX_EMBEDDING_BLOB_BYTES) {
|
|
495
|
+
rawBlob = rawBlobRaw;
|
|
496
|
+
}
|
|
276
497
|
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
277
498
|
const obj = rawBlobRaw;
|
|
278
499
|
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
279
|
-
|
|
500
|
+
const data = obj["data"];
|
|
501
|
+
if (data.length <= MAX_EMBEDDING_BLOB_BYTES) {
|
|
502
|
+
rawBlob = new Uint8Array(data);
|
|
503
|
+
}
|
|
280
504
|
} else if (!Array.isArray(rawBlobRaw)) {
|
|
281
505
|
const entries = Object.keys(obj);
|
|
282
506
|
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
283
507
|
const len = entries.length;
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
508
|
+
if (len <= MAX_EMBEDDING_BLOB_BYTES) {
|
|
509
|
+
rawBlob = new Uint8Array(len);
|
|
510
|
+
for (let i = 0; i < len; i++) {
|
|
511
|
+
rawBlob[i] = obj[String(i)] ?? 0;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
287
514
|
}
|
|
288
515
|
}
|
|
289
516
|
}
|
|
517
|
+
if (rawBlob !== null && rawBlob.byteLength > MAX_EMBEDDING_BLOB_BYTES) {
|
|
518
|
+
rawBlob = null;
|
|
519
|
+
}
|
|
290
520
|
let blobData = null;
|
|
291
521
|
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
292
522
|
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
@@ -316,11 +546,14 @@ var ImportExportService = class {
|
|
|
316
546
|
}
|
|
317
547
|
if (merge && safeUpdatedAt <= existing.updated_at) continue;
|
|
318
548
|
}
|
|
549
|
+
const safeTitle = clip(String(fact.title ?? ""), IMPORT_TITLE_MAX);
|
|
550
|
+
const safeBody = clip(String(fact.body ?? ""), IMPORT_BODY_MAX);
|
|
551
|
+
clippedTextByFactId.set(fact.id, { title: safeTitle, body: safeBody });
|
|
319
552
|
const factObj = {
|
|
320
553
|
id: fact.id,
|
|
321
554
|
entity_id: entityId,
|
|
322
|
-
title:
|
|
323
|
-
body:
|
|
555
|
+
title: safeTitle,
|
|
556
|
+
body: safeBody,
|
|
324
557
|
tags: Array.isArray(fact.tags) ? fact.tags : [],
|
|
325
558
|
confidence: fact.confidence,
|
|
326
559
|
source_type: sourceType,
|
|
@@ -408,11 +641,12 @@ var ImportExportService = class {
|
|
|
408
641
|
await this.searchService.sync(entityId);
|
|
409
642
|
for (const fact of bundle.facts) {
|
|
410
643
|
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
644
|
+
const clipped = clippedTextByFactId.get(fact.id);
|
|
411
645
|
const embedded = await this.embeddingService.embedFact({
|
|
412
646
|
id: fact.id,
|
|
413
647
|
entity_id: entityId,
|
|
414
|
-
title: fact.title,
|
|
415
|
-
body: fact.body,
|
|
648
|
+
title: clipped?.title ?? fact.title,
|
|
649
|
+
body: clipped?.body ?? fact.body,
|
|
416
650
|
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
417
651
|
});
|
|
418
652
|
if (!embedded) {
|
|
@@ -512,7 +746,7 @@ var ImportExportService = class {
|
|
|
512
746
|
}
|
|
513
747
|
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
514
748
|
console.warn(
|
|
515
|
-
`[WikiMemory] importDump: ${type} id
|
|
749
|
+
`[WikiMemory] importDump: ${type} id ${JSON.stringify(id)} already belongs to entity ${JSON.stringify(existingEntityId)}; skipping for entity ${JSON.stringify(targetEntityId)}`
|
|
516
750
|
);
|
|
517
751
|
}
|
|
518
752
|
_normalizeImportedSourceType(raw, ctx) {
|
|
@@ -544,221 +778,6 @@ After running the migration SQL, restart your application.`
|
|
|
544
778
|
}
|
|
545
779
|
};
|
|
546
780
|
|
|
547
|
-
// src/utils/pure.ts
|
|
548
|
-
function parseJsonResponse(text) {
|
|
549
|
-
const firstBrace = text.indexOf("{");
|
|
550
|
-
const firstBracket = text.indexOf("[");
|
|
551
|
-
let start;
|
|
552
|
-
let openChar;
|
|
553
|
-
let closeChar;
|
|
554
|
-
const useBrace = firstBrace !== -1 && (firstBracket === -1 || firstBrace < firstBracket);
|
|
555
|
-
if (useBrace) {
|
|
556
|
-
start = firstBrace;
|
|
557
|
-
openChar = "{";
|
|
558
|
-
closeChar = "}";
|
|
559
|
-
} else if (firstBracket !== -1) {
|
|
560
|
-
start = firstBracket;
|
|
561
|
-
openChar = "[";
|
|
562
|
-
closeChar = "]";
|
|
563
|
-
} else {
|
|
564
|
-
throw new SyntaxError("No JSON object/array found in LLM response");
|
|
565
|
-
}
|
|
566
|
-
let depth = 0;
|
|
567
|
-
let inString = false;
|
|
568
|
-
let escape = false;
|
|
569
|
-
let end = -1;
|
|
570
|
-
for (let i = start; i < text.length; i++) {
|
|
571
|
-
const ch = text[i];
|
|
572
|
-
if (escape) {
|
|
573
|
-
escape = false;
|
|
574
|
-
continue;
|
|
575
|
-
}
|
|
576
|
-
if (ch === "\\" && inString) {
|
|
577
|
-
escape = true;
|
|
578
|
-
continue;
|
|
579
|
-
}
|
|
580
|
-
if (ch === '"') {
|
|
581
|
-
inString = !inString;
|
|
582
|
-
continue;
|
|
583
|
-
}
|
|
584
|
-
if (inString) continue;
|
|
585
|
-
if (ch === openChar) {
|
|
586
|
-
depth++;
|
|
587
|
-
continue;
|
|
588
|
-
}
|
|
589
|
-
if (ch === closeChar) {
|
|
590
|
-
depth--;
|
|
591
|
-
if (depth === 0) {
|
|
592
|
-
end = i;
|
|
593
|
-
break;
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
}
|
|
597
|
-
if (end === -1) throw new SyntaxError("No JSON object/array found in LLM response");
|
|
598
|
-
return JSON.parse(text.slice(start, end + 1));
|
|
599
|
-
}
|
|
600
|
-
function sanitizeRankerError(err, sanitizeRankerErrors) {
|
|
601
|
-
if (sanitizeRankerErrors === false) {
|
|
602
|
-
return err instanceof Error ? err : new Error(String(err));
|
|
603
|
-
}
|
|
604
|
-
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
605
|
-
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
606
|
-
const sanitized = new Error(
|
|
607
|
-
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
608
|
-
innerCause ? { cause: innerCause } : void 0
|
|
609
|
-
);
|
|
610
|
-
sanitized.name = typeName;
|
|
611
|
-
return sanitized;
|
|
612
|
-
}
|
|
613
|
-
function safeSlice(value, start, end) {
|
|
614
|
-
const length = value.length;
|
|
615
|
-
let safeStart = start < 0 ? Math.max(length + start, 0) : Math.min(start, length);
|
|
616
|
-
let safeEnd = end === void 0 ? length : end < 0 ? Math.max(length + end, 0) : Math.min(end, length);
|
|
617
|
-
if (safeStart > safeEnd) {
|
|
618
|
-
[safeStart, safeEnd] = [safeEnd, safeStart];
|
|
619
|
-
}
|
|
620
|
-
if (safeStart > 0 && safeStart < length && value.charCodeAt(safeStart) >= 56320 && value.charCodeAt(safeStart) <= 57343 && value.charCodeAt(safeStart - 1) >= 55296 && value.charCodeAt(safeStart - 1) <= 56319) {
|
|
621
|
-
safeStart--;
|
|
622
|
-
}
|
|
623
|
-
if (safeEnd > 0 && safeEnd < length && value.charCodeAt(safeEnd - 1) >= 55296 && value.charCodeAt(safeEnd - 1) <= 56319 && value.charCodeAt(safeEnd) >= 56320 && value.charCodeAt(safeEnd) <= 57343) {
|
|
624
|
-
safeEnd--;
|
|
625
|
-
}
|
|
626
|
-
return value.slice(safeStart, safeEnd);
|
|
627
|
-
}
|
|
628
|
-
function chunkText(input, maxChunkLength, overlap) {
|
|
629
|
-
const text = input.trim();
|
|
630
|
-
if (text.length === 0) return { chunks: [], truncated: false };
|
|
631
|
-
if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
|
|
632
|
-
throw new Error("maxChunkLength must be an integer >= 2");
|
|
633
|
-
}
|
|
634
|
-
if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
|
|
635
|
-
throw new Error("overlap must be a non-negative integer < maxChunkLength");
|
|
636
|
-
}
|
|
637
|
-
const chunks = [];
|
|
638
|
-
let truncated = false;
|
|
639
|
-
let cursor = 0;
|
|
640
|
-
const halfMax = Math.floor(maxChunkLength / 2);
|
|
641
|
-
while (cursor < text.length) {
|
|
642
|
-
const remaining = text.length - cursor;
|
|
643
|
-
if (remaining <= maxChunkLength) {
|
|
644
|
-
chunks.push(safeSlice(text, cursor, text.length));
|
|
645
|
-
break;
|
|
646
|
-
}
|
|
647
|
-
const windowEnd = cursor + maxChunkLength;
|
|
648
|
-
const minSplit = cursor + halfMax;
|
|
649
|
-
let splitPoint = -1;
|
|
650
|
-
const paraIdx = text.lastIndexOf("\n\n", windowEnd);
|
|
651
|
-
if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
|
|
652
|
-
splitPoint = paraIdx + 2;
|
|
653
|
-
}
|
|
654
|
-
if (splitPoint === -1) {
|
|
655
|
-
let lastTerm = -1;
|
|
656
|
-
for (let i = minSplit; i < windowEnd - 1; i++) {
|
|
657
|
-
const ch = text[i];
|
|
658
|
-
if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
|
|
659
|
-
lastTerm = i + 2;
|
|
660
|
-
}
|
|
661
|
-
}
|
|
662
|
-
if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
|
|
663
|
-
}
|
|
664
|
-
if (splitPoint === -1) {
|
|
665
|
-
for (let i = windowEnd - 1; i >= minSplit; i--) {
|
|
666
|
-
if (/\s/.test(text[i])) {
|
|
667
|
-
splitPoint = i + 1;
|
|
668
|
-
break;
|
|
669
|
-
}
|
|
670
|
-
}
|
|
671
|
-
}
|
|
672
|
-
if (splitPoint === -1) {
|
|
673
|
-
truncated = true;
|
|
674
|
-
splitPoint = windowEnd;
|
|
675
|
-
}
|
|
676
|
-
chunks.push(safeSlice(text, cursor, splitPoint));
|
|
677
|
-
const next = Math.max(splitPoint - overlap, cursor + 1);
|
|
678
|
-
cursor = next;
|
|
679
|
-
}
|
|
680
|
-
return { chunks, truncated };
|
|
681
|
-
}
|
|
682
|
-
async function withConcurrency(tasks, limit) {
|
|
683
|
-
const results = new Array(tasks.length);
|
|
684
|
-
let index = 0;
|
|
685
|
-
let failed = false;
|
|
686
|
-
let firstError;
|
|
687
|
-
async function worker() {
|
|
688
|
-
while (index < tasks.length && !failed) {
|
|
689
|
-
const i = index++;
|
|
690
|
-
try {
|
|
691
|
-
results[i] = await tasks[i]();
|
|
692
|
-
} catch (e) {
|
|
693
|
-
if (!failed) {
|
|
694
|
-
failed = true;
|
|
695
|
-
firstError = e;
|
|
696
|
-
}
|
|
697
|
-
return;
|
|
698
|
-
}
|
|
699
|
-
}
|
|
700
|
-
}
|
|
701
|
-
const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
|
|
702
|
-
await Promise.allSettled(Array.from({ length: workerCount }, worker));
|
|
703
|
-
if (failed) throw firstError;
|
|
704
|
-
return results;
|
|
705
|
-
}
|
|
706
|
-
function clip(value, max) {
|
|
707
|
-
if (typeof value !== "string") return "";
|
|
708
|
-
const s = value.trim();
|
|
709
|
-
return s.length <= max ? s : safeSlice(s, 0, max).trimEnd();
|
|
710
|
-
}
|
|
711
|
-
function validateTags(tags) {
|
|
712
|
-
if (!Array.isArray(tags)) return [];
|
|
713
|
-
return tags.filter((t) => typeof t === "string").map((t) => t.trim().toLowerCase()).filter((t) => t.length > 0 && t.length <= 40).slice(0, 6);
|
|
714
|
-
}
|
|
715
|
-
function validateFact(fact) {
|
|
716
|
-
if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
|
|
717
|
-
const title = clip(fact.title, 80);
|
|
718
|
-
const body = clip(fact.body, 800);
|
|
719
|
-
if (!title || !body) return null;
|
|
720
|
-
let confidence = fact.confidence;
|
|
721
|
-
if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
|
|
722
|
-
return {
|
|
723
|
-
...fact,
|
|
724
|
-
title,
|
|
725
|
-
body,
|
|
726
|
-
confidence,
|
|
727
|
-
tags: validateTags(fact.tags)
|
|
728
|
-
};
|
|
729
|
-
}
|
|
730
|
-
function validateTask(task) {
|
|
731
|
-
if (typeof task?.description !== "string") return null;
|
|
732
|
-
const description = clip(task.description, 200);
|
|
733
|
-
if (!description) return null;
|
|
734
|
-
let priority = task.priority;
|
|
735
|
-
if (typeof priority !== "number" || !isFinite(priority)) priority = 0;
|
|
736
|
-
priority = Math.max(0, Math.min(10, Math.round(priority)));
|
|
737
|
-
return {
|
|
738
|
-
...task,
|
|
739
|
-
description,
|
|
740
|
-
priority
|
|
741
|
-
};
|
|
742
|
-
}
|
|
743
|
-
function normalizeSourceRef(value) {
|
|
744
|
-
if (typeof value !== "string") return null;
|
|
745
|
-
const cleaned = value.replace(/[^A-Za-z0-9._\- ]/g, "").trim().slice(0, 255);
|
|
746
|
-
return cleaned.length > 0 ? cleaned : null;
|
|
747
|
-
}
|
|
748
|
-
function normalizeSourceHash(value) {
|
|
749
|
-
if (typeof value !== "string") return null;
|
|
750
|
-
return /^[0-9a-f]{64}$/i.test(value) ? value.toLowerCase() : null;
|
|
751
|
-
}
|
|
752
|
-
function titleTokens(title) {
|
|
753
|
-
return new Set(title.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3));
|
|
754
|
-
}
|
|
755
|
-
function jaccardScore(a, b) {
|
|
756
|
-
if (a.size === 0 && b.size === 0) return 0;
|
|
757
|
-
const intersection = new Set([...a].filter((x) => b.has(x)));
|
|
758
|
-
const union = /* @__PURE__ */ new Set([...a, ...b]);
|
|
759
|
-
return intersection.size / union.size;
|
|
760
|
-
}
|
|
761
|
-
|
|
762
781
|
// src/utils/ids.ts
|
|
763
782
|
function generateId(prefix = "") {
|
|
764
783
|
if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
|
|
@@ -769,7 +788,9 @@ function generateId(prefix = "") {
|
|
|
769
788
|
crypto.getRandomValues(bytes);
|
|
770
789
|
return prefix + Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("").substring(0, 24);
|
|
771
790
|
}
|
|
772
|
-
|
|
791
|
+
throw new Error(
|
|
792
|
+
"generateId: no cryptographically secure random source available (crypto.randomUUID and crypto.getRandomValues are both missing)."
|
|
793
|
+
);
|
|
773
794
|
}
|
|
774
795
|
|
|
775
796
|
// src/prompts.ts
|
|
@@ -2181,6 +2202,20 @@ var JobManager = class {
|
|
|
2181
2202
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
2182
2203
|
this.activeIngestJobs = /* @__PURE__ */ new Map();
|
|
2183
2204
|
this.statusSubscribers = /* @__PURE__ */ new Map();
|
|
2205
|
+
/**
|
|
2206
|
+
* Lookup table for acquireLock/releaseLock's dynamic-dispatch branch.
|
|
2207
|
+
* Excludes 'ingest' | 'global_reembed' | 'global_import', which those
|
|
2208
|
+
* methods already handle via explicit if/else branches before reaching
|
|
2209
|
+
* this table.
|
|
2210
|
+
*/
|
|
2211
|
+
this.lockKeyFns = {
|
|
2212
|
+
prune: (id) => this._pruneKey(id),
|
|
2213
|
+
librarian: (id) => this._librarianKey(id),
|
|
2214
|
+
heal: (id) => this._healKey(id),
|
|
2215
|
+
reembed: (id) => this._reembedKey(id),
|
|
2216
|
+
import: (id) => this._importKey(id),
|
|
2217
|
+
forget: (id) => this._forgetKey(id)
|
|
2218
|
+
};
|
|
2184
2219
|
}
|
|
2185
2220
|
_pruneKey(entityId) {
|
|
2186
2221
|
return `${this.prefix}:${entityId}:prune`;
|
|
@@ -2330,9 +2365,7 @@ var JobManager = class {
|
|
|
2330
2365
|
} else if (operation === "global_import") {
|
|
2331
2366
|
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
2332
2367
|
} else {
|
|
2333
|
-
|
|
2334
|
-
const keyFn = this[keyFnName];
|
|
2335
|
-
this.activeMaintenanceJobs.add(keyFn.call(this, entityId));
|
|
2368
|
+
this.activeMaintenanceJobs.add(this.lockKeyFns[operation](entityId));
|
|
2336
2369
|
}
|
|
2337
2370
|
this._notifyStatusSubscribers(entityId);
|
|
2338
2371
|
}
|
|
@@ -2344,9 +2377,7 @@ var JobManager = class {
|
|
|
2344
2377
|
} else if (operation === "global_import") {
|
|
2345
2378
|
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
2346
2379
|
} else {
|
|
2347
|
-
|
|
2348
|
-
const keyFn = this[keyFnName];
|
|
2349
|
-
this.activeMaintenanceJobs.delete(keyFn.call(this, entityId));
|
|
2380
|
+
this.activeMaintenanceJobs.delete(this.lockKeyFns[operation](entityId));
|
|
2350
2381
|
}
|
|
2351
2382
|
this._notifyStatusSubscribers(entityId);
|
|
2352
2383
|
}
|
|
@@ -2460,15 +2491,38 @@ var JobManager = class {
|
|
|
2460
2491
|
|
|
2461
2492
|
// src/services/WriteService.ts
|
|
2462
2493
|
var WriteService = class {
|
|
2463
|
-
constructor(db, options, eventRepo, metadataRepo, jobManager, maintenanceService) {
|
|
2494
|
+
constructor(db, options, entryRepo, eventRepo, metadataRepo, jobManager, maintenanceService) {
|
|
2464
2495
|
this.db = db;
|
|
2465
2496
|
this.options = options;
|
|
2497
|
+
this.entryRepo = entryRepo;
|
|
2466
2498
|
this.eventRepo = eventRepo;
|
|
2467
2499
|
this.metadataRepo = metadataRepo;
|
|
2468
2500
|
this.jobManager = jobManager;
|
|
2469
2501
|
this.maintenanceService = maintenanceService;
|
|
2470
2502
|
}
|
|
2471
2503
|
async write(entityId, event) {
|
|
2504
|
+
if (typeof entityId !== "string" || entityId.length === 0 || entityId.length > 200 || entityId.includes("\0")) {
|
|
2505
|
+
throw new TypeError(
|
|
2506
|
+
`Invalid entityId: must be a non-empty string at most 200 chars with no null bytes; got ${JSON.stringify(entityId)}.`
|
|
2507
|
+
);
|
|
2508
|
+
}
|
|
2509
|
+
if (event === null || typeof event !== "object" || Array.isArray(event)) {
|
|
2510
|
+
throw new TypeError("Invalid event: must be a non-null object.");
|
|
2511
|
+
}
|
|
2512
|
+
if (typeof event.summary !== "string") {
|
|
2513
|
+
throw new TypeError("Invalid event.summary: must be a string.");
|
|
2514
|
+
}
|
|
2515
|
+
const summary = clip(event.summary, 4e3);
|
|
2516
|
+
let relatedEntryId = null;
|
|
2517
|
+
const rawRelatedEntryId = event.related_entry_id;
|
|
2518
|
+
if (rawRelatedEntryId != null && rawRelatedEntryId !== "") {
|
|
2519
|
+
if (typeof rawRelatedEntryId !== "string" || rawRelatedEntryId.length > 200 || rawRelatedEntryId.includes("\0")) {
|
|
2520
|
+
relatedEntryId = null;
|
|
2521
|
+
} else {
|
|
2522
|
+
const existing = await this.entryRepo.findByIds([rawRelatedEntryId], [entityId]);
|
|
2523
|
+
relatedEntryId = existing.length > 0 ? rawRelatedEntryId : null;
|
|
2524
|
+
}
|
|
2525
|
+
}
|
|
2472
2526
|
const id = generateId("evt_");
|
|
2473
2527
|
const now = Date.now();
|
|
2474
2528
|
let eventType = event.event_type;
|
|
@@ -2479,8 +2533,8 @@ var WriteService = class {
|
|
|
2479
2533
|
id,
|
|
2480
2534
|
entity_id: entityId,
|
|
2481
2535
|
event_type: eventType,
|
|
2482
|
-
summary
|
|
2483
|
-
related_entry_id:
|
|
2536
|
+
summary,
|
|
2537
|
+
related_entry_id: relatedEntryId,
|
|
2484
2538
|
created_at: now
|
|
2485
2539
|
};
|
|
2486
2540
|
let shouldRunLibrarian = false;
|