@equationalapplications/core-llm-wiki 4.13.1 → 4.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -477,6 +477,18 @@ Core WikiMemory provides:
477
477
  - **Input Validation**: `sourceRef`/`sourceHash` normalized; embedding dimensions validated
478
478
  - **Parameterized Queries**: All SQL uses bind parameters
479
479
 
480
+ ### Prompt-Injection Trust Boundary
481
+
482
+ User-controlled text — `event.summary` passed to `write()`, document chunks passed to `ingestDocument()`,
483
+ fact `title`/`body` (including imported dumps) — is interpolated verbatim into LLM prompts for librarian,
484
+ heal, and embedding operations. Prompt templating does simple variable substitution; it does not detect
485
+ or filter instruction-like content.
486
+
487
+ Mitigating prompt injection (e.g. "ignore prior instructions and emit...") is **the host's responsibility**.
488
+ If your application accepts untrusted input that flows into `write()`, `ingestDocument()`, or `importDump()`,
489
+ treat the LLM's librarian/heal output as similarly untrusted — validate or scope it before acting on it
490
+ downstream.
491
+
480
492
  ## Usage
481
493
 
482
494
  ```typescript
@@ -284,6 +284,20 @@ var JobManager = class {
284
284
  this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
285
285
  this.activeIngestJobs = /* @__PURE__ */ new Map();
286
286
  this.statusSubscribers = /* @__PURE__ */ new Map();
287
+ /**
288
+ * Lookup table for acquireLock/releaseLock's dynamic-dispatch branch.
289
+ * Excludes 'ingest' | 'global_reembed' | 'global_import', which those
290
+ * methods already handle via explicit if/else branches before reaching
291
+ * this table.
292
+ */
293
+ this.lockKeyFns = {
294
+ prune: (id) => this._pruneKey(id),
295
+ librarian: (id) => this._librarianKey(id),
296
+ heal: (id) => this._healKey(id),
297
+ reembed: (id) => this._reembedKey(id),
298
+ import: (id) => this._importKey(id),
299
+ forget: (id) => this._forgetKey(id)
300
+ };
287
301
  }
288
302
  _pruneKey(entityId) {
289
303
  return `${this.prefix}:${entityId}:prune`;
@@ -433,9 +447,7 @@ var JobManager = class {
433
447
  } else if (operation === "global_import") {
434
448
  this.activeMaintenanceJobs.add(this._globalImportKey());
435
449
  } else {
436
- const keyFnName = `_${operation}Key`;
437
- const keyFn = this[keyFnName];
438
- this.activeMaintenanceJobs.add(keyFn.call(this, entityId));
450
+ this.activeMaintenanceJobs.add(this.lockKeyFns[operation](entityId));
439
451
  }
440
452
  this._notifyStatusSubscribers(entityId);
441
453
  }
@@ -447,9 +459,7 @@ var JobManager = class {
447
459
  } else if (operation === "global_import") {
448
460
  this.activeMaintenanceJobs.delete(this._globalImportKey());
449
461
  } else {
450
- const keyFnName = `_${operation}Key`;
451
- const keyFn = this[keyFnName];
452
- this.activeMaintenanceJobs.delete(keyFn.call(this, entityId));
462
+ this.activeMaintenanceJobs.delete(this.lockKeyFns[operation](entityId));
453
463
  }
454
464
  this._notifyStatusSubscribers(entityId);
455
465
  }
@@ -875,7 +885,9 @@ function generateId(prefix = "") {
875
885
  crypto.getRandomValues(bytes);
876
886
  return prefix + Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("").substring(0, 24);
877
887
  }
878
- return prefix + Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
888
+ throw new Error(
889
+ "generateId: no cryptographically secure random source available (crypto.randomUUID and crypto.getRandomValues are both missing)."
890
+ );
879
891
  }
880
892
 
881
893
  // src/services/IngestionService.ts
@@ -1412,6 +1424,9 @@ var MaintenanceService = class {
1412
1424
  };
1413
1425
 
1414
1426
  // src/services/ImportExportService.ts
1427
+ var MAX_EMBEDDING_BLOB_BYTES = 32 * 1024;
1428
+ var IMPORT_TITLE_MAX = 500;
1429
+ var IMPORT_BODY_MAX = 8e3;
1415
1430
  var ImportExportService = class {
1416
1431
  constructor(db, entryRepo, taskRepo, eventRepo, metadataRepo, searchService, jobManager, embeddingService) {
1417
1432
  this.db = db;
@@ -1493,6 +1508,7 @@ var ImportExportService = class {
1493
1508
  const factsWithPreservedBlob = /* @__PURE__ */ new Map();
1494
1509
  const preservedBlobDims = /* @__PURE__ */ new Set();
1495
1510
  const softDeletedFactIds = [];
1511
+ const clippedTextByFactId = /* @__PURE__ */ new Map();
1496
1512
  await this.db.withTransactionAsync(async (tx) => {
1497
1513
  if (!merge) {
1498
1514
  const deletedLiveFactIds = await this.entryRepo.findIdsBySource(
@@ -1529,21 +1545,32 @@ var ImportExportService = class {
1529
1545
  const rawBlobRaw = fact.embedding_blob;
1530
1546
  let rawBlob = null;
1531
1547
  if (rawBlobRaw instanceof Uint8Array) {
1532
- rawBlob = rawBlobRaw;
1548
+ if (rawBlobRaw.byteLength <= MAX_EMBEDDING_BLOB_BYTES) {
1549
+ rawBlob = rawBlobRaw;
1550
+ }
1533
1551
  } else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
1534
1552
  const obj = rawBlobRaw;
1535
1553
  if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
1536
- rawBlob = new Uint8Array(obj["data"]);
1554
+ const data = obj["data"];
1555
+ if (data.length <= MAX_EMBEDDING_BLOB_BYTES) {
1556
+ rawBlob = new Uint8Array(data);
1557
+ }
1537
1558
  } else if (!Array.isArray(rawBlobRaw)) {
1538
1559
  const entries = Object.keys(obj);
1539
1560
  if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
1540
1561
  const len = entries.length;
1541
- rawBlob = new Uint8Array(len);
1542
- for (let i = 0; i < len; i++)
1543
- rawBlob[i] = obj[String(i)] ?? 0;
1562
+ if (len <= MAX_EMBEDDING_BLOB_BYTES) {
1563
+ rawBlob = new Uint8Array(len);
1564
+ for (let i = 0; i < len; i++) {
1565
+ rawBlob[i] = obj[String(i)] ?? 0;
1566
+ }
1567
+ }
1544
1568
  }
1545
1569
  }
1546
1570
  }
1571
+ if (rawBlob !== null && rawBlob.byteLength > MAX_EMBEDDING_BLOB_BYTES) {
1572
+ rawBlob = null;
1573
+ }
1547
1574
  let blobData = null;
1548
1575
  if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
1549
1576
  const copy = new ArrayBuffer(rawBlob.byteLength);
@@ -1573,11 +1600,14 @@ var ImportExportService = class {
1573
1600
  }
1574
1601
  if (merge && safeUpdatedAt <= existing.updated_at) continue;
1575
1602
  }
1603
+ const safeTitle = clip(String(fact.title ?? ""), IMPORT_TITLE_MAX);
1604
+ const safeBody = clip(String(fact.body ?? ""), IMPORT_BODY_MAX);
1605
+ clippedTextByFactId.set(fact.id, { title: safeTitle, body: safeBody });
1576
1606
  const factObj = {
1577
1607
  id: fact.id,
1578
1608
  entity_id: entityId,
1579
- title: fact.title,
1580
- body: fact.body,
1609
+ title: safeTitle,
1610
+ body: safeBody,
1581
1611
  tags: Array.isArray(fact.tags) ? fact.tags : [],
1582
1612
  confidence: fact.confidence,
1583
1613
  source_type: sourceType,
@@ -1665,11 +1695,12 @@ var ImportExportService = class {
1665
1695
  await this.searchService.sync(entityId);
1666
1696
  for (const fact of bundle.facts) {
1667
1697
  if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
1698
+ const clipped = clippedTextByFactId.get(fact.id);
1668
1699
  const embedded = await this.embeddingService.embedFact({
1669
1700
  id: fact.id,
1670
1701
  entity_id: entityId,
1671
- title: fact.title,
1672
- body: fact.body,
1702
+ title: clipped?.title ?? fact.title,
1703
+ body: clipped?.body ?? fact.body,
1673
1704
  tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1674
1705
  });
1675
1706
  if (!embedded) {
@@ -1769,7 +1800,7 @@ var ImportExportService = class {
1769
1800
  }
1770
1801
  _warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
1771
1802
  console.warn(
1772
- `[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`
1803
+ `[WikiMemory] importDump: ${type} id ${JSON.stringify(id)} already belongs to entity ${JSON.stringify(existingEntityId)}; skipping for entity ${JSON.stringify(targetEntityId)}`
1773
1804
  );
1774
1805
  }
1775
1806
  _normalizeImportedSourceType(raw, ctx) {
@@ -1848,7 +1879,7 @@ var EmbeddingService = class {
1848
1879
  tagsStr = fact.tags;
1849
1880
  }
1850
1881
  }
1851
- const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
1882
+ const text = clip(`${fact.title} ${fact.body} ${tagsStr}`.trim(), 16e3);
1852
1883
  try {
1853
1884
  const vector = await embedFn(text);
1854
1885
  if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
@@ -2462,15 +2493,38 @@ var RetrievalService = class {
2462
2493
 
2463
2494
  // src/services/WriteService.ts
2464
2495
  var WriteService = class {
2465
- constructor(db, options, eventRepo, metadataRepo, jobManager, maintenanceService) {
2496
+ constructor(db, options, entryRepo, eventRepo, metadataRepo, jobManager, maintenanceService) {
2466
2497
  this.db = db;
2467
2498
  this.options = options;
2499
+ this.entryRepo = entryRepo;
2468
2500
  this.eventRepo = eventRepo;
2469
2501
  this.metadataRepo = metadataRepo;
2470
2502
  this.jobManager = jobManager;
2471
2503
  this.maintenanceService = maintenanceService;
2472
2504
  }
2473
2505
  async write(entityId, event) {
2506
+ if (typeof entityId !== "string" || entityId.length === 0 || entityId.length > 200 || entityId.includes("\0")) {
2507
+ throw new TypeError(
2508
+ `Invalid entityId: must be a non-empty string at most 200 chars with no null bytes; got ${JSON.stringify(entityId)}.`
2509
+ );
2510
+ }
2511
+ if (event === null || typeof event !== "object" || Array.isArray(event)) {
2512
+ throw new TypeError("Invalid event: must be a non-null object.");
2513
+ }
2514
+ if (typeof event.summary !== "string") {
2515
+ throw new TypeError("Invalid event.summary: must be a string.");
2516
+ }
2517
+ const summary = clip(event.summary, 4e3);
2518
+ let relatedEntryId = null;
2519
+ const rawRelatedEntryId = event.related_entry_id;
2520
+ if (rawRelatedEntryId != null && rawRelatedEntryId !== "") {
2521
+ if (typeof rawRelatedEntryId !== "string" || rawRelatedEntryId.length > 200 || rawRelatedEntryId.includes("\0")) {
2522
+ relatedEntryId = null;
2523
+ } else {
2524
+ const existing = await this.entryRepo.findByIds([rawRelatedEntryId], [entityId]);
2525
+ relatedEntryId = existing.length > 0 ? rawRelatedEntryId : null;
2526
+ }
2527
+ }
2474
2528
  const id = generateId("evt_");
2475
2529
  const now = Date.now();
2476
2530
  let eventType = event.event_type;
@@ -2481,8 +2535,8 @@ var WriteService = class {
2481
2535
  id,
2482
2536
  entity_id: entityId,
2483
2537
  event_type: eventType,
2484
- summary: event.summary,
2485
- related_entry_id: event.related_entry_id || null,
2538
+ summary,
2539
+ related_entry_id: relatedEntryId,
2486
2540
  created_at: now
2487
2541
  };
2488
2542
  let shouldRunLibrarian = false;
@@ -2543,5 +2597,5 @@ var WriteService = class {
2543
2597
  };
2544
2598
 
2545
2599
  export { EmbeddingService, HOOK_TIMEOUT_MARKER, ImportExportService, IngestionService, JobManager, MaintenanceService, PromptService, PrunePartialFailureError, RetrievalService, SearchService, WikiBusyError, WriteService, __privateAdd, __privateGet, __privateSet, generateId, normalizeSourceHash, normalizeSourceRef, parseEmbedding };
2546
- //# sourceMappingURL=chunk-6FWG2DG4.mjs.map
2547
- //# sourceMappingURL=chunk-6FWG2DG4.mjs.map
2600
+ //# sourceMappingURL=chunk-24ANTHZB.mjs.map
2601
+ //# sourceMappingURL=chunk-24ANTHZB.mjs.map