@equationalapplications/expo-llm-wiki 0.0.0-development → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,8 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ WikiBusyError: () => WikiBusyError,
23
24
  WikiMemory: () => WikiMemory,
24
- createWiki: () => createWiki
25
+ createWiki: () => createWiki,
26
+ formatMemoryDump: () => formatMemoryDump
25
27
  });
26
28
  module.exports = __toCommonJS(index_exports);
27
29
 
@@ -110,11 +112,23 @@ async function setupDatabase(db, prefix) {
110
112
  `);
111
113
  }
112
114
 
115
+ // src/types.ts
116
+ var WikiBusyError = class extends Error {
117
+ operation;
118
+ entityId;
119
+ constructor(operation, entityId) {
120
+ super(`${operation} already running for entity ${entityId}`);
121
+ this.name = "WikiBusyError";
122
+ this.operation = operation;
123
+ this.entityId = entityId;
124
+ }
125
+ };
126
+
113
127
  // src/prompts.ts
114
128
  var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
115
129
  Return ONLY a valid JSON object matching this schema:
116
130
  {
117
- "facts": [{ "title": "string (max 80 chars)", "body": "string (max 200 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
131
+ "facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }],
118
132
  "tasks": [{ "description": "string", "priority": "number (0-10)" }]
119
133
  }
120
134
  Keep facts concise. Do not return markdown, just raw JSON.`;
@@ -123,13 +137,13 @@ Return ONLY a valid JSON object matching this schema:
123
137
  {
124
138
  "downgraded": ["string (fact IDs)"],
125
139
  "deleted": ["string (fact IDs)"],
126
- "newFacts": [{ "title": "string", "body": "string", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
140
+ "newFacts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
127
141
  }
128
142
  Do not return markdown, just raw JSON.`;
129
143
  var INGEST_SYSTEM_PROMPT = `You are a document ingestion agent. Your job is to extract factual knowledge from the provided document chunk.
130
144
  Return ONLY a valid JSON object matching this schema:
131
145
  {
132
- "facts": [{ "title": "string (max 80 chars)", "body": "string (max 200 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
146
+ "facts": [{ "title": "string (max 80 chars)", "body": "string (max 800 chars)", "tags": ["string"], "confidence": "certain|inferred|tentative" }]
133
147
  }
134
148
  Extract verbatim factual content. Do not return markdown, just raw JSON.`;
135
149
 
@@ -203,6 +217,84 @@ function safeSlice(value, start, end) {
203
217
  }
204
218
  return value.slice(safeStart, safeEnd);
205
219
  }
220
+ function chunkText(input, maxChunkLength, overlap) {
221
+ const text = input.trim();
222
+ if (text.length === 0) return { chunks: [], truncated: false };
223
+ if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
224
+ throw new Error("maxChunkLength must be an integer >= 2");
225
+ }
226
+ if (!Number.isInteger(overlap) || overlap < 0 || overlap >= maxChunkLength) {
227
+ throw new Error("overlap must be a non-negative integer < maxChunkLength");
228
+ }
229
+ const chunks = [];
230
+ let truncated = false;
231
+ let cursor = 0;
232
+ const halfMax = Math.floor(maxChunkLength / 2);
233
+ while (cursor < text.length) {
234
+ const remaining = text.length - cursor;
235
+ if (remaining <= maxChunkLength) {
236
+ chunks.push(safeSlice(text, cursor, text.length));
237
+ break;
238
+ }
239
+ const windowEnd = cursor + maxChunkLength;
240
+ const minSplit = cursor + halfMax;
241
+ let splitPoint = -1;
242
+ const paraIdx = text.lastIndexOf("\n\n", windowEnd);
243
+ if (paraIdx >= minSplit && paraIdx + 2 <= windowEnd) {
244
+ splitPoint = paraIdx + 2;
245
+ }
246
+ if (splitPoint === -1) {
247
+ let lastTerm = -1;
248
+ for (let i = minSplit; i < windowEnd - 1; i++) {
249
+ const ch = text[i];
250
+ if ((ch === "." || ch === "!" || ch === "?") && /\s/.test(text[i + 1])) {
251
+ lastTerm = i + 2;
252
+ }
253
+ }
254
+ if (lastTerm !== -1 && lastTerm <= windowEnd) splitPoint = lastTerm;
255
+ }
256
+ if (splitPoint === -1) {
257
+ for (let i = windowEnd - 1; i >= minSplit; i--) {
258
+ if (/\s/.test(text[i])) {
259
+ splitPoint = i + 1;
260
+ break;
261
+ }
262
+ }
263
+ }
264
+ if (splitPoint === -1) {
265
+ truncated = true;
266
+ splitPoint = windowEnd;
267
+ }
268
+ chunks.push(safeSlice(text, cursor, splitPoint));
269
+ const next = Math.max(splitPoint - overlap, cursor + 1);
270
+ cursor = next;
271
+ }
272
+ return { chunks, truncated };
273
+ }
274
+ async function withConcurrency(tasks, limit) {
275
+ const results = new Array(tasks.length);
276
+ let index = 0;
277
+ let failed = false;
278
+ let firstError;
279
+ async function worker() {
280
+ while (index < tasks.length && !failed) {
281
+ const i = index++;
282
+ try {
283
+ results[i] = await tasks[i]();
284
+ } catch (e) {
285
+ if (!failed) {
286
+ failed = true;
287
+ firstError = e;
288
+ }
289
+ return;
290
+ }
291
+ }
292
+ }
293
+ const workerCount = tasks.length === 0 ? 0 : Math.min(Math.max(limit, 1), tasks.length);
294
+ await Promise.allSettled(Array.from({ length: workerCount }, worker));
295
+ if (failed) throw firstError;
296
+ return results;
297
+ }
206
298
  function clip(value, max) {
207
299
  if (typeof value !== "string") return "";
208
300
  const s = value.trim();
@@ -215,7 +307,7 @@ function validateTags(tags) {
215
307
  function validateFact(fact) {
216
308
  if (typeof fact?.title !== "string" || typeof fact?.body !== "string") return null;
217
309
  const title = clip(fact.title, 80);
218
- const body = clip(fact.body, 200);
310
+ const body = clip(fact.body, 800);
219
311
  if (!title || !body) return null;
220
312
  let confidence = fact.confidence;
221
313
  if (confidence !== "certain" && confidence !== "tentative") confidence = "inferred";
@@ -264,6 +356,16 @@ var WikiMemory = class {
264
356
  prefix;
265
357
  options;
266
358
  activeMaintenanceJobs = /* @__PURE__ */ new Set();
359
+ activeIngestJobs = /* @__PURE__ */ new Set();
360
+ _librarianKey(entityId) {
361
+ return `${this.prefix}:${entityId}:librarian`;
362
+ }
363
+ _healKey(entityId) {
364
+ return `${this.prefix}:${entityId}:heal`;
365
+ }
366
+ _warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
367
+ console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
368
+ }
267
369
  constructor(db, options) {
268
370
  this.db = db;
269
371
  this.options = options;
@@ -349,6 +451,9 @@ var WikiMemory = class {
349
451
  }));
350
452
  return { facts, tasks, events: events.reverse() };
351
453
  }
454
+ async getMemoryBundle(entityId) {
455
+ return this._getFullBundle(entityId, { maxEvents: 10 });
456
+ }
352
457
  async write(entityId, event) {
353
458
  const id = generateId("evt_");
354
459
  const now = Date.now();
@@ -369,7 +474,7 @@ var WikiMemory = class {
369
474
  let memoryCheckpoint = cp?.memory_checkpoint || 0;
370
475
  if (memoryCheckpoint > count) memoryCheckpoint = 0;
371
476
  if (count - memoryCheckpoint >= threshold) {
372
- const jobKey = `${this.prefix}:${entityId}`;
477
+ const jobKey = this._librarianKey(entityId);
373
478
  if (!this.activeMaintenanceJobs.has(jobKey)) {
374
479
  this.activeMaintenanceJobs.add(jobKey);
375
480
  this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
@@ -388,12 +493,20 @@ var WikiMemory = class {
388
493
  let healCheckpoint = cp?.heal_checkpoint || 0;
389
494
  if (healCheckpoint > currentEventCount) healCheckpoint = 0;
390
495
  if (currentEventCount - healCheckpoint >= autoHealThreshold) {
391
- await this._doRunHeal(entityId);
392
- await this.db.runAsync(`
393
- INSERT INTO ${this.prefix}checkpoints (entity_id, heal_checkpoint)
394
- VALUES (?, ?)
395
- ON CONFLICT(entity_id) DO UPDATE SET heal_checkpoint = ?
396
- `, [entityId, currentEventCount, currentEventCount]);
496
+ const healKey = this._healKey(entityId);
497
+ if (!this.activeMaintenanceJobs.has(healKey)) {
498
+ this.activeMaintenanceJobs.add(healKey);
499
+ try {
500
+ await this._doRunHeal(entityId);
501
+ await this.db.runAsync(`
502
+ INSERT INTO ${this.prefix}checkpoints (entity_id, heal_checkpoint)
503
+ VALUES (?, ?)
504
+ ON CONFLICT(entity_id) DO UPDATE SET heal_checkpoint = ?
505
+ `, [entityId, currentEventCount, currentEventCount]);
506
+ } finally {
507
+ this.activeMaintenanceJobs.delete(healKey);
508
+ }
509
+ }
397
510
  }
398
511
  }
399
512
  async _doRunLibrarian(entityId) {
@@ -536,8 +649,10 @@ The following document anchors are provided for contradiction detection only. Do
536
649
  });
537
650
  }
538
651
  async runLibrarian(entityId) {
539
- const jobKey = `${this.prefix}:${entityId}`;
540
- if (this.activeMaintenanceJobs.has(jobKey)) return;
652
+ const jobKey = this._librarianKey(entityId);
653
+ if (this.activeMaintenanceJobs.has(jobKey)) {
654
+ throw new WikiBusyError("librarian", entityId);
655
+ }
541
656
  this.activeMaintenanceJobs.add(jobKey);
542
657
  try {
543
658
  await this._doRunLibrarian(entityId);
@@ -546,8 +661,10 @@ The following document anchors are provided for contradiction detection only. Do
546
661
  }
547
662
  }
548
663
  async runHeal(entityId) {
549
- const jobKey = `${this.prefix}:${entityId}`;
550
- if (this.activeMaintenanceJobs.has(jobKey)) return;
664
+ const jobKey = this._healKey(entityId);
665
+ if (this.activeMaintenanceJobs.has(jobKey)) {
666
+ throw new WikiBusyError("heal", entityId);
667
+ }
551
668
  this.activeMaintenanceJobs.add(jobKey);
552
669
  try {
553
670
  await this._doRunHeal(entityId);
@@ -555,6 +672,170 @@ The following document anchors are provided for contradiction detection only. Do
555
672
  this.activeMaintenanceJobs.delete(jobKey);
556
673
  }
557
674
  }
675
+ getEntityStatus(entityId) {
676
+ const ingestPrefix = `${this.prefix}:${entityId}:`;
677
+ let ingesting = false;
678
+ for (const k of this.activeIngestJobs) {
679
+ if (k.startsWith(ingestPrefix)) {
680
+ ingesting = true;
681
+ break;
682
+ }
683
+ }
684
+ return {
685
+ ingesting,
686
+ librarian: this.activeMaintenanceJobs.has(this._librarianKey(entityId)),
687
+ heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
688
+ };
689
+ }
690
+ async _getFullBundle(entityId, opts) {
691
+ const maxEvents = opts?.maxEvents;
692
+ const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
693
+ const eventsParams = maxEvents != null ? [entityId, maxEvents] : [entityId];
694
+ const [factsRaw, tasks, eventsRaw] = await Promise.all([
695
+ this.db.getAllAsync(
696
+ `SELECT * FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL ORDER BY updated_at DESC`,
697
+ [entityId]
698
+ ),
699
+ this.db.getAllAsync(
700
+ `SELECT * FROM ${this.prefix}tasks WHERE entity_id = ? AND deleted_at IS NULL ORDER BY priority DESC, created_at ASC`,
701
+ [entityId]
702
+ ),
703
+ this.db.getAllAsync(eventsQuery, eventsParams)
704
+ ]);
705
+ const facts = factsRaw.map((f) => ({
706
+ ...f,
707
+ tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
708
+ }));
709
+ const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
710
+ return { facts, tasks, events };
711
+ }
712
+ async exportDump(entityIds) {
713
+ let ids;
714
+ if (entityIds && entityIds.length > 0) {
715
+ ids = Array.from(new Set(entityIds));
716
+ } else {
717
+ const rows = await this.db.getAllAsync(`
718
+ SELECT DISTINCT entity_id FROM (
719
+ SELECT entity_id FROM ${this.prefix}entries WHERE deleted_at IS NULL
720
+ UNION
721
+ SELECT entity_id FROM ${this.prefix}tasks WHERE deleted_at IS NULL
722
+ UNION
723
+ SELECT entity_id FROM ${this.prefix}events
724
+ ) ORDER BY entity_id
725
+ `);
726
+ ids = rows.map((r) => r.entity_id);
727
+ }
728
+ const entities = {};
729
+ const BATCH = 3;
730
+ for (let i = 0; i < ids.length; i += BATCH) {
731
+ const batch = ids.slice(i, i + BATCH);
732
+ const batchResults = await Promise.all(
733
+ batch.map(async (id) => [id, await this._getFullBundle(id)])
734
+ );
735
+ for (const [id, bundle] of batchResults) {
736
+ entities[id] = bundle;
737
+ }
738
+ }
739
+ return { generatedAt: Date.now(), entities };
740
+ }
741
+ async importDump(dump, opts) {
742
+ const merge = opts?.merge ?? false;
743
+ for (const [entityId, bundle] of Object.entries(dump.entities)) {
744
+ await this.db.withTransactionAsync(async () => {
745
+ if (!merge) {
746
+ const now = Date.now();
747
+ await this.db.runAsync(
748
+ `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
749
+ [now, now, entityId]
750
+ );
751
+ await this.db.runAsync(
752
+ `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
753
+ [now, now, entityId]
754
+ );
755
+ await this.db.runAsync(
756
+ `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
757
+ [entityId]
758
+ );
759
+ }
760
+ const factIds = bundle.facts.map((fact) => fact.id);
761
+ const existingFactsById = /* @__PURE__ */ new Map();
762
+ const factLookupChunkSize = 500;
763
+ for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
764
+ const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
765
+ if (factIdChunk.length === 0) continue;
766
+ const placeholders = factIdChunk.map(() => "?").join(", ");
767
+ const existingFacts = await this.db.getAllAsync(
768
+ `SELECT id, entity_id FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
769
+ factIdChunk
770
+ );
771
+ for (const existingFact of existingFacts) {
772
+ existingFactsById.set(existingFact.id, existingFact);
773
+ }
774
+ }
775
+ for (const fact of bundle.facts) {
776
+ const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
777
+ const existing = existingFactsById.get(fact.id);
778
+ if (existing) {
779
+ if (existing.entity_id !== entityId) {
780
+ this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
781
+ continue;
782
+ }
783
+ if (merge) continue;
784
+ await this.db.runAsync(
785
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ? WHERE id = ?`,
786
+ [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, fact.updated_at, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
787
+ );
788
+ } else {
789
+ await this.db.runAsync(
790
+ `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
791
+ [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, fact.updated_at, fact.last_accessed_at, fact.access_count, fact.deleted_at]
792
+ );
793
+ }
794
+ }
795
+ const taskIds = bundle.tasks.map((task) => task.id);
796
+ const existingTasksById = /* @__PURE__ */ new Map();
797
+ const taskLookupChunkSize = 500;
798
+ for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
799
+ const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
800
+ if (taskIdChunk.length === 0) continue;
801
+ const placeholders = taskIdChunk.map(() => "?").join(", ");
802
+ const existingTasks = await this.db.getAllAsync(
803
+ `SELECT id, entity_id FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
804
+ taskIdChunk
805
+ );
806
+ for (const existingTask of existingTasks) {
807
+ existingTasksById.set(existingTask.id, existingTask);
808
+ }
809
+ }
810
+ for (const task of bundle.tasks) {
811
+ const existing = existingTasksById.get(task.id);
812
+ if (existing) {
813
+ if (existing.entity_id !== entityId) {
814
+ this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
815
+ continue;
816
+ }
817
+ if (merge) continue;
818
+ await this.db.runAsync(
819
+ `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
820
+ [entityId, task.description, task.status, task.priority, task.created_at, task.updated_at, task.resolved_at, task.deleted_at, task.id]
821
+ );
822
+ } else {
823
+ await this.db.runAsync(
824
+ `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
825
+ [task.id, entityId, task.description, task.status, task.priority, task.created_at, task.updated_at, task.resolved_at, task.deleted_at]
826
+ );
827
+ }
828
+ }
829
+ for (const event of bundle.events) {
830
+ await this.db.runAsync(
831
+ `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
832
+ VALUES (?, ?, ?, ?, ?, ?)`,
833
+ [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
834
+ );
835
+ }
836
+ });
837
+ }
838
+ }
558
839
  async forget(entityId, params) {
559
840
  const now = Date.now();
560
841
  let deletedEntries = 0;
@@ -609,71 +890,166 @@ The following document anchors are provided for contradiction detection only. Do
609
890
  if (!sourceRef) throw new Error("Invalid sourceRef");
610
891
  const sourceHash = normalizeSourceHash(params.sourceHash);
611
892
  if (!sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
612
- const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 6e3;
613
- if (!Number.isInteger(maxChunkLength) || maxChunkLength < 2) {
614
- throw new Error("maxChunkLength must be an integer greater than or equal to 2");
615
- }
893
+ const maxChunkLength = params.maxChunkLength ?? this.options.config?.maxChunkLength ?? 12e3;
894
+ const rawOverlap = params.chunkOverlap ?? this.options.config?.chunkOverlap ?? 400;
895
+ const chunkOverlap = Math.min(
896
+ Number.isFinite(rawOverlap) && rawOverlap >= 0 ? Math.floor(rawOverlap) : 400,
897
+ maxChunkLength - 1
898
+ );
899
+ const rawConcurrency = params.chunkConcurrency ?? this.options.config?.chunkConcurrency ?? 1;
900
+ const chunkConcurrency = Number.isFinite(rawConcurrency) && rawConcurrency >= 1 ? Math.floor(rawConcurrency) : 1;
616
901
  if (typeof params.documentChunk !== "string") {
617
902
  throw new Error(`documentChunk must be a string, received ${typeof params.documentChunk}`);
618
903
  }
619
- const chunks = [];
620
- let truncated = false;
621
- let text = params.documentChunk.trim();
622
- if (text.length === 0) {
623
- return { truncated: false, chunks: 0 };
904
+ const jobKey = `${this.prefix}:${entityId}:${sourceRef}`;
905
+ if (this.activeIngestJobs.has(jobKey)) {
906
+ throw new WikiBusyError("ingest", entityId);
624
907
  }
625
- while (text.length > 0) {
626
- if (text.length <= maxChunkLength) {
627
- chunks.push(text);
628
- break;
629
- }
630
- const searchArea = text.slice(0, maxChunkLength + 1);
631
- const match = searchArea.match(/[.!?]\s+(?![\s\S]*[.!?]\s+)/);
632
- if (match && match.index !== void 0) {
633
- const splitPoint = Math.min(match.index + match[0].length, maxChunkLength);
634
- const chunk = safeSlice(text, 0, splitPoint);
635
- chunks.push(chunk);
636
- text = text.slice(chunk.length);
637
- } else {
638
- truncated = true;
639
- const chunk = safeSlice(text, 0, maxChunkLength);
640
- chunks.push(chunk);
641
- text = text.slice(chunk.length);
908
+ this.activeIngestJobs.add(jobKey);
909
+ try {
910
+ const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
911
+ if (chunks.length === 0) {
912
+ return { truncated: false, chunks: 0 };
642
913
  }
643
- }
644
- const allValidFacts = [];
645
- for (const chunk of chunks) {
646
- const userPrompt = `Document Chunk:
914
+ const chunkResults = await withConcurrency(
915
+ chunks.map((chunk) => async () => {
916
+ const userPrompt = `Document Chunk:
647
917
  ${chunk}`;
648
- const responseText = await this.options.llmProvider.generateText({
649
- systemPrompt: INGEST_SYSTEM_PROMPT,
650
- userPrompt
918
+ const responseText = await this.options.llmProvider.generateText({
919
+ systemPrompt: INGEST_SYSTEM_PROMPT,
920
+ userPrompt
921
+ });
922
+ const result = parseJsonResponse(responseText);
923
+ return (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
924
+ }),
925
+ chunkConcurrency
926
+ );
927
+ const seen = /* @__PURE__ */ new Set();
928
+ const allValidFacts = [];
929
+ for (const facts of chunkResults) {
930
+ for (const fact of facts) {
931
+ const normalized = fact.title.trim().toLowerCase().replace(/\s+/g, " ");
932
+ if (!seen.has(normalized)) {
933
+ seen.add(normalized);
934
+ allValidFacts.push(fact);
935
+ }
936
+ }
937
+ }
938
+ const now = Date.now();
939
+ await this.db.withTransactionAsync(async () => {
940
+ await this.db.runAsync(
941
+ `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
942
+ [now, now, sourceRef, entityId]
943
+ );
944
+ for (const fact of allValidFacts) {
945
+ const id = generateId("fact_");
946
+ await this.db.runAsync(
947
+ `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at)
948
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
949
+ [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
950
+ );
951
+ }
651
952
  });
652
- const result = parseJsonResponse(responseText);
653
- const validFacts = (Array.isArray(result.facts) ? result.facts : []).map(validateFact).filter((f) => f !== null);
654
- allValidFacts.push(...validFacts);
953
+ return { truncated, chunks: chunks.length };
954
+ } finally {
955
+ this.activeIngestJobs.delete(jobKey);
655
956
  }
656
- const now = Date.now();
657
- await this.db.withTransactionAsync(async () => {
658
- await this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, sourceRef, entityId]);
659
- for (const fact of allValidFacts) {
660
- const id = generateId("fact_");
661
- await this.db.runAsync(`
662
- INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at)
663
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
664
- `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]);
665
- }
666
- });
667
- return { truncated, chunks: chunks.length };
668
957
  }
669
958
  };
670
959
 
960
+ // src/utils/formatMemoryDump.ts
961
+ function renderFact(f) {
962
+ const tags = (f.tags || []).join(", ");
963
+ const source = f.source_ref ?? f.source_type;
964
+ return `### ${f.title}
965
+ **Tags:** ${tags}
966
+ **Confidence:** ${f.confidence}
967
+ **Source:** ${source}
968
+
969
+ ${f.body}
970
+
971
+ ---
972
+ `;
973
+ }
974
+ function renderTask(t) {
975
+ const checked = t.status === "done" ? "x" : " ";
976
+ const note = t.status === "done" ? " (done)" : t.status === "abandoned" ? " (abandoned)" : t.status === "in_progress" ? " (in progress)" : "";
977
+ return `- [${checked}] ${t.description}${note}
978
+ `;
979
+ }
980
+ function renderEvent(e) {
981
+ const ts = new Date(e.created_at).toISOString();
982
+ return `- [${ts}] (${e.event_type}) ${e.summary}
983
+ `;
984
+ }
985
+ function renderEntity(entityId, bundle, generatedAt) {
986
+ const lines = [];
987
+ lines.push(`# Memory Dump: ${entityId}`);
988
+ lines.push(`Generated: ${new Date(generatedAt).toISOString()}`);
989
+ lines.push("");
990
+ lines.push("## Facts");
991
+ lines.push("");
992
+ if (bundle.facts.length === 0) {
993
+ lines.push("_(none)_\n");
994
+ } else {
995
+ for (const f of bundle.facts) lines.push(renderFact(f));
996
+ }
997
+ lines.push("## Tasks");
998
+ lines.push("");
999
+ if (bundle.tasks.length === 0) {
1000
+ lines.push("_(none)_\n");
1001
+ } else {
1002
+ for (const t of bundle.tasks) lines.push(renderTask(t));
1003
+ }
1004
+ lines.push("");
1005
+ lines.push("## Recent Events");
1006
+ lines.push("");
1007
+ if (bundle.events.length === 0) {
1008
+ lines.push("_(none)_\n");
1009
+ } else {
1010
+ for (const e of bundle.events) lines.push(renderEvent(e));
1011
+ }
1012
+ return lines.join("\n");
1013
+ }
1014
+ function shortHash(value) {
1015
+ let h1 = 5381;
1016
+ let h2 = 52711;
1017
+ for (let i = 0; i < value.length; i += 1) {
1018
+ const c = value.charCodeAt(i);
1019
+ h1 = Math.imul(h1, 33) ^ c;
1020
+ h2 = Math.imul(h2, 31) ^ c;
1021
+ }
1022
+ return (h1 >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0");
1023
+ }
1024
+ function formatEntityFileName(entityId) {
1025
+ const normalized = entityId.normalize("NFKC");
1026
+ const sanitized = normalized.replace(/[^A-Za-z0-9._-]+/g, "_").replace(/^\.+/, "_").replace(/_+/g, "_").replace(/^[_-]+|[_-]+$/g, "");
1027
+ const MAX_BASE = 200;
1028
+ const trimmed = sanitized.length > MAX_BASE ? sanitized.slice(0, MAX_BASE) : sanitized;
1029
+ const baseName = trimmed && trimmed !== "." && trimmed !== ".." ? trimmed : "entity";
1030
+ const needsSuffix = baseName !== entityId || sanitized.length > MAX_BASE;
1031
+ const uniqueBaseName = needsSuffix ? `${baseName}-${shortHash(entityId)}` : baseName;
1032
+ return `${uniqueBaseName}.md`;
1033
+ }
1034
+ function formatMemoryDump(dump) {
1035
+ const files = Object.entries(dump.entities).map(([entityId, bundle]) => ({
1036
+ name: formatEntityFileName(entityId),
1037
+ content: renderEntity(entityId, bundle, dump.generatedAt)
1038
+ }));
1039
+ return {
1040
+ manifest: JSON.stringify(dump, null, 2),
1041
+ files
1042
+ };
1043
+ }
1044
+
671
1045
  // src/index.ts
672
1046
  function createWiki(db, options) {
673
1047
  return new WikiMemory(db, options);
674
1048
  }
675
1049
  // Annotate the CommonJS export names for ESM import in node:
676
1050
  0 && (module.exports = {
1051
+ WikiBusyError,
677
1052
  WikiMemory,
678
- createWiki
1053
+ createWiki,
1054
+ formatMemoryDump
679
1055
  });