ei-tui 0.9.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +22 -3
  2. package/package.json +5 -1
  3. package/src/README.md +9 -25
  4. package/src/core/handlers/document-segmentation.ts +113 -0
  5. package/src/core/handlers/index.ts +2 -0
  6. package/src/core/handlers/rewrite.ts +13 -9
  7. package/src/core/heartbeat-manager.ts +2 -2
  8. package/src/core/llm-client.ts +11 -1
  9. package/src/core/message-manager.ts +20 -18
  10. package/src/core/orchestrators/ceremony.ts +83 -40
  11. package/src/core/orchestrators/human-extraction.ts +5 -1
  12. package/src/core/persona-manager.ts +4 -0
  13. package/src/core/processor.ts +90 -1
  14. package/src/core/queue-manager.ts +35 -0
  15. package/src/core/state/queue.ts +9 -1
  16. package/src/core/state-manager.ts +4 -0
  17. package/src/core/types/entities.ts +15 -0
  18. package/src/core/types/enums.ts +1 -0
  19. package/src/core/types/integrations.ts +2 -0
  20. package/src/core/types/llm.ts +9 -0
  21. package/src/integrations/document/chunker.ts +88 -0
  22. package/src/integrations/document/importer.ts +82 -0
  23. package/src/integrations/document/index.ts +2 -0
  24. package/src/integrations/document/invoice.ts +63 -0
  25. package/src/integrations/document/types.ts +16 -0
  26. package/src/integrations/document/unsource.ts +164 -0
  27. package/src/integrations/persona-history/importer.ts +197 -0
  28. package/src/integrations/persona-history/index.ts +3 -0
  29. package/src/integrations/persona-history/types.ts +7 -0
  30. package/src/prompts/ceremony/dedup.ts +7 -3
  31. package/src/prompts/ceremony/index.ts +2 -1
  32. package/src/prompts/ceremony/people-rewrite.ts +190 -0
  33. package/src/prompts/ceremony/{rewrite.ts → topic-rewrite.ts} +103 -78
  34. package/src/prompts/human/person-scan.ts +13 -4
  35. package/src/prompts/human/topic-scan.ts +16 -2
  36. package/src/prompts/human/topic-update.ts +36 -4
  37. package/src/prompts/human/types.ts +1 -0
  38. package/src/storage/indexed.ts +4 -0
  39. package/src/storage/interface.ts +1 -0
  40. package/src/storage/local.ts +4 -0
  41. package/src/templates/emmett.ts +49 -0
  42. package/tui/README.md +22 -0
  43. package/tui/src/app.tsx +9 -6
  44. package/tui/src/commands/delete.tsx +7 -1
  45. package/tui/src/commands/import.tsx +30 -0
  46. package/tui/src/commands/unsource.tsx +115 -0
  47. package/tui/src/components/PromptInput.tsx +4 -0
  48. package/tui/src/components/WelcomeOverlay.tsx +58 -32
  49. package/tui/src/context/ei.tsx +80 -60
  50. package/tui/src/index.tsx +14 -0
  51. package/tui/src/storage/file.ts +11 -5
  52. package/tui/src/util/e2e-flags.ts +4 -3
  53. package/tui/src/util/help-content.ts +20 -0
  54. package/tui/src/util/provider-detection.ts +251 -0
  55. package/tui/src/util/yaml-human.ts +7 -1
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  RESERVED_PERSONA_NAMES,
3
3
  isReservedPersonaName,
4
+ isReservedPersonaId,
4
5
  type PersonaSummary,
5
6
  type PersonaEntity,
6
7
  type PersonaCreationInput,
@@ -110,6 +111,9 @@ export async function deletePersona(
110
111
  personaId: string,
111
112
  _deleteHumanData: boolean
112
113
  ): Promise<boolean> {
114
+ if (isReservedPersonaId(personaId)) {
115
+ throw new Error(`Cannot delete reserved persona "${personaId}". Use archive instead.`);
116
+ }
113
117
  const persona = sm.persona_getById(personaId);
114
118
  if (!persona) return false;
115
119
  sm.persona_delete(personaId);
@@ -39,7 +39,9 @@ import { ContextStatus as ContextStatusEnum, RoomMode } from "./types.js";
39
39
  import { registerReadMemoryExecutor, registerFileReadExecutor } from "./tools/index.js";
40
40
  import { createReadMemoryExecutor } from "./tools/builtin/read-memory.js";
41
41
  import { EI_WELCOME_MESSAGE, EI_PERSONA_DEFINITION } from "../templates/welcome.js";
42
+ import { EMMETT_PERSONA_DEFINITION } from "../templates/emmett.js";
42
43
  import { shouldStartCeremony, startCeremony, handleCeremonyProgress, queueReflectionDrain, queueUserDedupRequest, queueRoomCapture, queuePersonaCapture, checkAndQueueRoomExtraction, queueTargetedPersonUpdate, queueTargetedTopicUpdate } from "./orchestrators/index.js";
44
+ import { finishDocumentBatch } from "./handlers/document-segmentation.js";
43
45
  import { BUILT_IN_FACTS } from "./constants/built-in-facts.js";
44
46
  import { DEFAULT_SEED_TRAITS } from "./constants/seed-traits.js";
45
47
 
@@ -132,6 +134,8 @@ import {
132
134
  markAllRoomMessagesRead,
133
135
  } from "./room-manager.js";
134
136
  import type { RoomCreationInput, RoomEntity, RoomMessage, RoomSummary } from "./types.js";
137
+ import { previewUnsource as _previewUnsource } from "../integrations/document/unsource.js";
138
+ import type { UnsourcePreview, UnsourceResult } from "../integrations/document/unsource.js";
135
139
 
136
140
  const DEFAULT_LOOP_INTERVAL_MS = 100;
137
141
  const DEFAULT_OPENCODE_POLLING_MS = 60000;
@@ -282,6 +286,46 @@ export class Processor {
282
286
  this.interface.onMessageAdded?.(eiEntity.id);
283
287
  }
284
288
 
289
+ private bootstrapEmmett(): void {
290
+ const existing = this.stateManager.persona_getById("emmet");
291
+ if (existing) {
292
+ if (existing.is_archived) {
293
+ this.stateManager.persona_unarchive("emmet");
294
+ }
295
+ return;
296
+ }
297
+ const readMemoryTool = this.stateManager.tools_getByName("read_memory");
298
+ const emmettEntity: PersonaEntity = {
299
+ ...EMMETT_PERSONA_DEFINITION,
300
+ id: "emmet",
301
+ display_name: "Emmett",
302
+ last_updated: new Date().toISOString(),
303
+ tools: readMemoryTool ? [readMemoryTool.id] : [],
304
+ };
305
+ this.stateManager.persona_add(emmettEntity);
306
+ this.interface.onPersonaAdded?.();
307
+ }
308
+
309
+ async importDocument(content: string, filename: string): Promise<import("../integrations/document/types.js").DocumentImportResult> {
310
+ this.bootstrapEmmett();
311
+ const { importDocument } = await import("../integrations/document/importer.js");
312
+ return importDocument({
313
+ stateManager: this.stateManager,
314
+ interface: this.interface,
315
+ content,
316
+ filename,
317
+ });
318
+ }
319
+
320
+ getUnsourcePreview(sourceTag: string): UnsourcePreview {
321
+ return _previewUnsource(sourceTag, this.stateManager);
322
+ }
323
+
324
+ async executeUnsource(preview: UnsourcePreview): Promise<UnsourceResult> {
325
+ const { executeUnsource } = await import("../integrations/document/unsource.js");
326
+ return executeUnsource(preview, this.stateManager);
327
+ }
328
+
285
329
  /**
286
330
  * Seed built-in tool providers and tools if they don't exist yet.
287
331
  * Called on every startup (after state load/restore) — safe to call repeatedly.
@@ -1168,6 +1212,15 @@ const toolNextSteps = new Set([
1168
1212
  await this.checkAndSyncCursor(human, now);
1169
1213
  }
1170
1214
 
1215
+ if (
1216
+ this.isTUI &&
1217
+ human.settings?.personaHistory?.integration &&
1218
+ !human.settings.personaHistory.complete &&
1219
+ this.stateManager.queue_length() === 0
1220
+ ) {
1221
+ await this.checkAndSyncPersonaHistory(human);
1222
+ }
1223
+
1171
1224
  if (human.settings?.ceremony && shouldStartCeremony(human.settings.ceremony, this.stateManager)) {
1172
1225
  if (human.settings?.sync && remoteSync.isConfigured()) {
1173
1226
  const state = this.stateManager.getStorageState();
@@ -1180,7 +1233,7 @@ const toolNextSteps = new Set([
1180
1233
  }
1181
1234
 
1182
1235
  for (const persona of this.stateManager.persona_getAll()) {
1183
- if (persona.is_paused || persona.is_archived) continue;
1236
+ if (persona.is_paused || persona.is_archived || persona.is_static) continue;
1184
1237
 
1185
1238
  const defaultHeartbeatMs = this.stateManager.getHuman().settings?.default_heartbeat_ms ?? 1800000;
1186
1239
  const heartbeatDelay = persona.heartbeat_delay_ms ?? defaultHeartbeatMs;
@@ -1408,6 +1461,32 @@ const toolNextSteps = new Set([
1408
1461
  });
1409
1462
  }
1410
1463
 
1464
+ private personaHistoryImportInProgress = false;
1465
+
1466
+ private async checkAndSyncPersonaHistory(_human: HumanEntity): Promise<void> {
1467
+ if (this.personaHistoryImportInProgress) return;
1468
+
1469
+ this.personaHistoryImportInProgress = true;
1470
+ import("../integrations/persona-history/importer.js")
1471
+ .then(({ importPersonaHistory }) =>
1472
+ importPersonaHistory({ stateManager: this.stateManager })
1473
+ )
1474
+ .then((result) => {
1475
+ if (result.scansQueued > 0) {
1476
+ console.log(
1477
+ `[Processor] PersonaHistory: ${result.scansQueued} scans queued` +
1478
+ (result.complete ? " — import complete" : "")
1479
+ );
1480
+ }
1481
+ })
1482
+ .catch((err) => {
1483
+ console.warn(`[Processor] PersonaHistory sync failed:`, err);
1484
+ })
1485
+ .finally(() => {
1486
+ this.personaHistoryImportInProgress = false;
1487
+ });
1488
+ }
1489
+
1411
1490
  private augmentRoomRequest(request: LLMRequest): LLMRequest {
1412
1491
  if (request.next_step !== LLMNextStep.HandleRoomResponse) return request;
1413
1492
 
@@ -1675,6 +1754,16 @@ const toolNextSteps = new Set([
1675
1754
  if (typeof response.request.data.ceremony_progress === "number") {
1676
1755
  handleCeremonyProgress(this.stateManager, response.request.data.ceremony_progress);
1677
1756
  }
1757
+
1758
+ if (response.request.next_step === LLMNextStep.HandleDocumentSegmentation) {
1759
+ const batchId = response.request.data.batchId as string;
1760
+ const filename = response.request.data.filename as string;
1761
+ if (batchId && !this.stateManager.queue_hasPendingDocumentSegments(batchId)) {
1762
+ finishDocumentBatch(batchId, filename, this.stateManager);
1763
+ this.interface.onMessageAdded?.("emmet");
1764
+ this.interface.onHumanUpdated?.();
1765
+ }
1766
+ }
1678
1767
  } catch (err) {
1679
1768
  const errorMsg = err instanceof Error ? err.message : String(err);
1680
1769
  const result = this.stateManager.queue_fail(response.request.id, errorMsg);
@@ -18,6 +18,39 @@ export async function resumeQueue(sm: StateManager): Promise<void> {
18
18
  }
19
19
 
20
20
  export async function getQueueStatus(sm: StateManager): Promise<QueueStatus> {
21
+ const activeItems = sm.queue_getAllActiveItems();
22
+ const segmentationItems = activeItems.filter(
23
+ r => r.next_step === LLMNextStep.HandleDocumentSegmentation
24
+ );
25
+
26
+ const batchMap = new Map<string, { filename: string; count: number }>();
27
+ for (const item of segmentationItems) {
28
+ const { batchId, filename } = item.data as { batchId: string; filename: string };
29
+ if (!batchId || !filename) continue;
30
+ const existing = batchMap.get(batchId);
31
+ if (existing) {
32
+ existing.count++;
33
+ } else {
34
+ batchMap.set(batchId, { filename, count: 1 });
35
+ }
36
+ }
37
+
38
+ const pending_documents = batchMap.size > 0
39
+ ? Array.from(batchMap.entries()).map(([batchId, { filename, count }]) => ({ batchId, filename, count }))
40
+ : undefined;
41
+
42
+ const extractingSet = new Set<string>();
43
+ for (const item of activeItems) {
44
+ const sources = item.data.sources as string[] | undefined;
45
+ if (!Array.isArray(sources)) continue;
46
+ for (const s of sources) {
47
+ if (typeof s === "string" && s.startsWith("import:document:")) {
48
+ extractingSet.add(s.slice("import:document:".length));
49
+ }
50
+ }
51
+ }
52
+ const extracting_documents = extractingSet.size > 0 ? Array.from(extractingSet) : undefined;
53
+
21
54
  return {
22
55
  state: sm.queue_isPaused()
23
56
  ? "paused"
@@ -27,6 +60,8 @@ export async function getQueueStatus(sm: StateManager): Promise<QueueStatus> {
27
60
  pending_count: sm.queue_length(),
28
61
  dlq_count: sm.queue_dlqLength(),
29
62
  embedding_warning: sm.embedding_getWarning() || undefined,
63
+ pending_documents,
64
+ extracting_documents,
30
65
  };
31
66
  }
32
67
 
@@ -1,5 +1,5 @@
1
1
  import type { LLMRequest, QueueFailResult } from "../types.js";
2
- import { DLQ_MAX_COUNT, DLQ_MAX_AGE_DAYS } from "../types.js";
2
+ import { DLQ_MAX_COUNT, DLQ_MAX_AGE_DAYS, LLMNextStep } from "../types.js";
3
3
 
4
4
  const BASE_BACKOFF_MS = 2_000;
5
5
  const MAX_BACKOFF_MS = 30_000;
@@ -200,6 +200,14 @@ export class QueueState {
200
200
  return this.queue.some(r => r.state !== "dlq" && typeof r.data.ceremony_progress === "number" && r.data.ceremony_progress > 0);
201
201
  }
202
202
 
203
+ hasPendingDocumentSegments(batchId: string): boolean {
204
+ return this.queue.some(r =>
205
+ r.state !== "dlq" &&
206
+ r.next_step === LLMNextStep.HandleDocumentSegmentation &&
207
+ r.data.batchId === batchId
208
+ );
209
+ }
210
+
203
211
  clear(): number {
204
212
  const count = this.queue.filter(r => r.state !== "dlq").length;
205
213
  this.queue = this.queue.filter(r => r.state === "dlq");
@@ -917,6 +917,10 @@ export class StateManager {
917
917
  return this.queueState.hasPendingCeremonies();
918
918
  }
919
919
 
920
+ queue_hasPendingDocumentSegments(batchId: string): boolean {
921
+ return this.queueState.hasPendingDocumentSegments(batchId);
922
+ }
923
+
920
924
  queue_clear(): number {
921
925
  const result = this.queueState.clear();
922
926
  this.scheduleSave();
@@ -20,6 +20,11 @@ export interface OpenCodeSettings {
20
20
  processed_sessions?: Record<string, string>; // sessionId → ISO timestamp of last import
21
21
  }
22
22
 
23
+ export interface DocumentSettings {
24
+ extraction_model?: string;
25
+ processed_documents?: Record<string, string>;
26
+ }
27
+
23
28
  export interface CeremonyConfig {
24
29
  time: string; // "HH:MM" format (e.g., "09:00")
25
30
  last_ceremony?: string; // ISO timestamp
@@ -117,8 +122,10 @@ export interface HumanSettings {
117
122
  backup?: BackupConfig;
118
123
  claudeCode?: import("../../integrations/claude-code/types.js").ClaudeCodeSettings;
119
124
  cursor?: import("../../integrations/cursor/types.js").CursorSettings;
125
+ document?: DocumentSettings;
120
126
  active_theme?: string;
121
127
  custom_themes?: ThemeDefinition[];
128
+ personaHistory?: import("../../integrations/persona-history/types.js").PersonaHistorySettings;
122
129
  }
123
130
 
124
131
  export interface HumanEntity {
@@ -202,3 +209,11 @@ export type ReservedPersonaName = typeof RESERVED_PERSONA_NAMES[number];
202
209
  export function isReservedPersonaName(name: string): boolean {
203
210
  return RESERVED_PERSONA_NAMES.includes(name.toLowerCase() as ReservedPersonaName);
204
211
  }
212
+
213
+ // Reserved persona IDs (built-in system personas that cannot be deleted)
214
+ export const RESERVED_PERSONA_IDS = ["ei", "emmet"] as const;
215
+ export type ReservedPersonaId = typeof RESERVED_PERSONA_IDS[number];
216
+
217
+ export function isReservedPersonaId(id: string): boolean {
218
+ return (RESERVED_PERSONA_IDS as readonly string[]).includes(id);
219
+ }
@@ -52,6 +52,7 @@ export enum LLMNextStep {
52
52
  HandlePersonaPreview = "handlePersonaPreview",
53
53
  HandleTopicValidate = "handleTopicValidate",
54
54
  HandleReflectionCritic = "handleReflectionCritic",
55
+ HandleDocumentSegmentation = "handleDocumentSegmentation",
55
56
  }
56
57
 
57
58
  export enum ProviderType {
@@ -75,6 +75,8 @@ export interface QueueStatus {
75
75
  current_operation?: string;
76
76
  /** True when the embedding service failed and topic/person matching fell back to recent items. */
77
77
  embedding_warning?: boolean;
78
+ pending_documents?: Array<{ batchId: string; filename: string; count: number }>;
79
+ extracting_documents?: string[];
78
80
  }
79
81
 
80
82
  export interface EiError {
@@ -27,6 +27,15 @@ export interface Message {
27
27
 
28
28
  external?: boolean; // Set by integration importers (OpenCode, Cursor, Claude Code); invisible to LLM context
29
29
 
30
+ /**
31
+ * Integration source tag. Set ONLY on external: true messages by importers (document, Slack, etc.)
32
+ * to identify which external source this synthetic message came from.
33
+ * Format: "import:document:filename" | "slack:channelId" | etc.
34
+ * Enables quote provenance tracing: quote.message_id → message.source_tag → original source.
35
+ * Never set on conversational messages.
36
+ */
37
+ source_tag?: string;
38
+
30
39
  }
31
40
 
32
41
  export interface ChatMessage {
@@ -0,0 +1,88 @@
1
+ import { expandToWordBoundaries } from "../../core/handlers/human-matching.js";
2
+
3
+ const DEFAULT_CHUNK_CHARS = 6000;
4
+ const DEFAULT_OVERLAP_CHARS = 300;
5
+
6
+ const MARKDOWN_SEPARATORS = ["\n## ", "\n### ", "\n#### ", "\n\n", "\n", ". ", " ", ""];
7
+ const DEFAULT_SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
8
+
9
+ function splitOnSeparator(text: string, separator: string): string[] {
10
+ if (separator === "") {
11
+ return text.split("");
12
+ }
13
+ return text.split(separator);
14
+ }
15
+
16
+ function mergeChunks(pieces: string[], separator: string, chunkSize: number): string[] {
17
+ const merged: string[] = [];
18
+ let current = "";
19
+
20
+ for (const piece of pieces) {
21
+ const candidate = current ? current + separator + piece : piece;
22
+ if (candidate.length <= chunkSize) {
23
+ current = candidate;
24
+ } else {
25
+ if (current) merged.push(current);
26
+ current = piece.length <= chunkSize ? piece : piece;
27
+ }
28
+ }
29
+ if (current) merged.push(current);
30
+ return merged;
31
+ }
32
+
33
+ function recursiveSplit(
34
+ text: string,
35
+ separators: string[],
36
+ chunkSize: number
37
+ ): string[] {
38
+ if (text.length <= chunkSize) {
39
+ return [text];
40
+ }
41
+
42
+ const [separator, ...remainingSeparators] = separators;
43
+
44
+ if (separator === undefined) {
45
+ return [text];
46
+ }
47
+
48
+ const pieces = splitOnSeparator(text, separator);
49
+ const result: string[] = [];
50
+
51
+ for (const piece of pieces) {
52
+ if (piece.length <= chunkSize) {
53
+ result.push(piece);
54
+ } else if (remainingSeparators.length > 0) {
55
+ result.push(...recursiveSplit(piece, remainingSeparators, chunkSize));
56
+ } else {
57
+ result.push(piece);
58
+ }
59
+ }
60
+
61
+ return mergeChunks(result, separator, chunkSize);
62
+ }
63
+
64
+ function applyOverlap(chunks: string[], overlapChars: number): string[] {
65
+ if (overlapChars <= 0 || chunks.length <= 1) return chunks;
66
+
67
+ return chunks.map((chunk, i) => {
68
+ if (i === 0) return chunk;
69
+ const prev = chunks[i - 1];
70
+ const rawStart = Math.max(0, prev.length - overlapChars);
71
+ const { start } = expandToWordBoundaries(prev, rawStart, rawStart);
72
+ const prefix = prev.slice(start);
73
+ return prefix + chunk;
74
+ });
75
+ }
76
+
77
+ export function recursiveCharacterSplit(
78
+ text: string,
79
+ options?: { chunkSize?: number; overlap?: number; isMarkdown?: boolean }
80
+ ): string[] {
81
+ const chunkSize = options?.chunkSize ?? DEFAULT_CHUNK_CHARS;
82
+ const overlap = options?.overlap ?? DEFAULT_OVERLAP_CHARS;
83
+ const separators = options?.isMarkdown ? MARKDOWN_SEPARATORS : DEFAULT_SEPARATORS;
84
+
85
+ const rawChunks = recursiveSplit(text, separators, chunkSize);
86
+ const nonEmpty = rawChunks.filter(c => c.trim().length > 0);
87
+ return applyOverlap(nonEmpty, overlap);
88
+ }
@@ -0,0 +1,82 @@
1
+ import type { PersonaEntity } from "../../core/types.js";
2
+ import { LLMRequestType, LLMPriority, LLMNextStep } from "../../core/types.js";
3
+ import { EMMETT_PERSONA_DEFINITION } from "../../templates/emmett.js";
4
+ import { recursiveCharacterSplit } from "./chunker.js";
5
+ import type { DocumentImportOptions, DocumentImportResult } from "./types.js";
6
+
7
+ const SEGMENTATION_SYSTEM_PROMPT = `You are a document segmentation assistant. Your job is to identify natural conceptual boundaries in document content and split it into coherent segments suitable for knowledge extraction. Each segment should be a self-contained unit of information.`;
8
+
9
+ const SEGMENTATION_USER_TEMPLATE = `Split the following document content into conceptual segments. Return a JSON array of strings, where each string is one segment. Preserve all original text — do not summarize or paraphrase. Identify boundaries at topic shifts, section changes, or logical breaks.
10
+
11
+ ---
12
+
13
+ {content}`;
14
+
15
+ export async function importDocument(options: DocumentImportOptions): Promise<DocumentImportResult> {
16
+ const { stateManager, interface: eiInterface, content: rawContent, filename, signal } = options;
17
+
18
+ const isMarkdown = filename.toLowerCase().endsWith(".md") || filename.toLowerCase().endsWith(".markdown");
19
+
20
+ const result: DocumentImportResult = {
21
+ chunksQueued: 0,
22
+ documentName: filename,
23
+ };
24
+
25
+ let emmett = stateManager.persona_getById("emmet");
26
+ if (emmett?.is_archived) {
27
+ stateManager.persona_unarchive("emmet");
28
+ emmett = stateManager.persona_getById("emmet")!;
29
+ }
30
+ if (!emmett) {
31
+ const emmettEntity: PersonaEntity = {
32
+ ...EMMETT_PERSONA_DEFINITION,
33
+ id: "emmet",
34
+ display_name: "Emmett",
35
+ last_updated: new Date().toISOString(),
36
+ };
37
+ stateManager.persona_add(emmettEntity);
38
+ eiInterface.onPersonaAdded?.();
39
+ emmett = stateManager.persona_getById("emmet")!;
40
+ }
41
+
42
+ const sourceTag = `import:document:${filename}`;
43
+ const existingMsgs = stateManager.messages_get("emmet");
44
+ const staleIds = existingMsgs
45
+ .filter(m => m.external === true && m.source_tag === sourceTag)
46
+ .map(m => m.id);
47
+ if (staleIds.length > 0) {
48
+ stateManager.messages_remove("emmet", staleIds);
49
+ }
50
+
51
+ if (signal?.aborted) return result;
52
+
53
+ const preChunks = recursiveCharacterSplit(rawContent, { isMarkdown });
54
+
55
+ if (preChunks.length === 0) return result;
56
+
57
+ const batchId = crypto.randomUUID();
58
+ const docSettings = stateManager.getHuman().settings?.document;
59
+ const model = docSettings?.extraction_model ?? stateManager.getHuman().settings?.default_model;
60
+
61
+ for (let i = 0; i < preChunks.length; i++) {
62
+ const chunk = preChunks[i];
63
+ stateManager.queue_enqueue({
64
+ type: LLMRequestType.JSON,
65
+ priority: LLMPriority.Low,
66
+ system: SEGMENTATION_SYSTEM_PROMPT,
67
+ user: SEGMENTATION_USER_TEMPLATE.replace("{content}", chunk),
68
+ next_step: LLMNextStep.HandleDocumentSegmentation,
69
+ model,
70
+ data: {
71
+ batchId,
72
+ filename,
73
+ chunkIndex: i,
74
+ originalContent: chunk,
75
+ },
76
+ });
77
+ }
78
+
79
+ result.chunksQueued = preChunks.length;
80
+ result.batchId = batchId;
81
+ return result;
82
+ }
@@ -0,0 +1,2 @@
1
+ export { importDocument } from "./importer.js";
2
+ export type { DocumentImportOptions, DocumentImportResult } from "./types.js";
@@ -0,0 +1,63 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import type { UnsourcePreview, UnsourceResult } from "./unsource.js";
4
+
5
+ /**
6
+ * Pure function — no filesystem access. Safe to call in any environment.
7
+ * Returns the invoice as a markdown string.
8
+ */
9
+ export function generateInvoiceMarkdown(preview: UnsourcePreview, result: UnsourceResult): string {
10
+ const timestamp = new Date().toISOString();
11
+
12
+ const deletedCount = result.deleted.facts + result.deleted.topics + result.deleted.people;
13
+ const strippedCount = result.stripped.facts + result.stripped.topics + result.stripped.people;
14
+
15
+ const lines: string[] = [
16
+ `# Unsource: ${preview.sourceTag}`,
17
+ `Run at: ${timestamp}`,
18
+ "",
19
+ `## Deleted (${deletedCount} items, ${result.deleted.quotes} quotes)`,
20
+ ];
21
+
22
+ for (const f of preview.toDelete.facts) lines.push(`- [Fact] ${f.name}`);
23
+ for (const t of preview.toDelete.topics) lines.push(`- [Topic] ${t.name}`);
24
+ for (const p of preview.toDelete.people) lines.push(`- [Person] ${p.name}`);
25
+ for (const q of preview.toDelete.quotes) {
26
+ const excerpt = q.text.length > 80 ? `${q.text.slice(0, 80)}...` : q.text;
27
+ lines.push(`- [Quote] "${excerpt}"`);
28
+ }
29
+
30
+ if (
31
+ preview.toStrip.facts.length > 0 ||
32
+ preview.toStrip.topics.length > 0 ||
33
+ preview.toStrip.people.length > 0
34
+ ) {
35
+ lines.push("");
36
+ lines.push(`## Retained — shared with other sources (${strippedCount} items)`);
37
+ lines.push(`Source removed from these items. They had additional sources or non-Emmett personas.`);
38
+ lines.push("");
39
+ for (const f of preview.toStrip.facts) lines.push(`- [Fact] ${f.name}`);
40
+ for (const t of preview.toStrip.topics) lines.push(`- [Topic] ${t.name}`);
41
+ for (const p of preview.toStrip.people) lines.push(`- [Person] ${p.name}`);
42
+ lines.push("");
43
+ lines.push(`Run \`/me topics\` or \`/me people\` to review or delete retained items manually.`);
44
+ }
45
+
46
+ return lines.join("\n") + "\n";
47
+ }
48
+
49
+ export async function writeUnsourceInvoice(
50
+ preview: UnsourcePreview,
51
+ result: UnsourceResult,
52
+ dataPath: string
53
+ ): Promise<string> {
54
+ const timestamp = new Date().toISOString();
55
+ const sanitizedTag = preview.sourceTag.replace(/[^a-zA-Z0-9._-]/g, "_");
56
+ const filename = `${timestamp.replace(/[:.]/g, "-")}-${sanitizedTag}.md`;
57
+ const dir = join(dataPath, "unsourced");
58
+ const filePath = join(dir, filename);
59
+
60
+ await mkdir(dir, { recursive: true });
61
+ await writeFile(filePath, generateInvoiceMarkdown(preview, result), "utf8");
62
+ return filePath;
63
+ }
@@ -0,0 +1,16 @@
1
+ import type { StateManager } from "../../core/state-manager.js";
2
+ import type { Ei_Interface } from "../../core/types.js";
3
+
4
+ export interface DocumentImportOptions {
5
+ stateManager: StateManager;
6
+ interface: Ei_Interface;
7
+ content: string;
8
+ filename: string;
9
+ signal?: AbortSignal;
10
+ }
11
+
12
+ export interface DocumentImportResult {
13
+ chunksQueued: number;
14
+ documentName: string;
15
+ batchId?: string;
16
+ }