ei-tui 0.9.4 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -3
- package/package.json +5 -1
- package/src/README.md +9 -25
- package/src/core/handlers/document-segmentation.ts +113 -0
- package/src/core/handlers/human-extraction.ts +16 -16
- package/src/core/handlers/index.ts +2 -0
- package/src/core/handlers/rewrite.ts +13 -9
- package/src/core/heartbeat-manager.ts +2 -2
- package/src/core/llm-client.ts +66 -6
- package/src/core/message-manager.ts +20 -18
- package/src/core/orchestrators/ceremony.ts +83 -40
- package/src/core/orchestrators/human-extraction.ts +5 -1
- package/src/core/persona-manager.ts +4 -0
- package/src/core/processor.ts +90 -1
- package/src/core/queue-manager.ts +35 -0
- package/src/core/queue-processor.ts +13 -13
- package/src/core/state/queue.ts +9 -1
- package/src/core/state-manager.ts +10 -6
- package/src/core/types/entities.ts +15 -0
- package/src/core/types/enums.ts +1 -0
- package/src/core/types/integrations.ts +2 -0
- package/src/core/types/llm.ts +9 -0
- package/src/integrations/document/chunker.ts +88 -0
- package/src/integrations/document/importer.ts +82 -0
- package/src/integrations/document/index.ts +2 -0
- package/src/integrations/document/invoice.ts +63 -0
- package/src/integrations/document/types.ts +16 -0
- package/src/integrations/document/unsource.ts +164 -0
- package/src/integrations/persona-history/importer.ts +197 -0
- package/src/integrations/persona-history/index.ts +3 -0
- package/src/integrations/persona-history/types.ts +7 -0
- package/src/prompts/ceremony/dedup.ts +7 -3
- package/src/prompts/ceremony/index.ts +2 -1
- package/src/prompts/ceremony/people-rewrite.ts +190 -0
- package/src/prompts/ceremony/{rewrite.ts → topic-rewrite.ts} +103 -78
- package/src/prompts/human/person-scan.ts +13 -4
- package/src/prompts/human/topic-scan.ts +16 -2
- package/src/prompts/human/topic-update.ts +36 -4
- package/src/prompts/human/types.ts +1 -0
- package/src/storage/indexed.ts +4 -0
- package/src/storage/interface.ts +1 -0
- package/src/storage/local.ts +4 -0
- package/src/templates/emmett.ts +49 -0
- package/tui/README.md +25 -2
- package/tui/src/app.tsx +9 -6
- package/tui/src/commands/delete.tsx +7 -1
- package/tui/src/commands/import.tsx +30 -0
- package/tui/src/commands/unsource.tsx +115 -0
- package/tui/src/components/PromptInput.tsx +4 -0
- package/tui/src/components/WelcomeOverlay.tsx +58 -32
- package/tui/src/context/ei.tsx +80 -60
- package/tui/src/index.tsx +14 -0
- package/tui/src/storage/file.ts +11 -5
- package/tui/src/util/e2e-flags.ts +4 -3
- package/tui/src/util/help-content.ts +20 -0
- package/tui/src/util/logger.ts +1 -1
- package/tui/src/util/provider-detection.ts +251 -0
- package/tui/src/util/yaml-human.ts +7 -1
package/src/core/types/llm.ts
CHANGED
|
@@ -27,6 +27,15 @@ export interface Message {
|
|
|
27
27
|
|
|
28
28
|
external?: boolean; // Set by integration importers (OpenCode, Cursor, Claude Code); invisible to LLM context
|
|
29
29
|
|
|
30
|
+
/**
|
|
31
|
+
* Integration source tag. Set ONLY on external: true messages by importers (document, Slack, etc.)
|
|
32
|
+
* to identify which external source this synthetic message came from.
|
|
33
|
+
* Format: "import:document:filename" | "slack:channelId" | etc.
|
|
34
|
+
* Enables quote provenance tracing: quote.message_id → message.source_tag → original source.
|
|
35
|
+
* Never set on conversational messages.
|
|
36
|
+
*/
|
|
37
|
+
source_tag?: string;
|
|
38
|
+
|
|
30
39
|
}
|
|
31
40
|
|
|
32
41
|
export interface ChatMessage {
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { expandToWordBoundaries } from "../../core/handlers/human-matching.js";
|
|
2
|
+
|
|
3
|
+
const DEFAULT_CHUNK_CHARS = 6000;
|
|
4
|
+
const DEFAULT_OVERLAP_CHARS = 300;
|
|
5
|
+
|
|
6
|
+
const MARKDOWN_SEPARATORS = ["\n## ", "\n### ", "\n#### ", "\n\n", "\n", ". ", " ", ""];
|
|
7
|
+
const DEFAULT_SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
|
|
8
|
+
|
|
9
|
+
function splitOnSeparator(text: string, separator: string): string[] {
|
|
10
|
+
if (separator === "") {
|
|
11
|
+
return text.split("");
|
|
12
|
+
}
|
|
13
|
+
return text.split(separator);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function mergeChunks(pieces: string[], separator: string, chunkSize: number): string[] {
|
|
17
|
+
const merged: string[] = [];
|
|
18
|
+
let current = "";
|
|
19
|
+
|
|
20
|
+
for (const piece of pieces) {
|
|
21
|
+
const candidate = current ? current + separator + piece : piece;
|
|
22
|
+
if (candidate.length <= chunkSize) {
|
|
23
|
+
current = candidate;
|
|
24
|
+
} else {
|
|
25
|
+
if (current) merged.push(current);
|
|
26
|
+
current = piece.length <= chunkSize ? piece : piece;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (current) merged.push(current);
|
|
30
|
+
return merged;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function recursiveSplit(
|
|
34
|
+
text: string,
|
|
35
|
+
separators: string[],
|
|
36
|
+
chunkSize: number
|
|
37
|
+
): string[] {
|
|
38
|
+
if (text.length <= chunkSize) {
|
|
39
|
+
return [text];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const [separator, ...remainingSeparators] = separators;
|
|
43
|
+
|
|
44
|
+
if (separator === undefined) {
|
|
45
|
+
return [text];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const pieces = splitOnSeparator(text, separator);
|
|
49
|
+
const result: string[] = [];
|
|
50
|
+
|
|
51
|
+
for (const piece of pieces) {
|
|
52
|
+
if (piece.length <= chunkSize) {
|
|
53
|
+
result.push(piece);
|
|
54
|
+
} else if (remainingSeparators.length > 0) {
|
|
55
|
+
result.push(...recursiveSplit(piece, remainingSeparators, chunkSize));
|
|
56
|
+
} else {
|
|
57
|
+
result.push(piece);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return mergeChunks(result, separator, chunkSize);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function applyOverlap(chunks: string[], overlapChars: number): string[] {
|
|
65
|
+
if (overlapChars <= 0 || chunks.length <= 1) return chunks;
|
|
66
|
+
|
|
67
|
+
return chunks.map((chunk, i) => {
|
|
68
|
+
if (i === 0) return chunk;
|
|
69
|
+
const prev = chunks[i - 1];
|
|
70
|
+
const rawStart = Math.max(0, prev.length - overlapChars);
|
|
71
|
+
const { start } = expandToWordBoundaries(prev, rawStart, rawStart);
|
|
72
|
+
const prefix = prev.slice(start);
|
|
73
|
+
return prefix + chunk;
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function recursiveCharacterSplit(
|
|
78
|
+
text: string,
|
|
79
|
+
options?: { chunkSize?: number; overlap?: number; isMarkdown?: boolean }
|
|
80
|
+
): string[] {
|
|
81
|
+
const chunkSize = options?.chunkSize ?? DEFAULT_CHUNK_CHARS;
|
|
82
|
+
const overlap = options?.overlap ?? DEFAULT_OVERLAP_CHARS;
|
|
83
|
+
const separators = options?.isMarkdown ? MARKDOWN_SEPARATORS : DEFAULT_SEPARATORS;
|
|
84
|
+
|
|
85
|
+
const rawChunks = recursiveSplit(text, separators, chunkSize);
|
|
86
|
+
const nonEmpty = rawChunks.filter(c => c.trim().length > 0);
|
|
87
|
+
return applyOverlap(nonEmpty, overlap);
|
|
88
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import type { PersonaEntity } from "../../core/types.js";
|
|
2
|
+
import { LLMRequestType, LLMPriority, LLMNextStep } from "../../core/types.js";
|
|
3
|
+
import { EMMETT_PERSONA_DEFINITION } from "../../templates/emmett.js";
|
|
4
|
+
import { recursiveCharacterSplit } from "./chunker.js";
|
|
5
|
+
import type { DocumentImportOptions, DocumentImportResult } from "./types.js";
|
|
6
|
+
|
|
7
|
+
const SEGMENTATION_SYSTEM_PROMPT = `You are a document segmentation assistant. Your job is to identify natural conceptual boundaries in document content and split it into coherent segments suitable for knowledge extraction. Each segment should be a self-contained unit of information.`;
|
|
8
|
+
|
|
9
|
+
const SEGMENTATION_USER_TEMPLATE = `Split the following document content into conceptual segments. Return a JSON array of strings, where each string is one segment. Preserve all original text — do not summarize or paraphrase. Identify boundaries at topic shifts, section changes, or logical breaks.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
{content}`;
|
|
14
|
+
|
|
15
|
+
export async function importDocument(options: DocumentImportOptions): Promise<DocumentImportResult> {
|
|
16
|
+
const { stateManager, interface: eiInterface, content: rawContent, filename, signal } = options;
|
|
17
|
+
|
|
18
|
+
const isMarkdown = filename.toLowerCase().endsWith(".md") || filename.toLowerCase().endsWith(".markdown");
|
|
19
|
+
|
|
20
|
+
const result: DocumentImportResult = {
|
|
21
|
+
chunksQueued: 0,
|
|
22
|
+
documentName: filename,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
let emmett = stateManager.persona_getById("emmet");
|
|
26
|
+
if (emmett?.is_archived) {
|
|
27
|
+
stateManager.persona_unarchive("emmet");
|
|
28
|
+
emmett = stateManager.persona_getById("emmet")!;
|
|
29
|
+
}
|
|
30
|
+
if (!emmett) {
|
|
31
|
+
const emmettEntity: PersonaEntity = {
|
|
32
|
+
...EMMETT_PERSONA_DEFINITION,
|
|
33
|
+
id: "emmet",
|
|
34
|
+
display_name: "Emmett",
|
|
35
|
+
last_updated: new Date().toISOString(),
|
|
36
|
+
};
|
|
37
|
+
stateManager.persona_add(emmettEntity);
|
|
38
|
+
eiInterface.onPersonaAdded?.();
|
|
39
|
+
emmett = stateManager.persona_getById("emmet")!;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const sourceTag = `import:document:${filename}`;
|
|
43
|
+
const existingMsgs = stateManager.messages_get("emmet");
|
|
44
|
+
const staleIds = existingMsgs
|
|
45
|
+
.filter(m => m.external === true && m.source_tag === sourceTag)
|
|
46
|
+
.map(m => m.id);
|
|
47
|
+
if (staleIds.length > 0) {
|
|
48
|
+
stateManager.messages_remove("emmet", staleIds);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (signal?.aborted) return result;
|
|
52
|
+
|
|
53
|
+
const preChunks = recursiveCharacterSplit(rawContent, { isMarkdown });
|
|
54
|
+
|
|
55
|
+
if (preChunks.length === 0) return result;
|
|
56
|
+
|
|
57
|
+
const batchId = crypto.randomUUID();
|
|
58
|
+
const docSettings = stateManager.getHuman().settings?.document;
|
|
59
|
+
const model = docSettings?.extraction_model ?? stateManager.getHuman().settings?.default_model;
|
|
60
|
+
|
|
61
|
+
for (let i = 0; i < preChunks.length; i++) {
|
|
62
|
+
const chunk = preChunks[i];
|
|
63
|
+
stateManager.queue_enqueue({
|
|
64
|
+
type: LLMRequestType.JSON,
|
|
65
|
+
priority: LLMPriority.Low,
|
|
66
|
+
system: SEGMENTATION_SYSTEM_PROMPT,
|
|
67
|
+
user: SEGMENTATION_USER_TEMPLATE.replace("{content}", chunk),
|
|
68
|
+
next_step: LLMNextStep.HandleDocumentSegmentation,
|
|
69
|
+
model,
|
|
70
|
+
data: {
|
|
71
|
+
batchId,
|
|
72
|
+
filename,
|
|
73
|
+
chunkIndex: i,
|
|
74
|
+
originalContent: chunk,
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
result.chunksQueued = preChunks.length;
|
|
80
|
+
result.batchId = batchId;
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import type { UnsourcePreview, UnsourceResult } from "./unsource.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Pure function — no filesystem access. Safe to call in any environment.
|
|
7
|
+
* Returns the invoice as a markdown string.
|
|
8
|
+
*/
|
|
9
|
+
export function generateInvoiceMarkdown(preview: UnsourcePreview, result: UnsourceResult): string {
|
|
10
|
+
const timestamp = new Date().toISOString();
|
|
11
|
+
|
|
12
|
+
const deletedCount = result.deleted.facts + result.deleted.topics + result.deleted.people;
|
|
13
|
+
const strippedCount = result.stripped.facts + result.stripped.topics + result.stripped.people;
|
|
14
|
+
|
|
15
|
+
const lines: string[] = [
|
|
16
|
+
`# Unsource: ${preview.sourceTag}`,
|
|
17
|
+
`Run at: ${timestamp}`,
|
|
18
|
+
"",
|
|
19
|
+
`## Deleted (${deletedCount} items, ${result.deleted.quotes} quotes)`,
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
for (const f of preview.toDelete.facts) lines.push(`- [Fact] ${f.name}`);
|
|
23
|
+
for (const t of preview.toDelete.topics) lines.push(`- [Topic] ${t.name}`);
|
|
24
|
+
for (const p of preview.toDelete.people) lines.push(`- [Person] ${p.name}`);
|
|
25
|
+
for (const q of preview.toDelete.quotes) {
|
|
26
|
+
const excerpt = q.text.length > 80 ? `${q.text.slice(0, 80)}...` : q.text;
|
|
27
|
+
lines.push(`- [Quote] "${excerpt}"`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (
|
|
31
|
+
preview.toStrip.facts.length > 0 ||
|
|
32
|
+
preview.toStrip.topics.length > 0 ||
|
|
33
|
+
preview.toStrip.people.length > 0
|
|
34
|
+
) {
|
|
35
|
+
lines.push("");
|
|
36
|
+
lines.push(`## Retained — shared with other sources (${strippedCount} items)`);
|
|
37
|
+
lines.push(`Source removed from these items. They had additional sources or non-Emmett personas.`);
|
|
38
|
+
lines.push("");
|
|
39
|
+
for (const f of preview.toStrip.facts) lines.push(`- [Fact] ${f.name}`);
|
|
40
|
+
for (const t of preview.toStrip.topics) lines.push(`- [Topic] ${t.name}`);
|
|
41
|
+
for (const p of preview.toStrip.people) lines.push(`- [Person] ${p.name}`);
|
|
42
|
+
lines.push("");
|
|
43
|
+
lines.push(`Run \`/me topics\` or \`/me people\` to review or delete retained items manually.`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return lines.join("\n") + "\n";
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export async function writeUnsourceInvoice(
|
|
50
|
+
preview: UnsourcePreview,
|
|
51
|
+
result: UnsourceResult,
|
|
52
|
+
dataPath: string
|
|
53
|
+
): Promise<string> {
|
|
54
|
+
const timestamp = new Date().toISOString();
|
|
55
|
+
const sanitizedTag = preview.sourceTag.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
56
|
+
const filename = `${timestamp.replace(/[:.]/g, "-")}-${sanitizedTag}.md`;
|
|
57
|
+
const dir = join(dataPath, "unsourced");
|
|
58
|
+
const filePath = join(dir, filename);
|
|
59
|
+
|
|
60
|
+
await mkdir(dir, { recursive: true });
|
|
61
|
+
await writeFile(filePath, generateInvoiceMarkdown(preview, result), "utf8");
|
|
62
|
+
return filePath;
|
|
63
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { StateManager } from "../../core/state-manager.js";
|
|
2
|
+
import type { Ei_Interface } from "../../core/types.js";
|
|
3
|
+
|
|
4
|
+
export interface DocumentImportOptions {
|
|
5
|
+
stateManager: StateManager;
|
|
6
|
+
interface: Ei_Interface;
|
|
7
|
+
content: string;
|
|
8
|
+
filename: string;
|
|
9
|
+
signal?: AbortSignal;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface DocumentImportResult {
|
|
13
|
+
chunksQueued: number;
|
|
14
|
+
documentName: string;
|
|
15
|
+
batchId?: string;
|
|
16
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import type { StateManager } from "../../core/state-manager.js";
|
|
2
|
+
|
|
3
|
+
export interface UnsourcePreview {
|
|
4
|
+
sourceTag: string;
|
|
5
|
+
toDelete: {
|
|
6
|
+
facts: Array<{ id: string; name: string }>;
|
|
7
|
+
topics: Array<{ id: string; name: string }>;
|
|
8
|
+
people: Array<{ id: string; name: string }>;
|
|
9
|
+
quotes: Array<{ id: string; text: string }>;
|
|
10
|
+
};
|
|
11
|
+
toStrip: {
|
|
12
|
+
facts: Array<{ id: string; name: string }>;
|
|
13
|
+
topics: Array<{ id: string; name: string }>;
|
|
14
|
+
people: Array<{ id: string; name: string }>;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface UnsourceResult {
|
|
19
|
+
sourceTag: string;
|
|
20
|
+
deleted: { facts: number; topics: number; people: number; quotes: number };
|
|
21
|
+
stripped: { facts: number; topics: number; people: number };
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function previewUnsource(sourceTag: string, stateManager: StateManager): UnsourcePreview {
|
|
25
|
+
const human = stateManager.getHuman();
|
|
26
|
+
|
|
27
|
+
const preview: UnsourcePreview = {
|
|
28
|
+
sourceTag,
|
|
29
|
+
toDelete: { facts: [], topics: [], people: [], quotes: [] },
|
|
30
|
+
toStrip: { facts: [], topics: [], people: [] },
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
for (const fact of human.facts) {
|
|
34
|
+
if (!fact.sources?.includes(sourceTag)) continue;
|
|
35
|
+
const remainingSources = fact.sources.filter(s => s !== sourceTag);
|
|
36
|
+
const nonEmmettPersonas = (fact.interested_personas ?? []).filter(id => id !== "emmet");
|
|
37
|
+
if (remainingSources.length === 0 && nonEmmettPersonas.length === 0) {
|
|
38
|
+
preview.toDelete.facts.push({ id: fact.id, name: fact.name });
|
|
39
|
+
} else {
|
|
40
|
+
preview.toStrip.facts.push({ id: fact.id, name: fact.name });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
for (const topic of human.topics) {
|
|
45
|
+
if (!topic.sources?.includes(sourceTag)) continue;
|
|
46
|
+
const remainingSources = topic.sources.filter(s => s !== sourceTag);
|
|
47
|
+
const nonEmmettPersonas = (topic.interested_personas ?? []).filter(id => id !== "emmet");
|
|
48
|
+
if (remainingSources.length === 0 && nonEmmettPersonas.length === 0) {
|
|
49
|
+
preview.toDelete.topics.push({ id: topic.id, name: topic.name });
|
|
50
|
+
} else {
|
|
51
|
+
preview.toStrip.topics.push({ id: topic.id, name: topic.name });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
for (const person of human.people) {
|
|
56
|
+
if (!person.sources?.includes(sourceTag)) continue;
|
|
57
|
+
const remainingSources = person.sources.filter(s => s !== sourceTag);
|
|
58
|
+
const nonEmmettPersonas = (person.interested_personas ?? []).filter(id => id !== "emmet");
|
|
59
|
+
if (remainingSources.length === 0 && nonEmmettPersonas.length === 0) {
|
|
60
|
+
preview.toDelete.people.push({ id: person.id, name: person.name });
|
|
61
|
+
} else {
|
|
62
|
+
preview.toStrip.people.push({ id: person.id, name: person.name });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const emmettMessages = stateManager.messages_get("emmet");
|
|
67
|
+
const sourceMessageIds = new Set(
|
|
68
|
+
emmettMessages
|
|
69
|
+
.filter(m => m.source_tag === sourceTag)
|
|
70
|
+
.map(m => m.id)
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
for (const quote of human.quotes) {
|
|
74
|
+
if (quote.message_id && sourceMessageIds.has(quote.message_id)) {
|
|
75
|
+
preview.toDelete.quotes.push({ id: quote.id, text: quote.text });
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return preview;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export async function executeUnsource(
|
|
83
|
+
preview: UnsourcePreview,
|
|
84
|
+
stateManager: StateManager
|
|
85
|
+
): Promise<UnsourceResult> {
|
|
86
|
+
const result: UnsourceResult = {
|
|
87
|
+
sourceTag: preview.sourceTag,
|
|
88
|
+
deleted: { facts: 0, topics: 0, people: 0, quotes: 0 },
|
|
89
|
+
stripped: { facts: 0, topics: 0, people: 0 },
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
for (const q of preview.toDelete.quotes) {
|
|
93
|
+
stateManager.human_quote_remove(q.id);
|
|
94
|
+
result.deleted.quotes++;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
for (const f of preview.toDelete.facts) {
|
|
98
|
+
stateManager.human_fact_remove(f.id);
|
|
99
|
+
result.deleted.facts++;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
for (const t of preview.toDelete.topics) {
|
|
103
|
+
stateManager.human_topic_remove(t.id);
|
|
104
|
+
result.deleted.topics++;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
for (const p of preview.toDelete.people) {
|
|
108
|
+
stateManager.human_person_remove(p.id);
|
|
109
|
+
result.deleted.people++;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (
|
|
113
|
+
preview.toStrip.facts.length > 0 ||
|
|
114
|
+
preview.toStrip.topics.length > 0 ||
|
|
115
|
+
preview.toStrip.people.length > 0
|
|
116
|
+
) {
|
|
117
|
+
const human = stateManager.getHuman();
|
|
118
|
+
const stripIds = new Set([
|
|
119
|
+
...preview.toStrip.facts.map(f => f.id),
|
|
120
|
+
...preview.toStrip.topics.map(t => t.id),
|
|
121
|
+
...preview.toStrip.people.map(p => p.id),
|
|
122
|
+
]);
|
|
123
|
+
|
|
124
|
+
for (const fact of human.facts) {
|
|
125
|
+
if (stripIds.has(fact.id) && fact.sources) {
|
|
126
|
+
fact.sources = fact.sources.filter(s => s !== preview.sourceTag);
|
|
127
|
+
result.stripped.facts++;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
for (const topic of human.topics) {
|
|
131
|
+
if (stripIds.has(topic.id) && topic.sources) {
|
|
132
|
+
topic.sources = topic.sources.filter(s => s !== preview.sourceTag);
|
|
133
|
+
result.stripped.topics++;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
for (const person of human.people) {
|
|
137
|
+
if (stripIds.has(person.id) && person.sources) {
|
|
138
|
+
person.sources = person.sources.filter(s => s !== preview.sourceTag);
|
|
139
|
+
result.stripped.people++;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
stateManager.setHuman(human);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const sourceMessageIds = stateManager.messages_get("emmet")
|
|
147
|
+
.filter(m => m.source_tag === preview.sourceTag)
|
|
148
|
+
.map(m => m.id);
|
|
149
|
+
if (sourceMessageIds.length > 0) {
|
|
150
|
+
stateManager.messages_remove("emmet", sourceMessageIds);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const filename = preview.sourceTag.startsWith("import:document:")
|
|
154
|
+
? preview.sourceTag.slice("import:document:".length)
|
|
155
|
+
: preview.sourceTag;
|
|
156
|
+
|
|
157
|
+
const human = stateManager.getHuman();
|
|
158
|
+
if (human.settings?.document?.processed_documents) {
|
|
159
|
+
delete human.settings.document.processed_documents[filename];
|
|
160
|
+
stateManager.setHuman(human);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return result;
|
|
164
|
+
}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import type { StateManager } from "../../core/state-manager.js";
|
|
2
|
+
import type { Message } from "../../core/types.js";
|
|
3
|
+
import {
|
|
4
|
+
queueTopicScan,
|
|
5
|
+
queuePersonScan,
|
|
6
|
+
type ExtractionContext,
|
|
7
|
+
} from "../../core/orchestrators/human-extraction.js";
|
|
8
|
+
|
|
9
|
+
export interface PersonaHistoryImportResult {
|
|
10
|
+
daysQueued: number;
|
|
11
|
+
personasProcessed: number;
|
|
12
|
+
scansQueued: number;
|
|
13
|
+
complete: boolean;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface PersonaHistoryImporterOptions {
|
|
17
|
+
stateManager: StateManager;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function dayBounds(dateStr: string): { start: number; end: number } {
|
|
21
|
+
const start = new Date(dateStr + "T00:00:00.000Z").getTime();
|
|
22
|
+
const end = new Date(dateStr + "T23:59:59.999Z").getTime();
|
|
23
|
+
return { start, end };
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function nextDay(dateStr: string): string {
|
|
27
|
+
const d = new Date(dateStr + "T12:00:00.000Z");
|
|
28
|
+
d.setUTCDate(d.getUTCDate() + 1);
|
|
29
|
+
return d.toISOString().slice(0, 10);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function todayUTC(): string {
|
|
33
|
+
return new Date().toISOString().slice(0, 10);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function messagesForDay(messages: Message[], start: number, end: number): Message[] {
|
|
37
|
+
return messages.filter(m => {
|
|
38
|
+
const ts = new Date(m.timestamp).getTime();
|
|
39
|
+
return ts >= start && ts <= end;
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function hasUnprocessed(messages: Message[]): boolean {
|
|
44
|
+
return messages.some(m => !m.t || !m.p);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function importPersonaHistory(
|
|
48
|
+
options: PersonaHistoryImporterOptions
|
|
49
|
+
): Promise<PersonaHistoryImportResult> {
|
|
50
|
+
const { stateManager } = options;
|
|
51
|
+
const human = stateManager.getHuman();
|
|
52
|
+
const settings = human.settings?.personaHistory;
|
|
53
|
+
|
|
54
|
+
const result: PersonaHistoryImportResult = {
|
|
55
|
+
daysQueued: 0,
|
|
56
|
+
personasProcessed: 0,
|
|
57
|
+
scansQueued: 0,
|
|
58
|
+
complete: false,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
if (settings?.complete) {
|
|
62
|
+
result.complete = true;
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const personas = stateManager.persona_getAll().filter(p => !p.is_archived);
|
|
67
|
+
const today = todayUTC();
|
|
68
|
+
|
|
69
|
+
let currentDate = settings?.last_queued_date
|
|
70
|
+
? nextDay(settings.last_queued_date)
|
|
71
|
+
: settings?.start_date ?? findEarliestMessageDate(stateManager) ?? today;
|
|
72
|
+
|
|
73
|
+
if (currentDate > today) {
|
|
74
|
+
markComplete(stateManager);
|
|
75
|
+
result.complete = true;
|
|
76
|
+
return result;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
console.log(`[PersonaHistory] Queuing day: ${currentDate}`);
|
|
80
|
+
|
|
81
|
+
const { start, end } = dayBounds(currentDate);
|
|
82
|
+
|
|
83
|
+
for (const persona of personas) {
|
|
84
|
+
const allMessages = stateManager.messages_get(persona.id);
|
|
85
|
+
const dayMessages = messagesForDay(allMessages, start, end);
|
|
86
|
+
|
|
87
|
+
if (dayMessages.length === 0) continue;
|
|
88
|
+
if (!hasUnprocessed(dayMessages)) continue;
|
|
89
|
+
|
|
90
|
+
const firstDayIdx = allMessages.findIndex(m => {
|
|
91
|
+
const ts = new Date(m.timestamp).getTime();
|
|
92
|
+
return ts >= start;
|
|
93
|
+
});
|
|
94
|
+
const contextMsgs = firstDayIdx > 0 ? allMessages.slice(Math.max(0, firstDayIdx - 20), firstDayIdx) : [];
|
|
95
|
+
|
|
96
|
+
const context: ExtractionContext = {
|
|
97
|
+
personaId: persona.id,
|
|
98
|
+
channelDisplayName: persona.display_name,
|
|
99
|
+
messages_context: contextMsgs,
|
|
100
|
+
messages_analyze: dayMessages,
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const extractionModel = settings?.extraction_model;
|
|
104
|
+
queueTopicScan(context, stateManager, { extraction_model: extractionModel });
|
|
105
|
+
queuePersonScan(context, stateManager, { extraction_model: extractionModel });
|
|
106
|
+
|
|
107
|
+
result.personasProcessed++;
|
|
108
|
+
result.scansQueued += 2;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
for (const room of Object.values((stateManager.getStorageState() as any).rooms ?? {})) {
|
|
112
|
+
const r = room as { id: string; display_name: string; messages?: Message[] };
|
|
113
|
+
if (!r.messages || r.messages.length === 0) continue;
|
|
114
|
+
|
|
115
|
+
const dayMessages = messagesForDay(r.messages, start, end);
|
|
116
|
+
if (dayMessages.length === 0) continue;
|
|
117
|
+
if (!hasUnprocessed(dayMessages)) continue;
|
|
118
|
+
|
|
119
|
+
const firstDayIdx = r.messages.findIndex((m: Message) => {
|
|
120
|
+
const ts = new Date(m.timestamp).getTime();
|
|
121
|
+
return ts >= start;
|
|
122
|
+
});
|
|
123
|
+
const contextMsgs = firstDayIdx > 0 ? r.messages.slice(Math.max(0, firstDayIdx - 20), firstDayIdx) : [];
|
|
124
|
+
|
|
125
|
+
const context: ExtractionContext = {
|
|
126
|
+
personaId: r.id,
|
|
127
|
+
channelDisplayName: r.display_name,
|
|
128
|
+
messages_context: contextMsgs,
|
|
129
|
+
messages_analyze: dayMessages,
|
|
130
|
+
roomId: r.id,
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const extractionModel = settings?.extraction_model;
|
|
134
|
+
queueTopicScan(context, stateManager, { extraction_model: extractionModel });
|
|
135
|
+
queuePersonScan(context, stateManager, { extraction_model: extractionModel });
|
|
136
|
+
|
|
137
|
+
result.scansQueued += 2;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
result.daysQueued = 1;
|
|
141
|
+
|
|
142
|
+
const isLastDay = currentDate >= today;
|
|
143
|
+
advanceProgress(stateManager, currentDate, isLastDay);
|
|
144
|
+
|
|
145
|
+
if (isLastDay) {
|
|
146
|
+
result.complete = true;
|
|
147
|
+
console.log(`[PersonaHistory] All days queued — marking complete`);
|
|
148
|
+
} else {
|
|
149
|
+
console.log(`[PersonaHistory] Day ${currentDate} queued (${result.scansQueued} scans), next: ${nextDay(currentDate)}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return result;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function findEarliestMessageDate(stateManager: StateManager): string | null {
|
|
156
|
+
const personas = stateManager.persona_getAll();
|
|
157
|
+
let earliest: number | null = null;
|
|
158
|
+
|
|
159
|
+
for (const persona of personas) {
|
|
160
|
+
const msgs = stateManager.messages_get(persona.id);
|
|
161
|
+
for (const m of msgs) {
|
|
162
|
+
const ts = new Date(m.timestamp).getTime();
|
|
163
|
+
if (earliest === null || ts < earliest) earliest = ts;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return earliest !== null ? new Date(earliest).toISOString().slice(0, 10) : null;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function advanceProgress(stateManager: StateManager, date: string, complete: boolean): void {
|
|
171
|
+
const human = stateManager.getHuman();
|
|
172
|
+
stateManager.setHuman({
|
|
173
|
+
...human,
|
|
174
|
+
settings: {
|
|
175
|
+
...human.settings,
|
|
176
|
+
personaHistory: {
|
|
177
|
+
...human.settings?.personaHistory,
|
|
178
|
+
last_queued_date: date,
|
|
179
|
+
...(complete && { complete: true }),
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function markComplete(stateManager: StateManager): void {
|
|
186
|
+
const human = stateManager.getHuman();
|
|
187
|
+
stateManager.setHuman({
|
|
188
|
+
...human,
|
|
189
|
+
settings: {
|
|
190
|
+
...human.settings,
|
|
191
|
+
personaHistory: {
|
|
192
|
+
...human.settings?.personaHistory,
|
|
193
|
+
complete: true,
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
});
|
|
197
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface PersonaHistorySettings {
|
|
2
|
+
integration?: boolean;
|
|
3
|
+
extraction_model?: string;
|
|
4
|
+
start_date?: string; // ISO date string "YYYY-MM-DD", defaults to earliest message found
|
|
5
|
+
last_queued_date?: string; // ISO date of last day fully queued — resume point if interrupted
|
|
6
|
+
complete?: boolean; // Set true when all days have been queued; prevents re-runs
|
|
7
|
+
}
|