ei-tui 0.9.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +22 -3
  2. package/package.json +5 -1
  3. package/src/README.md +9 -25
  4. package/src/core/handlers/document-segmentation.ts +113 -0
  5. package/src/core/handlers/index.ts +2 -0
  6. package/src/core/handlers/rewrite.ts +13 -9
  7. package/src/core/heartbeat-manager.ts +2 -2
  8. package/src/core/llm-client.ts +11 -1
  9. package/src/core/message-manager.ts +20 -18
  10. package/src/core/orchestrators/ceremony.ts +83 -40
  11. package/src/core/orchestrators/human-extraction.ts +5 -1
  12. package/src/core/persona-manager.ts +4 -0
  13. package/src/core/processor.ts +90 -1
  14. package/src/core/queue-manager.ts +35 -0
  15. package/src/core/state/queue.ts +9 -1
  16. package/src/core/state-manager.ts +4 -0
  17. package/src/core/types/entities.ts +15 -0
  18. package/src/core/types/enums.ts +1 -0
  19. package/src/core/types/integrations.ts +2 -0
  20. package/src/core/types/llm.ts +9 -0
  21. package/src/integrations/document/chunker.ts +88 -0
  22. package/src/integrations/document/importer.ts +82 -0
  23. package/src/integrations/document/index.ts +2 -0
  24. package/src/integrations/document/invoice.ts +63 -0
  25. package/src/integrations/document/types.ts +16 -0
  26. package/src/integrations/document/unsource.ts +164 -0
  27. package/src/integrations/persona-history/importer.ts +197 -0
  28. package/src/integrations/persona-history/index.ts +3 -0
  29. package/src/integrations/persona-history/types.ts +7 -0
  30. package/src/prompts/ceremony/dedup.ts +7 -3
  31. package/src/prompts/ceremony/index.ts +2 -1
  32. package/src/prompts/ceremony/people-rewrite.ts +190 -0
  33. package/src/prompts/ceremony/{rewrite.ts → topic-rewrite.ts} +103 -78
  34. package/src/prompts/human/person-scan.ts +13 -4
  35. package/src/prompts/human/topic-scan.ts +16 -2
  36. package/src/prompts/human/topic-update.ts +36 -4
  37. package/src/prompts/human/types.ts +1 -0
  38. package/src/storage/indexed.ts +4 -0
  39. package/src/storage/interface.ts +1 -0
  40. package/src/storage/local.ts +4 -0
  41. package/src/templates/emmett.ts +49 -0
  42. package/tui/README.md +22 -0
  43. package/tui/src/app.tsx +9 -6
  44. package/tui/src/commands/delete.tsx +7 -1
  45. package/tui/src/commands/import.tsx +30 -0
  46. package/tui/src/commands/unsource.tsx +115 -0
  47. package/tui/src/components/PromptInput.tsx +4 -0
  48. package/tui/src/components/WelcomeOverlay.tsx +58 -32
  49. package/tui/src/context/ei.tsx +80 -60
  50. package/tui/src/index.tsx +14 -0
  51. package/tui/src/storage/file.ts +11 -5
  52. package/tui/src/util/e2e-flags.ts +4 -3
  53. package/tui/src/util/help-content.ts +20 -0
  54. package/tui/src/util/provider-detection.ts +251 -0
  55. package/tui/src/util/yaml-human.ts +7 -1
@@ -0,0 +1,164 @@
1
+ import type { StateManager } from "../../core/state-manager.js";
2
+
3
+ export interface UnsourcePreview {
4
+ sourceTag: string;
5
+ toDelete: {
6
+ facts: Array<{ id: string; name: string }>;
7
+ topics: Array<{ id: string; name: string }>;
8
+ people: Array<{ id: string; name: string }>;
9
+ quotes: Array<{ id: string; text: string }>;
10
+ };
11
+ toStrip: {
12
+ facts: Array<{ id: string; name: string }>;
13
+ topics: Array<{ id: string; name: string }>;
14
+ people: Array<{ id: string; name: string }>;
15
+ };
16
+ }
17
+
18
+ export interface UnsourceResult {
19
+ sourceTag: string;
20
+ deleted: { facts: number; topics: number; people: number; quotes: number };
21
+ stripped: { facts: number; topics: number; people: number };
22
+ }
23
+
24
+ export function previewUnsource(sourceTag: string, stateManager: StateManager): UnsourcePreview {
25
+ const human = stateManager.getHuman();
26
+
27
+ const preview: UnsourcePreview = {
28
+ sourceTag,
29
+ toDelete: { facts: [], topics: [], people: [], quotes: [] },
30
+ toStrip: { facts: [], topics: [], people: [] },
31
+ };
32
+
33
+ for (const fact of human.facts) {
34
+ if (!fact.sources?.includes(sourceTag)) continue;
35
+ const remainingSources = fact.sources.filter(s => s !== sourceTag);
36
+ const nonEmmettPersonas = (fact.interested_personas ?? []).filter(id => id !== "emmet");
37
+ if (remainingSources.length === 0 && nonEmmettPersonas.length === 0) {
38
+ preview.toDelete.facts.push({ id: fact.id, name: fact.name });
39
+ } else {
40
+ preview.toStrip.facts.push({ id: fact.id, name: fact.name });
41
+ }
42
+ }
43
+
44
+ for (const topic of human.topics) {
45
+ if (!topic.sources?.includes(sourceTag)) continue;
46
+ const remainingSources = topic.sources.filter(s => s !== sourceTag);
47
+ const nonEmmettPersonas = (topic.interested_personas ?? []).filter(id => id !== "emmet");
48
+ if (remainingSources.length === 0 && nonEmmettPersonas.length === 0) {
49
+ preview.toDelete.topics.push({ id: topic.id, name: topic.name });
50
+ } else {
51
+ preview.toStrip.topics.push({ id: topic.id, name: topic.name });
52
+ }
53
+ }
54
+
55
+ for (const person of human.people) {
56
+ if (!person.sources?.includes(sourceTag)) continue;
57
+ const remainingSources = person.sources.filter(s => s !== sourceTag);
58
+ const nonEmmettPersonas = (person.interested_personas ?? []).filter(id => id !== "emmet");
59
+ if (remainingSources.length === 0 && nonEmmettPersonas.length === 0) {
60
+ preview.toDelete.people.push({ id: person.id, name: person.name });
61
+ } else {
62
+ preview.toStrip.people.push({ id: person.id, name: person.name });
63
+ }
64
+ }
65
+
66
+ const emmettMessages = stateManager.messages_get("emmet");
67
+ const sourceMessageIds = new Set(
68
+ emmettMessages
69
+ .filter(m => m.source_tag === sourceTag)
70
+ .map(m => m.id)
71
+ );
72
+
73
+ for (const quote of human.quotes) {
74
+ if (quote.message_id && sourceMessageIds.has(quote.message_id)) {
75
+ preview.toDelete.quotes.push({ id: quote.id, text: quote.text });
76
+ }
77
+ }
78
+
79
+ return preview;
80
+ }
81
+
82
+ export async function executeUnsource(
83
+ preview: UnsourcePreview,
84
+ stateManager: StateManager
85
+ ): Promise<UnsourceResult> {
86
+ const result: UnsourceResult = {
87
+ sourceTag: preview.sourceTag,
88
+ deleted: { facts: 0, topics: 0, people: 0, quotes: 0 },
89
+ stripped: { facts: 0, topics: 0, people: 0 },
90
+ };
91
+
92
+ for (const q of preview.toDelete.quotes) {
93
+ stateManager.human_quote_remove(q.id);
94
+ result.deleted.quotes++;
95
+ }
96
+
97
+ for (const f of preview.toDelete.facts) {
98
+ stateManager.human_fact_remove(f.id);
99
+ result.deleted.facts++;
100
+ }
101
+
102
+ for (const t of preview.toDelete.topics) {
103
+ stateManager.human_topic_remove(t.id);
104
+ result.deleted.topics++;
105
+ }
106
+
107
+ for (const p of preview.toDelete.people) {
108
+ stateManager.human_person_remove(p.id);
109
+ result.deleted.people++;
110
+ }
111
+
112
+ if (
113
+ preview.toStrip.facts.length > 0 ||
114
+ preview.toStrip.topics.length > 0 ||
115
+ preview.toStrip.people.length > 0
116
+ ) {
117
+ const human = stateManager.getHuman();
118
+ const stripIds = new Set([
119
+ ...preview.toStrip.facts.map(f => f.id),
120
+ ...preview.toStrip.topics.map(t => t.id),
121
+ ...preview.toStrip.people.map(p => p.id),
122
+ ]);
123
+
124
+ for (const fact of human.facts) {
125
+ if (stripIds.has(fact.id) && fact.sources) {
126
+ fact.sources = fact.sources.filter(s => s !== preview.sourceTag);
127
+ result.stripped.facts++;
128
+ }
129
+ }
130
+ for (const topic of human.topics) {
131
+ if (stripIds.has(topic.id) && topic.sources) {
132
+ topic.sources = topic.sources.filter(s => s !== preview.sourceTag);
133
+ result.stripped.topics++;
134
+ }
135
+ }
136
+ for (const person of human.people) {
137
+ if (stripIds.has(person.id) && person.sources) {
138
+ person.sources = person.sources.filter(s => s !== preview.sourceTag);
139
+ result.stripped.people++;
140
+ }
141
+ }
142
+
143
+ stateManager.setHuman(human);
144
+ }
145
+
146
+ const sourceMessageIds = stateManager.messages_get("emmet")
147
+ .filter(m => m.source_tag === preview.sourceTag)
148
+ .map(m => m.id);
149
+ if (sourceMessageIds.length > 0) {
150
+ stateManager.messages_remove("emmet", sourceMessageIds);
151
+ }
152
+
153
+ const filename = preview.sourceTag.startsWith("import:document:")
154
+ ? preview.sourceTag.slice("import:document:".length)
155
+ : preview.sourceTag;
156
+
157
+ const human = stateManager.getHuman();
158
+ if (human.settings?.document?.processed_documents) {
159
+ delete human.settings.document.processed_documents[filename];
160
+ stateManager.setHuman(human);
161
+ }
162
+
163
+ return result;
164
+ }
@@ -0,0 +1,197 @@
1
+ import type { StateManager } from "../../core/state-manager.js";
2
+ import type { Message } from "../../core/types.js";
3
+ import {
4
+ queueTopicScan,
5
+ queuePersonScan,
6
+ type ExtractionContext,
7
+ } from "../../core/orchestrators/human-extraction.js";
8
+
9
+ export interface PersonaHistoryImportResult {
10
+ daysQueued: number;
11
+ personasProcessed: number;
12
+ scansQueued: number;
13
+ complete: boolean;
14
+ }
15
+
16
+ export interface PersonaHistoryImporterOptions {
17
+ stateManager: StateManager;
18
+ }
19
+
20
+ function dayBounds(dateStr: string): { start: number; end: number } {
21
+ const start = new Date(dateStr + "T00:00:00.000Z").getTime();
22
+ const end = new Date(dateStr + "T23:59:59.999Z").getTime();
23
+ return { start, end };
24
+ }
25
+
26
+ function nextDay(dateStr: string): string {
27
+ const d = new Date(dateStr + "T12:00:00.000Z");
28
+ d.setUTCDate(d.getUTCDate() + 1);
29
+ return d.toISOString().slice(0, 10);
30
+ }
31
+
32
+ function todayUTC(): string {
33
+ return new Date().toISOString().slice(0, 10);
34
+ }
35
+
36
+ function messagesForDay(messages: Message[], start: number, end: number): Message[] {
37
+ return messages.filter(m => {
38
+ const ts = new Date(m.timestamp).getTime();
39
+ return ts >= start && ts <= end;
40
+ });
41
+ }
42
+
43
+ function hasUnprocessed(messages: Message[]): boolean {
44
+ return messages.some(m => !m.t || !m.p);
45
+ }
46
+
47
+ export async function importPersonaHistory(
48
+ options: PersonaHistoryImporterOptions
49
+ ): Promise<PersonaHistoryImportResult> {
50
+ const { stateManager } = options;
51
+ const human = stateManager.getHuman();
52
+ const settings = human.settings?.personaHistory;
53
+
54
+ const result: PersonaHistoryImportResult = {
55
+ daysQueued: 0,
56
+ personasProcessed: 0,
57
+ scansQueued: 0,
58
+ complete: false,
59
+ };
60
+
61
+ if (settings?.complete) {
62
+ result.complete = true;
63
+ return result;
64
+ }
65
+
66
+ const personas = stateManager.persona_getAll().filter(p => !p.is_archived);
67
+ const today = todayUTC();
68
+
69
+ let currentDate = settings?.last_queued_date
70
+ ? nextDay(settings.last_queued_date)
71
+ : settings?.start_date ?? findEarliestMessageDate(stateManager) ?? today;
72
+
73
+ if (currentDate > today) {
74
+ markComplete(stateManager);
75
+ result.complete = true;
76
+ return result;
77
+ }
78
+
79
+ console.log(`[PersonaHistory] Queuing day: ${currentDate}`);
80
+
81
+ const { start, end } = dayBounds(currentDate);
82
+
83
+ for (const persona of personas) {
84
+ const allMessages = stateManager.messages_get(persona.id);
85
+ const dayMessages = messagesForDay(allMessages, start, end);
86
+
87
+ if (dayMessages.length === 0) continue;
88
+ if (!hasUnprocessed(dayMessages)) continue;
89
+
90
+ const firstDayIdx = allMessages.findIndex(m => {
91
+ const ts = new Date(m.timestamp).getTime();
92
+ return ts >= start;
93
+ });
94
+ const contextMsgs = firstDayIdx > 0 ? allMessages.slice(Math.max(0, firstDayIdx - 20), firstDayIdx) : [];
95
+
96
+ const context: ExtractionContext = {
97
+ personaId: persona.id,
98
+ channelDisplayName: persona.display_name,
99
+ messages_context: contextMsgs,
100
+ messages_analyze: dayMessages,
101
+ };
102
+
103
+ const extractionModel = settings?.extraction_model;
104
+ queueTopicScan(context, stateManager, { extraction_model: extractionModel });
105
+ queuePersonScan(context, stateManager, { extraction_model: extractionModel });
106
+
107
+ result.personasProcessed++;
108
+ result.scansQueued += 2;
109
+ }
110
+
111
+ for (const room of Object.values((stateManager.getStorageState() as any).rooms ?? {})) {
112
+ const r = room as { id: string; display_name: string; messages?: Message[] };
113
+ if (!r.messages || r.messages.length === 0) continue;
114
+
115
+ const dayMessages = messagesForDay(r.messages, start, end);
116
+ if (dayMessages.length === 0) continue;
117
+ if (!hasUnprocessed(dayMessages)) continue;
118
+
119
+ const firstDayIdx = r.messages.findIndex((m: Message) => {
120
+ const ts = new Date(m.timestamp).getTime();
121
+ return ts >= start;
122
+ });
123
+ const contextMsgs = firstDayIdx > 0 ? r.messages.slice(Math.max(0, firstDayIdx - 20), firstDayIdx) : [];
124
+
125
+ const context: ExtractionContext = {
126
+ personaId: r.id,
127
+ channelDisplayName: r.display_name,
128
+ messages_context: contextMsgs,
129
+ messages_analyze: dayMessages,
130
+ roomId: r.id,
131
+ };
132
+
133
+ const extractionModel = settings?.extraction_model;
134
+ queueTopicScan(context, stateManager, { extraction_model: extractionModel });
135
+ queuePersonScan(context, stateManager, { extraction_model: extractionModel });
136
+
137
+ result.scansQueued += 2;
138
+ }
139
+
140
+ result.daysQueued = 1;
141
+
142
+ const isLastDay = currentDate >= today;
143
+ advanceProgress(stateManager, currentDate, isLastDay);
144
+
145
+ if (isLastDay) {
146
+ result.complete = true;
147
+ console.log(`[PersonaHistory] All days queued — marking complete`);
148
+ } else {
149
+ console.log(`[PersonaHistory] Day ${currentDate} queued (${result.scansQueued} scans), next: ${nextDay(currentDate)}`);
150
+ }
151
+
152
+ return result;
153
+ }
154
+
155
+ function findEarliestMessageDate(stateManager: StateManager): string | null {
156
+ const personas = stateManager.persona_getAll();
157
+ let earliest: number | null = null;
158
+
159
+ for (const persona of personas) {
160
+ const msgs = stateManager.messages_get(persona.id);
161
+ for (const m of msgs) {
162
+ const ts = new Date(m.timestamp).getTime();
163
+ if (earliest === null || ts < earliest) earliest = ts;
164
+ }
165
+ }
166
+
167
+ return earliest !== null ? new Date(earliest).toISOString().slice(0, 10) : null;
168
+ }
169
+
170
+ function advanceProgress(stateManager: StateManager, date: string, complete: boolean): void {
171
+ const human = stateManager.getHuman();
172
+ stateManager.setHuman({
173
+ ...human,
174
+ settings: {
175
+ ...human.settings,
176
+ personaHistory: {
177
+ ...human.settings?.personaHistory,
178
+ last_queued_date: date,
179
+ ...(complete && { complete: true }),
180
+ },
181
+ },
182
+ });
183
+ }
184
+
185
+ function markComplete(stateManager: StateManager): void {
186
+ const human = stateManager.getHuman();
187
+ stateManager.setHuman({
188
+ ...human,
189
+ settings: {
190
+ ...human.settings,
191
+ personaHistory: {
192
+ ...human.settings?.personaHistory,
193
+ complete: true,
194
+ },
195
+ },
196
+ });
197
+ }
@@ -0,0 +1,3 @@
1
+ export { importPersonaHistory } from "./importer.js";
2
+ export type { PersonaHistoryImportResult, PersonaHistoryImporterOptions } from "./importer.js";
3
+ export type { PersonaHistorySettings } from "./types.js";
@@ -0,0 +1,7 @@
1
+ export interface PersonaHistorySettings {
2
+ integration?: boolean;
3
+ extraction_model?: string;
4
+ start_date?: string; // ISO date string "YYYY-MM-DD", defaults to earliest message found
5
+ last_queued_date?: string; // ISO date of last day fully queued — resume point if interrupted
6
+ complete?: boolean; // Set true when all days have been queued; prevents re-runs
7
+ }
@@ -89,7 +89,7 @@ ${buildRecordFormatExamples(data.itemType)}
89
89
 
90
90
  ### Rules:
91
91
  - Do NOT invent information. Only redistribute what exists in the cluster.
92
- - Descriptions should be concise—ideally under 300 characters, never over 500.
92
+ - Descriptions should be concise ideally under 300 characters, never over 500 for regular topics. Technical topics (category: "Technical") may go up to 900 characters — preserve their specific gotchas, decisions, and open questions.
93
93
  - Preserve all numeric values (sentiment, strength, confidence, exposure, etc.) from source records. When merging, take the HIGHER value for strength/confidence, AVERAGE for sentiment.
94
94
  - Every removed record MUST have "replaced_by" pointing to the canonical record that absorbed its data.
95
95
  - The "update" array should contain AT LEAST ONE record (the canonical/merged one), even if all others are removed.
@@ -165,6 +165,8 @@ Similarity of meaning is not the same as identity. "Concern about job security"
165
165
 
166
166
  Ask yourself: *If a persona referenced the established record in conversation, would the newcomer feel like a repeat? Or would it feel like something different being said?*
167
167
 
168
+ **Default to keeping both.** Merge only when you are certain these describe the same concept — thematic overlap, shared vocabulary, or similar domain are not sufficient. A false merge destroys information permanently; a false keep is harmless.
169
+
168
170
  If they are the same thing: **merge**. Preserve every unique detail from both. The newcomer's description is synthesized and current — weight it, but don't discard what the established record learned first.
169
171
 
170
172
  If they are distinct: **keep both**. Return them both in \`update\` unchanged. Leave \`remove\` and \`add\` empty.
@@ -183,7 +185,8 @@ Rules:
183
185
  - \`add\` is always empty here. We are not creating new records from this decision.
184
186
  - If merging: the merged record goes in \`update\`, the absorbed record goes in \`remove\`.
185
187
  - If keeping both: return both in \`update\` exactly as received. Do not modify either.
186
- - Descriptions must stay concise — under 300 characters, never over 500. Synthesize; don't concatenate.
188
+ - Descriptions must stay concise — under 300 characters, never over 500 for regular topics. **Technical topics** (category: "Technical") may go up to 900 characters — they are knowledge bases, not summaries. Synthesize regular topics; preserve detail in Technical ones.
189
+ - For Technical topics: two records about the same technology but different aspects (e.g., "Uniform composition model" vs "Uniform preview setup") are **NOT duplicates** — keep both. Only merge if they are genuinely the same concept described twice.
187
190
  - When merging numeric fields: take the HIGHER value for \`exposure_current\`, \`exposure_desired\`, \`strength\`, \`confidence\`. Average \`sentiment\`.
188
191
  - Do NOT invent information. Only what exists in these two records.
189
192
 
@@ -297,7 +300,7 @@ function buildTopicExamples(): string {
297
300
  "name": "Software Architecture", // REQUIRED
298
301
  "description": "System design patterns, microservices, event-driven architecture. Passionate about scalability and maintainability.", // REQUIRED
299
302
  "sentiment": 0.8, // -1.0 to 1.0 (average when merging)
300
- "category": "Interest", // REQUIRED - Interest, Goal, Dream, Conflict, Concern, Fear, Hope, Plan, Project (pick most common)
303
+ "category": "Interest", // REQUIRED - Interest, Goal, Dream, Conflict, Concern, Fear, Hope, Plan, Project, Event, Technical (pick most common)
301
304
  "exposure_current": 0.6, // 0.0 to 1.0, how recently discussed (take HIGHER when merging)
302
305
  "exposure_desired": 0.9, // 0.0 to 1.0, how much they want to discuss (take HIGHER when merging)
303
306
  "last_ei_asked": "2024-03-10T08:00:00Z", // OPTIONAL - ISO timestamp or null
@@ -330,6 +333,7 @@ CATEGORIES explained:
330
333
  - Goal: Things they want to achieve
331
334
  - Concern/Fear: Things that worry them
332
335
  - Plan/Project: Active work or intentions
336
+ - Technical: Tools, platforms, frameworks, or technical concepts being learned or used — knowledge base entries, NOT summaries
333
337
 
334
338
  GOOD vs BAD descriptions:
335
339
  ✅ GOOD: "Functional programming paradigm. Loves immutability and pure functions. Uses in side projects."
@@ -1,4 +1,5 @@
1
- export { buildRewriteScanPrompt, buildRewritePrompt } from "./rewrite.js";
1
+ export { buildPersonRewriteScanPrompt, buildPersonRewriteSplitPrompt } from "./people-rewrite.js";
2
+ export { buildTopicRewriteScanPrompt, buildTopicRewriteSplitPrompt } from "./topic-rewrite.js";
2
3
  export { buildDedupPrompt, buildValidatePrompt } from "./dedup.js";
3
4
  export { buildUserDedupPrompt } from "./user-dedup.js";
4
5
  export type {
@@ -0,0 +1,190 @@
1
+ import type { RewriteScanPromptData, RewritePromptData } from "./types.js";
2
+
3
+ // =============================================================================
4
+ // What belongs in a Person record (shared reference for both prompts)
5
+ // =============================================================================
6
+ //
7
+ // A Person record is a RELATIONSHIP PROFILE — who this person is, how they
8
+ // relate to the human user, and anything a persona would use to meaningfully
9
+ // reference them in conversation 6+ months from now.
10
+ //
11
+ // A Person record is NOT:
12
+ // - A project status log
13
+ // - A record of ticket numbers, PR numbers, or sprint assignments
14
+ // - A biography of their personal habits and hobbies
15
+ // - A shared-interest tracker (those are Topics)
16
+ //
17
+ // The test: "Would this still be true and useful if you ran into this person
18
+ // at a coffee shop, unrelated to any current project?"
19
+
20
+ const PERSON_CONTRACT = `A Person record is a **relationship profile** — who this person IS, how they relate to the human user, their character and communication style, and anything that makes them recognizable across time and context.
21
+
22
+ It is NOT:
23
+ - A project status log (ticket numbers, PR references, sprint assignments)
24
+ - A record of shared interests that could stand alone as a Topic
25
+ - Personal biography unrelated to the relationship (commute, hobbies, hometown)
26
+ - Technical knowledge attributed to them rather than about them
27
+
28
+ **The test**: Would this detail still be true and useful if you ran into this person at a coffee shop, unrelated to any current project, in six months?`;
29
+
30
+ // =============================================================================
31
+ // PHASE 1: SCAN — Identify subjects that don't belong in a Person record
32
+ // =============================================================================
33
+
34
+ export function buildPersonRewriteScanPrompt(data: RewriteScanPromptData): { system: string; user: string } {
35
+ const system = `You are auditing a Person record in a personal knowledge base.
36
+
37
+ ${PERSON_CONTRACT}
38
+
39
+ Your job: identify **subjects buried in this description that fail the test above**.
40
+
41
+ For each subject that doesn't belong, return a short phrase (3-8 words) that describes it — specific enough to search for matching records. These phrases will be used to find existing Topics this content might belong in.
42
+
43
+ Rules:
44
+ - Do NOT include the relationship profile itself — who they are, their role, how you know them, their character
45
+ - Be specific: "React performance patterns" beats "technical stuff"
46
+ - If the record is clean — everything in it passes the test — return an empty array
47
+
48
+ Return a raw JSON array of strings. No markdown fencing, no commentary.
49
+
50
+ Example — a Person named "Nicholas" whose description includes sprint ticket numbers:
51
+ ["CMIDP sprint ticket assignments", "ASU Data Lake access provisioning details"]`;
52
+
53
+ const payload = JSON.stringify({
54
+ name: (data.item as { name?: string }).name,
55
+ description: data.item.description,
56
+ relationship: (data.item as { relationship?: string }).relationship,
57
+ }, null, 2);
58
+
59
+ const user = `${payload}
60
+
61
+ ---
62
+
63
+ Return a raw JSON array of subject phrases found in this Person record that don't belong there. Return [] if the record is clean.`;
64
+
65
+ return { system, user };
66
+ }
67
+
68
+ // =============================================================================
69
+ // PHASE 2: SPLIT — Slim the Person, redistribute subjects to Topics
70
+ // =============================================================================
71
+
72
+ function buildPersonExistingExample(): string {
73
+ return `{
74
+ "id": "existing-uuid",
75
+ "type": "person",
76
+ "name": "Nicholas",
77
+ "description": "Backend engineer on the CMIDP team. Thoughtful code reviewer who flags architectural concerns — specifically around concurrency and queue isolation. Direct point of contact for Data Lake access provisioning.",
78
+ "relationship": "coworker"
79
+ }`;
80
+ }
81
+
82
+ function buildPersonNewTopicExample(): string {
83
+ return `{
84
+ "type": "topic",
85
+ "name": "CMIDP Sprint 86 work",
86
+ "description": "Nicholas owns 4 tickets in Sprint 86 including course list ordering bugs (CMIDP-2604, CMIDP-2441, CMIDP-2686) and course sequencing (CMIDP-2624).",
87
+ "sentiment": 0.5,
88
+ "category": "Project"
89
+ }`;
90
+ }
91
+
92
+ export function buildPersonRewriteSplitPrompt(data: RewritePromptData): { system: string; user: string } {
93
+ const system = `You are reorganizing a Person record in a personal knowledge base.
94
+
95
+ ${PERSON_CONTRACT}
96
+
97
+ An earlier scan identified subjects in this Person record that don't belong there. For each subject, we searched the knowledge base for existing Topics that might already cover it.
98
+
99
+ Your job:
100
+ 1. **Slim the Person** — remove the identified subjects AND any other content that fails the relationship profile test (personal trivia, lifestyle details, biographical facts unrelated to the relationship). Keep only: who they are, their role, their character, how the human user knows and works with them.
101
+ 2. **Redistribute each identified subject** — if a matching Topic exists in the search results, move the content there. If not, create a new Topic.
102
+ 3. **Discard what isn't worth a Topic** — personal trivia (hobbies, commute, hometown) that has no standalone value doesn't need to become a Topic. Just remove it from the Person.
103
+ 4. **Lose NO relationship data** — everything about how this person relates to the human user must survive.
104
+
105
+ Record format for the Person (MUST keep "id", type stays "person"):
106
+ ${buildPersonExistingExample()}
107
+
108
+ Record format for a new Topic created from extracted content:
109
+ ${buildPersonNewTopicExample()}
110
+
111
+ Rules:
112
+ - The original Person record (id: "${data.item.id}") MUST appear in "existing", slimmed down
113
+ - Person description after slimming: 2-4 sentences, relationship profile only. **If it still contains city, commute, hobbies, or lifestyle details after slimming — remove them.** Those are not relationship data.
114
+ - Topics created from person content: use the most appropriate category (Technical, Project, Interest, etc.)
115
+ - People MUST include "relationship"
116
+ - Topics MUST include "category"
117
+ - Do NOT invent information — only redistribute what exists in the original record
118
+ - Do NOT remove the person's relationship, role, character, or how the human user knows them — only the non-person content
119
+
120
+ **What to KEEP in the Person description**: role, expertise, *why* the human user works with them (their operational function in the relationship), how they communicate, character traits, how the human user knows them.
121
+ **What to REMOVE from the Person description**: current project status, ticket/PR numbers, shared interests (→ Topic), city/commute/hobbies (→ discard).
122
+
123
+ The distinction:
124
+ - "Data Lake bucket owner responsible for access provisioning" → KEEP (operational role in the relationship)
125
+ - "Currently owns 4 tickets in Sprint 86" → REMOVE (current sprint status, not who they are)
126
+ - "Left detailed comments on PR #1644 identifying architectural concerns around concurrency" → KEEP the insight, DROP the PR reference: "Flags architectural concerns around concurrency and queue isolation" belongs in the description; "PR #1644" does not.
127
+
128
+ Return raw JSON with exactly two keys:
129
+ {
130
+ "existing": [ /* slimmed Person + any existing Topics being updated */ ],
131
+ "new": [ /* new Topics for subjects with no existing match */ ]
132
+ }
133
+
134
+ No markdown fencing, no commentary.`;
135
+
136
+ const subjects = data.subjects.map(s => ({
137
+ search_term: s.searchTerm,
138
+ matches: s.matches.map(m => ({
139
+ id: (m as { id?: string }).id,
140
+ name: (m as { name?: string }).name,
141
+ description: m.description,
142
+ category: (m as { category?: string }).category,
143
+ })),
144
+ }));
145
+
146
+ const payload = JSON.stringify({
147
+ original_person: {
148
+ id: data.item.id,
149
+ name: (data.item as { name?: string }).name,
150
+ description: data.item.description,
151
+ relationship: (data.item as { relationship?: string }).relationship,
152
+ sentiment: data.item.sentiment,
153
+ },
154
+ subjects_to_extract: subjects,
155
+ }, null, 2);
156
+
157
+ const schemaReminder = `**Return JSON:**
158
+ \`\`\`json
159
+ {
160
+ "existing": [
161
+ {
162
+ "id": "uuid-of-person",
163
+ "type": "person",
164
+ "name": "Person Name",
165
+ "description": "Slimmed relationship profile only",
166
+ "relationship": "coworker"
167
+ }
168
+ ],
169
+ "new": [
170
+ {
171
+ "type": "topic",
172
+ "name": "Subject Name",
173
+ "description": "Content extracted from person record",
174
+ "sentiment": 0.5,
175
+ "category": "Project|Technical|Interest|etc."
176
+ }
177
+ ]
178
+ }
179
+ \`\`\`
180
+
181
+ Return raw JSON only.`;
182
+
183
+ const user = `${payload}
184
+
185
+ ---
186
+
187
+ ${schemaReminder}`;
188
+
189
+ return { system, user };
190
+ }