ei-tui 0.1.25 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +42 -0
  2. package/package.json +2 -1
  3. package/src/README.md +4 -11
  4. package/src/cli/README.md +87 -7
  5. package/src/cli/commands/facts.ts +2 -2
  6. package/src/cli/commands/people.ts +2 -2
  7. package/src/cli/commands/quotes.ts +2 -2
  8. package/src/cli/commands/topics.ts +2 -2
  9. package/src/cli/mcp.ts +94 -0
  10. package/src/cli/retrieval.ts +67 -31
  11. package/src/cli.ts +64 -23
  12. package/src/core/AGENTS.md +1 -1
  13. package/src/core/constants/built-in-facts.ts +49 -0
  14. package/src/core/constants/index.ts +1 -0
  15. package/src/core/context-utils.ts +0 -1
  16. package/src/core/embedding-service.ts +8 -0
  17. package/src/core/handlers/dedup.ts +11 -23
  18. package/src/core/handlers/heartbeat.ts +2 -3
  19. package/src/core/handlers/human-extraction.ts +96 -30
  20. package/src/core/handlers/human-matching.ts +328 -248
  21. package/src/core/handlers/index.ts +8 -6
  22. package/src/core/handlers/persona-generation.ts +8 -8
  23. package/src/core/handlers/rewrite.ts +4 -51
  24. package/src/core/handlers/utils.ts +23 -1
  25. package/src/core/heartbeat-manager.ts +2 -4
  26. package/src/core/human-data-manager.ts +38 -36
  27. package/src/core/message-manager.ts +10 -10
  28. package/src/core/orchestrators/ceremony.ts +49 -44
  29. package/src/core/orchestrators/dedup-phase.ts +2 -4
  30. package/src/core/orchestrators/human-extraction.ts +351 -207
  31. package/src/core/orchestrators/index.ts +6 -4
  32. package/src/core/orchestrators/persona-generation.ts +3 -3
  33. package/src/core/processor.ts +167 -20
  34. package/src/core/prompt-context-builder.ts +4 -6
  35. package/src/core/state/human.ts +1 -26
  36. package/src/core/state/personas.ts +2 -2
  37. package/src/core/state-manager.ts +107 -14
  38. package/src/core/tools/builtin/read-memory.ts +13 -18
  39. package/src/core/types/data-items.ts +3 -4
  40. package/src/core/types/entities.ts +7 -4
  41. package/src/core/types/enums.ts +6 -9
  42. package/src/core/types/llm.ts +2 -2
  43. package/src/core/utils/crossFind.ts +2 -5
  44. package/src/core/utils/event-windows.ts +31 -0
  45. package/src/integrations/claude-code/importer.ts +14 -5
  46. package/src/integrations/claude-code/types.ts +3 -0
  47. package/src/integrations/cursor/importer.ts +282 -0
  48. package/src/integrations/cursor/index.ts +10 -0
  49. package/src/integrations/cursor/reader.ts +209 -0
  50. package/src/integrations/cursor/types.ts +140 -0
  51. package/src/integrations/opencode/importer.ts +14 -4
  52. package/src/prompts/AGENTS.md +73 -1
  53. package/src/prompts/ceremony/dedup.ts +0 -33
  54. package/src/prompts/ceremony/rewrite.ts +6 -41
  55. package/src/prompts/ceremony/types.ts +4 -4
  56. package/src/prompts/generation/descriptions.ts +2 -2
  57. package/src/prompts/generation/types.ts +2 -2
  58. package/src/prompts/heartbeat/types.ts +2 -2
  59. package/src/prompts/human/event-scan.ts +122 -0
  60. package/src/prompts/human/fact-find.ts +106 -0
  61. package/src/prompts/human/fact-scan.ts +0 -2
  62. package/src/prompts/human/index.ts +17 -10
  63. package/src/prompts/human/person-match.ts +65 -0
  64. package/src/prompts/human/person-scan.ts +52 -59
  65. package/src/prompts/human/person-update.ts +241 -0
  66. package/src/prompts/human/topic-match.ts +65 -0
  67. package/src/prompts/human/topic-scan.ts +51 -71
  68. package/src/prompts/human/topic-update.ts +295 -0
  69. package/src/prompts/human/types.ts +63 -40
  70. package/src/prompts/index.ts +4 -8
  71. package/src/prompts/persona/topics-update.ts +2 -2
  72. package/src/prompts/persona/traits.ts +2 -2
  73. package/src/prompts/persona/types.ts +3 -3
  74. package/src/prompts/response/index.ts +1 -1
  75. package/src/prompts/response/sections.ts +9 -12
  76. package/src/prompts/response/types.ts +2 -3
  77. package/src/storage/embeddings.ts +1 -1
  78. package/src/storage/index.ts +1 -0
  79. package/src/storage/indexed.ts +174 -0
  80. package/src/storage/merge.ts +67 -2
  81. package/tui/src/commands/me.tsx +5 -14
  82. package/tui/src/commands/settings.tsx +15 -0
  83. package/tui/src/context/ei.tsx +5 -14
  84. package/tui/src/util/yaml-serializers.ts +76 -33
  85. package/src/cli/commands/traits.ts +0 -25
  86. package/src/prompts/human/item-match.ts +0 -74
  87. package/src/prompts/human/item-update.ts +0 -364
  88. package/src/prompts/human/trait-scan.ts +0 -115
@@ -1,239 +1,314 @@
1
1
  import {
2
- ValidationLevel,
3
2
  type LLMResponse,
4
3
  type Message,
5
- type Trait,
6
4
  type Topic,
7
- type Fact,
8
5
  type Person,
9
6
  type Quote,
10
- type DataItemType,
11
- type DataItemBase,
12
7
  } from "../types.js";
13
8
  import type { StateManager } from "../state-manager.js";
14
- import type { ItemMatchResult, ItemUpdateResult, ExposureImpact } from "../../prompts/human/types.js";
15
- import { queueItemUpdate, type ExtractionContext } from "../orchestrators/index.js";
16
- import { getEmbeddingService, getItemEmbeddingText } from "../embedding-service.js";
17
- import { crossFind } from "../utils/index.js";
18
- import { splitMessagesByTimestamp, getMessageText } from "./utils.js";
9
+ import type { ItemMatchResult, ExposureImpact, TopicUpdateResult, PersonUpdateResult } from "../../prompts/human/types.js";
10
+ import { queueTopicUpdate, queuePersonUpdate, type ExtractionContext } from "../orchestrators/index.js";
11
+ import { getEmbeddingService, getTopicEmbeddingText, getPersonEmbeddingText } from "../embedding-service.js";
19
12
 
20
- export function handleHumanItemMatch(response: LLMResponse, state: StateManager): void {
13
+ function mergeGroups(personaGroup: string | null, isNewItem: boolean, existing: string[] | undefined): string[] | undefined {
14
+ if (!personaGroup) return existing;
15
+ if (isNewItem) return [personaGroup];
16
+ const groups = new Set(existing ?? []);
17
+ groups.add(personaGroup);
18
+ return Array.from(groups);
19
+ }
20
+ import { resolveMessageWindow, getMessageText } from "./utils.js";
21
+
22
+ export function handleTopicMatch(response: LLMResponse, state: StateManager): void {
21
23
  const result = response.parsed as ItemMatchResult | undefined;
22
24
  if (!result) {
23
- console.error("[handleHumanItemMatch] No parsed result");
25
+ console.error("[handleTopicMatch] No parsed result");
24
26
  return;
25
27
  }
26
28
 
27
- const candidateType = response.request.data.candidateType as DataItemType;
28
29
  const personaId = response.request.data.personaId as string;
29
30
  const personaDisplayName = response.request.data.personaDisplayName as string;
30
- const messageIdsToMark = response.request.data.message_ids_to_mark as string[] | undefined;
31
- const allMessages = state.messages_get(personaId);
31
+ const { messages_context, messages_analyze } = resolveMessageWindow(response, state);
32
32
 
33
- let messages_context: Message[];
34
- let messages_analyze: Message[];
35
-
36
- if (messageIdsToMark && messageIdsToMark.length > 0) {
37
- const messageIdSet = new Set(messageIdsToMark);
38
- messages_analyze = allMessages.filter(m => messageIdSet.has(m.id));
39
- const analyzeStartTime = messages_analyze[0]?.timestamp ?? '9999';
40
- messages_context = allMessages.filter(m =>
41
- !messageIdSet.has(m.id) && new Date(m.timestamp).getTime() < new Date(analyzeStartTime).getTime()
42
- );
43
- } else {
44
- // Fallback to existing behavior
45
- const analyzeFrom = response.request.data.analyze_from_timestamp as string | null;
46
- const split = splitMessagesByTimestamp(allMessages, analyzeFrom);
47
- messages_context = split.messages_context;
48
- messages_analyze = split.messages_analyze;
33
+ let matched_guid = result.matched_guid;
34
+ if (matched_guid === "new") {
35
+ matched_guid = null;
36
+ } else if (matched_guid) {
37
+ const human = state.getHuman();
38
+ const found = human.topics.find(t => t.id === matched_guid);
39
+ if (!found) {
40
+ console.warn(`[handleTopicMatch] matched_guid "${matched_guid}" not found in topics — treating as new`);
41
+ matched_guid = null;
42
+ }
49
43
  }
44
+ result.matched_guid = matched_guid;
50
45
 
51
- const context: ExtractionContext & { itemName: string; itemValue: string; itemCategory?: string } = {
46
+ const context: ExtractionContext & {
47
+ candidateName: string;
48
+ candidateDescription: string;
49
+ candidateCategory: string;
50
+ extraction_model?: string;
51
+ } = {
52
52
  personaId,
53
53
  personaDisplayName,
54
54
  messages_context,
55
55
  messages_analyze,
56
- itemName: response.request.data.itemName as string,
57
- itemValue: response.request.data.itemValue as string,
58
- itemCategory: response.request.data.itemCategory as string | undefined,
56
+ candidateName: response.request.data.candidateName as string,
57
+ candidateDescription: response.request.data.candidateDescription as string,
58
+ candidateCategory: response.request.data.candidateCategory as string,
59
+ extraction_model: response.request.data.extraction_model as string | undefined,
59
60
  };
60
61
 
61
- let resolvedType: DataItemType = candidateType;
62
+ queueTopicUpdate(result, context, state);
63
+ const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new topic)";
64
+ console.log(`[handleTopicMatch] topic "${context.candidateName}": ${matched}`);
65
+ }
66
+
67
+ export function handlePersonMatch(response: LLMResponse, state: StateManager): void {
68
+ const result = response.parsed as ItemMatchResult | undefined;
69
+ if (!result) {
70
+ console.error("[handlePersonMatch] No parsed result");
71
+ return;
72
+ }
73
+
74
+ const personaId = response.request.data.personaId as string;
75
+ const personaDisplayName = response.request.data.personaDisplayName as string;
76
+ const { messages_context, messages_analyze } = resolveMessageWindow(response, state);
77
+
62
78
  let matched_guid = result.matched_guid;
63
79
  if (matched_guid === "new") {
64
80
  matched_guid = null;
65
81
  } else if (matched_guid) {
66
- const found = crossFind(matched_guid, state.getHuman());
82
+ const human = state.getHuman();
83
+ const found = human.people.find(p => p.id === matched_guid);
67
84
  if (!found) {
68
- console.warn(`[handleHumanItemMatch] matched_guid "${matched_guid}" not found in human data — treating as new item`);
85
+ console.warn(`[handlePersonMatch] matched_guid "${matched_guid}" not found in people — treating as new`);
69
86
  matched_guid = null;
70
- } else if (found.type === "fact" && found.validated === ValidationLevel.Human) {
71
- console.log(`[handleHumanItemMatch] Skipping locked fact "${found.name}" (human-validated)`);
72
- return;
73
- } else if (!(found.type === "fact" || found.type === "trait" || found.type === "topic" || found.type === "person")) {
74
- console.warn(`[handleHumanItemMatch] matched_guid "${matched_guid}" resolved to non-human type "${found.type}" - Ignoring`);
75
- return;
76
- } else {
77
- resolvedType = found.type;
78
- context.itemName = found.name || context.itemName;
79
- context.itemValue = found.description || context.itemValue;
80
87
  }
81
88
  }
82
89
  result.matched_guid = matched_guid;
83
- queueItemUpdate(resolvedType, result, context, state);
84
- const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new item)";
85
- console.log(`[handleHumanItemMatch] ${resolvedType} "${context.itemName}": ${matched}`);
90
+
91
+ const context: ExtractionContext & {
92
+ candidateName: string;
93
+ candidateDescription: string;
94
+ candidateRelationship: string;
95
+ extraction_model?: string;
96
+ } = {
97
+ personaId,
98
+ personaDisplayName,
99
+ messages_context,
100
+ messages_analyze,
101
+ candidateName: response.request.data.candidateName as string,
102
+ candidateDescription: response.request.data.candidateDescription as string,
103
+ candidateRelationship: response.request.data.candidateRelationship as string,
104
+ extraction_model: response.request.data.extraction_model as string | undefined,
105
+ };
106
+
107
+ queuePersonUpdate(result, context, state);
108
+ const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new person)";
109
+ console.log(`[handlePersonMatch] person "${context.candidateName}": ${matched}`);
86
110
  }
87
111
 
88
- export async function handleHumanItemUpdate(response: LLMResponse, state: StateManager): Promise<void> {
89
- const result = response.parsed as ItemUpdateResult | undefined;
90
-
112
+ export async function handleTopicUpdate(response: LLMResponse, state: StateManager): Promise<void> {
113
+ const result = response.parsed as (TopicUpdateResult & { quotes?: Array<{ text: string; reason: string }> }) | undefined;
114
+
91
115
  if (!result || Object.keys(result).length === 0) {
92
- console.log("[handleHumanItemUpdate] No changes needed (empty result)");
116
+ console.log("[handleTopicUpdate] No changes needed (empty result)");
93
117
  return;
94
118
  }
95
119
 
96
- const candidateType = response.request.data.candidateType as DataItemType;
97
120
  const isNewItem = response.request.data.isNewItem as boolean;
98
121
  const existingItemId = response.request.data.existingItemId as string | undefined;
99
122
  const personaId = response.request.data.personaId as string;
100
123
  const personaDisplayName = response.request.data.personaDisplayName as string;
124
+ const candidateCategory = response.request.data.candidateCategory as string | undefined;
101
125
 
102
126
  if (!result.name || !result.description || result.sentiment === undefined) {
103
- console.error("[handleHumanItemUpdate] Missing required fields in result");
127
+ console.error("[handleTopicUpdate] Missing required fields in result");
104
128
  return;
105
129
  }
106
130
 
107
131
  const now = new Date().toISOString();
132
+ const human = state.getHuman();
133
+
108
134
  const resolveItemId = (): string => {
109
135
  if (isNewItem || !existingItemId) return crypto.randomUUID();
110
- const h = state.getHuman();
111
- const arr = candidateType === "fact" ? h.facts : candidateType === "trait" ? h.traits : candidateType === "topic" ? h.topics : h.people;
112
- // Guard: if existingItemId isn't in the correct type array, treat as new
113
- // (prevents cross-type ID reuse when LLM matches against a different type's UUID)
114
- return arr.find((x: DataItemBase) => x.id === existingItemId) ? existingItemId : crypto.randomUUID();
136
+ return human.topics.find(t => t.id === existingItemId) ? existingItemId : crypto.randomUUID();
115
137
  };
116
138
  const itemId = resolveItemId();
117
139
 
118
140
  const persona = state.persona_getById(personaId);
119
141
  const personaGroup = persona?.group_primary ?? null;
120
- const isEi = personaDisplayName.toLowerCase() === "ei";
121
142
 
122
- const human = state.getHuman();
123
- const getExistingItem = (): { learned_by?: string; last_changed_by?: string; persona_groups?: string[] } | undefined => {
124
- if (isNewItem) return undefined;
125
- switch (candidateType) {
126
- case "fact": return human.facts.find(f => f.id === existingItemId);
127
- case "trait": return human.traits.find(t => t.id === existingItemId);
128
- case "topic": return human.topics.find(t => t.id === existingItemId);
129
- case "person": return human.people.find(p => p.id === existingItemId);
130
- }
143
+ const existingTopic = isNewItem ? undefined : human.topics.find(t => t.id === existingItemId);
144
+
145
+ let embedding: number[] | undefined;
146
+ try {
147
+ const embeddingService = getEmbeddingService();
148
+ const category = result.category ?? candidateCategory ?? existingTopic?.category;
149
+ const text = getTopicEmbeddingText({ name: result.name, category, description: result.description });
150
+ embedding = await embeddingService.embed(text);
151
+ } catch (err) {
152
+ console.warn(`[handleTopicUpdate] Failed to compute embedding for topic "${result.name}":`, err);
153
+ }
154
+
155
+ const exposureImpact = result.exposure_impact as ExposureImpact | undefined;
156
+ const topic: Topic = {
157
+ id: itemId,
158
+ name: result.name,
159
+ description: result.description,
160
+ sentiment: result.sentiment,
161
+ category: result.category ?? candidateCategory ?? existingTopic?.category,
162
+ exposure_current: calculateExposureCurrent(exposureImpact),
163
+ exposure_desired: result.exposure_desired ?? 0.5,
164
+ last_updated: now,
165
+ last_mentioned: now,
166
+ learned_by: isNewItem ? personaId : existingTopic?.learned_by,
167
+ last_changed_by: personaId,
168
+ persona_groups: mergeGroups(personaGroup, isNewItem, existingTopic?.persona_groups),
169
+ embedding,
131
170
  };
132
- const existingItem = getExistingItem();
133
-
134
- const mergeGroups = (existing: string[] | undefined): string[] | undefined => {
135
- if (!personaGroup) return existing;
136
- if (isNewItem) return [personaGroup];
137
- const groups = new Set(existing ?? []);
138
- groups.add(personaGroup);
139
- return Array.from(groups);
171
+ state.human_topic_upsert(topic);
172
+
173
+ const allMessages = state.messages_get(personaId);
174
+ await validateAndStoreQuotes(result.quotes, allMessages, itemId, personaDisplayName, personaGroup, state);
175
+
176
+ console.log(`[handleTopicUpdate] ${isNewItem ? "Created" : "Updated"} topic "${result.name}"`);
177
+ }
178
+
179
+ export async function handlePersonUpdate(response: LLMResponse, state: StateManager): Promise<void> {
180
+ const result = response.parsed as (PersonUpdateResult & { quotes?: Array<{ text: string; reason: string }> }) | undefined;
181
+
182
+ if (!result || Object.keys(result).length === 0) {
183
+ console.log("[handlePersonUpdate] No changes needed (empty result)");
184
+ return;
185
+ }
186
+
187
+ const isNewItem = response.request.data.isNewItem as boolean;
188
+ const existingItemId = response.request.data.existingItemId as string | undefined;
189
+ const personaId = response.request.data.personaId as string;
190
+ const personaDisplayName = response.request.data.personaDisplayName as string;
191
+ const candidateRelationship = response.request.data.candidateRelationship as string | undefined;
192
+
193
+ if (!result.name || !result.description || result.sentiment === undefined) {
194
+ console.error("[handlePersonUpdate] Missing required fields in result");
195
+ return;
196
+ }
197
+
198
+ const now = new Date().toISOString();
199
+ const human = state.getHuman();
200
+
201
+ const resolveItemId = (): string => {
202
+ if (isNewItem || !existingItemId) return crypto.randomUUID();
203
+ return human.people.find(p => p.id === existingItemId) ? existingItemId : crypto.randomUUID();
140
204
  };
205
+ const itemId = resolveItemId();
206
+
207
+ const persona = state.persona_getById(personaId);
208
+ const personaGroup = persona?.group_primary ?? null;
209
+
210
+ const existingPerson = isNewItem ? undefined : human.people.find(p => p.id === existingItemId);
141
211
 
142
212
  let embedding: number[] | undefined;
143
213
  try {
144
214
  const embeddingService = getEmbeddingService();
145
- const text = getItemEmbeddingText({ name: result.name, description: result.description });
215
+ const relationship = result.relationship ?? candidateRelationship ?? existingPerson?.relationship;
216
+ const text = getPersonEmbeddingText({ name: result.name, relationship, description: result.description });
146
217
  embedding = await embeddingService.embed(text);
147
218
  } catch (err) {
148
- console.warn(`[handleHumanItemUpdate] Failed to compute embedding for ${candidateType} "${result.name}":`, err);
219
+ console.warn(`[handlePersonUpdate] Failed to compute embedding for person "${result.name}":`, err);
149
220
  }
150
221
 
151
- switch (candidateType) {
152
- case "fact": {
153
- const fact: Fact = {
154
- id: itemId,
155
- name: result.name,
156
- description: result.description,
157
- sentiment: result.sentiment,
158
- validated: ValidationLevel.None,
159
- validated_date: now,
160
- last_updated: now,
161
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
162
- last_changed_by: personaId,
163
- persona_groups: mergeGroups(existingItem?.persona_groups),
164
- embedding,
165
- };
166
- applyOrValidate(state, "fact", fact, personaDisplayName, isEi, personaGroup);
167
- break;
168
- }
169
- case "trait": {
170
- const trait: Trait = {
171
- id: itemId,
172
- name: result.name,
173
- description: result.description,
174
- sentiment: result.sentiment,
175
- strength: (result as any).strength ?? 0.5,
176
- last_updated: now,
177
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
178
- last_changed_by: personaId,
179
- persona_groups: mergeGroups(existingItem?.persona_groups),
180
- embedding,
181
- };
182
- applyOrValidate(state, "trait", trait, personaDisplayName, isEi, personaGroup);
183
- break;
184
- }
185
- case "topic": {
186
- const exposureImpact = (result as any).exposure_impact as ExposureImpact | undefined;
187
- const itemCategory = response.request.data.itemCategory as string | undefined;
188
- const existingTopic = human.topics.find(t => t.id === existingItemId);
189
- const topic: Topic = {
190
- id: itemId,
191
- name: result.name,
192
- description: result.description,
193
- sentiment: result.sentiment,
194
- category: (result as any).category ?? itemCategory ?? existingTopic?.category,
195
- exposure_current: calculateExposureCurrent(exposureImpact),
196
- exposure_desired: (result as any).exposure_desired ?? 0.5,
197
- last_updated: now,
198
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
199
- last_changed_by: personaId,
200
- persona_groups: mergeGroups(existingItem?.persona_groups),
201
- embedding,
202
- };
203
- applyOrValidate(state, "topic", topic, personaDisplayName, isEi, personaGroup);
204
- break;
205
- }
206
- case "person": {
207
- const exposureImpact = (result as any).exposure_impact as ExposureImpact | undefined;
208
- const person: Person = {
209
- id: itemId,
210
- name: result.name,
211
- description: result.description,
212
- sentiment: result.sentiment,
213
- relationship: (result as any).relationship ?? "Unknown",
214
- exposure_current: calculateExposureCurrent(exposureImpact),
215
- exposure_desired: (result as any).exposure_desired ?? 0.5,
216
- last_updated: now,
217
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
218
- last_changed_by: personaId,
219
- persona_groups: mergeGroups(existingItem?.persona_groups),
220
- embedding,
221
- };
222
- applyOrValidate(state, "person", person, personaDisplayName, isEi, personaGroup);
223
- break;
224
- }
225
- }
222
+ const exposureImpact = result.exposure_impact as ExposureImpact | undefined;
223
+ const person: Person = {
224
+ id: itemId,
225
+ name: result.name,
226
+ description: result.description,
227
+ sentiment: result.sentiment,
228
+ relationship: result.relationship ?? candidateRelationship ?? existingPerson?.relationship ?? "Unknown",
229
+ exposure_current: calculateExposureCurrent(exposureImpact),
230
+ exposure_desired: result.exposure_desired ?? 0.5,
231
+ last_updated: now,
232
+ last_mentioned: now,
233
+ learned_by: isNewItem ? personaId : existingPerson?.learned_by,
234
+ last_changed_by: personaId,
235
+ persona_groups: mergeGroups(personaGroup, isNewItem, existingPerson?.persona_groups),
236
+ embedding,
237
+ };
238
+ state.human_person_upsert(person);
226
239
 
227
240
  const allMessages = state.messages_get(personaId);
228
241
  await validateAndStoreQuotes(result.quotes, allMessages, itemId, personaDisplayName, personaGroup, state);
229
242
 
230
- console.log(`[handleHumanItemUpdate] ${isNewItem ? "Created" : "Updated"} ${candidateType} "${result.name}"`);
243
+ console.log(`[handlePersonUpdate] ${isNewItem ? "Created" : "Updated"} person "${result.name}"`);
244
+ }
245
+
246
+ function normalizeText(text: string): string {
247
+ return text
248
+ .replace(/[\u201C\u201D]/g, '"') // curly double quotes
249
+ .replace(/[\u2018\u2019\u0060\u00B4]/g, "'") // curly single, backtick, acute accent
250
+ .replace(/[\u2014\u2013\u2012]/g, '-') // em-dash, en-dash, figure dash
251
+ .replace(/\u00A0/g, ' ') // non-breaking space
252
+ .replace(/[\u2000-\u200F]/g, ' ') // unicode space variants
253
+ .replace(/\u2026|\.\.\./g, '\u2026'); // normalize both ellipsis forms → unicode ellipsis (1:1)
231
254
  }
232
255
 
233
- function normalizeQuotes(text: string): string {
256
+ function stripPunctuation(text: string): string {
257
+ // Remove characters LLMs commonly mangle, keep spaces and alphanumeric
258
+ // Strip: punctuation, unicode punctuation variants, curly quotes, dashes, etc.
259
+ // Keep: letters, digits, spaces
234
260
  return text
235
- .replace(/[\u201C\u201D]/g, '"') // Curly double quotes to straight
236
- .replace(/[\u2018\u2019]/g, "'"); // Curly single quotes to straight
261
+ .replace(/[^\w\s]/gu, ' ') // replace non-word, non-space with space
262
+ .replace(/\s+/g, ' ') // collapse multiple spaces
263
+ .trim()
264
+ .toLowerCase();
265
+ }
266
+
267
+ interface WordBoundaryMatch {
268
+ start: number;
269
+ end: number;
270
+ text: string;
271
+ }
272
+
273
+ function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch | null {
274
+ const strippedQuote = stripPunctuation(quoteText);
275
+ const quoteWords = strippedQuote.split(' ').filter(w => w.length > 0);
276
+
277
+ if (quoteWords.length < 3) return null; // Too short to trust — require at least 3 words
278
+
279
+ // Build word token list from original message with original positions
280
+ const wordTokens: Array<{ word: string; start: number; end: number }> = [];
281
+ const wordRegex = /\S+/g;
282
+ let match: RegExpExecArray | null;
283
+ while ((match = wordRegex.exec(msgText)) !== null) {
284
+ wordTokens.push({
285
+ word: stripPunctuation(match[0]),
286
+ start: match.index,
287
+ end: match.index + match[0].length,
288
+ });
289
+ }
290
+
291
+ // Find contiguous sequence of words matching the quote words
292
+ for (let i = 0; i <= wordTokens.length - quoteWords.length; i++) {
293
+ let allMatch = true;
294
+ for (let j = 0; j < quoteWords.length; j++) {
295
+ if (wordTokens[i + j].word !== quoteWords[j]) {
296
+ allMatch = false;
297
+ break;
298
+ }
299
+ }
300
+ if (allMatch) {
301
+ const startToken = wordTokens[i];
302
+ const endToken = wordTokens[i + quoteWords.length - 1];
303
+ return {
304
+ start: startToken.start,
305
+ end: endToken.end,
306
+ text: msgText.slice(startToken.start, endToken.end),
307
+ };
308
+ }
309
+ }
310
+
311
+ return null;
237
312
  }
238
313
 
239
314
  async function validateAndStoreQuotes(
@@ -250,88 +325,107 @@ async function validateAndStoreQuotes(
250
325
  let found = false;
251
326
  for (const message of messages) {
252
327
  const msgText = getMessageText(message);
253
- const normalizedMsg = normalizeQuotes(msgText);
254
- const normalizedQuote = normalizeQuotes(candidate.text);
328
+
329
+ // Level 1: normalized exact match
330
+ const normalizedMsg = normalizeText(msgText);
331
+ const normalizedQuote = normalizeText(candidate.text);
255
332
  const start = normalizedMsg.indexOf(normalizedQuote);
333
+
334
+ let matchStart: number;
335
+ let matchEnd: number;
336
+ let matchText: string;
337
+ let matchLevel: string;
338
+
256
339
  if (start !== -1) {
257
- const end = start + candidate.text.length;
258
-
259
- // Check for ANY overlapping quote in this message (not just exact match)
260
- const existing = state.human_quote_getForMessage(message.id);
261
- const overlapping = existing.find(q =>
262
- q.start !== null && q.end !== null &&
263
- start < q.end && end > q.start // ranges overlap
264
- );
265
-
266
- if (overlapping) {
267
- // Merge: expand to the union of both ranges
268
- const mergedStart = Math.min(start, overlapping.start!);
269
- const mergedEnd = Math.max(end, overlapping.end!);
270
- const mergedText = msgText.slice(mergedStart, mergedEnd);
271
-
272
- // Merge data_item_ids and persona_groups (deduplicated)
273
- const mergedDataItemIds = overlapping.data_item_ids.includes(dataItemId)
274
- ? overlapping.data_item_ids
275
- : [...overlapping.data_item_ids, dataItemId];
276
- const group = personaGroup || "General";
277
- const mergedGroups = overlapping.persona_groups.includes(group)
278
- ? overlapping.persona_groups
279
- : [...overlapping.persona_groups, group];
280
-
281
- // Only recompute embedding if the text actually changed
282
- let embedding = overlapping.embedding;
283
- if (mergedText !== overlapping.text) {
284
- try {
285
- const embeddingService = getEmbeddingService();
286
- embedding = await embeddingService.embed(mergedText);
287
- } catch (err) {
288
- console.warn(`[extraction] Failed to recompute embedding for merged quote: "${mergedText.slice(0, 30)}..."`, err);
289
- }
340
+ matchStart = start;
341
+ matchEnd = start + candidate.text.length;
342
+ matchText = candidate.text;
343
+ matchLevel = "exact";
344
+ } else {
345
+ // Level 2: word-boundary fallback
346
+ const wordMatch = findQuoteByWords(candidate.text, msgText);
347
+ if (!wordMatch) continue;
348
+ matchStart = wordMatch.start;
349
+ matchEnd = wordMatch.end;
350
+ matchText = wordMatch.text;
351
+ matchLevel = "word-boundary";
352
+ }
353
+
354
+ const existing = state.human_quote_getForMessage(message.id);
355
+ const overlapping = existing.find(q =>
356
+ q.start !== null && q.end !== null &&
357
+ matchStart < q.end && matchEnd > q.start
358
+ );
359
+
360
+ if (overlapping) {
361
+ const mergedStart = Math.min(matchStart, overlapping.start!);
362
+ const mergedEnd = Math.max(matchEnd, overlapping.end!);
363
+ const mergedText = msgText.slice(mergedStart, mergedEnd);
364
+
365
+ const mergedDataItemIds = overlapping.data_item_ids.includes(dataItemId)
366
+ ? overlapping.data_item_ids
367
+ : [...overlapping.data_item_ids, dataItemId];
368
+ const group = personaGroup || "General";
369
+ const mergedGroups = overlapping.persona_groups.includes(group)
370
+ ? overlapping.persona_groups
371
+ : [...overlapping.persona_groups, group];
372
+
373
+ let embedding = overlapping.embedding;
374
+ if (mergedText !== overlapping.text) {
375
+ try {
376
+ const embeddingService = getEmbeddingService();
377
+ embedding = await embeddingService.embed(mergedText);
378
+ } catch (err) {
379
+ console.warn(`[extraction] Failed to recompute embedding for merged quote: "${mergedText.slice(0, 30)}..."`, err);
290
380
  }
291
-
292
- state.human_quote_update(overlapping.id, {
293
- start: mergedStart,
294
- end: mergedEnd,
295
- text: mergedText,
296
- data_item_ids: mergedDataItemIds,
297
- persona_groups: mergedGroups,
298
- embedding,
299
- });
300
- console.log(`[extraction] Merged overlapping quote: "${mergedText.slice(0, 50)}..." (${mergedStart}-${mergedEnd})`);
301
- found = true;
302
- break;
303
381
  }
304
-
305
- let embedding: number[] | undefined;
306
- try {
307
- const embeddingService = getEmbeddingService();
308
- embedding = await embeddingService.embed(candidate.text);
309
- } catch (err) {
310
- console.warn(`[extraction] Failed to compute embedding for quote: "${candidate.text.slice(0, 30)}..."`, err);
311
- }
312
-
313
- const quote: Quote = {
314
- id: crypto.randomUUID(),
315
- message_id: message.id,
316
- data_item_ids: [dataItemId],
317
- persona_groups: [personaGroup || "General"],
318
- text: candidate.text,
319
- speaker: message.role === "human" ? "human" : personaName,
320
- timestamp: message.timestamp,
321
- start: start,
322
- end: end,
323
- created_at: new Date().toISOString(),
324
- created_by: "extraction",
382
+
383
+ state.human_quote_update(overlapping.id, {
384
+ start: mergedStart,
385
+ end: mergedEnd,
386
+ text: mergedText,
387
+ data_item_ids: mergedDataItemIds,
388
+ persona_groups: mergedGroups,
325
389
  embedding,
326
- };
327
- state.human_quote_add(quote);
328
- console.log(`[extraction] Captured quote: "${candidate.text.slice(0, 50)}..."`);
390
+ });
391
+ console.log(`[extraction] Merged overlapping quote: "${mergedText.slice(0, 50)}..." (${mergedStart}-${mergedEnd})`);
329
392
  found = true;
330
393
  break;
331
394
  }
395
+
396
+ let embedding: number[] | undefined;
397
+ try {
398
+ const embeddingService = getEmbeddingService();
399
+ embedding = await embeddingService.embed(matchText);
400
+ } catch (err) {
401
+ console.warn(`[extraction] Failed to compute embedding for quote: "${matchText.slice(0, 30)}..."`, err);
402
+ }
403
+
404
+ const quote: Quote = {
405
+ id: crypto.randomUUID(),
406
+ message_id: message.id,
407
+ data_item_ids: [dataItemId],
408
+ persona_groups: [personaGroup || "General"],
409
+ text: matchText,
410
+ speaker: message.role === "human" ? "human" : personaName,
411
+ timestamp: message.timestamp,
412
+ start: matchStart,
413
+ end: matchEnd,
414
+ created_at: new Date().toISOString(),
415
+ created_by: "extraction",
416
+ embedding,
417
+ };
418
+ state.human_quote_add(quote);
419
+ if (matchLevel === "word-boundary") {
420
+ console.log(`[extraction] Captured quote (word-boundary match): "${matchText.slice(0, 50)}..."`);
421
+ } else {
422
+ console.log(`[extraction] Captured quote: "${matchText.slice(0, 50)}..."`);
423
+ }
424
+ found = true;
425
+ break;
332
426
  }
333
427
  if (!found) {
334
- console.log(`[extraction] Quote not found in messages, skipping: "${candidate.text?.slice(0, 50)}..."`);
428
+ console.warn(`[extraction] Quote not found in messages (both levels), skipping: "${candidate.text?.slice(0, 50)}..."`);
335
429
  }
336
430
  }
337
431
  }
@@ -346,18 +440,4 @@ function calculateExposureCurrent(impact: ExposureImpact | undefined): number {
346
440
  }
347
441
  }
348
442
 
349
- function applyOrValidate(
350
- state: StateManager,
351
- dataType: DataItemType,
352
- item: Fact | Trait | Topic | Person,
353
- _personaName: string,
354
- _isEi: boolean,
355
- _personaGroup: string | null
356
- ): void {
357
- switch (dataType) {
358
- case "fact": state.human_fact_upsert(item as Fact); break;
359
- case "trait": state.human_trait_upsert(item as Trait); break;
360
- case "topic": state.human_topic_upsert(item as Topic); break;
361
- case "person": state.human_person_upsert(item as Person); break;
362
- }
363
- }
443
+