ei-tui 0.1.25 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +42 -0
  2. package/package.json +1 -1
  3. package/src/README.md +4 -11
  4. package/src/cli/README.md +4 -5
  5. package/src/cli/retrieval.ts +3 -25
  6. package/src/cli.ts +3 -7
  7. package/src/core/AGENTS.md +1 -1
  8. package/src/core/constants/built-in-facts.ts +49 -0
  9. package/src/core/constants/index.ts +1 -0
  10. package/src/core/context-utils.ts +0 -1
  11. package/src/core/embedding-service.ts +8 -0
  12. package/src/core/handlers/dedup.ts +10 -16
  13. package/src/core/handlers/heartbeat.ts +2 -3
  14. package/src/core/handlers/human-extraction.ts +95 -30
  15. package/src/core/handlers/human-matching.ts +326 -248
  16. package/src/core/handlers/index.ts +8 -6
  17. package/src/core/handlers/persona-generation.ts +8 -8
  18. package/src/core/handlers/rewrite.ts +4 -29
  19. package/src/core/handlers/utils.ts +23 -1
  20. package/src/core/heartbeat-manager.ts +2 -4
  21. package/src/core/human-data-manager.ts +5 -27
  22. package/src/core/message-manager.ts +10 -10
  23. package/src/core/orchestrators/ceremony.ts +50 -39
  24. package/src/core/orchestrators/dedup-phase.ts +0 -1
  25. package/src/core/orchestrators/human-extraction.ts +351 -207
  26. package/src/core/orchestrators/index.ts +6 -4
  27. package/src/core/orchestrators/persona-generation.ts +3 -3
  28. package/src/core/processor.ts +99 -17
  29. package/src/core/prompt-context-builder.ts +4 -6
  30. package/src/core/state/human.ts +1 -26
  31. package/src/core/state/personas.ts +2 -2
  32. package/src/core/state-manager.ts +107 -14
  33. package/src/core/tools/builtin/read-memory.ts +7 -8
  34. package/src/core/types/data-items.ts +2 -4
  35. package/src/core/types/entities.ts +6 -4
  36. package/src/core/types/enums.ts +6 -9
  37. package/src/core/types/llm.ts +2 -2
  38. package/src/core/utils/crossFind.ts +2 -5
  39. package/src/core/utils/event-windows.ts +31 -0
  40. package/src/integrations/claude-code/importer.ts +8 -4
  41. package/src/integrations/claude-code/types.ts +2 -0
  42. package/src/integrations/opencode/importer.ts +7 -3
  43. package/src/prompts/AGENTS.md +73 -1
  44. package/src/prompts/ceremony/rewrite.ts +3 -22
  45. package/src/prompts/ceremony/types.ts +3 -3
  46. package/src/prompts/generation/descriptions.ts +2 -2
  47. package/src/prompts/generation/types.ts +2 -2
  48. package/src/prompts/heartbeat/types.ts +2 -2
  49. package/src/prompts/human/event-scan.ts +122 -0
  50. package/src/prompts/human/fact-find.ts +106 -0
  51. package/src/prompts/human/fact-scan.ts +0 -2
  52. package/src/prompts/human/index.ts +17 -10
  53. package/src/prompts/human/person-match.ts +65 -0
  54. package/src/prompts/human/person-scan.ts +52 -59
  55. package/src/prompts/human/person-update.ts +241 -0
  56. package/src/prompts/human/topic-match.ts +65 -0
  57. package/src/prompts/human/topic-scan.ts +51 -71
  58. package/src/prompts/human/topic-update.ts +295 -0
  59. package/src/prompts/human/types.ts +63 -40
  60. package/src/prompts/index.ts +4 -8
  61. package/src/prompts/persona/topics-update.ts +2 -2
  62. package/src/prompts/persona/traits.ts +2 -2
  63. package/src/prompts/persona/types.ts +3 -3
  64. package/src/prompts/response/index.ts +1 -1
  65. package/src/prompts/response/sections.ts +9 -12
  66. package/src/prompts/response/types.ts +2 -3
  67. package/src/storage/embeddings.ts +1 -1
  68. package/src/storage/index.ts +1 -0
  69. package/src/storage/indexed.ts +174 -0
  70. package/src/storage/merge.ts +67 -2
  71. package/tui/src/commands/me.tsx +5 -14
  72. package/tui/src/commands/settings.tsx +15 -0
  73. package/tui/src/context/ei.tsx +5 -14
  74. package/tui/src/util/yaml-serializers.ts +48 -33
  75. package/src/cli/commands/traits.ts +0 -25
  76. package/src/prompts/human/item-match.ts +0 -74
  77. package/src/prompts/human/item-update.ts +0 -364
  78. package/src/prompts/human/trait-scan.ts +0 -115
@@ -1,239 +1,312 @@
1
1
  import {
2
- ValidationLevel,
3
2
  type LLMResponse,
4
3
  type Message,
5
- type Trait,
6
4
  type Topic,
7
- type Fact,
8
5
  type Person,
9
6
  type Quote,
10
- type DataItemType,
11
- type DataItemBase,
12
7
  } from "../types.js";
13
8
  import type { StateManager } from "../state-manager.js";
14
- import type { ItemMatchResult, ItemUpdateResult, ExposureImpact } from "../../prompts/human/types.js";
15
- import { queueItemUpdate, type ExtractionContext } from "../orchestrators/index.js";
16
- import { getEmbeddingService, getItemEmbeddingText } from "../embedding-service.js";
17
- import { crossFind } from "../utils/index.js";
18
- import { splitMessagesByTimestamp, getMessageText } from "./utils.js";
9
+ import type { ItemMatchResult, ExposureImpact, TopicUpdateResult, PersonUpdateResult } from "../../prompts/human/types.js";
10
+ import { queueTopicUpdate, queuePersonUpdate, type ExtractionContext } from "../orchestrators/index.js";
11
+ import { getEmbeddingService, getTopicEmbeddingText, getPersonEmbeddingText } from "../embedding-service.js";
19
12
 
20
- export function handleHumanItemMatch(response: LLMResponse, state: StateManager): void {
13
+ function mergeGroups(personaGroup: string | null, isNewItem: boolean, existing: string[] | undefined): string[] | undefined {
14
+ if (!personaGroup) return existing;
15
+ if (isNewItem) return [personaGroup];
16
+ const groups = new Set(existing ?? []);
17
+ groups.add(personaGroup);
18
+ return Array.from(groups);
19
+ }
20
+ import { resolveMessageWindow, getMessageText } from "./utils.js";
21
+
22
+ export function handleTopicMatch(response: LLMResponse, state: StateManager): void {
21
23
  const result = response.parsed as ItemMatchResult | undefined;
22
24
  if (!result) {
23
- console.error("[handleHumanItemMatch] No parsed result");
25
+ console.error("[handleTopicMatch] No parsed result");
24
26
  return;
25
27
  }
26
28
 
27
- const candidateType = response.request.data.candidateType as DataItemType;
28
29
  const personaId = response.request.data.personaId as string;
29
30
  const personaDisplayName = response.request.data.personaDisplayName as string;
30
- const messageIdsToMark = response.request.data.message_ids_to_mark as string[] | undefined;
31
- const allMessages = state.messages_get(personaId);
31
+ const { messages_context, messages_analyze } = resolveMessageWindow(response, state);
32
32
 
33
- let messages_context: Message[];
34
- let messages_analyze: Message[];
35
-
36
- if (messageIdsToMark && messageIdsToMark.length > 0) {
37
- const messageIdSet = new Set(messageIdsToMark);
38
- messages_analyze = allMessages.filter(m => messageIdSet.has(m.id));
39
- const analyzeStartTime = messages_analyze[0]?.timestamp ?? '9999';
40
- messages_context = allMessages.filter(m =>
41
- !messageIdSet.has(m.id) && new Date(m.timestamp).getTime() < new Date(analyzeStartTime).getTime()
42
- );
43
- } else {
44
- // Fallback to existing behavior
45
- const analyzeFrom = response.request.data.analyze_from_timestamp as string | null;
46
- const split = splitMessagesByTimestamp(allMessages, analyzeFrom);
47
- messages_context = split.messages_context;
48
- messages_analyze = split.messages_analyze;
33
+ let matched_guid = result.matched_guid;
34
+ if (matched_guid === "new") {
35
+ matched_guid = null;
36
+ } else if (matched_guid) {
37
+ const human = state.getHuman();
38
+ const found = human.topics.find(t => t.id === matched_guid);
39
+ if (!found) {
40
+ console.warn(`[handleTopicMatch] matched_guid "${matched_guid}" not found in topics — treating as new`);
41
+ matched_guid = null;
42
+ }
49
43
  }
44
+ result.matched_guid = matched_guid;
50
45
 
51
- const context: ExtractionContext & { itemName: string; itemValue: string; itemCategory?: string } = {
46
+ const context: ExtractionContext & {
47
+ candidateName: string;
48
+ candidateDescription: string;
49
+ candidateCategory: string;
50
+ extraction_model?: string;
51
+ } = {
52
52
  personaId,
53
53
  personaDisplayName,
54
54
  messages_context,
55
55
  messages_analyze,
56
- itemName: response.request.data.itemName as string,
57
- itemValue: response.request.data.itemValue as string,
58
- itemCategory: response.request.data.itemCategory as string | undefined,
56
+ candidateName: response.request.data.candidateName as string,
57
+ candidateDescription: response.request.data.candidateDescription as string,
58
+ candidateCategory: response.request.data.candidateCategory as string,
59
+ extraction_model: response.request.data.extraction_model as string | undefined,
59
60
  };
60
61
 
61
- let resolvedType: DataItemType = candidateType;
62
+ queueTopicUpdate(result, context, state);
63
+ const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new topic)";
64
+ console.log(`[handleTopicMatch] topic "${context.candidateName}": ${matched}`);
65
+ }
66
+
67
+ export function handlePersonMatch(response: LLMResponse, state: StateManager): void {
68
+ const result = response.parsed as ItemMatchResult | undefined;
69
+ if (!result) {
70
+ console.error("[handlePersonMatch] No parsed result");
71
+ return;
72
+ }
73
+
74
+ const personaId = response.request.data.personaId as string;
75
+ const personaDisplayName = response.request.data.personaDisplayName as string;
76
+ const { messages_context, messages_analyze } = resolveMessageWindow(response, state);
77
+
62
78
  let matched_guid = result.matched_guid;
63
79
  if (matched_guid === "new") {
64
80
  matched_guid = null;
65
81
  } else if (matched_guid) {
66
- const found = crossFind(matched_guid, state.getHuman());
82
+ const human = state.getHuman();
83
+ const found = human.people.find(p => p.id === matched_guid);
67
84
  if (!found) {
68
- console.warn(`[handleHumanItemMatch] matched_guid "${matched_guid}" not found in human data — treating as new item`);
85
+ console.warn(`[handlePersonMatch] matched_guid "${matched_guid}" not found in people — treating as new`);
69
86
  matched_guid = null;
70
- } else if (found.type === "fact" && found.validated === ValidationLevel.Human) {
71
- console.log(`[handleHumanItemMatch] Skipping locked fact "${found.name}" (human-validated)`);
72
- return;
73
- } else if (!(found.type === "fact" || found.type === "trait" || found.type === "topic" || found.type === "person")) {
74
- console.warn(`[handleHumanItemMatch] matched_guid "${matched_guid}" resolved to non-human type "${found.type}" - Ignoring`);
75
- return;
76
- } else {
77
- resolvedType = found.type;
78
- context.itemName = found.name || context.itemName;
79
- context.itemValue = found.description || context.itemValue;
80
87
  }
81
88
  }
82
89
  result.matched_guid = matched_guid;
83
- queueItemUpdate(resolvedType, result, context, state);
84
- const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new item)";
85
- console.log(`[handleHumanItemMatch] ${resolvedType} "${context.itemName}": ${matched}`);
90
+
91
+ const context: ExtractionContext & {
92
+ candidateName: string;
93
+ candidateDescription: string;
94
+ candidateRelationship: string;
95
+ extraction_model?: string;
96
+ } = {
97
+ personaId,
98
+ personaDisplayName,
99
+ messages_context,
100
+ messages_analyze,
101
+ candidateName: response.request.data.candidateName as string,
102
+ candidateDescription: response.request.data.candidateDescription as string,
103
+ candidateRelationship: response.request.data.candidateRelationship as string,
104
+ extraction_model: response.request.data.extraction_model as string | undefined,
105
+ };
106
+
107
+ queuePersonUpdate(result, context, state);
108
+ const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new person)";
109
+ console.log(`[handlePersonMatch] person "${context.candidateName}": ${matched}`);
86
110
  }
87
111
 
88
- export async function handleHumanItemUpdate(response: LLMResponse, state: StateManager): Promise<void> {
89
- const result = response.parsed as ItemUpdateResult | undefined;
90
-
112
+ export async function handleTopicUpdate(response: LLMResponse, state: StateManager): Promise<void> {
113
+ const result = response.parsed as (TopicUpdateResult & { quotes?: Array<{ text: string; reason: string }> }) | undefined;
114
+
91
115
  if (!result || Object.keys(result).length === 0) {
92
- console.log("[handleHumanItemUpdate] No changes needed (empty result)");
116
+ console.log("[handleTopicUpdate] No changes needed (empty result)");
93
117
  return;
94
118
  }
95
119
 
96
- const candidateType = response.request.data.candidateType as DataItemType;
97
120
  const isNewItem = response.request.data.isNewItem as boolean;
98
121
  const existingItemId = response.request.data.existingItemId as string | undefined;
99
122
  const personaId = response.request.data.personaId as string;
100
123
  const personaDisplayName = response.request.data.personaDisplayName as string;
124
+ const candidateCategory = response.request.data.candidateCategory as string | undefined;
101
125
 
102
126
  if (!result.name || !result.description || result.sentiment === undefined) {
103
- console.error("[handleHumanItemUpdate] Missing required fields in result");
127
+ console.error("[handleTopicUpdate] Missing required fields in result");
104
128
  return;
105
129
  }
106
130
 
107
131
  const now = new Date().toISOString();
132
+ const human = state.getHuman();
133
+
108
134
  const resolveItemId = (): string => {
109
135
  if (isNewItem || !existingItemId) return crypto.randomUUID();
110
- const h = state.getHuman();
111
- const arr = candidateType === "fact" ? h.facts : candidateType === "trait" ? h.traits : candidateType === "topic" ? h.topics : h.people;
112
- // Guard: if existingItemId isn't in the correct type array, treat as new
113
- // (prevents cross-type ID reuse when LLM matches against a different type's UUID)
114
- return arr.find((x: DataItemBase) => x.id === existingItemId) ? existingItemId : crypto.randomUUID();
136
+ return human.topics.find(t => t.id === existingItemId) ? existingItemId : crypto.randomUUID();
115
137
  };
116
138
  const itemId = resolveItemId();
117
139
 
118
140
  const persona = state.persona_getById(personaId);
119
141
  const personaGroup = persona?.group_primary ?? null;
120
- const isEi = personaDisplayName.toLowerCase() === "ei";
121
142
 
122
- const human = state.getHuman();
123
- const getExistingItem = (): { learned_by?: string; last_changed_by?: string; persona_groups?: string[] } | undefined => {
124
- if (isNewItem) return undefined;
125
- switch (candidateType) {
126
- case "fact": return human.facts.find(f => f.id === existingItemId);
127
- case "trait": return human.traits.find(t => t.id === existingItemId);
128
- case "topic": return human.topics.find(t => t.id === existingItemId);
129
- case "person": return human.people.find(p => p.id === existingItemId);
130
- }
143
+ const existingTopic = isNewItem ? undefined : human.topics.find(t => t.id === existingItemId);
144
+
145
+ let embedding: number[] | undefined;
146
+ try {
147
+ const embeddingService = getEmbeddingService();
148
+ const category = result.category ?? candidateCategory ?? existingTopic?.category;
149
+ const text = getTopicEmbeddingText({ name: result.name, category, description: result.description });
150
+ embedding = await embeddingService.embed(text);
151
+ } catch (err) {
152
+ console.warn(`[handleTopicUpdate] Failed to compute embedding for topic "${result.name}":`, err);
153
+ }
154
+
155
+ const exposureImpact = result.exposure_impact as ExposureImpact | undefined;
156
+ const topic: Topic = {
157
+ id: itemId,
158
+ name: result.name,
159
+ description: result.description,
160
+ sentiment: result.sentiment,
161
+ category: result.category ?? candidateCategory ?? existingTopic?.category,
162
+ exposure_current: calculateExposureCurrent(exposureImpact),
163
+ exposure_desired: result.exposure_desired ?? 0.5,
164
+ last_updated: now,
165
+ learned_by: isNewItem ? personaId : existingTopic?.learned_by,
166
+ last_changed_by: personaId,
167
+ persona_groups: mergeGroups(personaGroup, isNewItem, existingTopic?.persona_groups),
168
+ embedding,
131
169
  };
132
- const existingItem = getExistingItem();
133
-
134
- const mergeGroups = (existing: string[] | undefined): string[] | undefined => {
135
- if (!personaGroup) return existing;
136
- if (isNewItem) return [personaGroup];
137
- const groups = new Set(existing ?? []);
138
- groups.add(personaGroup);
139
- return Array.from(groups);
170
+ state.human_topic_upsert(topic);
171
+
172
+ const allMessages = state.messages_get(personaId);
173
+ await validateAndStoreQuotes(result.quotes, allMessages, itemId, personaDisplayName, personaGroup, state);
174
+
175
+ console.log(`[handleTopicUpdate] ${isNewItem ? "Created" : "Updated"} topic "${result.name}"`);
176
+ }
177
+
178
+ export async function handlePersonUpdate(response: LLMResponse, state: StateManager): Promise<void> {
179
+ const result = response.parsed as (PersonUpdateResult & { quotes?: Array<{ text: string; reason: string }> }) | undefined;
180
+
181
+ if (!result || Object.keys(result).length === 0) {
182
+ console.log("[handlePersonUpdate] No changes needed (empty result)");
183
+ return;
184
+ }
185
+
186
+ const isNewItem = response.request.data.isNewItem as boolean;
187
+ const existingItemId = response.request.data.existingItemId as string | undefined;
188
+ const personaId = response.request.data.personaId as string;
189
+ const personaDisplayName = response.request.data.personaDisplayName as string;
190
+ const candidateRelationship = response.request.data.candidateRelationship as string | undefined;
191
+
192
+ if (!result.name || !result.description || result.sentiment === undefined) {
193
+ console.error("[handlePersonUpdate] Missing required fields in result");
194
+ return;
195
+ }
196
+
197
+ const now = new Date().toISOString();
198
+ const human = state.getHuman();
199
+
200
+ const resolveItemId = (): string => {
201
+ if (isNewItem || !existingItemId) return crypto.randomUUID();
202
+ return human.people.find(p => p.id === existingItemId) ? existingItemId : crypto.randomUUID();
140
203
  };
204
+ const itemId = resolveItemId();
205
+
206
+ const persona = state.persona_getById(personaId);
207
+ const personaGroup = persona?.group_primary ?? null;
208
+
209
+ const existingPerson = isNewItem ? undefined : human.people.find(p => p.id === existingItemId);
141
210
 
142
211
  let embedding: number[] | undefined;
143
212
  try {
144
213
  const embeddingService = getEmbeddingService();
145
- const text = getItemEmbeddingText({ name: result.name, description: result.description });
214
+ const relationship = result.relationship ?? candidateRelationship ?? existingPerson?.relationship;
215
+ const text = getPersonEmbeddingText({ name: result.name, relationship, description: result.description });
146
216
  embedding = await embeddingService.embed(text);
147
217
  } catch (err) {
148
- console.warn(`[handleHumanItemUpdate] Failed to compute embedding for ${candidateType} "${result.name}":`, err);
218
+ console.warn(`[handlePersonUpdate] Failed to compute embedding for person "${result.name}":`, err);
149
219
  }
150
220
 
151
- switch (candidateType) {
152
- case "fact": {
153
- const fact: Fact = {
154
- id: itemId,
155
- name: result.name,
156
- description: result.description,
157
- sentiment: result.sentiment,
158
- validated: ValidationLevel.None,
159
- validated_date: now,
160
- last_updated: now,
161
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
162
- last_changed_by: personaId,
163
- persona_groups: mergeGroups(existingItem?.persona_groups),
164
- embedding,
165
- };
166
- applyOrValidate(state, "fact", fact, personaDisplayName, isEi, personaGroup);
167
- break;
168
- }
169
- case "trait": {
170
- const trait: Trait = {
171
- id: itemId,
172
- name: result.name,
173
- description: result.description,
174
- sentiment: result.sentiment,
175
- strength: (result as any).strength ?? 0.5,
176
- last_updated: now,
177
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
178
- last_changed_by: personaId,
179
- persona_groups: mergeGroups(existingItem?.persona_groups),
180
- embedding,
181
- };
182
- applyOrValidate(state, "trait", trait, personaDisplayName, isEi, personaGroup);
183
- break;
184
- }
185
- case "topic": {
186
- const exposureImpact = (result as any).exposure_impact as ExposureImpact | undefined;
187
- const itemCategory = response.request.data.itemCategory as string | undefined;
188
- const existingTopic = human.topics.find(t => t.id === existingItemId);
189
- const topic: Topic = {
190
- id: itemId,
191
- name: result.name,
192
- description: result.description,
193
- sentiment: result.sentiment,
194
- category: (result as any).category ?? itemCategory ?? existingTopic?.category,
195
- exposure_current: calculateExposureCurrent(exposureImpact),
196
- exposure_desired: (result as any).exposure_desired ?? 0.5,
197
- last_updated: now,
198
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
199
- last_changed_by: personaId,
200
- persona_groups: mergeGroups(existingItem?.persona_groups),
201
- embedding,
202
- };
203
- applyOrValidate(state, "topic", topic, personaDisplayName, isEi, personaGroup);
204
- break;
205
- }
206
- case "person": {
207
- const exposureImpact = (result as any).exposure_impact as ExposureImpact | undefined;
208
- const person: Person = {
209
- id: itemId,
210
- name: result.name,
211
- description: result.description,
212
- sentiment: result.sentiment,
213
- relationship: (result as any).relationship ?? "Unknown",
214
- exposure_current: calculateExposureCurrent(exposureImpact),
215
- exposure_desired: (result as any).exposure_desired ?? 0.5,
216
- last_updated: now,
217
- learned_by: isNewItem ? personaId : existingItem?.learned_by,
218
- last_changed_by: personaId,
219
- persona_groups: mergeGroups(existingItem?.persona_groups),
220
- embedding,
221
- };
222
- applyOrValidate(state, "person", person, personaDisplayName, isEi, personaGroup);
223
- break;
224
- }
225
- }
221
+ const exposureImpact = result.exposure_impact as ExposureImpact | undefined;
222
+ const person: Person = {
223
+ id: itemId,
224
+ name: result.name,
225
+ description: result.description,
226
+ sentiment: result.sentiment,
227
+ relationship: result.relationship ?? candidateRelationship ?? existingPerson?.relationship ?? "Unknown",
228
+ exposure_current: calculateExposureCurrent(exposureImpact),
229
+ exposure_desired: result.exposure_desired ?? 0.5,
230
+ last_updated: now,
231
+ learned_by: isNewItem ? personaId : existingPerson?.learned_by,
232
+ last_changed_by: personaId,
233
+ persona_groups: mergeGroups(personaGroup, isNewItem, existingPerson?.persona_groups),
234
+ embedding,
235
+ };
236
+ state.human_person_upsert(person);
226
237
 
227
238
  const allMessages = state.messages_get(personaId);
228
239
  await validateAndStoreQuotes(result.quotes, allMessages, itemId, personaDisplayName, personaGroup, state);
229
240
 
230
- console.log(`[handleHumanItemUpdate] ${isNewItem ? "Created" : "Updated"} ${candidateType} "${result.name}"`);
241
+ console.log(`[handlePersonUpdate] ${isNewItem ? "Created" : "Updated"} person "${result.name}"`);
242
+ }
243
+
244
+ function normalizeText(text: string): string {
245
+ return text
246
+ .replace(/[\u201C\u201D]/g, '"') // curly double quotes
247
+ .replace(/[\u2018\u2019\u0060\u00B4]/g, "'") // curly single, backtick, acute accent
248
+ .replace(/[\u2014\u2013\u2012]/g, '-') // em-dash, en-dash, figure dash
249
+ .replace(/\u00A0/g, ' ') // non-breaking space
250
+ .replace(/[\u2000-\u200F]/g, ' ') // unicode space variants
251
+ .replace(/\u2026|\.\.\./g, '\u2026'); // normalize both ellipsis forms → unicode ellipsis (1:1)
231
252
  }
232
253
 
233
- function normalizeQuotes(text: string): string {
254
+ function stripPunctuation(text: string): string {
255
+ // Remove characters LLMs commonly mangle, keep spaces and alphanumeric
256
+ // Strip: punctuation, unicode punctuation variants, curly quotes, dashes, etc.
257
+ // Keep: letters, digits, spaces
234
258
  return text
235
- .replace(/[\u201C\u201D]/g, '"') // Curly double quotes to straight
236
- .replace(/[\u2018\u2019]/g, "'"); // Curly single quotes to straight
259
+ .replace(/[^\w\s]/gu, ' ') // replace non-word, non-space with space
260
+ .replace(/\s+/g, ' ') // collapse multiple spaces
261
+ .trim()
262
+ .toLowerCase();
263
+ }
264
+
265
+ interface WordBoundaryMatch {
266
+ start: number;
267
+ end: number;
268
+ text: string;
269
+ }
270
+
271
+ function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch | null {
272
+ const strippedQuote = stripPunctuation(quoteText);
273
+ const quoteWords = strippedQuote.split(' ').filter(w => w.length > 0);
274
+
275
+ if (quoteWords.length < 3) return null; // Too short to trust — require at least 3 words
276
+
277
+ // Build word token list from original message with original positions
278
+ const wordTokens: Array<{ word: string; start: number; end: number }> = [];
279
+ const wordRegex = /\S+/g;
280
+ let match: RegExpExecArray | null;
281
+ while ((match = wordRegex.exec(msgText)) !== null) {
282
+ wordTokens.push({
283
+ word: stripPunctuation(match[0]),
284
+ start: match.index,
285
+ end: match.index + match[0].length,
286
+ });
287
+ }
288
+
289
+ // Find contiguous sequence of words matching the quote words
290
+ for (let i = 0; i <= wordTokens.length - quoteWords.length; i++) {
291
+ let allMatch = true;
292
+ for (let j = 0; j < quoteWords.length; j++) {
293
+ if (wordTokens[i + j].word !== quoteWords[j]) {
294
+ allMatch = false;
295
+ break;
296
+ }
297
+ }
298
+ if (allMatch) {
299
+ const startToken = wordTokens[i];
300
+ const endToken = wordTokens[i + quoteWords.length - 1];
301
+ return {
302
+ start: startToken.start,
303
+ end: endToken.end,
304
+ text: msgText.slice(startToken.start, endToken.end),
305
+ };
306
+ }
307
+ }
308
+
309
+ return null;
237
310
  }
238
311
 
239
312
  async function validateAndStoreQuotes(
@@ -250,88 +323,107 @@ async function validateAndStoreQuotes(
250
323
  let found = false;
251
324
  for (const message of messages) {
252
325
  const msgText = getMessageText(message);
253
- const normalizedMsg = normalizeQuotes(msgText);
254
- const normalizedQuote = normalizeQuotes(candidate.text);
326
+
327
+ // Level 1: normalized exact match
328
+ const normalizedMsg = normalizeText(msgText);
329
+ const normalizedQuote = normalizeText(candidate.text);
255
330
  const start = normalizedMsg.indexOf(normalizedQuote);
331
+
332
+ let matchStart: number;
333
+ let matchEnd: number;
334
+ let matchText: string;
335
+ let matchLevel: string;
336
+
256
337
  if (start !== -1) {
257
- const end = start + candidate.text.length;
258
-
259
- // Check for ANY overlapping quote in this message (not just exact match)
260
- const existing = state.human_quote_getForMessage(message.id);
261
- const overlapping = existing.find(q =>
262
- q.start !== null && q.end !== null &&
263
- start < q.end && end > q.start // ranges overlap
264
- );
265
-
266
- if (overlapping) {
267
- // Merge: expand to the union of both ranges
268
- const mergedStart = Math.min(start, overlapping.start!);
269
- const mergedEnd = Math.max(end, overlapping.end!);
270
- const mergedText = msgText.slice(mergedStart, mergedEnd);
271
-
272
- // Merge data_item_ids and persona_groups (deduplicated)
273
- const mergedDataItemIds = overlapping.data_item_ids.includes(dataItemId)
274
- ? overlapping.data_item_ids
275
- : [...overlapping.data_item_ids, dataItemId];
276
- const group = personaGroup || "General";
277
- const mergedGroups = overlapping.persona_groups.includes(group)
278
- ? overlapping.persona_groups
279
- : [...overlapping.persona_groups, group];
280
-
281
- // Only recompute embedding if the text actually changed
282
- let embedding = overlapping.embedding;
283
- if (mergedText !== overlapping.text) {
284
- try {
285
- const embeddingService = getEmbeddingService();
286
- embedding = await embeddingService.embed(mergedText);
287
- } catch (err) {
288
- console.warn(`[extraction] Failed to recompute embedding for merged quote: "${mergedText.slice(0, 30)}..."`, err);
289
- }
338
+ matchStart = start;
339
+ matchEnd = start + candidate.text.length;
340
+ matchText = candidate.text;
341
+ matchLevel = "exact";
342
+ } else {
343
+ // Level 2: word-boundary fallback
344
+ const wordMatch = findQuoteByWords(candidate.text, msgText);
345
+ if (!wordMatch) continue;
346
+ matchStart = wordMatch.start;
347
+ matchEnd = wordMatch.end;
348
+ matchText = wordMatch.text;
349
+ matchLevel = "word-boundary";
350
+ }
351
+
352
+ const existing = state.human_quote_getForMessage(message.id);
353
+ const overlapping = existing.find(q =>
354
+ q.start !== null && q.end !== null &&
355
+ matchStart < q.end && matchEnd > q.start
356
+ );
357
+
358
+ if (overlapping) {
359
+ const mergedStart = Math.min(matchStart, overlapping.start!);
360
+ const mergedEnd = Math.max(matchEnd, overlapping.end!);
361
+ const mergedText = msgText.slice(mergedStart, mergedEnd);
362
+
363
+ const mergedDataItemIds = overlapping.data_item_ids.includes(dataItemId)
364
+ ? overlapping.data_item_ids
365
+ : [...overlapping.data_item_ids, dataItemId];
366
+ const group = personaGroup || "General";
367
+ const mergedGroups = overlapping.persona_groups.includes(group)
368
+ ? overlapping.persona_groups
369
+ : [...overlapping.persona_groups, group];
370
+
371
+ let embedding = overlapping.embedding;
372
+ if (mergedText !== overlapping.text) {
373
+ try {
374
+ const embeddingService = getEmbeddingService();
375
+ embedding = await embeddingService.embed(mergedText);
376
+ } catch (err) {
377
+ console.warn(`[extraction] Failed to recompute embedding for merged quote: "${mergedText.slice(0, 30)}..."`, err);
290
378
  }
291
-
292
- state.human_quote_update(overlapping.id, {
293
- start: mergedStart,
294
- end: mergedEnd,
295
- text: mergedText,
296
- data_item_ids: mergedDataItemIds,
297
- persona_groups: mergedGroups,
298
- embedding,
299
- });
300
- console.log(`[extraction] Merged overlapping quote: "${mergedText.slice(0, 50)}..." (${mergedStart}-${mergedEnd})`);
301
- found = true;
302
- break;
303
379
  }
304
-
305
- let embedding: number[] | undefined;
306
- try {
307
- const embeddingService = getEmbeddingService();
308
- embedding = await embeddingService.embed(candidate.text);
309
- } catch (err) {
310
- console.warn(`[extraction] Failed to compute embedding for quote: "${candidate.text.slice(0, 30)}..."`, err);
311
- }
312
-
313
- const quote: Quote = {
314
- id: crypto.randomUUID(),
315
- message_id: message.id,
316
- data_item_ids: [dataItemId],
317
- persona_groups: [personaGroup || "General"],
318
- text: candidate.text,
319
- speaker: message.role === "human" ? "human" : personaName,
320
- timestamp: message.timestamp,
321
- start: start,
322
- end: end,
323
- created_at: new Date().toISOString(),
324
- created_by: "extraction",
380
+
381
+ state.human_quote_update(overlapping.id, {
382
+ start: mergedStart,
383
+ end: mergedEnd,
384
+ text: mergedText,
385
+ data_item_ids: mergedDataItemIds,
386
+ persona_groups: mergedGroups,
325
387
  embedding,
326
- };
327
- state.human_quote_add(quote);
328
- console.log(`[extraction] Captured quote: "${candidate.text.slice(0, 50)}..."`);
388
+ });
389
+ console.log(`[extraction] Merged overlapping quote: "${mergedText.slice(0, 50)}..." (${mergedStart}-${mergedEnd})`);
329
390
  found = true;
330
391
  break;
331
392
  }
393
+
394
+ let embedding: number[] | undefined;
395
+ try {
396
+ const embeddingService = getEmbeddingService();
397
+ embedding = await embeddingService.embed(matchText);
398
+ } catch (err) {
399
+ console.warn(`[extraction] Failed to compute embedding for quote: "${matchText.slice(0, 30)}..."`, err);
400
+ }
401
+
402
+ const quote: Quote = {
403
+ id: crypto.randomUUID(),
404
+ message_id: message.id,
405
+ data_item_ids: [dataItemId],
406
+ persona_groups: [personaGroup || "General"],
407
+ text: matchText,
408
+ speaker: message.role === "human" ? "human" : personaName,
409
+ timestamp: message.timestamp,
410
+ start: matchStart,
411
+ end: matchEnd,
412
+ created_at: new Date().toISOString(),
413
+ created_by: "extraction",
414
+ embedding,
415
+ };
416
+ state.human_quote_add(quote);
417
+ if (matchLevel === "word-boundary") {
418
+ console.log(`[extraction] Captured quote (word-boundary match): "${matchText.slice(0, 50)}..."`);
419
+ } else {
420
+ console.log(`[extraction] Captured quote: "${matchText.slice(0, 50)}..."`);
421
+ }
422
+ found = true;
423
+ break;
332
424
  }
333
425
  if (!found) {
334
- console.log(`[extraction] Quote not found in messages, skipping: "${candidate.text?.slice(0, 50)}..."`);
426
+ console.warn(`[extraction] Quote not found in messages (both levels), skipping: "${candidate.text?.slice(0, 50)}..."`);
335
427
  }
336
428
  }
337
429
  }
@@ -346,18 +438,4 @@ function calculateExposureCurrent(impact: ExposureImpact | undefined): number {
346
438
  }
347
439
  }
348
440
 
349
- function applyOrValidate(
350
- state: StateManager,
351
- dataType: DataItemType,
352
- item: Fact | Trait | Topic | Person,
353
- _personaName: string,
354
- _isEi: boolean,
355
- _personaGroup: string | null
356
- ): void {
357
- switch (dataType) {
358
- case "fact": state.human_fact_upsert(item as Fact); break;
359
- case "trait": state.human_trait_upsert(item as Trait); break;
360
- case "topic": state.human_topic_upsert(item as Topic); break;
361
- case "person": state.human_person_upsert(item as Person); break;
362
- }
363
- }
441
+