ei-tui 0.1.25 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/package.json +1 -1
- package/src/README.md +4 -11
- package/src/cli/README.md +4 -5
- package/src/cli/retrieval.ts +3 -25
- package/src/cli.ts +3 -7
- package/src/core/AGENTS.md +1 -1
- package/src/core/constants/built-in-facts.ts +49 -0
- package/src/core/constants/index.ts +1 -0
- package/src/core/context-utils.ts +0 -1
- package/src/core/embedding-service.ts +8 -0
- package/src/core/handlers/dedup.ts +10 -16
- package/src/core/handlers/heartbeat.ts +2 -3
- package/src/core/handlers/human-extraction.ts +95 -30
- package/src/core/handlers/human-matching.ts +326 -248
- package/src/core/handlers/index.ts +8 -6
- package/src/core/handlers/persona-generation.ts +8 -8
- package/src/core/handlers/rewrite.ts +4 -29
- package/src/core/handlers/utils.ts +23 -1
- package/src/core/heartbeat-manager.ts +2 -4
- package/src/core/human-data-manager.ts +5 -27
- package/src/core/message-manager.ts +10 -10
- package/src/core/orchestrators/ceremony.ts +50 -39
- package/src/core/orchestrators/dedup-phase.ts +0 -1
- package/src/core/orchestrators/human-extraction.ts +351 -207
- package/src/core/orchestrators/index.ts +6 -4
- package/src/core/orchestrators/persona-generation.ts +3 -3
- package/src/core/processor.ts +99 -17
- package/src/core/prompt-context-builder.ts +4 -6
- package/src/core/state/human.ts +1 -26
- package/src/core/state/personas.ts +2 -2
- package/src/core/state-manager.ts +107 -14
- package/src/core/tools/builtin/read-memory.ts +7 -8
- package/src/core/types/data-items.ts +2 -4
- package/src/core/types/entities.ts +6 -4
- package/src/core/types/enums.ts +6 -9
- package/src/core/types/llm.ts +2 -2
- package/src/core/utils/crossFind.ts +2 -5
- package/src/core/utils/event-windows.ts +31 -0
- package/src/integrations/claude-code/importer.ts +8 -4
- package/src/integrations/claude-code/types.ts +2 -0
- package/src/integrations/opencode/importer.ts +7 -3
- package/src/prompts/AGENTS.md +73 -1
- package/src/prompts/ceremony/rewrite.ts +3 -22
- package/src/prompts/ceremony/types.ts +3 -3
- package/src/prompts/generation/descriptions.ts +2 -2
- package/src/prompts/generation/types.ts +2 -2
- package/src/prompts/heartbeat/types.ts +2 -2
- package/src/prompts/human/event-scan.ts +122 -0
- package/src/prompts/human/fact-find.ts +106 -0
- package/src/prompts/human/fact-scan.ts +0 -2
- package/src/prompts/human/index.ts +17 -10
- package/src/prompts/human/person-match.ts +65 -0
- package/src/prompts/human/person-scan.ts +52 -59
- package/src/prompts/human/person-update.ts +241 -0
- package/src/prompts/human/topic-match.ts +65 -0
- package/src/prompts/human/topic-scan.ts +51 -71
- package/src/prompts/human/topic-update.ts +295 -0
- package/src/prompts/human/types.ts +63 -40
- package/src/prompts/index.ts +4 -8
- package/src/prompts/persona/topics-update.ts +2 -2
- package/src/prompts/persona/traits.ts +2 -2
- package/src/prompts/persona/types.ts +3 -3
- package/src/prompts/response/index.ts +1 -1
- package/src/prompts/response/sections.ts +9 -12
- package/src/prompts/response/types.ts +2 -3
- package/src/storage/embeddings.ts +1 -1
- package/src/storage/index.ts +1 -0
- package/src/storage/indexed.ts +174 -0
- package/src/storage/merge.ts +67 -2
- package/tui/src/commands/me.tsx +5 -14
- package/tui/src/commands/settings.tsx +15 -0
- package/tui/src/context/ei.tsx +5 -14
- package/tui/src/util/yaml-serializers.ts +48 -33
- package/src/cli/commands/traits.ts +0 -25
- package/src/prompts/human/item-match.ts +0 -74
- package/src/prompts/human/item-update.ts +0 -364
- package/src/prompts/human/trait-scan.ts +0 -115
|
@@ -1,239 +1,312 @@
|
|
|
1
1
|
import {
|
|
2
|
-
ValidationLevel,
|
|
3
2
|
type LLMResponse,
|
|
4
3
|
type Message,
|
|
5
|
-
type Trait,
|
|
6
4
|
type Topic,
|
|
7
|
-
type Fact,
|
|
8
5
|
type Person,
|
|
9
6
|
type Quote,
|
|
10
|
-
type DataItemType,
|
|
11
|
-
type DataItemBase,
|
|
12
7
|
} from "../types.js";
|
|
13
8
|
import type { StateManager } from "../state-manager.js";
|
|
14
|
-
import type { ItemMatchResult,
|
|
15
|
-
import {
|
|
16
|
-
import { getEmbeddingService,
|
|
17
|
-
import { crossFind } from "../utils/index.js";
|
|
18
|
-
import { splitMessagesByTimestamp, getMessageText } from "./utils.js";
|
|
9
|
+
import type { ItemMatchResult, ExposureImpact, TopicUpdateResult, PersonUpdateResult } from "../../prompts/human/types.js";
|
|
10
|
+
import { queueTopicUpdate, queuePersonUpdate, type ExtractionContext } from "../orchestrators/index.js";
|
|
11
|
+
import { getEmbeddingService, getTopicEmbeddingText, getPersonEmbeddingText } from "../embedding-service.js";
|
|
19
12
|
|
|
20
|
-
|
|
13
|
+
function mergeGroups(personaGroup: string | null, isNewItem: boolean, existing: string[] | undefined): string[] | undefined {
|
|
14
|
+
if (!personaGroup) return existing;
|
|
15
|
+
if (isNewItem) return [personaGroup];
|
|
16
|
+
const groups = new Set(existing ?? []);
|
|
17
|
+
groups.add(personaGroup);
|
|
18
|
+
return Array.from(groups);
|
|
19
|
+
}
|
|
20
|
+
import { resolveMessageWindow, getMessageText } from "./utils.js";
|
|
21
|
+
|
|
22
|
+
export function handleTopicMatch(response: LLMResponse, state: StateManager): void {
|
|
21
23
|
const result = response.parsed as ItemMatchResult | undefined;
|
|
22
24
|
if (!result) {
|
|
23
|
-
console.error("[
|
|
25
|
+
console.error("[handleTopicMatch] No parsed result");
|
|
24
26
|
return;
|
|
25
27
|
}
|
|
26
28
|
|
|
27
|
-
const candidateType = response.request.data.candidateType as DataItemType;
|
|
28
29
|
const personaId = response.request.data.personaId as string;
|
|
29
30
|
const personaDisplayName = response.request.data.personaDisplayName as string;
|
|
30
|
-
const
|
|
31
|
-
const allMessages = state.messages_get(personaId);
|
|
31
|
+
const { messages_context, messages_analyze } = resolveMessageWindow(response, state);
|
|
32
32
|
|
|
33
|
-
let
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
} else {
|
|
44
|
-
// Fallback to existing behavior
|
|
45
|
-
const analyzeFrom = response.request.data.analyze_from_timestamp as string | null;
|
|
46
|
-
const split = splitMessagesByTimestamp(allMessages, analyzeFrom);
|
|
47
|
-
messages_context = split.messages_context;
|
|
48
|
-
messages_analyze = split.messages_analyze;
|
|
33
|
+
let matched_guid = result.matched_guid;
|
|
34
|
+
if (matched_guid === "new") {
|
|
35
|
+
matched_guid = null;
|
|
36
|
+
} else if (matched_guid) {
|
|
37
|
+
const human = state.getHuman();
|
|
38
|
+
const found = human.topics.find(t => t.id === matched_guid);
|
|
39
|
+
if (!found) {
|
|
40
|
+
console.warn(`[handleTopicMatch] matched_guid "${matched_guid}" not found in topics — treating as new`);
|
|
41
|
+
matched_guid = null;
|
|
42
|
+
}
|
|
49
43
|
}
|
|
44
|
+
result.matched_guid = matched_guid;
|
|
50
45
|
|
|
51
|
-
const context: ExtractionContext & {
|
|
46
|
+
const context: ExtractionContext & {
|
|
47
|
+
candidateName: string;
|
|
48
|
+
candidateDescription: string;
|
|
49
|
+
candidateCategory: string;
|
|
50
|
+
extraction_model?: string;
|
|
51
|
+
} = {
|
|
52
52
|
personaId,
|
|
53
53
|
personaDisplayName,
|
|
54
54
|
messages_context,
|
|
55
55
|
messages_analyze,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
candidateName: response.request.data.candidateName as string,
|
|
57
|
+
candidateDescription: response.request.data.candidateDescription as string,
|
|
58
|
+
candidateCategory: response.request.data.candidateCategory as string,
|
|
59
|
+
extraction_model: response.request.data.extraction_model as string | undefined,
|
|
59
60
|
};
|
|
60
61
|
|
|
61
|
-
|
|
62
|
+
queueTopicUpdate(result, context, state);
|
|
63
|
+
const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new topic)";
|
|
64
|
+
console.log(`[handleTopicMatch] topic "${context.candidateName}": ${matched}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function handlePersonMatch(response: LLMResponse, state: StateManager): void {
|
|
68
|
+
const result = response.parsed as ItemMatchResult | undefined;
|
|
69
|
+
if (!result) {
|
|
70
|
+
console.error("[handlePersonMatch] No parsed result");
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const personaId = response.request.data.personaId as string;
|
|
75
|
+
const personaDisplayName = response.request.data.personaDisplayName as string;
|
|
76
|
+
const { messages_context, messages_analyze } = resolveMessageWindow(response, state);
|
|
77
|
+
|
|
62
78
|
let matched_guid = result.matched_guid;
|
|
63
79
|
if (matched_guid === "new") {
|
|
64
80
|
matched_guid = null;
|
|
65
81
|
} else if (matched_guid) {
|
|
66
|
-
const
|
|
82
|
+
const human = state.getHuman();
|
|
83
|
+
const found = human.people.find(p => p.id === matched_guid);
|
|
67
84
|
if (!found) {
|
|
68
|
-
console.warn(`[
|
|
85
|
+
console.warn(`[handlePersonMatch] matched_guid "${matched_guid}" not found in people — treating as new`);
|
|
69
86
|
matched_guid = null;
|
|
70
|
-
} else if (found.type === "fact" && found.validated === ValidationLevel.Human) {
|
|
71
|
-
console.log(`[handleHumanItemMatch] Skipping locked fact "${found.name}" (human-validated)`);
|
|
72
|
-
return;
|
|
73
|
-
} else if (!(found.type === "fact" || found.type === "trait" || found.type === "topic" || found.type === "person")) {
|
|
74
|
-
console.warn(`[handleHumanItemMatch] matched_guid "${matched_guid}" resolved to non-human type "${found.type}" - Ignoring`);
|
|
75
|
-
return;
|
|
76
|
-
} else {
|
|
77
|
-
resolvedType = found.type;
|
|
78
|
-
context.itemName = found.name || context.itemName;
|
|
79
|
-
context.itemValue = found.description || context.itemValue;
|
|
80
87
|
}
|
|
81
88
|
}
|
|
82
89
|
result.matched_guid = matched_guid;
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
|
|
90
|
+
|
|
91
|
+
const context: ExtractionContext & {
|
|
92
|
+
candidateName: string;
|
|
93
|
+
candidateDescription: string;
|
|
94
|
+
candidateRelationship: string;
|
|
95
|
+
extraction_model?: string;
|
|
96
|
+
} = {
|
|
97
|
+
personaId,
|
|
98
|
+
personaDisplayName,
|
|
99
|
+
messages_context,
|
|
100
|
+
messages_analyze,
|
|
101
|
+
candidateName: response.request.data.candidateName as string,
|
|
102
|
+
candidateDescription: response.request.data.candidateDescription as string,
|
|
103
|
+
candidateRelationship: response.request.data.candidateRelationship as string,
|
|
104
|
+
extraction_model: response.request.data.extraction_model as string | undefined,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
queuePersonUpdate(result, context, state);
|
|
108
|
+
const matched = matched_guid ? `matched GUID "${matched_guid}"` : "no match (new person)";
|
|
109
|
+
console.log(`[handlePersonMatch] person "${context.candidateName}": ${matched}`);
|
|
86
110
|
}
|
|
87
111
|
|
|
88
|
-
export async function
|
|
89
|
-
const result = response.parsed as
|
|
90
|
-
|
|
112
|
+
export async function handleTopicUpdate(response: LLMResponse, state: StateManager): Promise<void> {
|
|
113
|
+
const result = response.parsed as (TopicUpdateResult & { quotes?: Array<{ text: string; reason: string }> }) | undefined;
|
|
114
|
+
|
|
91
115
|
if (!result || Object.keys(result).length === 0) {
|
|
92
|
-
console.log("[
|
|
116
|
+
console.log("[handleTopicUpdate] No changes needed (empty result)");
|
|
93
117
|
return;
|
|
94
118
|
}
|
|
95
119
|
|
|
96
|
-
const candidateType = response.request.data.candidateType as DataItemType;
|
|
97
120
|
const isNewItem = response.request.data.isNewItem as boolean;
|
|
98
121
|
const existingItemId = response.request.data.existingItemId as string | undefined;
|
|
99
122
|
const personaId = response.request.data.personaId as string;
|
|
100
123
|
const personaDisplayName = response.request.data.personaDisplayName as string;
|
|
124
|
+
const candidateCategory = response.request.data.candidateCategory as string | undefined;
|
|
101
125
|
|
|
102
126
|
if (!result.name || !result.description || result.sentiment === undefined) {
|
|
103
|
-
console.error("[
|
|
127
|
+
console.error("[handleTopicUpdate] Missing required fields in result");
|
|
104
128
|
return;
|
|
105
129
|
}
|
|
106
130
|
|
|
107
131
|
const now = new Date().toISOString();
|
|
132
|
+
const human = state.getHuman();
|
|
133
|
+
|
|
108
134
|
const resolveItemId = (): string => {
|
|
109
135
|
if (isNewItem || !existingItemId) return crypto.randomUUID();
|
|
110
|
-
|
|
111
|
-
const arr = candidateType === "fact" ? h.facts : candidateType === "trait" ? h.traits : candidateType === "topic" ? h.topics : h.people;
|
|
112
|
-
// Guard: if existingItemId isn't in the correct type array, treat as new
|
|
113
|
-
// (prevents cross-type ID reuse when LLM matches against a different type's UUID)
|
|
114
|
-
return arr.find((x: DataItemBase) => x.id === existingItemId) ? existingItemId : crypto.randomUUID();
|
|
136
|
+
return human.topics.find(t => t.id === existingItemId) ? existingItemId : crypto.randomUUID();
|
|
115
137
|
};
|
|
116
138
|
const itemId = resolveItemId();
|
|
117
139
|
|
|
118
140
|
const persona = state.persona_getById(personaId);
|
|
119
141
|
const personaGroup = persona?.group_primary ?? null;
|
|
120
|
-
const isEi = personaDisplayName.toLowerCase() === "ei";
|
|
121
142
|
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
const existingTopic = isNewItem ? undefined : human.topics.find(t => t.id === existingItemId);
|
|
144
|
+
|
|
145
|
+
let embedding: number[] | undefined;
|
|
146
|
+
try {
|
|
147
|
+
const embeddingService = getEmbeddingService();
|
|
148
|
+
const category = result.category ?? candidateCategory ?? existingTopic?.category;
|
|
149
|
+
const text = getTopicEmbeddingText({ name: result.name, category, description: result.description });
|
|
150
|
+
embedding = await embeddingService.embed(text);
|
|
151
|
+
} catch (err) {
|
|
152
|
+
console.warn(`[handleTopicUpdate] Failed to compute embedding for topic "${result.name}":`, err);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const exposureImpact = result.exposure_impact as ExposureImpact | undefined;
|
|
156
|
+
const topic: Topic = {
|
|
157
|
+
id: itemId,
|
|
158
|
+
name: result.name,
|
|
159
|
+
description: result.description,
|
|
160
|
+
sentiment: result.sentiment,
|
|
161
|
+
category: result.category ?? candidateCategory ?? existingTopic?.category,
|
|
162
|
+
exposure_current: calculateExposureCurrent(exposureImpact),
|
|
163
|
+
exposure_desired: result.exposure_desired ?? 0.5,
|
|
164
|
+
last_updated: now,
|
|
165
|
+
learned_by: isNewItem ? personaId : existingTopic?.learned_by,
|
|
166
|
+
last_changed_by: personaId,
|
|
167
|
+
persona_groups: mergeGroups(personaGroup, isNewItem, existingTopic?.persona_groups),
|
|
168
|
+
embedding,
|
|
131
169
|
};
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
170
|
+
state.human_topic_upsert(topic);
|
|
171
|
+
|
|
172
|
+
const allMessages = state.messages_get(personaId);
|
|
173
|
+
await validateAndStoreQuotes(result.quotes, allMessages, itemId, personaDisplayName, personaGroup, state);
|
|
174
|
+
|
|
175
|
+
console.log(`[handleTopicUpdate] ${isNewItem ? "Created" : "Updated"} topic "${result.name}"`);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export async function handlePersonUpdate(response: LLMResponse, state: StateManager): Promise<void> {
|
|
179
|
+
const result = response.parsed as (PersonUpdateResult & { quotes?: Array<{ text: string; reason: string }> }) | undefined;
|
|
180
|
+
|
|
181
|
+
if (!result || Object.keys(result).length === 0) {
|
|
182
|
+
console.log("[handlePersonUpdate] No changes needed (empty result)");
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const isNewItem = response.request.data.isNewItem as boolean;
|
|
187
|
+
const existingItemId = response.request.data.existingItemId as string | undefined;
|
|
188
|
+
const personaId = response.request.data.personaId as string;
|
|
189
|
+
const personaDisplayName = response.request.data.personaDisplayName as string;
|
|
190
|
+
const candidateRelationship = response.request.data.candidateRelationship as string | undefined;
|
|
191
|
+
|
|
192
|
+
if (!result.name || !result.description || result.sentiment === undefined) {
|
|
193
|
+
console.error("[handlePersonUpdate] Missing required fields in result");
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const now = new Date().toISOString();
|
|
198
|
+
const human = state.getHuman();
|
|
199
|
+
|
|
200
|
+
const resolveItemId = (): string => {
|
|
201
|
+
if (isNewItem || !existingItemId) return crypto.randomUUID();
|
|
202
|
+
return human.people.find(p => p.id === existingItemId) ? existingItemId : crypto.randomUUID();
|
|
140
203
|
};
|
|
204
|
+
const itemId = resolveItemId();
|
|
205
|
+
|
|
206
|
+
const persona = state.persona_getById(personaId);
|
|
207
|
+
const personaGroup = persona?.group_primary ?? null;
|
|
208
|
+
|
|
209
|
+
const existingPerson = isNewItem ? undefined : human.people.find(p => p.id === existingItemId);
|
|
141
210
|
|
|
142
211
|
let embedding: number[] | undefined;
|
|
143
212
|
try {
|
|
144
213
|
const embeddingService = getEmbeddingService();
|
|
145
|
-
const
|
|
214
|
+
const relationship = result.relationship ?? candidateRelationship ?? existingPerson?.relationship;
|
|
215
|
+
const text = getPersonEmbeddingText({ name: result.name, relationship, description: result.description });
|
|
146
216
|
embedding = await embeddingService.embed(text);
|
|
147
217
|
} catch (err) {
|
|
148
|
-
console.warn(`[
|
|
218
|
+
console.warn(`[handlePersonUpdate] Failed to compute embedding for person "${result.name}":`, err);
|
|
149
219
|
}
|
|
150
220
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
break;
|
|
168
|
-
}
|
|
169
|
-
case "trait": {
|
|
170
|
-
const trait: Trait = {
|
|
171
|
-
id: itemId,
|
|
172
|
-
name: result.name,
|
|
173
|
-
description: result.description,
|
|
174
|
-
sentiment: result.sentiment,
|
|
175
|
-
strength: (result as any).strength ?? 0.5,
|
|
176
|
-
last_updated: now,
|
|
177
|
-
learned_by: isNewItem ? personaId : existingItem?.learned_by,
|
|
178
|
-
last_changed_by: personaId,
|
|
179
|
-
persona_groups: mergeGroups(existingItem?.persona_groups),
|
|
180
|
-
embedding,
|
|
181
|
-
};
|
|
182
|
-
applyOrValidate(state, "trait", trait, personaDisplayName, isEi, personaGroup);
|
|
183
|
-
break;
|
|
184
|
-
}
|
|
185
|
-
case "topic": {
|
|
186
|
-
const exposureImpact = (result as any).exposure_impact as ExposureImpact | undefined;
|
|
187
|
-
const itemCategory = response.request.data.itemCategory as string | undefined;
|
|
188
|
-
const existingTopic = human.topics.find(t => t.id === existingItemId);
|
|
189
|
-
const topic: Topic = {
|
|
190
|
-
id: itemId,
|
|
191
|
-
name: result.name,
|
|
192
|
-
description: result.description,
|
|
193
|
-
sentiment: result.sentiment,
|
|
194
|
-
category: (result as any).category ?? itemCategory ?? existingTopic?.category,
|
|
195
|
-
exposure_current: calculateExposureCurrent(exposureImpact),
|
|
196
|
-
exposure_desired: (result as any).exposure_desired ?? 0.5,
|
|
197
|
-
last_updated: now,
|
|
198
|
-
learned_by: isNewItem ? personaId : existingItem?.learned_by,
|
|
199
|
-
last_changed_by: personaId,
|
|
200
|
-
persona_groups: mergeGroups(existingItem?.persona_groups),
|
|
201
|
-
embedding,
|
|
202
|
-
};
|
|
203
|
-
applyOrValidate(state, "topic", topic, personaDisplayName, isEi, personaGroup);
|
|
204
|
-
break;
|
|
205
|
-
}
|
|
206
|
-
case "person": {
|
|
207
|
-
const exposureImpact = (result as any).exposure_impact as ExposureImpact | undefined;
|
|
208
|
-
const person: Person = {
|
|
209
|
-
id: itemId,
|
|
210
|
-
name: result.name,
|
|
211
|
-
description: result.description,
|
|
212
|
-
sentiment: result.sentiment,
|
|
213
|
-
relationship: (result as any).relationship ?? "Unknown",
|
|
214
|
-
exposure_current: calculateExposureCurrent(exposureImpact),
|
|
215
|
-
exposure_desired: (result as any).exposure_desired ?? 0.5,
|
|
216
|
-
last_updated: now,
|
|
217
|
-
learned_by: isNewItem ? personaId : existingItem?.learned_by,
|
|
218
|
-
last_changed_by: personaId,
|
|
219
|
-
persona_groups: mergeGroups(existingItem?.persona_groups),
|
|
220
|
-
embedding,
|
|
221
|
-
};
|
|
222
|
-
applyOrValidate(state, "person", person, personaDisplayName, isEi, personaGroup);
|
|
223
|
-
break;
|
|
224
|
-
}
|
|
225
|
-
}
|
|
221
|
+
const exposureImpact = result.exposure_impact as ExposureImpact | undefined;
|
|
222
|
+
const person: Person = {
|
|
223
|
+
id: itemId,
|
|
224
|
+
name: result.name,
|
|
225
|
+
description: result.description,
|
|
226
|
+
sentiment: result.sentiment,
|
|
227
|
+
relationship: result.relationship ?? candidateRelationship ?? existingPerson?.relationship ?? "Unknown",
|
|
228
|
+
exposure_current: calculateExposureCurrent(exposureImpact),
|
|
229
|
+
exposure_desired: result.exposure_desired ?? 0.5,
|
|
230
|
+
last_updated: now,
|
|
231
|
+
learned_by: isNewItem ? personaId : existingPerson?.learned_by,
|
|
232
|
+
last_changed_by: personaId,
|
|
233
|
+
persona_groups: mergeGroups(personaGroup, isNewItem, existingPerson?.persona_groups),
|
|
234
|
+
embedding,
|
|
235
|
+
};
|
|
236
|
+
state.human_person_upsert(person);
|
|
226
237
|
|
|
227
238
|
const allMessages = state.messages_get(personaId);
|
|
228
239
|
await validateAndStoreQuotes(result.quotes, allMessages, itemId, personaDisplayName, personaGroup, state);
|
|
229
240
|
|
|
230
|
-
console.log(`[
|
|
241
|
+
console.log(`[handlePersonUpdate] ${isNewItem ? "Created" : "Updated"} person "${result.name}"`);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function normalizeText(text: string): string {
|
|
245
|
+
return text
|
|
246
|
+
.replace(/[\u201C\u201D]/g, '"') // curly double quotes
|
|
247
|
+
.replace(/[\u2018\u2019\u0060\u00B4]/g, "'") // curly single, backtick, acute accent
|
|
248
|
+
.replace(/[\u2014\u2013\u2012]/g, '-') // em-dash, en-dash, figure dash
|
|
249
|
+
.replace(/\u00A0/g, ' ') // non-breaking space
|
|
250
|
+
.replace(/[\u2000-\u200F]/g, ' ') // unicode space variants
|
|
251
|
+
.replace(/\u2026|\.\.\./g, '\u2026'); // normalize both ellipsis forms → unicode ellipsis (1:1)
|
|
231
252
|
}
|
|
232
253
|
|
|
233
|
-
function
|
|
254
|
+
function stripPunctuation(text: string): string {
|
|
255
|
+
// Remove characters LLMs commonly mangle, keep spaces and alphanumeric
|
|
256
|
+
// Strip: punctuation, unicode punctuation variants, curly quotes, dashes, etc.
|
|
257
|
+
// Keep: letters, digits, spaces
|
|
234
258
|
return text
|
|
235
|
-
.replace(/[\
|
|
236
|
-
.replace(
|
|
259
|
+
.replace(/[^\w\s]/gu, ' ') // replace non-word, non-space with space
|
|
260
|
+
.replace(/\s+/g, ' ') // collapse multiple spaces
|
|
261
|
+
.trim()
|
|
262
|
+
.toLowerCase();
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
interface WordBoundaryMatch {
|
|
266
|
+
start: number;
|
|
267
|
+
end: number;
|
|
268
|
+
text: string;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch | null {
|
|
272
|
+
const strippedQuote = stripPunctuation(quoteText);
|
|
273
|
+
const quoteWords = strippedQuote.split(' ').filter(w => w.length > 0);
|
|
274
|
+
|
|
275
|
+
if (quoteWords.length < 3) return null; // Too short to trust — require at least 3 words
|
|
276
|
+
|
|
277
|
+
// Build word token list from original message with original positions
|
|
278
|
+
const wordTokens: Array<{ word: string; start: number; end: number }> = [];
|
|
279
|
+
const wordRegex = /\S+/g;
|
|
280
|
+
let match: RegExpExecArray | null;
|
|
281
|
+
while ((match = wordRegex.exec(msgText)) !== null) {
|
|
282
|
+
wordTokens.push({
|
|
283
|
+
word: stripPunctuation(match[0]),
|
|
284
|
+
start: match.index,
|
|
285
|
+
end: match.index + match[0].length,
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Find contiguous sequence of words matching the quote words
|
|
290
|
+
for (let i = 0; i <= wordTokens.length - quoteWords.length; i++) {
|
|
291
|
+
let allMatch = true;
|
|
292
|
+
for (let j = 0; j < quoteWords.length; j++) {
|
|
293
|
+
if (wordTokens[i + j].word !== quoteWords[j]) {
|
|
294
|
+
allMatch = false;
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
if (allMatch) {
|
|
299
|
+
const startToken = wordTokens[i];
|
|
300
|
+
const endToken = wordTokens[i + quoteWords.length - 1];
|
|
301
|
+
return {
|
|
302
|
+
start: startToken.start,
|
|
303
|
+
end: endToken.end,
|
|
304
|
+
text: msgText.slice(startToken.start, endToken.end),
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return null;
|
|
237
310
|
}
|
|
238
311
|
|
|
239
312
|
async function validateAndStoreQuotes(
|
|
@@ -250,88 +323,107 @@ async function validateAndStoreQuotes(
|
|
|
250
323
|
let found = false;
|
|
251
324
|
for (const message of messages) {
|
|
252
325
|
const msgText = getMessageText(message);
|
|
253
|
-
|
|
254
|
-
|
|
326
|
+
|
|
327
|
+
// Level 1: normalized exact match
|
|
328
|
+
const normalizedMsg = normalizeText(msgText);
|
|
329
|
+
const normalizedQuote = normalizeText(candidate.text);
|
|
255
330
|
const start = normalizedMsg.indexOf(normalizedQuote);
|
|
331
|
+
|
|
332
|
+
let matchStart: number;
|
|
333
|
+
let matchEnd: number;
|
|
334
|
+
let matchText: string;
|
|
335
|
+
let matchLevel: string;
|
|
336
|
+
|
|
256
337
|
if (start !== -1) {
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
);
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
338
|
+
matchStart = start;
|
|
339
|
+
matchEnd = start + candidate.text.length;
|
|
340
|
+
matchText = candidate.text;
|
|
341
|
+
matchLevel = "exact";
|
|
342
|
+
} else {
|
|
343
|
+
// Level 2: word-boundary fallback
|
|
344
|
+
const wordMatch = findQuoteByWords(candidate.text, msgText);
|
|
345
|
+
if (!wordMatch) continue;
|
|
346
|
+
matchStart = wordMatch.start;
|
|
347
|
+
matchEnd = wordMatch.end;
|
|
348
|
+
matchText = wordMatch.text;
|
|
349
|
+
matchLevel = "word-boundary";
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const existing = state.human_quote_getForMessage(message.id);
|
|
353
|
+
const overlapping = existing.find(q =>
|
|
354
|
+
q.start !== null && q.end !== null &&
|
|
355
|
+
matchStart < q.end && matchEnd > q.start
|
|
356
|
+
);
|
|
357
|
+
|
|
358
|
+
if (overlapping) {
|
|
359
|
+
const mergedStart = Math.min(matchStart, overlapping.start!);
|
|
360
|
+
const mergedEnd = Math.max(matchEnd, overlapping.end!);
|
|
361
|
+
const mergedText = msgText.slice(mergedStart, mergedEnd);
|
|
362
|
+
|
|
363
|
+
const mergedDataItemIds = overlapping.data_item_ids.includes(dataItemId)
|
|
364
|
+
? overlapping.data_item_ids
|
|
365
|
+
: [...overlapping.data_item_ids, dataItemId];
|
|
366
|
+
const group = personaGroup || "General";
|
|
367
|
+
const mergedGroups = overlapping.persona_groups.includes(group)
|
|
368
|
+
? overlapping.persona_groups
|
|
369
|
+
: [...overlapping.persona_groups, group];
|
|
370
|
+
|
|
371
|
+
let embedding = overlapping.embedding;
|
|
372
|
+
if (mergedText !== overlapping.text) {
|
|
373
|
+
try {
|
|
374
|
+
const embeddingService = getEmbeddingService();
|
|
375
|
+
embedding = await embeddingService.embed(mergedText);
|
|
376
|
+
} catch (err) {
|
|
377
|
+
console.warn(`[extraction] Failed to recompute embedding for merged quote: "${mergedText.slice(0, 30)}..."`, err);
|
|
290
378
|
}
|
|
291
|
-
|
|
292
|
-
state.human_quote_update(overlapping.id, {
|
|
293
|
-
start: mergedStart,
|
|
294
|
-
end: mergedEnd,
|
|
295
|
-
text: mergedText,
|
|
296
|
-
data_item_ids: mergedDataItemIds,
|
|
297
|
-
persona_groups: mergedGroups,
|
|
298
|
-
embedding,
|
|
299
|
-
});
|
|
300
|
-
console.log(`[extraction] Merged overlapping quote: "${mergedText.slice(0, 50)}..." (${mergedStart}-${mergedEnd})`);
|
|
301
|
-
found = true;
|
|
302
|
-
break;
|
|
303
379
|
}
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
const quote: Quote = {
|
|
314
|
-
id: crypto.randomUUID(),
|
|
315
|
-
message_id: message.id,
|
|
316
|
-
data_item_ids: [dataItemId],
|
|
317
|
-
persona_groups: [personaGroup || "General"],
|
|
318
|
-
text: candidate.text,
|
|
319
|
-
speaker: message.role === "human" ? "human" : personaName,
|
|
320
|
-
timestamp: message.timestamp,
|
|
321
|
-
start: start,
|
|
322
|
-
end: end,
|
|
323
|
-
created_at: new Date().toISOString(),
|
|
324
|
-
created_by: "extraction",
|
|
380
|
+
|
|
381
|
+
state.human_quote_update(overlapping.id, {
|
|
382
|
+
start: mergedStart,
|
|
383
|
+
end: mergedEnd,
|
|
384
|
+
text: mergedText,
|
|
385
|
+
data_item_ids: mergedDataItemIds,
|
|
386
|
+
persona_groups: mergedGroups,
|
|
325
387
|
embedding,
|
|
326
|
-
};
|
|
327
|
-
|
|
328
|
-
console.log(`[extraction] Captured quote: "${candidate.text.slice(0, 50)}..."`);
|
|
388
|
+
});
|
|
389
|
+
console.log(`[extraction] Merged overlapping quote: "${mergedText.slice(0, 50)}..." (${mergedStart}-${mergedEnd})`);
|
|
329
390
|
found = true;
|
|
330
391
|
break;
|
|
331
392
|
}
|
|
393
|
+
|
|
394
|
+
let embedding: number[] | undefined;
|
|
395
|
+
try {
|
|
396
|
+
const embeddingService = getEmbeddingService();
|
|
397
|
+
embedding = await embeddingService.embed(matchText);
|
|
398
|
+
} catch (err) {
|
|
399
|
+
console.warn(`[extraction] Failed to compute embedding for quote: "${matchText.slice(0, 30)}..."`, err);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
const quote: Quote = {
|
|
403
|
+
id: crypto.randomUUID(),
|
|
404
|
+
message_id: message.id,
|
|
405
|
+
data_item_ids: [dataItemId],
|
|
406
|
+
persona_groups: [personaGroup || "General"],
|
|
407
|
+
text: matchText,
|
|
408
|
+
speaker: message.role === "human" ? "human" : personaName,
|
|
409
|
+
timestamp: message.timestamp,
|
|
410
|
+
start: matchStart,
|
|
411
|
+
end: matchEnd,
|
|
412
|
+
created_at: new Date().toISOString(),
|
|
413
|
+
created_by: "extraction",
|
|
414
|
+
embedding,
|
|
415
|
+
};
|
|
416
|
+
state.human_quote_add(quote);
|
|
417
|
+
if (matchLevel === "word-boundary") {
|
|
418
|
+
console.log(`[extraction] Captured quote (word-boundary match): "${matchText.slice(0, 50)}..."`);
|
|
419
|
+
} else {
|
|
420
|
+
console.log(`[extraction] Captured quote: "${matchText.slice(0, 50)}..."`);
|
|
421
|
+
}
|
|
422
|
+
found = true;
|
|
423
|
+
break;
|
|
332
424
|
}
|
|
333
425
|
if (!found) {
|
|
334
|
-
console.
|
|
426
|
+
console.warn(`[extraction] Quote not found in messages (both levels), skipping: "${candidate.text?.slice(0, 50)}..."`);
|
|
335
427
|
}
|
|
336
428
|
}
|
|
337
429
|
}
|
|
@@ -346,18 +438,4 @@ function calculateExposureCurrent(impact: ExposureImpact | undefined): number {
|
|
|
346
438
|
}
|
|
347
439
|
}
|
|
348
440
|
|
|
349
|
-
|
|
350
|
-
state: StateManager,
|
|
351
|
-
dataType: DataItemType,
|
|
352
|
-
item: Fact | Trait | Topic | Person,
|
|
353
|
-
_personaName: string,
|
|
354
|
-
_isEi: boolean,
|
|
355
|
-
_personaGroup: string | null
|
|
356
|
-
): void {
|
|
357
|
-
switch (dataType) {
|
|
358
|
-
case "fact": state.human_fact_upsert(item as Fact); break;
|
|
359
|
-
case "trait": state.human_trait_upsert(item as Trait); break;
|
|
360
|
-
case "topic": state.human_topic_upsert(item as Topic); break;
|
|
361
|
-
case "person": state.human_person_upsert(item as Person); break;
|
|
362
|
-
}
|
|
363
|
-
}
|
|
441
|
+
|