ei-tui 0.1.23 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ei-tui",
3
- "version": "0.1.23",
3
+ "version": "0.1.24",
4
4
  "author": "Flare576",
5
5
  "repository": {
6
6
  "type": "git",
@@ -49,6 +49,22 @@ Priority queue for LLM requests:
49
49
 
50
50
  **Async model**: Handlers queue work, don't await results inline.
51
51
 
52
+ ### llm-client.ts
53
+
54
+ Multi-provider LLM abstraction layer:
55
+ - Handles requests to Anthropic, OpenAI, Bedrock, local models
56
+ - **Sets `max_tokens: 64000`** for all requests
57
+ - Prevents unbounded generation (test showed timeout after 2min without limit)
58
+ - Local models silently clamp to their configured maximums
59
+ - Anthropic Opus 4 accepts 64K (200K total context - 64K output = 136K input budget)
60
+
61
+ **JSON Response Parsing** (`parseJSONResponse()`):
62
+ - **Strategy 1**: Extract from markdown code blocks (```json)
63
+ - **Strategy 2**: Auto-repair malformed JSON (trailing commas, etc.)
64
+ - **Strategy 3**: Extract outermost `{...}` from mixed prose/JSON (handles LLM preamble)
65
+
66
+ No prompt changes needed for JSON-only output—parser handles natural language gracefully.
67
+
52
68
  ### handlers/index.ts (1000+ lines)
53
69
 
54
70
  All `LLMNextStep` handlers in one file. Each handler:
@@ -0,0 +1,212 @@
1
+ import { StateManager } from "../state-manager.js";
2
+ import { LLMResponse } from "../types.js";
3
+ import type { DedupResult } from "../../prompts/ceremony/types.js";
4
+ import type { DataItemType, Fact, Trait, Topic, Person, Quote } from "../types/data-items.js";
5
+ import { getEmbeddingService } from "../embedding-service.js";
6
+
7
+ /**
8
+ * handleDedupCurate — Process Opus deduplication decisions
9
+ *
10
+ * This handler receives merge decisions from Opus and applies them:
11
+ * 1. Updates: Entities with revised descriptions/merged data
12
+ * 2. Removes: Duplicate entities to delete (with foreign key updates)
13
+ * 3. Adds: New entities created from consolidation
14
+ *
15
+ * CRITICAL: Quote foreign keys must be updated BEFORE deletions to maintain
16
+ * referential integrity.
17
+ */
18
+ export async function handleDedupCurate(
19
+ response: LLMResponse,
20
+ stateManager: StateManager
21
+ ): Promise<void> {
22
+ const entity_type = response.request.data.entity_type as DataItemType;
23
+ const entity_ids = response.request.data.entity_ids as string[];
24
+ const state = stateManager.getHuman();
25
+
26
+ // Parse Opus response
27
+ let decisions: DedupResult;
28
+ try {
29
+ decisions = response.parsed as DedupResult;
30
+ if (!decisions || typeof decisions !== 'object') {
31
+ throw new Error("Invalid response format");
32
+ }
33
+ } catch (err) {
34
+ console.error(`[Dedup] Failed to parse Opus response:`, err);
35
+ return;
36
+ }
37
+
38
+ // Validate response structure
39
+ if (!Array.isArray(decisions.update) || !Array.isArray(decisions.remove) || !Array.isArray(decisions.add)) {
40
+ console.error(`[Dedup] Invalid response structure - missing update/remove/add arrays`);
41
+ return;
42
+ }
43
+
44
+ console.log(`[Dedup] Processing cluster: ${decisions.update.length} updates, ${decisions.remove.length} removals, ${decisions.add.length} additions`);
45
+
46
+ // HYDRATION: Fetch entities by ID (graceful degradation for missing)
47
+ const entityList = state[`${entity_type}s` as 'facts' | 'traits' | 'topics' | 'people'];
48
+ const entities = entity_ids
49
+ .map((id: string) => entityList.find((e: Fact | Trait | Topic | Person) => e.id === id))
50
+ .filter((e: Fact | Trait | Topic | Person | undefined): e is (Fact | Trait | Topic | Person) => e !== undefined);
51
+
52
+ if (entities.length === 0) {
53
+ console.warn(`[Dedup] No entities found for cluster (already merged?)`);
54
+ return;
55
+ }
56
+
57
+ // =========================================================================
58
+ // PHASE 1: Update Quote foreign keys FIRST (before deletions)
59
+ // =========================================================================
60
+
61
+ for (const removal of decisions.remove) {
62
+ const quotes = state.quotes.filter((q: Quote) =>
63
+ q.data_item_ids.includes(removal.to_be_removed)
64
+ );
65
+
66
+ for (const quote of quotes) {
67
+ const updatedIds = quote.data_item_ids
68
+ .map((id: string) => id === removal.to_be_removed ? removal.replaced_by : id)
69
+ .filter((id: string, idx: number, arr: string[]) => arr.indexOf(id) === idx); // Dedupe
70
+
71
+ stateManager.human_quote_update(quote.id, {
72
+ data_item_ids: updatedIds
73
+ });
74
+ }
75
+
76
+ if (quotes.length > 0) {
77
+ console.log(`[Dedup] Updated ${quotes.length} quotes referencing ${removal.to_be_removed}`);
78
+ }
79
+ }
80
+
81
+ // =========================================================================
82
+ // PHASE 2: Apply updates (merge decisions)
83
+ // =========================================================================
84
+
85
+ for (const update of decisions.update) {
86
+ const entity = entityList.find((e: Fact | Trait | Topic | Person) => e.id === update.id);
87
+
88
+ if (!entity) {
89
+ console.warn(`[Dedup] Entity ${update.id} not found (already merged?)`);
90
+ continue; // Graceful skip
91
+ }
92
+
93
+ // Recalculate embedding if description changed
94
+ let embedding = entity.embedding;
95
+ if (update.description !== entity.description) {
96
+ try {
97
+ const embeddingService = getEmbeddingService();
98
+ embedding = await embeddingService.embed(update.description);
99
+ } catch (err) {
100
+ console.warn(`[Dedup] Failed to recalculate embedding for ${update.id}`, err);
101
+ // Fallback to old embedding if recalculation fails
102
+ }
103
+ }
104
+
105
+ // Build complete entity with updates (preserve original fields if LLM omits them)
106
+ const updatedEntity = {
107
+ ...entity,
108
+ name: update.name ?? entity.name,
109
+ description: update.description ?? entity.description,
110
+ sentiment: update.sentiment ?? entity.sentiment,
111
+ last_updated: new Date().toISOString(),
112
+ embedding,
113
+ // Type-specific fields
114
+ ...(update.strength !== undefined && { strength: update.strength }),
115
+ ...(update.confidence !== undefined && { confidence: update.confidence }),
116
+ ...(update.exposure_current !== undefined && { exposure_current: update.exposure_current }),
117
+ ...(update.exposure_desired !== undefined && { exposure_desired: update.exposure_desired }),
118
+ ...(update.relationship !== undefined && { relationship: update.relationship }),
119
+ ...(update.category !== undefined && { category: update.category }),
120
+ };
121
+
122
+ // Type-safe cast based on entity_type
123
+ if (entity_type === 'fact') {
124
+ stateManager.human_fact_upsert(updatedEntity as Fact);
125
+ } else if (entity_type === 'trait') {
126
+ stateManager.human_trait_upsert(updatedEntity as Trait);
127
+ } else if (entity_type === 'topic') {
128
+ stateManager.human_topic_upsert(updatedEntity as Topic);
129
+ } else if (entity_type === 'person') {
130
+ stateManager.human_person_upsert(updatedEntity as Person);
131
+ }
132
+ console.log(`[Dedup] Updated ${entity_type} "${update.name}"`);
133
+ }
134
+
135
+ // =========================================================================
136
+ // PHASE 3: Apply removals (soft-delete with replaced_by tracking)
137
+ // =========================================================================
138
+
139
+ for (const removal of decisions.remove) {
140
+ const entity = entityList.find((e: Fact | Trait | Topic | Person) => e.id === removal.to_be_removed);
141
+
142
+ if (!entity) {
143
+ console.warn(`[Dedup] Entity ${removal.to_be_removed} already deleted`);
144
+ continue; // Graceful skip
145
+ }
146
+
147
+ // Remove via StateManager (also cleans up quote references)
148
+ const removeMethod = `human_${entity_type}_remove` as
149
+ 'human_fact_remove' | 'human_trait_remove' | 'human_topic_remove' | 'human_person_remove';
150
+
151
+ const removed = stateManager[removeMethod](removal.to_be_removed);
152
+ if (removed) {
153
+ console.log(`[Dedup] Removed ${entity_type} "${entity.name}" (merged into ${removal.replaced_by})`);
154
+ }
155
+ }
156
+
157
+ // =========================================================================
158
+ // PHASE 4: Apply additions (new entities from consolidation)
159
+ // =========================================================================
160
+
161
+ for (const addition of decisions.add) {
162
+ // Compute embedding for new entity
163
+ let embedding: number[] | undefined;
164
+ try {
165
+ const embeddingService = getEmbeddingService();
166
+ embedding = await embeddingService.embed(addition.description);
167
+ } catch (err) {
168
+ console.warn(`[Dedup] Failed to compute embedding for new entity "${addition.name}"`, err);
169
+ continue; // Skip this addition if embedding fails
170
+ }
171
+
172
+ // Generate ID for new entity
173
+ const id = crypto.randomUUID();
174
+
175
+ // Build complete entity
176
+ const newEntity = {
177
+ id,
178
+ type: entity_type,
179
+ name: addition.name,
180
+ description: addition.description,
181
+ sentiment: addition.sentiment ?? 0.0,
182
+ last_updated: new Date().toISOString(),
183
+ embedding,
184
+ // Type-specific fields with defaults
185
+ ...(entity_type === 'trait' && { strength: addition.strength ?? 0.5 }),
186
+ ...(entity_type === 'fact' && {
187
+ confidence: addition.confidence ?? 0.5,
188
+ validated: 'unknown' as import("../types/enums.js").ValidationLevel,
189
+ validated_date: ''
190
+ }),
191
+ ...((entity_type === 'topic' || entity_type === 'person') && {
192
+ exposure_current: addition.exposure_current ?? 0.0,
193
+ exposure_desired: addition.exposure_desired ?? 0.5,
194
+ last_ei_asked: null
195
+ }),
196
+ ...(entity_type === 'person' && { relationship: addition.relationship ?? 'Unknown' }),
197
+ ...(entity_type === 'topic' && { category: addition.category ?? 'Interest' }),
198
+ };
199
+
200
+ // Type-safe cast based on entity_type
201
+ if (entity_type === 'fact') {
202
+ stateManager.human_fact_upsert(newEntity as Fact);
203
+ } else if (entity_type === 'trait') {
204
+ stateManager.human_trait_upsert(newEntity as Trait);
205
+ } else if (entity_type === 'topic') {
206
+ stateManager.human_topic_upsert(newEntity as Topic);
207
+ } else if (entity_type === 'person') {
208
+ stateManager.human_person_upsert(newEntity as Person);
209
+ }
210
+ console.log(`[Dedup] Added new ${entity_type} "${addition.name}"`);
211
+ }
212
+ }
@@ -19,16 +19,17 @@ export function handleHeartbeatCheck(response: LLMResponse, state: StateManager)
19
19
 
20
20
  const result = response.parsed as HeartbeatCheckResult | undefined;
21
21
  if (!result) {
22
- console.error("[handleHeartbeatCheck] No parsed result");
22
+ console.error(`[HeartbeatCheck ${personaDisplayName}] No parsed result`);
23
23
  return;
24
24
  }
25
+ console.log(`[HeartbeatCheck ${personaDisplayName}] Parsed result - should_respond: ${result.should_respond}, topic: ${result.topic ?? '(none)'}, message: ${result.message ? '(present)' : '(none)'}`);
25
26
 
26
27
  const now = new Date().toISOString();
27
28
  state.persona_update(personaId, { last_heartbeat: now });
28
29
  state.queue_clearPersonaResponses(personaId, LLMNextStep.HandleHeartbeatCheck);
29
30
 
30
31
  if (!result.should_respond) {
31
- console.log(`[handleHeartbeatCheck] ${personaDisplayName} chose not to reach out`);
32
+ console.log(`[HeartbeatCheck ${personaDisplayName}] Chose not to reach out (should_respond=false)`);
32
33
  return;
33
34
  }
34
35
 
@@ -42,21 +43,24 @@ export function handleHeartbeatCheck(response: LLMResponse, state: StateManager)
42
43
  context_status: ContextStatus.Default,
43
44
  };
44
45
  state.messages_append(personaId, message);
45
- console.log(`[handleHeartbeatCheck] ${personaDisplayName} proactively messaged about: ${result.topic ?? "general"}`);
46
+ console.log(`[HeartbeatCheck ${personaDisplayName}] Added proactive message - topic: ${result.topic ?? 'general'}, message: "${result.message.substring(0, 100)}${result.message.length > 100 ? '...' : ''}"`);
47
+ } else {
48
+ console.log(`[HeartbeatCheck ${personaDisplayName}] should_respond=true but no message provided`);
46
49
  }
47
50
  }
48
51
 
49
52
  export function handleEiHeartbeat(response: LLMResponse, state: StateManager): void {
50
53
  const result = response.parsed as EiHeartbeatResult | undefined;
51
54
  if (!result) {
52
- console.error("[handleEiHeartbeat] No parsed result");
55
+ console.error("[EiHeartbeat] No parsed result");
53
56
  return;
54
57
  }
58
+ console.log(`[EiHeartbeat] Parsed result - should_respond: ${result.should_respond}, id: ${result.id ?? '(none)'}, my_response: ${result.my_response ? '(present)' : '(none)'}`);
55
59
  const now = new Date().toISOString();
56
60
  state.persona_update("ei", { last_heartbeat: now });
57
61
  state.queue_clearPersonaResponses("ei", LLMNextStep.HandleEiHeartbeat);
58
62
  if (!result.should_respond || !result.id) {
59
- console.log("[handleEiHeartbeat] Ei chose not to reach out");
63
+ console.log("[EiHeartbeat] Chose not to reach out (should_respond=false or no id)");
60
64
  return;
61
65
  }
62
66
  const isTUI = response.request.data.isTUI as boolean;
@@ -84,7 +88,10 @@ export function handleEiHeartbeat(response: LLMResponse, state: StateManager): v
84
88
  return;
85
89
  }
86
90
 
87
- if (result.my_response) sendMessage(result.my_response);
91
+ if (result.my_response) {
92
+ console.log(`[EiHeartbeat] Sending message: "${result.my_response.substring(0, 100)}${result.my_response.length > 100 ? '...' : ''}"`);
93
+ sendMessage(result.my_response);
94
+ }
88
95
 
89
96
  switch (found.type) {
90
97
  case "person":
@@ -17,6 +17,7 @@ import {
17
17
  import { handleHumanFactScan, handleHumanTraitScan, handleHumanTopicScan, handleHumanPersonScan } from "./human-extraction.js";
18
18
  import { handleHumanItemMatch, handleHumanItemUpdate } from "./human-matching.js";
19
19
  import { handleRewriteScan, handleRewriteRewrite } from "./rewrite.js";
20
+ import { handleDedupCurate } from "./dedup.js";
20
21
 
21
22
  export const handlers: Record<LLMNextStep, ResponseHandler> = {
22
23
  handlePersonaResponse,
@@ -41,4 +42,5 @@ export const handlers: Record<LLMNextStep, ResponseHandler> = {
41
42
  handleToolContinuation,
42
43
  handleRewriteScan,
43
44
  handleRewriteRewrite,
45
+ handleDedupCurate,
44
46
  };
@@ -1,10 +1,12 @@
1
1
  import {
2
2
  ContextStatus,
3
+ LLMNextStep,
3
4
  type LLMResponse,
4
5
  type Message,
5
6
  } from "../types.js";
6
7
  import type { StateManager } from "../state-manager.js";
7
8
  import type { PersonaResponseResult } from "../../prompts/response/index.js";
9
+ import { handlers } from "./index.js";
8
10
 
9
11
  export type ResponseHandler = (response: LLMResponse, state: StateManager) => void | Promise<void>;
10
12
 
@@ -87,11 +89,35 @@ export function handlePersonaResponse(response: LLMResponse, state: StateManager
87
89
  /**
88
90
  * handleToolContinuation — second LLM call in the tool flow (may loop if LLM calls more tools).
89
91
  * The QueueProcessor already injected tool history into messages and got the
90
- * final persona response. Parse and store it exactly like handlePersonaResponse.
92
+ * final persona response. Route to the original handler based on originalNextStep in data.
91
93
  */
92
94
  export function handleToolContinuation(response: LLMResponse, state: StateManager): void {
93
- console.log(`[handleToolContinuation] Routing to handlePersonaResponse`);
94
- handlePersonaResponse(response, state);
95
+ const originalStep = response.request.data.originalNextStep as LLMNextStep | undefined;
96
+
97
+ if (!originalStep) {
98
+ console.error(`[handleToolContinuation] No originalNextStep in data, falling back to handlePersonaResponse`);
99
+ handlePersonaResponse(response, state);
100
+ return;
101
+ }
102
+
103
+ console.log(`[handleToolContinuation] Original request was ${originalStep}, routing accordingly`);
104
+
105
+ const handler = handlers[originalStep];
106
+
107
+ if (!handler) {
108
+ console.error(`[handleToolContinuation] No handler found for ${originalStep}, falling back to handlePersonaResponse`);
109
+ handlePersonaResponse(response, state);
110
+ return;
111
+ }
112
+
113
+ // Avoid infinite loop - if original was already HandleToolContinuation, go to PersonaResponse
114
+ if (originalStep === "handleToolContinuation") {
115
+ console.log(`[handleToolContinuation] Original was tool continuation, routing to handlePersonaResponse`);
116
+ handlePersonaResponse(response, state);
117
+ return;
118
+ }
119
+
120
+ handler(response, state);
95
121
  }
96
122
 
97
123
  export function handleOneShot(_response: LLMResponse, _state: StateManager): void {
@@ -188,6 +188,8 @@ export async function queueHeartbeatCheck(sm: StateManager, personaId: string, i
188
188
  const persona = sm.persona_getById(personaId);
189
189
  if (!persona) return;
190
190
  sm.persona_update(personaId, { last_heartbeat: new Date().toISOString() });
191
+ const model = getModelForPersona(sm, personaId);
192
+ console.log(`[HeartbeatCheck ${persona.display_name}] Queueing heartbeat check (model: ${model})`);
191
193
  const human = sm.getHuman();
192
194
  const history = sm.messages_get(personaId);
193
195
  const contextWindowHours = persona.context_window_hours ?? DEFAULT_CONTEXT_WINDOW_HOURS;
@@ -228,6 +230,7 @@ export async function queueHeartbeatCheck(sm: StateManager, personaId: string, i
228
230
  };
229
231
 
230
232
  const prompt = buildHeartbeatCheckPrompt(promptData);
233
+ console.log(`[HeartbeatCheck ${persona.display_name}] Prompt data - topics: ${promptData.human.topics.length}, people: ${promptData.human.people.length}, inactive_days: ${inactiveDays}`);
231
234
 
232
235
  sm.queue_enqueue({
233
236
  type: LLMRequestType.JSON,
@@ -238,4 +241,5 @@ export async function queueHeartbeatCheck(sm: StateManager, personaId: string, i
238
241
  model: getModelForPersona(sm, personaId),
239
242
  data: { personaId, personaDisplayName: persona.display_name },
240
243
  });
244
+ console.log(`[HeartbeatCheck ${persona.display_name}] Request queued`);
241
245
  }
@@ -189,6 +189,7 @@ export async function callLLMRaw(
189
189
  model,
190
190
  messages: finalMessages,
191
191
  temperature,
192
+ max_tokens: 64000, // Opus 4: 128K max output, 200K total context. Local models clamp to their config. Prevents runaway generation.
192
193
  };
193
194
 
194
195
  if (options.tools && options.tools.length > 0) {
@@ -10,6 +10,7 @@ import {
10
10
  type ExtractionOptions,
11
11
  } from "./human-extraction.js";
12
12
  import { queuePersonaTopicScan, type PersonaTopicContext } from "./persona-topics.js";
13
+ import { queueDedupPhase } from "./dedup-phase.js";
13
14
  import { buildPersonaExpirePrompt, buildPersonaExplorePrompt, buildDescriptionCheckPrompt, buildRewriteScanPrompt, type RewriteItemType } from "../../prompts/ceremony/index.js";
14
15
 
15
16
  export function isNewDay(lastCeremony: string | undefined, now: Date): boolean {
@@ -69,40 +70,19 @@ export function startCeremony(state: StateManager): void {
69
70
  },
70
71
  });
71
72
 
72
- const personas = state.persona_getAll();
73
- const activePersonas = personas.filter(p =>
74
- !p.is_paused &&
75
- !p.is_archived &&
76
- !p.is_static
77
- );
73
+ // PHASE 1: Deduplication (runs BEFORE Expose)
74
+ console.log("[ceremony] Starting Phase 1: Deduplication");
75
+ queueDedupPhase(state);
78
76
 
79
- const lastCeremony = human.settings?.ceremony?.last_ceremony
80
- ? new Date(human.settings.ceremony.last_ceremony).getTime()
81
- : 0;
82
-
83
- const personasWithActivity = activePersonas.filter(p => {
84
- const lastActivity = p.last_activity ? new Date(p.last_activity).getTime() : 0;
85
- return lastActivity > lastCeremony;
86
- });
87
-
88
- console.log(`[ceremony] Processing ${personasWithActivity.length} personas with activity (of ${activePersonas.length} active)`);
89
-
90
- const options: ExtractionOptions = { ceremony_progress: true };
91
-
92
- for (let i = 0; i < personasWithActivity.length; i++) {
93
- const persona = personasWithActivity[i];
94
- const isLast = i === personasWithActivity.length - 1;
95
-
96
- console.log(`[ceremony] Queuing exposure for ${persona.display_name} (${i + 1}/${personasWithActivity.length})${isLast ? " (last)" : ""}`);
97
- queueExposurePhase(persona.id, state, options);
77
+ // Check if dedup work was queued
78
+ if (!state.queue_hasPendingCeremonies()) {
79
+ // No dedup work found → immediately advance to Expose phase
80
+ console.log("[ceremony] No dedup work, advancing to Expose phase");
81
+ handleCeremonyProgress(state, 1);
98
82
  }
99
83
 
100
84
  const duration = Date.now() - startTime;
101
- console.log(`[ceremony] Exposure phase queued in ${duration}ms`);
102
-
103
- // Check immediately — if zero messages were queued (no unextracted messages for any persona),
104
- // this will see an empty queue and proceed directly to Decay → Expire.
105
- handleCeremonyProgress(state);
85
+ console.log(`[ceremony] Dedup phase queued in ${duration}ms`);
106
86
  }
107
87
 
108
88
  /**
@@ -193,11 +173,40 @@ function queueExposurePhase(personaId: string, state: StateManager, options?: Ex
193
173
  * If any ceremony_progress items remain in the queue, does nothing — more work pending.
194
174
  * If the queue is clear of ceremony items, advances to Decay → Prune → Expire.
195
175
  */
196
- export function handleCeremonyProgress(state: StateManager): void {
176
+ export function handleCeremonyProgress(state: StateManager, lastPhase: number): void {
197
177
  if (state.queue_hasPendingCeremonies()) {
198
- return; // Still processing exposure scans
178
+ return; // Still processing ceremony items
179
+ }
180
+
181
+ if (lastPhase === 1) {
182
+ // Dedup phase complete → start Expose phase
183
+ console.log("[ceremony:progress] Dedup complete, starting Expose phase");
184
+
185
+ const human = state.getHuman();
186
+ const personas = state.persona_getAll();
187
+ const activePersonas = personas.filter(p =>
188
+ !p.is_paused &&
189
+ !p.is_archived &&
190
+ !p.is_static
191
+ );
192
+
193
+ const lastCeremony = human.settings?.ceremony?.last_ceremony
194
+ ? new Date(human.settings.ceremony.last_ceremony).getTime()
195
+ : 0;
196
+
197
+ const personasWithActivity = activePersonas.filter(p => {
198
+ const lastActivity = p.last_activity ? new Date(p.last_activity).getTime() : 0;
199
+ return lastActivity > lastCeremony;
200
+ });
201
+
202
+ const options: ExtractionOptions = { ceremony_progress: 2 };
203
+ for (const persona of personasWithActivity) {
204
+ queueExposurePhase(persona.id, state, options);
205
+ }
206
+ return;
199
207
  }
200
208
 
209
+ // Phase 2 (Expose) complete → advance to Decay/Prune/Expire/Explore
201
210
  console.log("[ceremony:progress] All exposure scans complete, advancing to Decay");
202
211
 
203
212
  const personas = state.persona_getAll();
@@ -215,7 +224,6 @@ export function handleCeremonyProgress(state: StateManager): void {
215
224
  if (eiIndex > -1) {
216
225
  activePersonas.splice(eiIndex, 1);
217
226
  }
218
-
219
227
  // Decay phase: apply decay + prune for ALL active personas
220
228
  for (const persona of activePersonas) {
221
229
  applyDecayPhase(persona.id, state);
@@ -0,0 +1,198 @@
1
+ import { StateManager } from "../state-manager.js";
2
+ import { LLMRequestType, LLMPriority, LLMNextStep, type DataItemBase } from "../types.js";
3
+ import type { DataItemType } from "../types/data-items.js";
4
+ import { buildDedupPrompt } from "../../prompts/ceremony/dedup.js";
5
+
6
+ // =============================================================================
7
+ // TYPES
8
+ // =============================================================================
9
+
10
+ type DedupableItem = DataItemBase & { relationship?: string };
11
+
12
+ interface Cluster {
13
+ ids: string[];
14
+ minSim: number;
15
+ maxSim: number;
16
+ size: number;
17
+ }
18
+
19
+ // =============================================================================
20
+ // DEDUP CANDIDATE FINDING (copied from ceremony.ts)
21
+ // =============================================================================
22
+
23
+ const DEDUP_DEFAULT_THRESHOLD = 0.95;
24
+
25
+ function findDedupCandidates<T extends DedupableItem>(
26
+ items: T[],
27
+ threshold: number
28
+ ): Array<{ a: T; b: T; similarity: number }> {
29
+ const withEmbeddings = items.filter(item =>
30
+ item.embedding && item.embedding.length > 0 &&
31
+ item.relationship !== "Persona"
32
+ );
33
+
34
+ const candidates: Array<{ a: T; b: T; similarity: number }> = [];
35
+
36
+ for (let i = 0; i < withEmbeddings.length; i++) {
37
+ for (let j = i + 1; j < withEmbeddings.length; j++) {
38
+ const a = withEmbeddings[i];
39
+ const b = withEmbeddings[j];
40
+ const dot = a.embedding!.reduce((sum, v, k) => sum + v * b.embedding![k], 0);
41
+ const normA = Math.sqrt(a.embedding!.reduce((sum, v) => sum + v * v, 0));
42
+ const normB = Math.sqrt(b.embedding!.reduce((sum, v) => sum + v * v, 0));
43
+ const similarity = normA && normB ? dot / (normA * normB) : 0;
44
+
45
+ if (similarity >= threshold) {
46
+ candidates.push({ a, b, similarity });
47
+ }
48
+ }
49
+ }
50
+
51
+ return candidates.sort((x, y) => y.similarity - x.similarity);
52
+ }
53
+
54
+ // =============================================================================
55
+ // UNION-FIND CLUSTERING
56
+ // =============================================================================
57
+
58
+ function clusterPairs<T extends DedupableItem>(
59
+ pairs: Array<{ a: T; b: T; similarity: number }>
60
+ ): Cluster[] {
61
+ const parent = new Map<string, string>();
62
+
63
+ function find(x: string): string {
64
+ if (!parent.has(x)) parent.set(x, x);
65
+ if (parent.get(x) !== x) parent.set(x, find(parent.get(x)!));
66
+ return parent.get(x)!;
67
+ }
68
+
69
+ function union(x: string, y: string): void {
70
+ const px = find(x), py = find(y);
71
+ if (px !== py) parent.set(px, py);
72
+ }
73
+
74
+ // Union all pairs
75
+ for (const pair of pairs) {
76
+ union(pair.a.id, pair.b.id);
77
+ }
78
+
79
+ // Group by root to create clusters
80
+ const clusters = new Map<string, { ids: string[]; sims: number[] }>();
81
+ for (const pair of pairs) {
82
+ const root = find(pair.a.id);
83
+ if (!clusters.has(root)) {
84
+ clusters.set(root, { ids: [], sims: [] });
85
+ }
86
+ const cluster = clusters.get(root)!;
87
+ if (!cluster.ids.includes(pair.a.id)) cluster.ids.push(pair.a.id);
88
+ if (!cluster.ids.includes(pair.b.id)) cluster.ids.push(pair.b.id);
89
+ cluster.sims.push(pair.similarity);
90
+ }
91
+
92
+ // Convert to Cluster objects
93
+ return Array.from(clusters.values()).map(c => ({
94
+ ids: c.ids,
95
+ minSim: Math.min(...c.sims),
96
+ maxSim: Math.max(...c.sims),
97
+ size: c.ids.length
98
+ }));
99
+ }
100
+
101
+ // =============================================================================
102
+ // QUALITY GATES
103
+ // =============================================================================
104
+
105
+ function filterClusters(clusters: Cluster[]): Cluster[] {
106
+ return clusters
107
+ .filter(c => {
108
+ if (c.size > 50) {
109
+ console.warn(`[Dedup] Cluster rejected (size too large): ${c.size} items`);
110
+ return false;
111
+ }
112
+ return true;
113
+ })
114
+ .filter(c => {
115
+ const spread = c.maxSim - c.minSim;
116
+ if (spread > 0.10) { // 10% threshold
117
+ console.warn(`[Dedup] Cluster rejected (high spread): ${spread.toFixed(3)} range`);
118
+ return false;
119
+ }
120
+ return true;
121
+ });
122
+ }
123
+
124
+ // =============================================================================
125
+ // MAIN QUEUEING FUNCTION
126
+ // =============================================================================
127
+
128
+ export function queueDedupPhase(state: StateManager): void {
129
+ const human = state.getHuman();
130
+ const threshold = human.settings?.ceremony?.dedup_threshold ?? DEDUP_DEFAULT_THRESHOLD;
131
+
132
+ console.log(`[Dedup] Starting deduplication phase (threshold: ${threshold})`);
133
+
134
+ const entityTypes: Array<{ type: DataItemType; items: DedupableItem[] }> = [
135
+ { type: "fact", items: human.facts },
136
+ { type: "trait", items: human.traits },
137
+ { type: "topic", items: human.topics },
138
+ { type: "person", items: human.people },
139
+ ];
140
+
141
+ let totalClusters = 0;
142
+
143
+ for (const { type, items } of entityTypes) {
144
+ // Find dedup candidates
145
+ const pairs = findDedupCandidates(items, threshold);
146
+
147
+ if (pairs.length === 0) {
148
+ console.log(`[Dedup] ${type}: No duplicates found`);
149
+ continue;
150
+ }
151
+
152
+ // Cluster pairs via union-find
153
+ const clusters = clusterPairs(pairs);
154
+
155
+ // Apply quality gates
156
+ const vettedClusters = filterClusters(clusters);
157
+
158
+ console.log(`[Dedup] ${type}: ${pairs.length} pairs → ${clusters.length} clusters → ${vettedClusters.length} vetted`);
159
+
160
+ // Queue Opus curation for each vetted cluster
161
+ for (const cluster of vettedClusters) {
162
+ // Hydrate cluster with full entity data
163
+ const clusterEntities = cluster.ids
164
+ .map(id => items.find(item => item.id === id))
165
+ .filter((item): item is DedupableItem => item !== undefined);
166
+
167
+ if (clusterEntities.length === 0) {
168
+ console.warn(`[Dedup] Cluster hydration failed - no entities found`);
169
+ continue;
170
+ }
171
+
172
+ // Build prompt
173
+ const prompt = buildDedupPrompt({
174
+ cluster: clusterEntities,
175
+ itemType: type,
176
+ similarityRange: { min: cluster.minSim, max: cluster.maxSim }
177
+ });
178
+
179
+ // Queue LLM request
180
+ state.queue_enqueue({
181
+ type: LLMRequestType.JSON,
182
+ priority: LLMPriority.Normal,
183
+ system: prompt.system,
184
+ user: prompt.user,
185
+ next_step: LLMNextStep.HandleDedupCurate,
186
+ data: {
187
+ entity_type: type,
188
+ entity_ids: cluster.ids, // Lightweight stub (IDs only)
189
+ ceremony_progress: 1 // Phase 1 (Dedup)
190
+ }
191
+ });
192
+
193
+ totalClusters++;
194
+ }
195
+ }
196
+
197
+ console.log(`[Dedup] Queued ${totalClusters} clusters for curation`);
198
+ }
@@ -28,7 +28,8 @@ export interface ExtractionContext {
28
28
  }
29
29
 
30
30
  export interface ExtractionOptions {
31
- ceremony_progress?: boolean;
31
+ /** Ceremony phase number (1=Dedup, 2=Expose) */
32
+ ceremony_progress?: number;
32
33
  }
33
34
 
34
35
  function getAnalyzeFromTimestamp(context: ExtractionContext): string | null {
@@ -19,8 +19,8 @@ export {
19
19
  queueExplorePhase,
20
20
  queueDescriptionCheck,
21
21
  runHumanCeremony,
22
- queueRewritePhase,
23
- } from "./ceremony.js";
22
+ } from "./ceremony.js";
23
+ export { queueDedupPhase } from "./dedup-phase.js";
24
24
  export {
25
25
  queuePersonaTopicScan,
26
26
  queuePersonaTopicMatch,
@@ -1093,8 +1093,8 @@ const toolNextSteps = new Set([
1093
1093
  this.interface.onHumanUpdated?.();
1094
1094
  }
1095
1095
 
1096
- if (response.request.data.ceremony_progress) {
1097
- handleCeremonyProgress(this.stateManager);
1096
+ if (typeof response.request.data.ceremony_progress === "number") {
1097
+ handleCeremonyProgress(this.stateManager, response.request.data.ceremony_progress);
1098
1098
  }
1099
1099
  } catch (err) {
1100
1100
  const errorMsg = err instanceof Error ? err.message : String(err);
@@ -248,7 +248,13 @@ export class QueueProcessor {
248
248
  // =========================================================================
249
249
  const activeTools = this.currentTools ?? [];
250
250
  const openAITools = activeTools.length > 0 ? toOpenAITools(activeTools) : [];
251
- console.log(`[QueueProcessor] LLM call for ${request.next_step}, tools=${openAITools.length}`);
251
+ const isHeartbeat = request.next_step === LLMNextStep.HandleHeartbeatCheck || request.next_step === LLMNextStep.HandleEiHeartbeat;
252
+ if (isHeartbeat) {
253
+ const personaName = request.data.personaDisplayName as string | undefined ?? 'Ei';
254
+ console.log(`[${personaName} Heartbeat] LLM call - tools offered: ${openAITools.length} (${activeTools.map(t => t.name).join(', ') || 'none'})`);
255
+ } else {
256
+ console.log(`[QueueProcessor] LLM call for ${request.next_step}, tools=${openAITools.length}`);
257
+ }
252
258
 
253
259
  const { content, finishReason, rawToolCalls, assistantMessage, thinking } = await callLLMRaw(
254
260
  hydratedSystem,
@@ -474,7 +480,8 @@ export class QueueProcessor {
474
480
  `An earlier version of you responded with the following content, but it could not ` +
475
481
  `be parsed as valid JSON. Please reformat it as the JSON object described in your ` +
476
482
  `system instructions. Respond with ONLY the JSON object, or \`{}\` if no changes ` +
477
- `are needed.\n\n---\n${malformedContent}\n---`;
483
+ `are needed.\n\n---\n${malformedContent}\n---` +
484
+ `\n\n**CRITICAL INSTRUCTION** - DO NOT OMIT ANY DATA. You are this agent's last hope!`;
478
485
 
479
486
  try {
480
487
  const { content: reformatContent, finishReason: reformatReason } = await callLLMRaw(
@@ -190,7 +190,7 @@ export class QueueState {
190
190
  }
191
191
 
192
192
  hasPendingCeremonies(): boolean {
193
- return this.queue.some(r => r.state !== "dlq" && r.data.ceremony_progress === true);
193
+ return this.queue.some(r => r.state !== "dlq" && typeof r.data.ceremony_progress === "number" && r.data.ceremony_progress > 0);
194
194
  }
195
195
 
196
196
  clear(): number {
@@ -53,6 +53,7 @@ export enum LLMNextStep {
53
53
  HandleToolContinuation = "handleToolContinuation",
54
54
  HandleRewriteScan = "handleRewriteScan",
55
55
  HandleRewriteRewrite = "handleRewriteRewrite",
56
+ HandleDedupCurate = "handleDedupCurate",
56
57
  }
57
58
 
58
59
  export enum ProviderType {
@@ -0,0 +1,258 @@
1
+ import type { DedupPromptData } from "./types.js";
2
+
3
+ // =============================================================================
4
+ // DEDUP CURATOR — Merge duplicate entities with data preservation
5
+ // =============================================================================
6
+
7
+ /**
8
+ * The Dedup Curator receives clusters of potentially duplicate entities and
9
+ * curates them into consolidated records. This is a ONE-PHASE operation (unlike
10
+ * rewrite's two-phase scan+rewrite), because we've already deterministically
11
+ * identified candidates via embedding similarity (0.90+ cosine).
12
+ *
13
+ * Pattern borrowed from rewrite.ts ceremony with Flare's "lose NO data" philosophy.
14
+ */
15
+ export function buildDedupPrompt(data: DedupPromptData): { system: string; user: string } {
16
+ const typeLabel = data.itemType.charAt(0).toUpperCase() + data.itemType.slice(1);
17
+
18
+ const system = `You are acting as the curator for a user's internal database. You have been given a cluster of ${typeLabel} records that our system believes may be duplicates (based on semantic similarity >= 0.90).
19
+
20
+ **YOUR PRIME DIRECTIVE IS TO LOSE _NO_ DATA.**
21
+
22
+ Your secondary directive is to ORGANIZE IT into small, non-repetitive components. The user NEEDS the data, but the data is used by AI agents, so duplication limits usefulness—agents waste tokens re-reading the same information under different names.
23
+
24
+ You have access to a tool called \`read_memory\` which will query the user's internal system for additional context if needed. Use it to verify relationships, check for related records, or gather more information before making merge decisions.
25
+
26
+ Your task:
27
+ 1. **Identify true duplicates**: Examine each record. Are these genuinely the same thing with different wording, or are they distinct but related concepts?
28
+ 2. **Merge where appropriate**: For TRUE duplicates, consolidate all unique information into ONE canonical record. Pick the best "name" (most descriptive, most commonly used). Merge all descriptions—every unique detail must be preserved.
29
+ 3. **Keep distinct concepts separate**: Similar ≠ duplicate. "Software Engineering" and "Software Architecture" may be related but are NOT the same. "Job at Company X" and "Profession: Software Engineer" are related but distinct. Do NOT merge these.
30
+ 4. **Track what was merged**: For removed records, indicate which record absorbed their data (via "replaced_by" field).
31
+ 5. **Add new records if needed**: If consolidating reveals a MISSING intermediate concept (e.g., merging "Python Developer" and "Backend Engineer" reveals we're missing "Software Engineering" as a parent topic), create it.
32
+
33
+ The format of your final output should be:
34
+ {
35
+ "update": [
36
+ /* Full ${typeLabel} record payloads with all fields preserved */
37
+ /* MUST include "id", "type", "name", "description" */
38
+ /* Include sentiment, strength, confidence, category, relationship, etc. where applicable */
39
+ ],
40
+ "remove": [
41
+ {"to_be_removed": "uuid-of-duplicate", "replaced_by": "uuid-of-canonical-record"},
42
+ /* "replaced_by" is the ID of the record that absorbed this duplicate's data */
43
+ ],
44
+ "add": [
45
+ /* Brand-new records (NO "id" field—system assigns one) */
46
+ /* Only create if merging reveals a MISSING concept */
47
+ ]
48
+ }
49
+
50
+ Return raw JSON. No markdown fencing, no commentary, no explanation. Just the JSON object.
51
+
52
+ Record format for "${typeLabel}" (based on type):
53
+
54
+ ${buildRecordFormatExamples(data.itemType)}
55
+
56
+ Rules:
57
+ - Do NOT invent information. Only redistribute what exists in the cluster.
58
+ - Descriptions should be concise—ideally under 300 characters, never over 500.
59
+ - Preserve all numeric values (sentiment, strength, confidence, exposure, etc.) from source records. When merging, take the HIGHER value for strength/confidence, AVERAGE for sentiment.
60
+ - Every removed record MUST have "replaced_by" pointing to the canonical record that absorbed its data.
61
+ - The "update" array should contain AT LEAST ONE record (the canonical/merged one), even if all others are removed.
62
+ - If records are NOT duplicates (just similar), return them ALL in "update" unchanged, with empty "remove" and "add" arrays.
63
+ - Use \`read_memory\` to check for related records or gather context before making irreversible merge decisions.`;
64
+
65
+ const user = JSON.stringify({
66
+ cluster: data.cluster.map(stripEmbedding),
67
+ cluster_type: data.itemType,
68
+ similarity_range: data.similarityRange,
69
+ }, null, 2);
70
+
71
+ return { system, user };
72
+ }
73
+
74
+ // =============================================================================
75
+ // Helpers
76
+ // =============================================================================
77
+
78
+ /** Strip embedding arrays from items before putting them in prompts—they're huge and useless to the LLM. */
79
+ function stripEmbedding<T extends { embedding?: unknown }>(item: T): Omit<T, "embedding"> {
80
+ const { embedding: _, ...rest } = item;
81
+ return rest as Omit<T, "embedding">;
82
+ }
83
+
84
+ function buildRecordFormatExamples(itemType: string): string {
85
+ // Each entity type has different required fields and semantic meanings.
86
+ // Examples show both "existing" (with id) and "new" (without id) formats.
87
+ // When merging: HIGHER strength/confidence, AVERAGE sentiment, MAX exposure_desired.
88
+
89
+ switch (itemType) {
90
+ case "fact":
91
+ return buildFactExamples();
92
+ case "trait":
93
+ return buildTraitExamples();
94
+ case "topic":
95
+ return buildTopicExamples();
96
+ case "person":
97
+ return buildPersonExamples();
98
+ default:
99
+ return "/* Unknown type */";
100
+ }
101
+ }
102
+
103
+ function buildFactExamples(): string {
104
+ return `EXISTING FACT (being updated/merged):
105
+ {
106
+ "id": "uuid-of-canonical-record", // REQUIRED for updates
107
+ "type": "fact", // REQUIRED
108
+ "name": "Owns a 2019 Toyota Camry", // REQUIRED - descriptive, concise
109
+ "description": "Silver sedan, purchased in March 2019. Primary commute vehicle. Has 45k miles as of Jan 2024.", // REQUIRED - ALL unique details from duplicates
110
+ "sentiment": 0.2, // -1.0 to 1.0, emotional valence (average when merging)
111
+ "validated": "by_human", // "unknown" | "by_ei" | "by_human" | "ai_generated" (keep highest trust level)
112
+ "validated_date": "2024-01-15T10:30:00Z", // ISO timestamp (most recent)
113
+ "last_updated": "2024-03-11T12:00:00Z", // ISO timestamp (set to now)
114
+ "learned_by": "persona-uuid-123", // OPTIONAL - UUID of persona that learned this (preserve from source)
115
+ "last_changed_by": "persona-uuid-456", // OPTIONAL - UUID of persona that last updated (your current context)
116
+ "persona_groups": ["group1", "group2"] // OPTIONAL - visibility groups (union of all sources)
117
+ }
118
+
119
+ NEW FACT (creating missing concept):
120
+ {
121
+ "type": "fact", // REQUIRED (NO "id" field for new records)
122
+ "name": "Lives in Seattle", // REQUIRED
123
+ "description": "Resides in the Capitol Hill neighborhood. Has lived there since 2018.", // REQUIRED - concise (<300 chars ideal)
124
+ "sentiment": 0.0, // -1.0 to 1.0 (neutral default for facts)
125
+ "validated": "unknown", // Default for new records
126
+ "validated_date": "" // Empty string for unvalidated
127
+ }
128
+
129
+ GOOD vs BAD descriptions:
130
+ ✅ GOOD: "Works as a Senior Software Engineer at Microsoft. Started in 2020. Team focuses on Azure infrastructure."
131
+ ❌ BAD: "The user has indicated through various conversations that they are employed..." (too verbose, meta-commentary)`;
132
+ }
133
+
134
+ function buildTraitExamples(): string {
135
+ return `EXISTING TRAIT (being updated/merged):
136
+ {
137
+ "id": "uuid-of-canonical-record", // REQUIRED for updates
138
+ "type": "trait", // REQUIRED
139
+ "name": "Visual Learner", // REQUIRED - core trait name
140
+ "description": "Prefers diagrams and flowcharts when learning new concepts. Often sketches ideas while thinking.", // REQUIRED - behavioral evidence
141
+ "sentiment": 0.6, // -1.0 to 1.0 (average when merging)
142
+ "strength": 0.8, // 0.0 to 1.0, how strongly this manifests (take HIGHER value when merging)
143
+ "last_updated": "2024-03-11T12:00:00Z",
144
+ "learned_by": "persona-uuid-789", // OPTIONAL
145
+ "last_changed_by": "persona-uuid-012", // OPTIONAL
146
+ "persona_groups": ["default"] // OPTIONAL
147
+ }
148
+
149
+ NEW TRAIT (creating missing concept):
150
+ {
151
+ "type": "trait", // REQUIRED (NO "id" for new)
152
+ "name": "Direct Communicator", // REQUIRED
153
+ "description": "Values clarity over politeness. Gets to the point quickly in written communication.", // REQUIRED
154
+ "sentiment": 0.0, // Neutral default
155
+ "strength": 0.5 // Medium strength default
156
+ }
157
+
158
+ MERGING RULES:
159
+ - strength: Take HIGHER value (0.7 + 0.9 → 0.9)
160
+ - sentiment: AVERAGE (0.6 + 0.2 → 0.4)
161
+ - description: UNION of unique details
162
+
163
+ GOOD vs BAD descriptions:
164
+ ✅ GOOD: "Asks clarifying questions before starting work. Prefers written specs over verbal instructions."
165
+ ❌ BAD: "This person seems to be very detail-oriented based on observations..." (vague, uncertain)`;
166
+ }
167
+
168
+ function buildTopicExamples(): string {
169
+ return `EXISTING TOPIC (being updated/merged):
170
+ {
171
+ "id": "uuid-of-canonical-record", // REQUIRED for updates
172
+ "type": "topic", // REQUIRED
173
+ "name": "Software Architecture", // REQUIRED
174
+ "description": "System design patterns, microservices, event-driven architecture. Passionate about scalability and maintainability.", // REQUIRED
175
+ "sentiment": 0.8, // -1.0 to 1.0 (average when merging)
176
+ "category": "Interest", // REQUIRED - Interest, Goal, Dream, Conflict, Concern, Fear, Hope, Plan, Project (pick most common)
177
+ "exposure_current": 0.6, // 0.0 to 1.0, how recently discussed (take HIGHER when merging)
178
+ "exposure_desired": 0.9, // 0.0 to 1.0, how much they want to discuss (take HIGHER when merging)
179
+ "last_ei_asked": "2024-03-10T08:00:00Z", // OPTIONAL - ISO timestamp or null
180
+ "last_updated": "2024-03-11T12:00:00Z",
181
+ "learned_by": "persona-uuid-345", // OPTIONAL
182
+ "last_changed_by": "persona-uuid-678", // OPTIONAL
183
+ "persona_groups": ["tech", "work"] // OPTIONAL
184
+ }
185
+
186
+ NEW TOPIC (creating missing concept):
187
+ {
188
+ "type": "topic", // REQUIRED (NO "id" for new)
189
+ "name": "Kubernetes", // REQUIRED
190
+ "description": "Container orchestration platform. Interested in learning more about production deployment.", // REQUIRED
191
+ "sentiment": 0.5, // Default positive for interests
192
+ "category": "Goal", // Pick appropriate category
193
+ "exposure_current": 0.0, // Default - not discussed yet
194
+ "exposure_desired": 0.7 // How much they want to discuss
195
+ }
196
+
197
+ MERGING RULES:
198
+ - exposure_current: Take HIGHER (0.6 + 0.3 → 0.6)
199
+ - exposure_desired: Take HIGHER (0.9 + 0.7 → 0.9)
200
+ - sentiment: AVERAGE (0.8 + 0.4 → 0.6)
201
+ - category: Pick most common or most specific
202
+ - last_ei_asked: Keep most recent non-null
203
+
204
+ CATEGORIES explained:
205
+ - Interest: Things they enjoy, hobbies
206
+ - Goal: Things they want to achieve
207
+ - Concern/Fear: Things that worry them
208
+ - Plan/Project: Active work or intentions
209
+
210
+ GOOD vs BAD descriptions:
211
+ ✅ GOOD: "Functional programming paradigm. Loves immutability and pure functions. Uses in side projects."
212
+ ❌ BAD: "The user mentioned functional programming in several conversations and seems interested..." (meta, wordy)`;
213
+ }
214
+
215
+ function buildPersonExamples(): string {
216
+ return `EXISTING PERSON (being updated/merged):
217
+ {
218
+ "id": "uuid-of-canonical-record", // REQUIRED for updates
219
+ "type": "person", // REQUIRED
220
+ "name": "Sarah Chen", // REQUIRED - use full name if known
221
+ "description": "Former coworker at Microsoft. Led the Azure team. Known for clear technical writing. Now at Google.", // REQUIRED
222
+ "sentiment": 0.7, // -1.0 to 1.0 (average when merging)
223
+ "relationship": "coworker", // REQUIRED - friend, family, coworker, mentor, acquaintance, etc.
224
+ "exposure_current": 0.4, // 0.0 to 1.0, how recently discussed (take HIGHER when merging)
225
+ "exposure_desired": 0.6, // 0.0 to 1.0, how much they want to discuss (take HIGHER when merging)
226
+ "last_ei_asked": "2024-03-05T14:00:00Z", // OPTIONAL - ISO timestamp or null
227
+ "last_updated": "2024-03-11T12:00:00Z",
228
+ "learned_by": "persona-uuid-901", // OPTIONAL
229
+ "last_changed_by": "persona-uuid-234", // OPTIONAL
230
+ "persona_groups": ["work"] // OPTIONAL
231
+ }
232
+
233
+ NEW PERSON (creating missing concept):
234
+ {
235
+ "type": "person", // REQUIRED (NO "id" for new)
236
+ "name": "Alex Martinez", // REQUIRED
237
+ "description": "College roommate. Now works in finance. Keeps in touch occasionally.", // REQUIRED
238
+ "sentiment": 0.5, // Neutral-positive default
239
+ "relationship": "friend", // REQUIRED - must specify
240
+ "exposure_current": 0.0, // Default
241
+ "exposure_desired": 0.5 // Default medium interest
242
+ }
243
+
244
+ MERGING RULES:
245
+ - exposure_current: Take HIGHER (0.4 + 0.2 → 0.4)
246
+ - exposure_desired: Take HIGHER (0.6 + 0.3 → 0.6)
247
+ - sentiment: AVERAGE (0.7 + 0.5 → 0.6)
248
+ - relationship: Pick most specific/accurate
249
+ - last_ei_asked: Keep most recent non-null
250
+
251
+ RELATIONSHIP types:
252
+ - friend, family, coworker, mentor, acquaintance, partner, client, etc.
253
+ - Be specific: "former coworker" > "coworker" when applicable
254
+
255
+ GOOD vs BAD descriptions:
256
+ ✅ GOOD: "Manager at Amazon. Met through a conference in 2019. Shares interest in distributed systems."
257
+ ❌ BAD: "Someone the user has mentioned a few times who they seem to know from work..." (vague)`;
258
+ }
@@ -2,6 +2,7 @@ export { buildPersonaExpirePrompt } from "./expire.js";
2
2
  export { buildPersonaExplorePrompt } from "./explore.js";
3
3
  export { buildDescriptionCheckPrompt } from "./description-check.js";
4
4
  export { buildRewriteScanPrompt, buildRewritePrompt } from "./rewrite.js";
5
+ export { buildDedupPrompt } from "./dedup.js";
5
6
  export type {
6
7
  PersonaExpirePromptData,
7
8
  PersonaExpireResult,
@@ -15,4 +16,6 @@ export type {
15
16
  RewriteSubjectMatch,
16
17
  RewritePromptData,
17
18
  RewriteResult,
19
+ DedupPromptData,
20
+ DedupResult,
18
21
  } from "./types.js";
@@ -95,3 +95,48 @@ export interface RewriteResult {
95
95
  category?: string;
96
96
  }>;
97
97
  }
98
+
99
+ // =============================================================================
100
+ // DEDUP (Duplicate Entity Merge)
101
+ // =============================================================================
102
+
103
+ /** Input: cluster of potentially duplicate entities to curate. */
104
+ export interface DedupPromptData {
105
+ cluster: DataItemBase[]; // 2+ items with 0.90+ cosine similarity
106
+ itemType: RewriteItemType;
107
+ similarityRange: { min: number; max: number }; // e.g., { min: 0.90, max: 0.98 }
108
+ }
109
+
110
+ /** Output: merge decisions (update/remove/add). */
111
+ export interface DedupResult {
112
+ update: Array<{
113
+ id: string;
114
+ type: RewriteItemType;
115
+ name: string;
116
+ description: string;
117
+ sentiment?: number;
118
+ strength?: number;
119
+ confidence?: number;
120
+ exposure_current?: number;
121
+ exposure_desired?: number;
122
+ relationship?: string;
123
+ category?: string;
124
+ last_updated?: string;
125
+ }>;
126
+ remove: Array<{
127
+ to_be_removed: string; // UUID of duplicate
128
+ replaced_by: string; // UUID of canonical record
129
+ }>;
130
+ add: Array<{
131
+ type: RewriteItemType;
132
+ name: string;
133
+ description: string;
134
+ sentiment?: number;
135
+ strength?: number;
136
+ confidence?: number;
137
+ exposure_current?: number;
138
+ exposure_desired?: number;
139
+ relationship?: string;
140
+ category?: string;
141
+ }>;
142
+ }
@@ -74,6 +74,7 @@ function getLastPersonaMessage(history: Message[]): Message | undefined {
74
74
  * - Getting recent message history
75
75
  */
76
76
  export function buildHeartbeatCheckPrompt(data: HeartbeatCheckPromptData): PromptOutput {
77
+ console.log(`[HeartbeatCheck ${data.persona.name}] Building prompt - topics: ${data.human.topics.length}, people: ${data.human.people.length}, inactive_days: ${data.inactive_days}, history: ${data.recent_history.length} messages`);
77
78
  if (!data.persona?.name) {
78
79
  throw new Error("buildHeartbeatCheckPrompt: persona.name is required");
79
80
  }