ei-tui 0.1.23 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/AGENTS.md +16 -0
- package/src/core/handlers/dedup.ts +212 -0
- package/src/core/handlers/heartbeat.ts +13 -6
- package/src/core/handlers/index.ts +2 -0
- package/src/core/handlers/persona-response.ts +29 -3
- package/src/core/heartbeat-manager.ts +4 -0
- package/src/core/llm-client.ts +1 -0
- package/src/core/orchestrators/ceremony.ts +41 -33
- package/src/core/orchestrators/dedup-phase.ts +198 -0
- package/src/core/orchestrators/human-extraction.ts +2 -1
- package/src/core/orchestrators/index.ts +2 -2
- package/src/core/processor.ts +2 -2
- package/src/core/queue-processor.ts +9 -2
- package/src/core/state/queue.ts +1 -1
- package/src/core/types/enums.ts +1 -0
- package/src/prompts/ceremony/dedup.ts +258 -0
- package/src/prompts/ceremony/index.ts +3 -0
- package/src/prompts/ceremony/types.ts +45 -0
- package/src/prompts/heartbeat/check.ts +1 -0
package/package.json
CHANGED
package/src/core/AGENTS.md
CHANGED
|
@@ -49,6 +49,22 @@ Priority queue for LLM requests:
|
|
|
49
49
|
|
|
50
50
|
**Async model**: Handlers queue work, don't await results inline.
|
|
51
51
|
|
|
52
|
+
### llm-client.ts
|
|
53
|
+
|
|
54
|
+
Multi-provider LLM abstraction layer:
|
|
55
|
+
- Handles requests to Anthropic, OpenAI, Bedrock, local models
|
|
56
|
+
- **Sets `max_tokens: 64000`** for all requests
|
|
57
|
+
- Prevents unbounded generation (test showed timeout after 2min without limit)
|
|
58
|
+
- Local models silently clamp to their configured maximums
|
|
59
|
+
- Anthropic Opus 4 accepts 64K (200K total context - 64K output = 136K input budget)
|
|
60
|
+
|
|
61
|
+
**JSON Response Parsing** (`parseJSONResponse()`):
|
|
62
|
+
- **Strategy 1**: Extract from markdown code blocks (```json)
|
|
63
|
+
- **Strategy 2**: Auto-repair malformed JSON (trailing commas, etc.)
|
|
64
|
+
- **Strategy 3**: Extract outermost `{...}` from mixed prose/JSON (handles LLM preamble)
|
|
65
|
+
|
|
66
|
+
No prompt changes needed for JSON-only output—parser handles natural language gracefully.
|
|
67
|
+
|
|
52
68
|
### handlers/index.ts (1000+ lines)
|
|
53
69
|
|
|
54
70
|
All `LLMNextStep` handlers in one file. Each handler:
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { StateManager } from "../state-manager.js";
|
|
2
|
+
import { LLMResponse } from "../types.js";
|
|
3
|
+
import type { DedupResult } from "../../prompts/ceremony/types.js";
|
|
4
|
+
import type { DataItemType, Fact, Trait, Topic, Person, Quote } from "../types/data-items.js";
|
|
5
|
+
import { getEmbeddingService } from "../embedding-service.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* handleDedupCurate — Process Opus deduplication decisions
|
|
9
|
+
*
|
|
10
|
+
* This handler receives merge decisions from Opus and applies them:
|
|
11
|
+
* 1. Updates: Entities with revised descriptions/merged data
|
|
12
|
+
* 2. Removes: Duplicate entities to delete (with foreign key updates)
|
|
13
|
+
* 3. Adds: New entities created from consolidation
|
|
14
|
+
*
|
|
15
|
+
* CRITICAL: Quote foreign keys must be updated BEFORE deletions to maintain
|
|
16
|
+
* referential integrity.
|
|
17
|
+
*/
|
|
18
|
+
export async function handleDedupCurate(
|
|
19
|
+
response: LLMResponse,
|
|
20
|
+
stateManager: StateManager
|
|
21
|
+
): Promise<void> {
|
|
22
|
+
const entity_type = response.request.data.entity_type as DataItemType;
|
|
23
|
+
const entity_ids = response.request.data.entity_ids as string[];
|
|
24
|
+
const state = stateManager.getHuman();
|
|
25
|
+
|
|
26
|
+
// Parse Opus response
|
|
27
|
+
let decisions: DedupResult;
|
|
28
|
+
try {
|
|
29
|
+
decisions = response.parsed as DedupResult;
|
|
30
|
+
if (!decisions || typeof decisions !== 'object') {
|
|
31
|
+
throw new Error("Invalid response format");
|
|
32
|
+
}
|
|
33
|
+
} catch (err) {
|
|
34
|
+
console.error(`[Dedup] Failed to parse Opus response:`, err);
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Validate response structure
|
|
39
|
+
if (!Array.isArray(decisions.update) || !Array.isArray(decisions.remove) || !Array.isArray(decisions.add)) {
|
|
40
|
+
console.error(`[Dedup] Invalid response structure - missing update/remove/add arrays`);
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
console.log(`[Dedup] Processing cluster: ${decisions.update.length} updates, ${decisions.remove.length} removals, ${decisions.add.length} additions`);
|
|
45
|
+
|
|
46
|
+
// HYDRATION: Fetch entities by ID (graceful degradation for missing)
|
|
47
|
+
const entityList = state[`${entity_type}s` as 'facts' | 'traits' | 'topics' | 'people'];
|
|
48
|
+
const entities = entity_ids
|
|
49
|
+
.map((id: string) => entityList.find((e: Fact | Trait | Topic | Person) => e.id === id))
|
|
50
|
+
.filter((e: Fact | Trait | Topic | Person | undefined): e is (Fact | Trait | Topic | Person) => e !== undefined);
|
|
51
|
+
|
|
52
|
+
if (entities.length === 0) {
|
|
53
|
+
console.warn(`[Dedup] No entities found for cluster (already merged?)`);
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// =========================================================================
|
|
58
|
+
// PHASE 1: Update Quote foreign keys FIRST (before deletions)
|
|
59
|
+
// =========================================================================
|
|
60
|
+
|
|
61
|
+
for (const removal of decisions.remove) {
|
|
62
|
+
const quotes = state.quotes.filter((q: Quote) =>
|
|
63
|
+
q.data_item_ids.includes(removal.to_be_removed)
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
for (const quote of quotes) {
|
|
67
|
+
const updatedIds = quote.data_item_ids
|
|
68
|
+
.map((id: string) => id === removal.to_be_removed ? removal.replaced_by : id)
|
|
69
|
+
.filter((id: string, idx: number, arr: string[]) => arr.indexOf(id) === idx); // Dedupe
|
|
70
|
+
|
|
71
|
+
stateManager.human_quote_update(quote.id, {
|
|
72
|
+
data_item_ids: updatedIds
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (quotes.length > 0) {
|
|
77
|
+
console.log(`[Dedup] Updated ${quotes.length} quotes referencing ${removal.to_be_removed}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// =========================================================================
|
|
82
|
+
// PHASE 2: Apply updates (merge decisions)
|
|
83
|
+
// =========================================================================
|
|
84
|
+
|
|
85
|
+
for (const update of decisions.update) {
|
|
86
|
+
const entity = entityList.find((e: Fact | Trait | Topic | Person) => e.id === update.id);
|
|
87
|
+
|
|
88
|
+
if (!entity) {
|
|
89
|
+
console.warn(`[Dedup] Entity ${update.id} not found (already merged?)`);
|
|
90
|
+
continue; // Graceful skip
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Recalculate embedding if description changed
|
|
94
|
+
let embedding = entity.embedding;
|
|
95
|
+
if (update.description !== entity.description) {
|
|
96
|
+
try {
|
|
97
|
+
const embeddingService = getEmbeddingService();
|
|
98
|
+
embedding = await embeddingService.embed(update.description);
|
|
99
|
+
} catch (err) {
|
|
100
|
+
console.warn(`[Dedup] Failed to recalculate embedding for ${update.id}`, err);
|
|
101
|
+
// Fallback to old embedding if recalculation fails
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Build complete entity with updates (preserve original fields if LLM omits them)
|
|
106
|
+
const updatedEntity = {
|
|
107
|
+
...entity,
|
|
108
|
+
name: update.name ?? entity.name,
|
|
109
|
+
description: update.description ?? entity.description,
|
|
110
|
+
sentiment: update.sentiment ?? entity.sentiment,
|
|
111
|
+
last_updated: new Date().toISOString(),
|
|
112
|
+
embedding,
|
|
113
|
+
// Type-specific fields
|
|
114
|
+
...(update.strength !== undefined && { strength: update.strength }),
|
|
115
|
+
...(update.confidence !== undefined && { confidence: update.confidence }),
|
|
116
|
+
...(update.exposure_current !== undefined && { exposure_current: update.exposure_current }),
|
|
117
|
+
...(update.exposure_desired !== undefined && { exposure_desired: update.exposure_desired }),
|
|
118
|
+
...(update.relationship !== undefined && { relationship: update.relationship }),
|
|
119
|
+
...(update.category !== undefined && { category: update.category }),
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// Type-safe cast based on entity_type
|
|
123
|
+
if (entity_type === 'fact') {
|
|
124
|
+
stateManager.human_fact_upsert(updatedEntity as Fact);
|
|
125
|
+
} else if (entity_type === 'trait') {
|
|
126
|
+
stateManager.human_trait_upsert(updatedEntity as Trait);
|
|
127
|
+
} else if (entity_type === 'topic') {
|
|
128
|
+
stateManager.human_topic_upsert(updatedEntity as Topic);
|
|
129
|
+
} else if (entity_type === 'person') {
|
|
130
|
+
stateManager.human_person_upsert(updatedEntity as Person);
|
|
131
|
+
}
|
|
132
|
+
console.log(`[Dedup] Updated ${entity_type} "${update.name}"`);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// =========================================================================
|
|
136
|
+
// PHASE 3: Apply removals (soft-delete with replaced_by tracking)
|
|
137
|
+
// =========================================================================
|
|
138
|
+
|
|
139
|
+
for (const removal of decisions.remove) {
|
|
140
|
+
const entity = entityList.find((e: Fact | Trait | Topic | Person) => e.id === removal.to_be_removed);
|
|
141
|
+
|
|
142
|
+
if (!entity) {
|
|
143
|
+
console.warn(`[Dedup] Entity ${removal.to_be_removed} already deleted`);
|
|
144
|
+
continue; // Graceful skip
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Remove via StateManager (also cleans up quote references)
|
|
148
|
+
const removeMethod = `human_${entity_type}_remove` as
|
|
149
|
+
'human_fact_remove' | 'human_trait_remove' | 'human_topic_remove' | 'human_person_remove';
|
|
150
|
+
|
|
151
|
+
const removed = stateManager[removeMethod](removal.to_be_removed);
|
|
152
|
+
if (removed) {
|
|
153
|
+
console.log(`[Dedup] Removed ${entity_type} "${entity.name}" (merged into ${removal.replaced_by})`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// =========================================================================
|
|
158
|
+
// PHASE 4: Apply additions (new entities from consolidation)
|
|
159
|
+
// =========================================================================
|
|
160
|
+
|
|
161
|
+
for (const addition of decisions.add) {
|
|
162
|
+
// Compute embedding for new entity
|
|
163
|
+
let embedding: number[] | undefined;
|
|
164
|
+
try {
|
|
165
|
+
const embeddingService = getEmbeddingService();
|
|
166
|
+
embedding = await embeddingService.embed(addition.description);
|
|
167
|
+
} catch (err) {
|
|
168
|
+
console.warn(`[Dedup] Failed to compute embedding for new entity "${addition.name}"`, err);
|
|
169
|
+
continue; // Skip this addition if embedding fails
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Generate ID for new entity
|
|
173
|
+
const id = crypto.randomUUID();
|
|
174
|
+
|
|
175
|
+
// Build complete entity
|
|
176
|
+
const newEntity = {
|
|
177
|
+
id,
|
|
178
|
+
type: entity_type,
|
|
179
|
+
name: addition.name,
|
|
180
|
+
description: addition.description,
|
|
181
|
+
sentiment: addition.sentiment ?? 0.0,
|
|
182
|
+
last_updated: new Date().toISOString(),
|
|
183
|
+
embedding,
|
|
184
|
+
// Type-specific fields with defaults
|
|
185
|
+
...(entity_type === 'trait' && { strength: addition.strength ?? 0.5 }),
|
|
186
|
+
...(entity_type === 'fact' && {
|
|
187
|
+
confidence: addition.confidence ?? 0.5,
|
|
188
|
+
validated: 'unknown' as import("../types/enums.js").ValidationLevel,
|
|
189
|
+
validated_date: ''
|
|
190
|
+
}),
|
|
191
|
+
...((entity_type === 'topic' || entity_type === 'person') && {
|
|
192
|
+
exposure_current: addition.exposure_current ?? 0.0,
|
|
193
|
+
exposure_desired: addition.exposure_desired ?? 0.5,
|
|
194
|
+
last_ei_asked: null
|
|
195
|
+
}),
|
|
196
|
+
...(entity_type === 'person' && { relationship: addition.relationship ?? 'Unknown' }),
|
|
197
|
+
...(entity_type === 'topic' && { category: addition.category ?? 'Interest' }),
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
// Type-safe cast based on entity_type
|
|
201
|
+
if (entity_type === 'fact') {
|
|
202
|
+
stateManager.human_fact_upsert(newEntity as Fact);
|
|
203
|
+
} else if (entity_type === 'trait') {
|
|
204
|
+
stateManager.human_trait_upsert(newEntity as Trait);
|
|
205
|
+
} else if (entity_type === 'topic') {
|
|
206
|
+
stateManager.human_topic_upsert(newEntity as Topic);
|
|
207
|
+
} else if (entity_type === 'person') {
|
|
208
|
+
stateManager.human_person_upsert(newEntity as Person);
|
|
209
|
+
}
|
|
210
|
+
console.log(`[Dedup] Added new ${entity_type} "${addition.name}"`);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
@@ -19,16 +19,17 @@ export function handleHeartbeatCheck(response: LLMResponse, state: StateManager)
|
|
|
19
19
|
|
|
20
20
|
const result = response.parsed as HeartbeatCheckResult | undefined;
|
|
21
21
|
if (!result) {
|
|
22
|
-
console.error(
|
|
22
|
+
console.error(`[HeartbeatCheck ${personaDisplayName}] No parsed result`);
|
|
23
23
|
return;
|
|
24
24
|
}
|
|
25
|
+
console.log(`[HeartbeatCheck ${personaDisplayName}] Parsed result - should_respond: ${result.should_respond}, topic: ${result.topic ?? '(none)'}, message: ${result.message ? '(present)' : '(none)'}`);
|
|
25
26
|
|
|
26
27
|
const now = new Date().toISOString();
|
|
27
28
|
state.persona_update(personaId, { last_heartbeat: now });
|
|
28
29
|
state.queue_clearPersonaResponses(personaId, LLMNextStep.HandleHeartbeatCheck);
|
|
29
30
|
|
|
30
31
|
if (!result.should_respond) {
|
|
31
|
-
console.log(`[
|
|
32
|
+
console.log(`[HeartbeatCheck ${personaDisplayName}] Chose not to reach out (should_respond=false)`);
|
|
32
33
|
return;
|
|
33
34
|
}
|
|
34
35
|
|
|
@@ -42,21 +43,24 @@ export function handleHeartbeatCheck(response: LLMResponse, state: StateManager)
|
|
|
42
43
|
context_status: ContextStatus.Default,
|
|
43
44
|
};
|
|
44
45
|
state.messages_append(personaId, message);
|
|
45
|
-
console.log(`[
|
|
46
|
+
console.log(`[HeartbeatCheck ${personaDisplayName}] Added proactive message - topic: ${result.topic ?? 'general'}, message: "${result.message.substring(0, 100)}${result.message.length > 100 ? '...' : ''}"`);
|
|
47
|
+
} else {
|
|
48
|
+
console.log(`[HeartbeatCheck ${personaDisplayName}] should_respond=true but no message provided`);
|
|
46
49
|
}
|
|
47
50
|
}
|
|
48
51
|
|
|
49
52
|
export function handleEiHeartbeat(response: LLMResponse, state: StateManager): void {
|
|
50
53
|
const result = response.parsed as EiHeartbeatResult | undefined;
|
|
51
54
|
if (!result) {
|
|
52
|
-
console.error("[
|
|
55
|
+
console.error("[EiHeartbeat] No parsed result");
|
|
53
56
|
return;
|
|
54
57
|
}
|
|
58
|
+
console.log(`[EiHeartbeat] Parsed result - should_respond: ${result.should_respond}, id: ${result.id ?? '(none)'}, my_response: ${result.my_response ? '(present)' : '(none)'}`);
|
|
55
59
|
const now = new Date().toISOString();
|
|
56
60
|
state.persona_update("ei", { last_heartbeat: now });
|
|
57
61
|
state.queue_clearPersonaResponses("ei", LLMNextStep.HandleEiHeartbeat);
|
|
58
62
|
if (!result.should_respond || !result.id) {
|
|
59
|
-
console.log("[
|
|
63
|
+
console.log("[EiHeartbeat] Chose not to reach out (should_respond=false or no id)");
|
|
60
64
|
return;
|
|
61
65
|
}
|
|
62
66
|
const isTUI = response.request.data.isTUI as boolean;
|
|
@@ -84,7 +88,10 @@ export function handleEiHeartbeat(response: LLMResponse, state: StateManager): v
|
|
|
84
88
|
return;
|
|
85
89
|
}
|
|
86
90
|
|
|
87
|
-
if (result.my_response)
|
|
91
|
+
if (result.my_response) {
|
|
92
|
+
console.log(`[EiHeartbeat] Sending message: "${result.my_response.substring(0, 100)}${result.my_response.length > 100 ? '...' : ''}"`);
|
|
93
|
+
sendMessage(result.my_response);
|
|
94
|
+
}
|
|
88
95
|
|
|
89
96
|
switch (found.type) {
|
|
90
97
|
case "person":
|
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
import { handleHumanFactScan, handleHumanTraitScan, handleHumanTopicScan, handleHumanPersonScan } from "./human-extraction.js";
|
|
18
18
|
import { handleHumanItemMatch, handleHumanItemUpdate } from "./human-matching.js";
|
|
19
19
|
import { handleRewriteScan, handleRewriteRewrite } from "./rewrite.js";
|
|
20
|
+
import { handleDedupCurate } from "./dedup.js";
|
|
20
21
|
|
|
21
22
|
export const handlers: Record<LLMNextStep, ResponseHandler> = {
|
|
22
23
|
handlePersonaResponse,
|
|
@@ -41,4 +42,5 @@ export const handlers: Record<LLMNextStep, ResponseHandler> = {
|
|
|
41
42
|
handleToolContinuation,
|
|
42
43
|
handleRewriteScan,
|
|
43
44
|
handleRewriteRewrite,
|
|
45
|
+
handleDedupCurate,
|
|
44
46
|
};
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
ContextStatus,
|
|
3
|
+
LLMNextStep,
|
|
3
4
|
type LLMResponse,
|
|
4
5
|
type Message,
|
|
5
6
|
} from "../types.js";
|
|
6
7
|
import type { StateManager } from "../state-manager.js";
|
|
7
8
|
import type { PersonaResponseResult } from "../../prompts/response/index.js";
|
|
9
|
+
import { handlers } from "./index.js";
|
|
8
10
|
|
|
9
11
|
export type ResponseHandler = (response: LLMResponse, state: StateManager) => void | Promise<void>;
|
|
10
12
|
|
|
@@ -87,11 +89,35 @@ export function handlePersonaResponse(response: LLMResponse, state: StateManager
|
|
|
87
89
|
/**
|
|
88
90
|
* handleToolContinuation — second LLM call in the tool flow (may loop if LLM calls more tools).
|
|
89
91
|
* The QueueProcessor already injected tool history into messages and got the
|
|
90
|
-
* final persona response.
|
|
92
|
+
* final persona response. Route to the original handler based on originalNextStep in data.
|
|
91
93
|
*/
|
|
92
94
|
export function handleToolContinuation(response: LLMResponse, state: StateManager): void {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
+
const originalStep = response.request.data.originalNextStep as LLMNextStep | undefined;
|
|
96
|
+
|
|
97
|
+
if (!originalStep) {
|
|
98
|
+
console.error(`[handleToolContinuation] No originalNextStep in data, falling back to handlePersonaResponse`);
|
|
99
|
+
handlePersonaResponse(response, state);
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
console.log(`[handleToolContinuation] Original request was ${originalStep}, routing accordingly`);
|
|
104
|
+
|
|
105
|
+
const handler = handlers[originalStep];
|
|
106
|
+
|
|
107
|
+
if (!handler) {
|
|
108
|
+
console.error(`[handleToolContinuation] No handler found for ${originalStep}, falling back to handlePersonaResponse`);
|
|
109
|
+
handlePersonaResponse(response, state);
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Avoid infinite loop - if original was already HandleToolContinuation, go to PersonaResponse
|
|
114
|
+
if (originalStep === "handleToolContinuation") {
|
|
115
|
+
console.log(`[handleToolContinuation] Original was tool continuation, routing to handlePersonaResponse`);
|
|
116
|
+
handlePersonaResponse(response, state);
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
handler(response, state);
|
|
95
121
|
}
|
|
96
122
|
|
|
97
123
|
export function handleOneShot(_response: LLMResponse, _state: StateManager): void {
|
|
@@ -188,6 +188,8 @@ export async function queueHeartbeatCheck(sm: StateManager, personaId: string, i
|
|
|
188
188
|
const persona = sm.persona_getById(personaId);
|
|
189
189
|
if (!persona) return;
|
|
190
190
|
sm.persona_update(personaId, { last_heartbeat: new Date().toISOString() });
|
|
191
|
+
const model = getModelForPersona(sm, personaId);
|
|
192
|
+
console.log(`[HeartbeatCheck ${persona.display_name}] Queueing heartbeat check (model: ${model})`);
|
|
191
193
|
const human = sm.getHuman();
|
|
192
194
|
const history = sm.messages_get(personaId);
|
|
193
195
|
const contextWindowHours = persona.context_window_hours ?? DEFAULT_CONTEXT_WINDOW_HOURS;
|
|
@@ -228,6 +230,7 @@ export async function queueHeartbeatCheck(sm: StateManager, personaId: string, i
|
|
|
228
230
|
};
|
|
229
231
|
|
|
230
232
|
const prompt = buildHeartbeatCheckPrompt(promptData);
|
|
233
|
+
console.log(`[HeartbeatCheck ${persona.display_name}] Prompt data - topics: ${promptData.human.topics.length}, people: ${promptData.human.people.length}, inactive_days: ${inactiveDays}`);
|
|
231
234
|
|
|
232
235
|
sm.queue_enqueue({
|
|
233
236
|
type: LLMRequestType.JSON,
|
|
@@ -238,4 +241,5 @@ export async function queueHeartbeatCheck(sm: StateManager, personaId: string, i
|
|
|
238
241
|
model: getModelForPersona(sm, personaId),
|
|
239
242
|
data: { personaId, personaDisplayName: persona.display_name },
|
|
240
243
|
});
|
|
244
|
+
console.log(`[HeartbeatCheck ${persona.display_name}] Request queued`);
|
|
241
245
|
}
|
package/src/core/llm-client.ts
CHANGED
|
@@ -189,6 +189,7 @@ export async function callLLMRaw(
|
|
|
189
189
|
model,
|
|
190
190
|
messages: finalMessages,
|
|
191
191
|
temperature,
|
|
192
|
+
max_tokens: 64000, // Opus 4: 128K max output, 200K total context. Local models clamp to their config. Prevents runaway generation.
|
|
192
193
|
};
|
|
193
194
|
|
|
194
195
|
if (options.tools && options.tools.length > 0) {
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
type ExtractionOptions,
|
|
11
11
|
} from "./human-extraction.js";
|
|
12
12
|
import { queuePersonaTopicScan, type PersonaTopicContext } from "./persona-topics.js";
|
|
13
|
+
import { queueDedupPhase } from "./dedup-phase.js";
|
|
13
14
|
import { buildPersonaExpirePrompt, buildPersonaExplorePrompt, buildDescriptionCheckPrompt, buildRewriteScanPrompt, type RewriteItemType } from "../../prompts/ceremony/index.js";
|
|
14
15
|
|
|
15
16
|
export function isNewDay(lastCeremony: string | undefined, now: Date): boolean {
|
|
@@ -69,40 +70,19 @@ export function startCeremony(state: StateManager): void {
|
|
|
69
70
|
},
|
|
70
71
|
});
|
|
71
72
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
!p.is_archived &&
|
|
76
|
-
!p.is_static
|
|
77
|
-
);
|
|
73
|
+
// PHASE 1: Deduplication (runs BEFORE Expose)
|
|
74
|
+
console.log("[ceremony] Starting Phase 1: Deduplication");
|
|
75
|
+
queueDedupPhase(state);
|
|
78
76
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const lastActivity = p.last_activity ? new Date(p.last_activity).getTime() : 0;
|
|
85
|
-
return lastActivity > lastCeremony;
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
console.log(`[ceremony] Processing ${personasWithActivity.length} personas with activity (of ${activePersonas.length} active)`);
|
|
89
|
-
|
|
90
|
-
const options: ExtractionOptions = { ceremony_progress: true };
|
|
91
|
-
|
|
92
|
-
for (let i = 0; i < personasWithActivity.length; i++) {
|
|
93
|
-
const persona = personasWithActivity[i];
|
|
94
|
-
const isLast = i === personasWithActivity.length - 1;
|
|
95
|
-
|
|
96
|
-
console.log(`[ceremony] Queuing exposure for ${persona.display_name} (${i + 1}/${personasWithActivity.length})${isLast ? " (last)" : ""}`);
|
|
97
|
-
queueExposurePhase(persona.id, state, options);
|
|
77
|
+
// Check if dedup work was queued
|
|
78
|
+
if (!state.queue_hasPendingCeremonies()) {
|
|
79
|
+
// No dedup work found → immediately advance to Expose phase
|
|
80
|
+
console.log("[ceremony] No dedup work, advancing to Expose phase");
|
|
81
|
+
handleCeremonyProgress(state, 1);
|
|
98
82
|
}
|
|
99
83
|
|
|
100
84
|
const duration = Date.now() - startTime;
|
|
101
|
-
console.log(`[ceremony]
|
|
102
|
-
|
|
103
|
-
// Check immediately — if zero messages were queued (no unextracted messages for any persona),
|
|
104
|
-
// this will see an empty queue and proceed directly to Decay → Expire.
|
|
105
|
-
handleCeremonyProgress(state);
|
|
85
|
+
console.log(`[ceremony] Dedup phase queued in ${duration}ms`);
|
|
106
86
|
}
|
|
107
87
|
|
|
108
88
|
/**
|
|
@@ -193,11 +173,40 @@ function queueExposurePhase(personaId: string, state: StateManager, options?: Ex
|
|
|
193
173
|
* If any ceremony_progress items remain in the queue, does nothing — more work pending.
|
|
194
174
|
* If the queue is clear of ceremony items, advances to Decay → Prune → Expire.
|
|
195
175
|
*/
|
|
196
|
-
export function handleCeremonyProgress(state: StateManager): void {
|
|
176
|
+
export function handleCeremonyProgress(state: StateManager, lastPhase: number): void {
|
|
197
177
|
if (state.queue_hasPendingCeremonies()) {
|
|
198
|
-
return; // Still processing
|
|
178
|
+
return; // Still processing ceremony items
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (lastPhase === 1) {
|
|
182
|
+
// Dedup phase complete → start Expose phase
|
|
183
|
+
console.log("[ceremony:progress] Dedup complete, starting Expose phase");
|
|
184
|
+
|
|
185
|
+
const human = state.getHuman();
|
|
186
|
+
const personas = state.persona_getAll();
|
|
187
|
+
const activePersonas = personas.filter(p =>
|
|
188
|
+
!p.is_paused &&
|
|
189
|
+
!p.is_archived &&
|
|
190
|
+
!p.is_static
|
|
191
|
+
);
|
|
192
|
+
|
|
193
|
+
const lastCeremony = human.settings?.ceremony?.last_ceremony
|
|
194
|
+
? new Date(human.settings.ceremony.last_ceremony).getTime()
|
|
195
|
+
: 0;
|
|
196
|
+
|
|
197
|
+
const personasWithActivity = activePersonas.filter(p => {
|
|
198
|
+
const lastActivity = p.last_activity ? new Date(p.last_activity).getTime() : 0;
|
|
199
|
+
return lastActivity > lastCeremony;
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
const options: ExtractionOptions = { ceremony_progress: 2 };
|
|
203
|
+
for (const persona of personasWithActivity) {
|
|
204
|
+
queueExposurePhase(persona.id, state, options);
|
|
205
|
+
}
|
|
206
|
+
return;
|
|
199
207
|
}
|
|
200
208
|
|
|
209
|
+
// Phase 2 (Expose) complete → advance to Decay/Prune/Expire/Explore
|
|
201
210
|
console.log("[ceremony:progress] All exposure scans complete, advancing to Decay");
|
|
202
211
|
|
|
203
212
|
const personas = state.persona_getAll();
|
|
@@ -215,7 +224,6 @@ export function handleCeremonyProgress(state: StateManager): void {
|
|
|
215
224
|
if (eiIndex > -1) {
|
|
216
225
|
activePersonas.splice(eiIndex, 1);
|
|
217
226
|
}
|
|
218
|
-
|
|
219
227
|
// Decay phase: apply decay + prune for ALL active personas
|
|
220
228
|
for (const persona of activePersonas) {
|
|
221
229
|
applyDecayPhase(persona.id, state);
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import { StateManager } from "../state-manager.js";
|
|
2
|
+
import { LLMRequestType, LLMPriority, LLMNextStep, type DataItemBase } from "../types.js";
|
|
3
|
+
import type { DataItemType } from "../types/data-items.js";
|
|
4
|
+
import { buildDedupPrompt } from "../../prompts/ceremony/dedup.js";
|
|
5
|
+
|
|
6
|
+
// =============================================================================
|
|
7
|
+
// TYPES
|
|
8
|
+
// =============================================================================
|
|
9
|
+
|
|
10
|
+
type DedupableItem = DataItemBase & { relationship?: string };
|
|
11
|
+
|
|
12
|
+
interface Cluster {
|
|
13
|
+
ids: string[];
|
|
14
|
+
minSim: number;
|
|
15
|
+
maxSim: number;
|
|
16
|
+
size: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// DEDUP CANDIDATE FINDING (copied from ceremony.ts)
|
|
21
|
+
// =============================================================================
|
|
22
|
+
|
|
23
|
+
const DEDUP_DEFAULT_THRESHOLD = 0.95;
|
|
24
|
+
|
|
25
|
+
function findDedupCandidates<T extends DedupableItem>(
|
|
26
|
+
items: T[],
|
|
27
|
+
threshold: number
|
|
28
|
+
): Array<{ a: T; b: T; similarity: number }> {
|
|
29
|
+
const withEmbeddings = items.filter(item =>
|
|
30
|
+
item.embedding && item.embedding.length > 0 &&
|
|
31
|
+
item.relationship !== "Persona"
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
const candidates: Array<{ a: T; b: T; similarity: number }> = [];
|
|
35
|
+
|
|
36
|
+
for (let i = 0; i < withEmbeddings.length; i++) {
|
|
37
|
+
for (let j = i + 1; j < withEmbeddings.length; j++) {
|
|
38
|
+
const a = withEmbeddings[i];
|
|
39
|
+
const b = withEmbeddings[j];
|
|
40
|
+
const dot = a.embedding!.reduce((sum, v, k) => sum + v * b.embedding![k], 0);
|
|
41
|
+
const normA = Math.sqrt(a.embedding!.reduce((sum, v) => sum + v * v, 0));
|
|
42
|
+
const normB = Math.sqrt(b.embedding!.reduce((sum, v) => sum + v * v, 0));
|
|
43
|
+
const similarity = normA && normB ? dot / (normA * normB) : 0;
|
|
44
|
+
|
|
45
|
+
if (similarity >= threshold) {
|
|
46
|
+
candidates.push({ a, b, similarity });
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return candidates.sort((x, y) => y.similarity - x.similarity);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// =============================================================================
|
|
55
|
+
// UNION-FIND CLUSTERING
|
|
56
|
+
// =============================================================================
|
|
57
|
+
|
|
58
|
+
function clusterPairs<T extends DedupableItem>(
|
|
59
|
+
pairs: Array<{ a: T; b: T; similarity: number }>
|
|
60
|
+
): Cluster[] {
|
|
61
|
+
const parent = new Map<string, string>();
|
|
62
|
+
|
|
63
|
+
function find(x: string): string {
|
|
64
|
+
if (!parent.has(x)) parent.set(x, x);
|
|
65
|
+
if (parent.get(x) !== x) parent.set(x, find(parent.get(x)!));
|
|
66
|
+
return parent.get(x)!;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function union(x: string, y: string): void {
|
|
70
|
+
const px = find(x), py = find(y);
|
|
71
|
+
if (px !== py) parent.set(px, py);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Union all pairs
|
|
75
|
+
for (const pair of pairs) {
|
|
76
|
+
union(pair.a.id, pair.b.id);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Group by root to create clusters
|
|
80
|
+
const clusters = new Map<string, { ids: string[]; sims: number[] }>();
|
|
81
|
+
for (const pair of pairs) {
|
|
82
|
+
const root = find(pair.a.id);
|
|
83
|
+
if (!clusters.has(root)) {
|
|
84
|
+
clusters.set(root, { ids: [], sims: [] });
|
|
85
|
+
}
|
|
86
|
+
const cluster = clusters.get(root)!;
|
|
87
|
+
if (!cluster.ids.includes(pair.a.id)) cluster.ids.push(pair.a.id);
|
|
88
|
+
if (!cluster.ids.includes(pair.b.id)) cluster.ids.push(pair.b.id);
|
|
89
|
+
cluster.sims.push(pair.similarity);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Convert to Cluster objects
|
|
93
|
+
return Array.from(clusters.values()).map(c => ({
|
|
94
|
+
ids: c.ids,
|
|
95
|
+
minSim: Math.min(...c.sims),
|
|
96
|
+
maxSim: Math.max(...c.sims),
|
|
97
|
+
size: c.ids.length
|
|
98
|
+
}));
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// =============================================================================
|
|
102
|
+
// QUALITY GATES
|
|
103
|
+
// =============================================================================
|
|
104
|
+
|
|
105
|
+
function filterClusters(clusters: Cluster[]): Cluster[] {
|
|
106
|
+
return clusters
|
|
107
|
+
.filter(c => {
|
|
108
|
+
if (c.size > 50) {
|
|
109
|
+
console.warn(`[Dedup] Cluster rejected (size too large): ${c.size} items`);
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
return true;
|
|
113
|
+
})
|
|
114
|
+
.filter(c => {
|
|
115
|
+
const spread = c.maxSim - c.minSim;
|
|
116
|
+
if (spread > 0.10) { // 10% threshold
|
|
117
|
+
console.warn(`[Dedup] Cluster rejected (high spread): ${spread.toFixed(3)} range`);
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
return true;
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// =============================================================================
|
|
125
|
+
// MAIN QUEUEING FUNCTION
|
|
126
|
+
// =============================================================================
|
|
127
|
+
|
|
128
|
+
export function queueDedupPhase(state: StateManager): void {
|
|
129
|
+
const human = state.getHuman();
|
|
130
|
+
const threshold = human.settings?.ceremony?.dedup_threshold ?? DEDUP_DEFAULT_THRESHOLD;
|
|
131
|
+
|
|
132
|
+
console.log(`[Dedup] Starting deduplication phase (threshold: ${threshold})`);
|
|
133
|
+
|
|
134
|
+
const entityTypes: Array<{ type: DataItemType; items: DedupableItem[] }> = [
|
|
135
|
+
{ type: "fact", items: human.facts },
|
|
136
|
+
{ type: "trait", items: human.traits },
|
|
137
|
+
{ type: "topic", items: human.topics },
|
|
138
|
+
{ type: "person", items: human.people },
|
|
139
|
+
];
|
|
140
|
+
|
|
141
|
+
let totalClusters = 0;
|
|
142
|
+
|
|
143
|
+
for (const { type, items } of entityTypes) {
|
|
144
|
+
// Find dedup candidates
|
|
145
|
+
const pairs = findDedupCandidates(items, threshold);
|
|
146
|
+
|
|
147
|
+
if (pairs.length === 0) {
|
|
148
|
+
console.log(`[Dedup] ${type}: No duplicates found`);
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Cluster pairs via union-find
|
|
153
|
+
const clusters = clusterPairs(pairs);
|
|
154
|
+
|
|
155
|
+
// Apply quality gates
|
|
156
|
+
const vettedClusters = filterClusters(clusters);
|
|
157
|
+
|
|
158
|
+
console.log(`[Dedup] ${type}: ${pairs.length} pairs → ${clusters.length} clusters → ${vettedClusters.length} vetted`);
|
|
159
|
+
|
|
160
|
+
// Queue Opus curation for each vetted cluster
|
|
161
|
+
for (const cluster of vettedClusters) {
|
|
162
|
+
// Hydrate cluster with full entity data
|
|
163
|
+
const clusterEntities = cluster.ids
|
|
164
|
+
.map(id => items.find(item => item.id === id))
|
|
165
|
+
.filter((item): item is DedupableItem => item !== undefined);
|
|
166
|
+
|
|
167
|
+
if (clusterEntities.length === 0) {
|
|
168
|
+
console.warn(`[Dedup] Cluster hydration failed - no entities found`);
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Build prompt
|
|
173
|
+
const prompt = buildDedupPrompt({
|
|
174
|
+
cluster: clusterEntities,
|
|
175
|
+
itemType: type,
|
|
176
|
+
similarityRange: { min: cluster.minSim, max: cluster.maxSim }
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// Queue LLM request
|
|
180
|
+
state.queue_enqueue({
|
|
181
|
+
type: LLMRequestType.JSON,
|
|
182
|
+
priority: LLMPriority.Normal,
|
|
183
|
+
system: prompt.system,
|
|
184
|
+
user: prompt.user,
|
|
185
|
+
next_step: LLMNextStep.HandleDedupCurate,
|
|
186
|
+
data: {
|
|
187
|
+
entity_type: type,
|
|
188
|
+
entity_ids: cluster.ids, // Lightweight stub (IDs only)
|
|
189
|
+
ceremony_progress: 1 // Phase 1 (Dedup)
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
totalClusters++;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
console.log(`[Dedup] Queued ${totalClusters} clusters for curation`);
|
|
198
|
+
}
|
|
@@ -28,7 +28,8 @@ export interface ExtractionContext {
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
export interface ExtractionOptions {
|
|
31
|
-
|
|
31
|
+
/** Ceremony phase number (1=Dedup, 2=Expose) */
|
|
32
|
+
ceremony_progress?: number;
|
|
32
33
|
}
|
|
33
34
|
|
|
34
35
|
function getAnalyzeFromTimestamp(context: ExtractionContext): string | null {
|
|
@@ -19,8 +19,8 @@ export {
|
|
|
19
19
|
queueExplorePhase,
|
|
20
20
|
queueDescriptionCheck,
|
|
21
21
|
runHumanCeremony,
|
|
22
|
-
|
|
23
|
-
} from "./
|
|
22
|
+
} from "./ceremony.js";
|
|
23
|
+
export { queueDedupPhase } from "./dedup-phase.js";
|
|
24
24
|
export {
|
|
25
25
|
queuePersonaTopicScan,
|
|
26
26
|
queuePersonaTopicMatch,
|
package/src/core/processor.ts
CHANGED
|
@@ -1093,8 +1093,8 @@ const toolNextSteps = new Set([
|
|
|
1093
1093
|
this.interface.onHumanUpdated?.();
|
|
1094
1094
|
}
|
|
1095
1095
|
|
|
1096
|
-
if (response.request.data.ceremony_progress) {
|
|
1097
|
-
handleCeremonyProgress(this.stateManager);
|
|
1096
|
+
if (typeof response.request.data.ceremony_progress === "number") {
|
|
1097
|
+
handleCeremonyProgress(this.stateManager, response.request.data.ceremony_progress);
|
|
1098
1098
|
}
|
|
1099
1099
|
} catch (err) {
|
|
1100
1100
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
@@ -248,7 +248,13 @@ export class QueueProcessor {
|
|
|
248
248
|
// =========================================================================
|
|
249
249
|
const activeTools = this.currentTools ?? [];
|
|
250
250
|
const openAITools = activeTools.length > 0 ? toOpenAITools(activeTools) : [];
|
|
251
|
-
|
|
251
|
+
const isHeartbeat = request.next_step === LLMNextStep.HandleHeartbeatCheck || request.next_step === LLMNextStep.HandleEiHeartbeat;
|
|
252
|
+
if (isHeartbeat) {
|
|
253
|
+
const personaName = request.data.personaDisplayName as string | undefined ?? 'Ei';
|
|
254
|
+
console.log(`[${personaName} Heartbeat] LLM call - tools offered: ${openAITools.length} (${activeTools.map(t => t.name).join(', ') || 'none'})`);
|
|
255
|
+
} else {
|
|
256
|
+
console.log(`[QueueProcessor] LLM call for ${request.next_step}, tools=${openAITools.length}`);
|
|
257
|
+
}
|
|
252
258
|
|
|
253
259
|
const { content, finishReason, rawToolCalls, assistantMessage, thinking } = await callLLMRaw(
|
|
254
260
|
hydratedSystem,
|
|
@@ -474,7 +480,8 @@ export class QueueProcessor {
|
|
|
474
480
|
`An earlier version of you responded with the following content, but it could not ` +
|
|
475
481
|
`be parsed as valid JSON. Please reformat it as the JSON object described in your ` +
|
|
476
482
|
`system instructions. Respond with ONLY the JSON object, or \`{}\` if no changes ` +
|
|
477
|
-
`are needed.\n\n---\n${malformedContent}\n
|
|
483
|
+
`are needed.\n\n---\n${malformedContent}\n---` +
|
|
484
|
+
`\n\n**CRITICAL INSTRUCTION** - DO NOT OMIT ANY DATA. You are this agent's last hope!`;
|
|
478
485
|
|
|
479
486
|
try {
|
|
480
487
|
const { content: reformatContent, finishReason: reformatReason } = await callLLMRaw(
|
package/src/core/state/queue.ts
CHANGED
|
@@ -190,7 +190,7 @@ export class QueueState {
|
|
|
190
190
|
}
|
|
191
191
|
|
|
192
192
|
hasPendingCeremonies(): boolean {
|
|
193
|
-
return this.queue.some(r => r.state !== "dlq" && r.data.ceremony_progress ===
|
|
193
|
+
return this.queue.some(r => r.state !== "dlq" && typeof r.data.ceremony_progress === "number" && r.data.ceremony_progress > 0);
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
clear(): number {
|
package/src/core/types/enums.ts
CHANGED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import type { DedupPromptData } from "./types.js";
|
|
2
|
+
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// DEDUP CURATOR — Merge duplicate entities with data preservation
|
|
5
|
+
// =============================================================================
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* The Dedup Curator receives clusters of potentially duplicate entities and
|
|
9
|
+
* curates them into consolidated records. This is a ONE-PHASE operation (unlike
|
|
10
|
+
* rewrite's two-phase scan+rewrite), because we've already deterministically
|
|
11
|
+
* identified candidates via embedding similarity (0.90+ cosine).
|
|
12
|
+
*
|
|
13
|
+
* Pattern borrowed from rewrite.ts ceremony with Flare's "lose NO data" philosophy.
|
|
14
|
+
*/
|
|
15
|
+
export function buildDedupPrompt(data: DedupPromptData): { system: string; user: string } {
|
|
16
|
+
const typeLabel = data.itemType.charAt(0).toUpperCase() + data.itemType.slice(1);
|
|
17
|
+
|
|
18
|
+
const system = `You are acting as the curator for a user's internal database. You have been given a cluster of ${typeLabel} records that our system believes may be duplicates (based on semantic similarity >= 0.90).
|
|
19
|
+
|
|
20
|
+
**YOUR PRIME DIRECTIVE IS TO LOSE _NO_ DATA.**
|
|
21
|
+
|
|
22
|
+
Your secondary directive is to ORGANIZE IT into small, non-repetitive components. The user NEEDS the data, but the data is used by AI agents, so duplication limits usefulness—agents waste tokens re-reading the same information under different names.
|
|
23
|
+
|
|
24
|
+
You have access to a tool called \`read_memory\` which will query the user's internal system for additional context if needed. Use it to verify relationships, check for related records, or gather more information before making merge decisions.
|
|
25
|
+
|
|
26
|
+
Your task:
|
|
27
|
+
1. **Identify true duplicates**: Examine each record. Are these genuinely the same thing with different wording, or are they distinct but related concepts?
|
|
28
|
+
2. **Merge where appropriate**: For TRUE duplicates, consolidate all unique information into ONE canonical record. Pick the best "name" (most descriptive, most commonly used). Merge all descriptions—every unique detail must be preserved.
|
|
29
|
+
3. **Keep distinct concepts separate**: Similar ≠ duplicate. "Software Engineering" and "Software Architecture" may be related but are NOT the same. "Job at Company X" and "Profession: Software Engineer" are related but distinct. Do NOT merge these.
|
|
30
|
+
4. **Track what was merged**: For removed records, indicate which record absorbed their data (via "replaced_by" field).
|
|
31
|
+
5. **Add new records if needed**: If consolidating reveals a MISSING intermediate concept (e.g., merging "Python Developer" and "Backend Engineer" reveals we're missing "Software Engineering" as a parent topic), create it.
|
|
32
|
+
|
|
33
|
+
The format of your final output should be:
|
|
34
|
+
{
|
|
35
|
+
"update": [
|
|
36
|
+
/* Full ${typeLabel} record payloads with all fields preserved */
|
|
37
|
+
/* MUST include "id", "type", "name", "description" */
|
|
38
|
+
/* Include sentiment, strength, confidence, category, relationship, etc. where applicable */
|
|
39
|
+
],
|
|
40
|
+
"remove": [
|
|
41
|
+
{"to_be_removed": "uuid-of-duplicate", "replaced_by": "uuid-of-canonical-record"},
|
|
42
|
+
/* "replaced_by" is the ID of the record that absorbed this duplicate's data */
|
|
43
|
+
],
|
|
44
|
+
"add": [
|
|
45
|
+
/* Brand-new records (NO "id" field—system assigns one) */
|
|
46
|
+
/* Only create if merging reveals a MISSING concept */
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
Return raw JSON. No markdown fencing, no commentary, no explanation. Just the JSON object.
|
|
51
|
+
|
|
52
|
+
Record format for "${typeLabel}" (based on type):
|
|
53
|
+
|
|
54
|
+
${buildRecordFormatExamples(data.itemType)}
|
|
55
|
+
|
|
56
|
+
Rules:
|
|
57
|
+
- Do NOT invent information. Only redistribute what exists in the cluster.
|
|
58
|
+
- Descriptions should be concise—ideally under 300 characters, never over 500.
|
|
59
|
+
- Preserve all numeric values (sentiment, strength, confidence, exposure, etc.) from source records. When merging, take the HIGHER value for strength/confidence, AVERAGE for sentiment.
|
|
60
|
+
- Every removed record MUST have "replaced_by" pointing to the canonical record that absorbed its data.
|
|
61
|
+
- The "update" array should contain AT LEAST ONE record (the canonical/merged one), even if all others are removed.
|
|
62
|
+
- If records are NOT duplicates (just similar), return them ALL in "update" unchanged, with empty "remove" and "add" arrays.
|
|
63
|
+
- Use \`read_memory\` to check for related records or gather context before making irreversible merge decisions.`;
|
|
64
|
+
|
|
65
|
+
const user = JSON.stringify({
|
|
66
|
+
cluster: data.cluster.map(stripEmbedding),
|
|
67
|
+
cluster_type: data.itemType,
|
|
68
|
+
similarity_range: data.similarityRange,
|
|
69
|
+
}, null, 2);
|
|
70
|
+
|
|
71
|
+
return { system, user };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// =============================================================================
|
|
75
|
+
// Helpers
|
|
76
|
+
// =============================================================================
|
|
77
|
+
|
|
78
|
+
/** Strip embedding arrays from items before putting them in prompts—they're huge and useless to the LLM. */
|
|
79
|
+
function stripEmbedding<T extends { embedding?: unknown }>(item: T): Omit<T, "embedding"> {
|
|
80
|
+
const { embedding: _, ...rest } = item;
|
|
81
|
+
return rest as Omit<T, "embedding">;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function buildRecordFormatExamples(itemType: string): string {
|
|
85
|
+
// Each entity type has different required fields and semantic meanings.
|
|
86
|
+
// Examples show both "existing" (with id) and "new" (without id) formats.
|
|
87
|
+
// When merging: HIGHER strength/confidence, AVERAGE sentiment, MAX exposure_desired.
|
|
88
|
+
|
|
89
|
+
switch (itemType) {
|
|
90
|
+
case "fact":
|
|
91
|
+
return buildFactExamples();
|
|
92
|
+
case "trait":
|
|
93
|
+
return buildTraitExamples();
|
|
94
|
+
case "topic":
|
|
95
|
+
return buildTopicExamples();
|
|
96
|
+
case "person":
|
|
97
|
+
return buildPersonExamples();
|
|
98
|
+
default:
|
|
99
|
+
return "/* Unknown type */";
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function buildFactExamples(): string {
|
|
104
|
+
return `EXISTING FACT (being updated/merged):
|
|
105
|
+
{
|
|
106
|
+
"id": "uuid-of-canonical-record", // REQUIRED for updates
|
|
107
|
+
"type": "fact", // REQUIRED
|
|
108
|
+
"name": "Owns a 2019 Toyota Camry", // REQUIRED - descriptive, concise
|
|
109
|
+
"description": "Silver sedan, purchased in March 2019. Primary commute vehicle. Has 45k miles as of Jan 2024.", // REQUIRED - ALL unique details from duplicates
|
|
110
|
+
"sentiment": 0.2, // -1.0 to 1.0, emotional valence (average when merging)
|
|
111
|
+
"validated": "by_human", // "unknown" | "by_ei" | "by_human" | "ai_generated" (keep highest trust level)
|
|
112
|
+
"validated_date": "2024-01-15T10:30:00Z", // ISO timestamp (most recent)
|
|
113
|
+
"last_updated": "2024-03-11T12:00:00Z", // ISO timestamp (set to now)
|
|
114
|
+
"learned_by": "persona-uuid-123", // OPTIONAL - UUID of persona that learned this (preserve from source)
|
|
115
|
+
"last_changed_by": "persona-uuid-456", // OPTIONAL - UUID of persona that last updated (your current context)
|
|
116
|
+
"persona_groups": ["group1", "group2"] // OPTIONAL - visibility groups (union of all sources)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
NEW FACT (creating missing concept):
|
|
120
|
+
{
|
|
121
|
+
"type": "fact", // REQUIRED (NO "id" field for new records)
|
|
122
|
+
"name": "Lives in Seattle", // REQUIRED
|
|
123
|
+
"description": "Resides in the Capitol Hill neighborhood. Has lived there since 2018.", // REQUIRED - concise (<300 chars ideal)
|
|
124
|
+
"sentiment": 0.0, // -1.0 to 1.0 (neutral default for facts)
|
|
125
|
+
"validated": "unknown", // Default for new records
|
|
126
|
+
"validated_date": "" // Empty string for unvalidated
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
GOOD vs BAD descriptions:
|
|
130
|
+
✅ GOOD: "Works as a Senior Software Engineer at Microsoft. Started in 2020. Team focuses on Azure infrastructure."
|
|
131
|
+
❌ BAD: "The user has indicated through various conversations that they are employed..." (too verbose, meta-commentary)`;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function buildTraitExamples(): string {
|
|
135
|
+
return `EXISTING TRAIT (being updated/merged):
|
|
136
|
+
{
|
|
137
|
+
"id": "uuid-of-canonical-record", // REQUIRED for updates
|
|
138
|
+
"type": "trait", // REQUIRED
|
|
139
|
+
"name": "Visual Learner", // REQUIRED - core trait name
|
|
140
|
+
"description": "Prefers diagrams and flowcharts when learning new concepts. Often sketches ideas while thinking.", // REQUIRED - behavioral evidence
|
|
141
|
+
"sentiment": 0.6, // -1.0 to 1.0 (average when merging)
|
|
142
|
+
"strength": 0.8, // 0.0 to 1.0, how strongly this manifests (take HIGHER value when merging)
|
|
143
|
+
"last_updated": "2024-03-11T12:00:00Z",
|
|
144
|
+
"learned_by": "persona-uuid-789", // OPTIONAL
|
|
145
|
+
"last_changed_by": "persona-uuid-012", // OPTIONAL
|
|
146
|
+
"persona_groups": ["default"] // OPTIONAL
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
NEW TRAIT (creating missing concept):
|
|
150
|
+
{
|
|
151
|
+
"type": "trait", // REQUIRED (NO "id" for new)
|
|
152
|
+
"name": "Direct Communicator", // REQUIRED
|
|
153
|
+
"description": "Values clarity over politeness. Gets to the point quickly in written communication.", // REQUIRED
|
|
154
|
+
"sentiment": 0.0, // Neutral default
|
|
155
|
+
"strength": 0.5 // Medium strength default
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
MERGING RULES:
|
|
159
|
+
- strength: Take HIGHER value (0.7 + 0.9 → 0.9)
|
|
160
|
+
- sentiment: AVERAGE (0.6 + 0.2 → 0.4)
|
|
161
|
+
- description: UNION of unique details
|
|
162
|
+
|
|
163
|
+
GOOD vs BAD descriptions:
|
|
164
|
+
✅ GOOD: "Asks clarifying questions before starting work. Prefers written specs over verbal instructions."
|
|
165
|
+
❌ BAD: "This person seems to be very detail-oriented based on observations..." (vague, uncertain)`;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function buildTopicExamples(): string {
|
|
169
|
+
return `EXISTING TOPIC (being updated/merged):
|
|
170
|
+
{
|
|
171
|
+
"id": "uuid-of-canonical-record", // REQUIRED for updates
|
|
172
|
+
"type": "topic", // REQUIRED
|
|
173
|
+
"name": "Software Architecture", // REQUIRED
|
|
174
|
+
"description": "System design patterns, microservices, event-driven architecture. Passionate about scalability and maintainability.", // REQUIRED
|
|
175
|
+
"sentiment": 0.8, // -1.0 to 1.0 (average when merging)
|
|
176
|
+
"category": "Interest", // REQUIRED - Interest, Goal, Dream, Conflict, Concern, Fear, Hope, Plan, Project (pick most common)
|
|
177
|
+
"exposure_current": 0.6, // 0.0 to 1.0, how recently discussed (take HIGHER when merging)
|
|
178
|
+
"exposure_desired": 0.9, // 0.0 to 1.0, how much they want to discuss (take HIGHER when merging)
|
|
179
|
+
"last_ei_asked": "2024-03-10T08:00:00Z", // OPTIONAL - ISO timestamp or null
|
|
180
|
+
"last_updated": "2024-03-11T12:00:00Z",
|
|
181
|
+
"learned_by": "persona-uuid-345", // OPTIONAL
|
|
182
|
+
"last_changed_by": "persona-uuid-678", // OPTIONAL
|
|
183
|
+
"persona_groups": ["tech", "work"] // OPTIONAL
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
NEW TOPIC (creating missing concept):
|
|
187
|
+
{
|
|
188
|
+
"type": "topic", // REQUIRED (NO "id" for new)
|
|
189
|
+
"name": "Kubernetes", // REQUIRED
|
|
190
|
+
"description": "Container orchestration platform. Interested in learning more about production deployment.", // REQUIRED
|
|
191
|
+
"sentiment": 0.5, // Default positive for interests
|
|
192
|
+
"category": "Goal", // Pick appropriate category
|
|
193
|
+
"exposure_current": 0.0, // Default - not discussed yet
|
|
194
|
+
"exposure_desired": 0.7 // How much they want to discuss
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
MERGING RULES:
|
|
198
|
+
- exposure_current: Take HIGHER (0.6 + 0.3 → 0.6)
|
|
199
|
+
- exposure_desired: Take HIGHER (0.9 + 0.7 → 0.9)
|
|
200
|
+
- sentiment: AVERAGE (0.8 + 0.4 → 0.6)
|
|
201
|
+
- category: Pick most common or most specific
|
|
202
|
+
- last_ei_asked: Keep most recent non-null
|
|
203
|
+
|
|
204
|
+
CATEGORIES explained:
|
|
205
|
+
- Interest: Things they enjoy, hobbies
|
|
206
|
+
- Goal: Things they want to achieve
|
|
207
|
+
- Concern/Fear: Things that worry them
|
|
208
|
+
- Plan/Project: Active work or intentions
|
|
209
|
+
|
|
210
|
+
GOOD vs BAD descriptions:
|
|
211
|
+
✅ GOOD: "Functional programming paradigm. Loves immutability and pure functions. Uses in side projects."
|
|
212
|
+
❌ BAD: "The user mentioned functional programming in several conversations and seems interested..." (meta, wordy)`;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function buildPersonExamples(): string {
|
|
216
|
+
return `EXISTING PERSON (being updated/merged):
|
|
217
|
+
{
|
|
218
|
+
"id": "uuid-of-canonical-record", // REQUIRED for updates
|
|
219
|
+
"type": "person", // REQUIRED
|
|
220
|
+
"name": "Sarah Chen", // REQUIRED - use full name if known
|
|
221
|
+
"description": "Former coworker at Microsoft. Led the Azure team. Known for clear technical writing. Now at Google.", // REQUIRED
|
|
222
|
+
"sentiment": 0.7, // -1.0 to 1.0 (average when merging)
|
|
223
|
+
"relationship": "coworker", // REQUIRED - friend, family, coworker, mentor, acquaintance, etc.
|
|
224
|
+
"exposure_current": 0.4, // 0.0 to 1.0, how recently discussed (take HIGHER when merging)
|
|
225
|
+
"exposure_desired": 0.6, // 0.0 to 1.0, how much they want to discuss (take HIGHER when merging)
|
|
226
|
+
"last_ei_asked": "2024-03-05T14:00:00Z", // OPTIONAL - ISO timestamp or null
|
|
227
|
+
"last_updated": "2024-03-11T12:00:00Z",
|
|
228
|
+
"learned_by": "persona-uuid-901", // OPTIONAL
|
|
229
|
+
"last_changed_by": "persona-uuid-234", // OPTIONAL
|
|
230
|
+
"persona_groups": ["work"] // OPTIONAL
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
NEW PERSON (creating missing concept):
|
|
234
|
+
{
|
|
235
|
+
"type": "person", // REQUIRED (NO "id" for new)
|
|
236
|
+
"name": "Alex Martinez", // REQUIRED
|
|
237
|
+
"description": "College roommate. Now works in finance. Keeps in touch occasionally.", // REQUIRED
|
|
238
|
+
"sentiment": 0.5, // Neutral-positive default
|
|
239
|
+
"relationship": "friend", // REQUIRED - must specify
|
|
240
|
+
"exposure_current": 0.0, // Default
|
|
241
|
+
"exposure_desired": 0.5 // Default medium interest
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
MERGING RULES:
|
|
245
|
+
- exposure_current: Take HIGHER (0.4 + 0.2 → 0.4)
|
|
246
|
+
- exposure_desired: Take HIGHER (0.6 + 0.3 → 0.6)
|
|
247
|
+
- sentiment: AVERAGE (0.7 + 0.5 → 0.6)
|
|
248
|
+
- relationship: Pick most specific/accurate
|
|
249
|
+
- last_ei_asked: Keep most recent non-null
|
|
250
|
+
|
|
251
|
+
RELATIONSHIP types:
|
|
252
|
+
- friend, family, coworker, mentor, acquaintance, partner, client, etc.
|
|
253
|
+
- Be specific: "former coworker" > "coworker" when applicable
|
|
254
|
+
|
|
255
|
+
GOOD vs BAD descriptions:
|
|
256
|
+
✅ GOOD: "Manager at Amazon. Met through a conference in 2019. Shares interest in distributed systems."
|
|
257
|
+
❌ BAD: "Someone the user has mentioned a few times who they seem to know from work..." (vague)`;
|
|
258
|
+
}
|
|
@@ -2,6 +2,7 @@ export { buildPersonaExpirePrompt } from "./expire.js";
|
|
|
2
2
|
export { buildPersonaExplorePrompt } from "./explore.js";
|
|
3
3
|
export { buildDescriptionCheckPrompt } from "./description-check.js";
|
|
4
4
|
export { buildRewriteScanPrompt, buildRewritePrompt } from "./rewrite.js";
|
|
5
|
+
export { buildDedupPrompt } from "./dedup.js";
|
|
5
6
|
export type {
|
|
6
7
|
PersonaExpirePromptData,
|
|
7
8
|
PersonaExpireResult,
|
|
@@ -15,4 +16,6 @@ export type {
|
|
|
15
16
|
RewriteSubjectMatch,
|
|
16
17
|
RewritePromptData,
|
|
17
18
|
RewriteResult,
|
|
19
|
+
DedupPromptData,
|
|
20
|
+
DedupResult,
|
|
18
21
|
} from "./types.js";
|
|
@@ -95,3 +95,48 @@ export interface RewriteResult {
|
|
|
95
95
|
category?: string;
|
|
96
96
|
}>;
|
|
97
97
|
}
|
|
98
|
+
|
|
99
|
+
// =============================================================================
|
|
100
|
+
// DEDUP (Duplicate Entity Merge)
|
|
101
|
+
// =============================================================================
|
|
102
|
+
|
|
103
|
+
/** Input: cluster of potentially duplicate entities to curate. */
|
|
104
|
+
export interface DedupPromptData {
|
|
105
|
+
cluster: DataItemBase[]; // 2+ items with 0.90+ cosine similarity
|
|
106
|
+
itemType: RewriteItemType;
|
|
107
|
+
similarityRange: { min: number; max: number }; // e.g., { min: 0.90, max: 0.98 }
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** Output: merge decisions (update/remove/add). */
|
|
111
|
+
export interface DedupResult {
|
|
112
|
+
update: Array<{
|
|
113
|
+
id: string;
|
|
114
|
+
type: RewriteItemType;
|
|
115
|
+
name: string;
|
|
116
|
+
description: string;
|
|
117
|
+
sentiment?: number;
|
|
118
|
+
strength?: number;
|
|
119
|
+
confidence?: number;
|
|
120
|
+
exposure_current?: number;
|
|
121
|
+
exposure_desired?: number;
|
|
122
|
+
relationship?: string;
|
|
123
|
+
category?: string;
|
|
124
|
+
last_updated?: string;
|
|
125
|
+
}>;
|
|
126
|
+
remove: Array<{
|
|
127
|
+
to_be_removed: string; // UUID of duplicate
|
|
128
|
+
replaced_by: string; // UUID of canonical record
|
|
129
|
+
}>;
|
|
130
|
+
add: Array<{
|
|
131
|
+
type: RewriteItemType;
|
|
132
|
+
name: string;
|
|
133
|
+
description: string;
|
|
134
|
+
sentiment?: number;
|
|
135
|
+
strength?: number;
|
|
136
|
+
confidence?: number;
|
|
137
|
+
exposure_current?: number;
|
|
138
|
+
exposure_desired?: number;
|
|
139
|
+
relationship?: string;
|
|
140
|
+
category?: string;
|
|
141
|
+
}>;
|
|
142
|
+
}
|
|
@@ -74,6 +74,7 @@ function getLastPersonaMessage(history: Message[]): Message | undefined {
|
|
|
74
74
|
* - Getting recent message history
|
|
75
75
|
*/
|
|
76
76
|
export function buildHeartbeatCheckPrompt(data: HeartbeatCheckPromptData): PromptOutput {
|
|
77
|
+
console.log(`[HeartbeatCheck ${data.persona.name}] Building prompt - topics: ${data.human.topics.length}, people: ${data.human.people.length}, inactive_days: ${data.inactive_days}, history: ${data.recent_history.length} messages`);
|
|
77
78
|
if (!data.persona?.name) {
|
|
78
79
|
throw new Error("buildHeartbeatCheckPrompt: persona.name is required");
|
|
79
80
|
}
|