@199-bio/engram 0.8.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +5 -0
- package/boba-prompt.md +107 -0
- package/dist/consolidation/consolidator.d.ts.map +1 -1
- package/dist/consolidation/plan.d.ts.map +1 -0
- package/dist/index.js +62 -17
- package/dist/retrieval/hybrid.d.ts.map +1 -1
- package/dist/storage/database.d.ts.map +1 -1
- package/dist/transport/http.d.ts.map +1 -0
- package/dist/transport/index.d.ts.map +1 -0
- package/dist/web/chat-handler.d.ts.map +1 -1
- package/nixpacks.toml +11 -0
- package/package.json +2 -1
- package/railway.json +13 -0
- package/src/consolidation/consolidator.ts +343 -19
- package/src/consolidation/plan.ts +444 -0
- package/src/index.ts +65 -19
- package/src/retrieval/hybrid.ts +63 -3
- package/src/storage/database.ts +307 -0
- package/src/transport/http.ts +111 -0
- package/src/transport/index.ts +24 -0
- package/src/web/chat-handler.ts +58 -15
- package/src/web/static/app.js +612 -360
- package/src/web/static/index.html +377 -130
- package/src/web/static/style.css +1249 -672
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Consolidation Plan
|
|
3
|
+
*
|
|
4
|
+
* Implements a Standard Operating Procedure for safe consolidation of large backlogs.
|
|
5
|
+
* Prevents damage through:
|
|
6
|
+
* - Assessment and recovery mode detection
|
|
7
|
+
* - Prioritization (recent + high importance first)
|
|
8
|
+
* - Rate limiting with delays between API calls
|
|
9
|
+
* - Budget tracking and cost caps
|
|
10
|
+
* - Checkpointing for resume capability
|
|
11
|
+
* - Validation with soft rollback triggers
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { randomUUID } from "crypto";
|
|
15
|
+
import { EngramDatabase, Memory, Episode, ConsolidationCheckpoint } from "../storage/database.js";
|
|
16
|
+
|
|
17
|
+
// Token pricing (Opus 4.5 with extended thinking)
|
|
18
|
+
const PRICING = {
|
|
19
|
+
opus: {
|
|
20
|
+
input: 15 / 1_000_000, // $15 per 1M input tokens
|
|
21
|
+
output: 75 / 1_000_000, // $75 per 1M output tokens
|
|
22
|
+
thinking: 15 / 1_000_000, // $15 per 1M thinking tokens (same as input)
|
|
23
|
+
},
|
|
24
|
+
haiku: {
|
|
25
|
+
input: 0.80 / 1_000_000, // $0.80 per 1M input tokens
|
|
26
|
+
output: 4.00 / 1_000_000, // $4.00 per 1M output tokens
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
// Estimated tokens per operation (conservative estimates)
|
|
31
|
+
const TOKEN_ESTIMATES = {
|
|
32
|
+
episodeBatch: {
|
|
33
|
+
input: 2000, // Conversation text + system prompt
|
|
34
|
+
output: 1000, // Extracted memories JSON
|
|
35
|
+
},
|
|
36
|
+
memoryBatch: {
|
|
37
|
+
input: 3000, // Memories + system prompt
|
|
38
|
+
output: 2000, // Digest + contradictions
|
|
39
|
+
thinking: 10000, // Extended thinking budget
|
|
40
|
+
},
|
|
41
|
+
entityProfile: {
|
|
42
|
+
input: 4000,
|
|
43
|
+
output: 3000,
|
|
44
|
+
thinking: 16000,
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
export interface BacklogAssessment {
|
|
49
|
+
unconsolidatedMemories: number;
|
|
50
|
+
unconsolidatedEpisodes: number;
|
|
51
|
+
isRecoveryMode: boolean;
|
|
52
|
+
estimatedBatches: number;
|
|
53
|
+
estimatedCost: number;
|
|
54
|
+
dailyBudget: number;
|
|
55
|
+
dailySpent: number;
|
|
56
|
+
budgetRemaining: number;
|
|
57
|
+
canProceed: boolean;
|
|
58
|
+
recommendedBatches: number;
|
|
59
|
+
phases: PhasePlan[];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface PhasePlan {
|
|
63
|
+
phase: "episodes" | "memories" | "decay" | "cleanup";
|
|
64
|
+
itemCount: number;
|
|
65
|
+
batchCount: number;
|
|
66
|
+
estimatedCost: number;
|
|
67
|
+
estimatedTimeMs: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface ConsolidationProgress {
|
|
71
|
+
runId: string;
|
|
72
|
+
phase: ConsolidationCheckpoint["phase"];
|
|
73
|
+
batchesCompleted: number;
|
|
74
|
+
batchesTotal: number;
|
|
75
|
+
memoriesProcessed: number;
|
|
76
|
+
episodesProcessed: number;
|
|
77
|
+
digestsCreated: number;
|
|
78
|
+
contradictionsFound: number;
|
|
79
|
+
tokensUsed: number;
|
|
80
|
+
estimatedCost: number;
|
|
81
|
+
errors: string[];
|
|
82
|
+
startedAt: Date;
|
|
83
|
+
elapsedMs: number;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export interface RollbackTrigger {
|
|
87
|
+
type: "error_rate" | "empty_digests" | "contradiction_rate" | "budget_exceeded";
|
|
88
|
+
threshold: number;
|
|
89
|
+
current: number;
|
|
90
|
+
triggered: boolean;
|
|
91
|
+
message: string;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export class ConsolidationPlan {
|
|
95
|
+
private db: EngramDatabase;
|
|
96
|
+
private runId: string;
|
|
97
|
+
private errors: string[] = [];
|
|
98
|
+
private emptyDigests: number = 0;
|
|
99
|
+
private totalDigests: number = 0;
|
|
100
|
+
private apiCalls: number = 0;
|
|
101
|
+
private apiErrors: number = 0;
|
|
102
|
+
|
|
103
|
+
constructor(db: EngramDatabase) {
|
|
104
|
+
this.db = db;
|
|
105
|
+
this.runId = randomUUID();
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Assess the current backlog and create a consolidation plan
|
|
110
|
+
*/
|
|
111
|
+
assessBacklog(): BacklogAssessment {
|
|
112
|
+
const unconsolidatedMem = this.db.getUnconsolidatedMemories(undefined, 10000);
|
|
113
|
+
const unconsolidatedEp = this.db.getUnconsolidatedEpisodes(10000);
|
|
114
|
+
|
|
115
|
+
const recoveryThreshold = this.db.getConfigNumber("recovery_mode_threshold", 100);
|
|
116
|
+
const isRecoveryMode = unconsolidatedMem.length > recoveryThreshold ||
|
|
117
|
+
unconsolidatedEp.length > recoveryThreshold;
|
|
118
|
+
|
|
119
|
+
const dailyBudget = this.db.getConfigNumber("daily_budget_usd", 5.0);
|
|
120
|
+
const dailySpent = this.db.getDailySpending();
|
|
121
|
+
const budgetRemaining = Math.max(0, dailyBudget - dailySpent);
|
|
122
|
+
|
|
123
|
+
const maxBatchesPerRun = this.db.getConfigNumber("max_batches_per_run", 5);
|
|
124
|
+
|
|
125
|
+
// Calculate phase plans
|
|
126
|
+
const episodeBatches = Math.ceil(unconsolidatedEp.length / 20);
|
|
127
|
+
const memoryBatches = Math.ceil(unconsolidatedMem.length / 15);
|
|
128
|
+
const totalBatches = episodeBatches + memoryBatches;
|
|
129
|
+
|
|
130
|
+
const phases: PhasePlan[] = [];
|
|
131
|
+
const delayMs = this.db.getConfigNumber("delay_between_calls_ms", 2000);
|
|
132
|
+
|
|
133
|
+
// Episode phase (Haiku - cheap)
|
|
134
|
+
if (unconsolidatedEp.length >= 4) {
|
|
135
|
+
const batchCount = Math.min(episodeBatches, maxBatchesPerRun);
|
|
136
|
+
const cost = batchCount * this.estimateEpisodeBatchCost();
|
|
137
|
+
phases.push({
|
|
138
|
+
phase: "episodes",
|
|
139
|
+
itemCount: Math.min(unconsolidatedEp.length, batchCount * 20),
|
|
140
|
+
batchCount,
|
|
141
|
+
estimatedCost: cost,
|
|
142
|
+
estimatedTimeMs: batchCount * (2000 + delayMs), // ~2s per Haiku call + delay
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Memory phase (Opus with thinking - expensive)
|
|
147
|
+
if (unconsolidatedMem.length >= 5) {
|
|
148
|
+
const batchCount = Math.min(memoryBatches, maxBatchesPerRun);
|
|
149
|
+
const cost = batchCount * this.estimateMemoryBatchCost();
|
|
150
|
+
phases.push({
|
|
151
|
+
phase: "memories",
|
|
152
|
+
itemCount: Math.min(unconsolidatedMem.length, batchCount * 15),
|
|
153
|
+
batchCount,
|
|
154
|
+
estimatedCost: cost,
|
|
155
|
+
estimatedTimeMs: batchCount * (15000 + delayMs), // ~15s per Opus call + delay
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Decay and cleanup phases (no API calls)
|
|
160
|
+
phases.push({ phase: "decay", itemCount: 0, batchCount: 0, estimatedCost: 0, estimatedTimeMs: 100 });
|
|
161
|
+
phases.push({ phase: "cleanup", itemCount: 0, batchCount: 0, estimatedCost: 0, estimatedTimeMs: 100 });
|
|
162
|
+
|
|
163
|
+
const estimatedCost = phases.reduce((sum, p) => sum + p.estimatedCost, 0);
|
|
164
|
+
const canProceed = estimatedCost <= budgetRemaining;
|
|
165
|
+
|
|
166
|
+
// In recovery mode, be more conservative
|
|
167
|
+
const recommendedBatches = isRecoveryMode
|
|
168
|
+
? Math.min(3, maxBatchesPerRun)
|
|
169
|
+
: maxBatchesPerRun;
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
unconsolidatedMemories: unconsolidatedMem.length,
|
|
173
|
+
unconsolidatedEpisodes: unconsolidatedEp.length,
|
|
174
|
+
isRecoveryMode,
|
|
175
|
+
estimatedBatches: totalBatches,
|
|
176
|
+
estimatedCost,
|
|
177
|
+
dailyBudget,
|
|
178
|
+
dailySpent,
|
|
179
|
+
budgetRemaining,
|
|
180
|
+
canProceed,
|
|
181
|
+
recommendedBatches,
|
|
182
|
+
phases,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Get prioritized memories for consolidation
|
|
188
|
+
* Priority: recent + high importance first, then older chronologically
|
|
189
|
+
*/
|
|
190
|
+
getPrioritizedMemories(limit: number): Memory[] {
|
|
191
|
+
const allMemories = this.db.getUnconsolidatedMemories(undefined, 10000);
|
|
192
|
+
|
|
193
|
+
// Score each memory
|
|
194
|
+
const now = Date.now();
|
|
195
|
+
const dayMs = 24 * 60 * 60 * 1000;
|
|
196
|
+
|
|
197
|
+
const scored = allMemories.map(m => {
|
|
198
|
+
const ageHours = (now - m.timestamp.getTime()) / (60 * 60 * 1000);
|
|
199
|
+
const ageDays = ageHours / 24;
|
|
200
|
+
|
|
201
|
+
// Recency score: 1.0 for today, decays over 7 days
|
|
202
|
+
const recencyScore = Math.max(0, 1 - (ageDays / 7));
|
|
203
|
+
|
|
204
|
+
// Importance score: 0-1
|
|
205
|
+
const importanceScore = m.importance;
|
|
206
|
+
|
|
207
|
+
// Emotional weight: 0-1
|
|
208
|
+
const emotionalScore = m.emotional_weight;
|
|
209
|
+
|
|
210
|
+
// Access frequency bonus
|
|
211
|
+
const accessBonus = Math.min(0.2, m.access_count * 0.05);
|
|
212
|
+
|
|
213
|
+
// Combined priority (weights: recency 40%, importance 30%, emotional 20%, access 10%)
|
|
214
|
+
const priority = (recencyScore * 0.4) +
|
|
215
|
+
(importanceScore * 0.3) +
|
|
216
|
+
(emotionalScore * 0.2) +
|
|
217
|
+
(accessBonus * 0.1);
|
|
218
|
+
|
|
219
|
+
return { memory: m, priority };
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// Sort by priority (highest first)
|
|
223
|
+
scored.sort((a, b) => b.priority - a.priority);
|
|
224
|
+
|
|
225
|
+
return scored.slice(0, limit).map(s => s.memory);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Get prioritized episodes for consolidation
|
|
230
|
+
*/
|
|
231
|
+
getPrioritizedEpisodes(limit: number): Episode[] {
|
|
232
|
+
const episodes = this.db.getUnconsolidatedEpisodes(limit);
|
|
233
|
+
|
|
234
|
+
// Sort by timestamp (oldest first for episodes - process in order)
|
|
235
|
+
episodes.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
|
236
|
+
|
|
237
|
+
return episodes;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Create checkpoint for this run
|
|
242
|
+
*/
|
|
243
|
+
createCheckpoint(phase: ConsolidationCheckpoint["phase"], batchesTotal: number): ConsolidationCheckpoint {
|
|
244
|
+
return this.db.createCheckpoint(this.runId, phase, batchesTotal);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Update checkpoint progress
|
|
249
|
+
*/
|
|
250
|
+
updateProgress(updates: Partial<{
|
|
251
|
+
phase: ConsolidationCheckpoint["phase"];
|
|
252
|
+
batchesCompleted: number;
|
|
253
|
+
batchesTotal: number;
|
|
254
|
+
memoriesProcessed: number;
|
|
255
|
+
episodesProcessed: number;
|
|
256
|
+
digestsCreated: number;
|
|
257
|
+
contradictionsFound: number;
|
|
258
|
+
tokensUsed: number;
|
|
259
|
+
estimatedCost: number;
|
|
260
|
+
}>): void {
|
|
261
|
+
this.db.updateCheckpoint(this.runId, {
|
|
262
|
+
phase: updates.phase,
|
|
263
|
+
batches_completed: updates.batchesCompleted,
|
|
264
|
+
batches_total: updates.batchesTotal,
|
|
265
|
+
memories_processed: updates.memoriesProcessed,
|
|
266
|
+
episodes_processed: updates.episodesProcessed,
|
|
267
|
+
digests_created: updates.digestsCreated,
|
|
268
|
+
contradictions_found: updates.contradictionsFound,
|
|
269
|
+
tokens_used: updates.tokensUsed,
|
|
270
|
+
estimated_cost_usd: updates.estimatedCost,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Mark run as complete
|
|
276
|
+
*/
|
|
277
|
+
complete(): void {
|
|
278
|
+
this.db.completeCheckpoint(this.runId);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Mark run as failed
|
|
283
|
+
*/
|
|
284
|
+
fail(error: string): void {
|
|
285
|
+
this.db.updateCheckpoint(this.runId, { error });
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Get current progress
|
|
290
|
+
*/
|
|
291
|
+
getProgress(): ConsolidationProgress {
|
|
292
|
+
const checkpoint = this.db.getCheckpoint(this.runId);
|
|
293
|
+
|
|
294
|
+
return {
|
|
295
|
+
runId: this.runId,
|
|
296
|
+
phase: checkpoint?.phase || "episodes",
|
|
297
|
+
batchesCompleted: checkpoint?.batches_completed || 0,
|
|
298
|
+
batchesTotal: checkpoint?.batches_total || 0,
|
|
299
|
+
memoriesProcessed: checkpoint?.memories_processed || 0,
|
|
300
|
+
episodesProcessed: checkpoint?.episodes_processed || 0,
|
|
301
|
+
digestsCreated: checkpoint?.digests_created || 0,
|
|
302
|
+
contradictionsFound: checkpoint?.contradictions_found || 0,
|
|
303
|
+
tokensUsed: checkpoint?.tokens_used || 0,
|
|
304
|
+
estimatedCost: checkpoint?.estimated_cost_usd || 0,
|
|
305
|
+
errors: this.errors,
|
|
306
|
+
startedAt: checkpoint?.started_at || new Date(),
|
|
307
|
+
elapsedMs: checkpoint ? Date.now() - checkpoint.started_at.getTime() : 0,
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Check if we should resume a previous incomplete run
|
|
313
|
+
*/
|
|
314
|
+
checkForResume(): ConsolidationCheckpoint | null {
|
|
315
|
+
return this.db.getIncompleteCheckpoint();
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Resume from a previous checkpoint
|
|
320
|
+
*/
|
|
321
|
+
resumeFrom(checkpoint: ConsolidationCheckpoint): void {
|
|
322
|
+
this.runId = checkpoint.run_id;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Record an API call result for tracking
|
|
327
|
+
*/
|
|
328
|
+
recordApiCall(success: boolean, tokensUsed?: number): void {
|
|
329
|
+
this.apiCalls++;
|
|
330
|
+
if (!success) {
|
|
331
|
+
this.apiErrors++;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Record a digest creation result
|
|
337
|
+
*/
|
|
338
|
+
recordDigest(isEmpty: boolean): void {
|
|
339
|
+
this.totalDigests++;
|
|
340
|
+
if (isEmpty) {
|
|
341
|
+
this.emptyDigests++;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Record an error
|
|
347
|
+
*/
|
|
348
|
+
recordError(error: string): void {
|
|
349
|
+
this.errors.push(error);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Check rollback triggers and return any that fired
|
|
354
|
+
*/
|
|
355
|
+
checkRollbackTriggers(): RollbackTrigger[] {
|
|
356
|
+
const triggers: RollbackTrigger[] = [];
|
|
357
|
+
|
|
358
|
+
// Error rate threshold
|
|
359
|
+
const errorRateThreshold = this.db.getConfigNumber("error_rate_threshold", 0.3);
|
|
360
|
+
if (this.apiCalls >= 3) {
|
|
361
|
+
const errorRate = this.apiErrors / this.apiCalls;
|
|
362
|
+
triggers.push({
|
|
363
|
+
type: "error_rate",
|
|
364
|
+
threshold: errorRateThreshold,
|
|
365
|
+
current: errorRate,
|
|
366
|
+
triggered: errorRate > errorRateThreshold,
|
|
367
|
+
message: `API error rate ${(errorRate * 100).toFixed(1)}% exceeds ${(errorRateThreshold * 100).toFixed(0)}%`,
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Empty digest threshold
|
|
372
|
+
const emptyDigestThreshold = this.db.getConfigNumber("empty_digest_threshold", 0.2);
|
|
373
|
+
if (this.totalDigests >= 3) {
|
|
374
|
+
const emptyRate = this.emptyDigests / this.totalDigests;
|
|
375
|
+
triggers.push({
|
|
376
|
+
type: "empty_digests",
|
|
377
|
+
threshold: emptyDigestThreshold,
|
|
378
|
+
current: emptyRate,
|
|
379
|
+
triggered: emptyRate > emptyDigestThreshold,
|
|
380
|
+
message: `Empty digest rate ${(emptyRate * 100).toFixed(1)}% exceeds ${(emptyDigestThreshold * 100).toFixed(0)}%`,
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Budget exceeded
|
|
385
|
+
const dailyBudget = this.db.getConfigNumber("daily_budget_usd", 5.0);
|
|
386
|
+
const dailySpent = this.db.getDailySpending();
|
|
387
|
+
triggers.push({
|
|
388
|
+
type: "budget_exceeded",
|
|
389
|
+
threshold: dailyBudget,
|
|
390
|
+
current: dailySpent,
|
|
391
|
+
triggered: dailySpent > dailyBudget,
|
|
392
|
+
message: `Daily spending $${dailySpent.toFixed(2)} exceeds budget $${dailyBudget.toFixed(2)}`,
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
return triggers;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Delay between API calls (rate limiting)
|
|
400
|
+
*/
|
|
401
|
+
async delay(): Promise<void> {
|
|
402
|
+
const delayMs = this.db.getConfigNumber("delay_between_calls_ms", 2000);
|
|
403
|
+
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* Estimate cost for an episode batch (Haiku)
|
|
408
|
+
*/
|
|
409
|
+
private estimateEpisodeBatchCost(): number {
|
|
410
|
+
const { input, output } = TOKEN_ESTIMATES.episodeBatch;
|
|
411
|
+
return (input * PRICING.haiku.input) + (output * PRICING.haiku.output);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Estimate cost for a memory batch (Opus with thinking)
|
|
416
|
+
*/
|
|
417
|
+
private estimateMemoryBatchCost(): number {
|
|
418
|
+
const { input, output, thinking } = TOKEN_ESTIMATES.memoryBatch;
|
|
419
|
+
return (input * PRICING.opus.input) +
|
|
420
|
+
(output * PRICING.opus.output) +
|
|
421
|
+
(thinking * PRICING.opus.thinking);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Calculate actual cost from token usage
|
|
426
|
+
*/
|
|
427
|
+
calculateCost(model: "opus" | "haiku", inputTokens: number, outputTokens: number, thinkingTokens?: number): number {
|
|
428
|
+
const pricing = PRICING[model];
|
|
429
|
+
let cost = (inputTokens * pricing.input) + (outputTokens * pricing.output);
|
|
430
|
+
|
|
431
|
+
if (model === "opus" && thinkingTokens) {
|
|
432
|
+
cost += thinkingTokens * PRICING.opus.thinking;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return cost;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Get the run ID
|
|
440
|
+
*/
|
|
441
|
+
getRunId(): string {
|
|
442
|
+
return this.runId;
|
|
443
|
+
}
|
|
444
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -16,6 +16,9 @@ import path from "path";
|
|
|
16
16
|
import os from "os";
|
|
17
17
|
import fs from "fs";
|
|
18
18
|
|
|
19
|
+
import { getTransportMode, getHttpPort } from "./transport/index.js";
|
|
20
|
+
import { startHttpServer } from "./transport/http.js";
|
|
21
|
+
|
|
19
22
|
import { EngramDatabase } from "./storage/database.js";
|
|
20
23
|
import { KnowledgeGraph } from "./graph/knowledge-graph.js";
|
|
21
24
|
import { createRetriever } from "./retrieval/colbert.js";
|
|
@@ -112,17 +115,20 @@ process.on("SIGINT", () => {
|
|
|
112
115
|
process.on("exit", cleanup);
|
|
113
116
|
|
|
114
117
|
// Detect when parent process (Claude) dies by monitoring stdin
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
118
|
+
// Only needed in stdio mode
|
|
119
|
+
if (getTransportMode() === "stdio") {
|
|
120
|
+
process.stdin.on("end", () => {
|
|
121
|
+
console.error("[Engram] stdin closed, parent process likely died. Shutting down...");
|
|
122
|
+
cleanup();
|
|
123
|
+
process.exit(0);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
process.stdin.on("close", () => {
|
|
127
|
+
console.error("[Engram] stdin closed, shutting down...");
|
|
128
|
+
cleanup();
|
|
129
|
+
process.exit(0);
|
|
130
|
+
});
|
|
131
|
+
}
|
|
126
132
|
|
|
127
133
|
// ============ Initialize Components ============
|
|
128
134
|
|
|
@@ -496,7 +502,29 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
496
502
|
includeGraph: include_graph,
|
|
497
503
|
});
|
|
498
504
|
|
|
505
|
+
// Format digests (synthesized context - these provide broad understanding)
|
|
506
|
+
const digestsFormatted = response.digests.map((d) => ({
|
|
507
|
+
type: "digest" as const,
|
|
508
|
+
id: d.digest.id,
|
|
509
|
+
level: d.digest.level, // 1=session, 2=topic, 3=entity
|
|
510
|
+
topic: d.digest.topic,
|
|
511
|
+
content: d.digest.content,
|
|
512
|
+
source_count: d.digest.source_count,
|
|
513
|
+
period: {
|
|
514
|
+
start: d.digest.period_start.toISOString(),
|
|
515
|
+
end: d.digest.period_end.toISOString(),
|
|
516
|
+
},
|
|
517
|
+
relevance_score: d.score.toFixed(4),
|
|
518
|
+
// Key evidence - specific memories supporting this synthesis
|
|
519
|
+
key_memories: d.key_memories.map((m) => ({
|
|
520
|
+
id: m.id,
|
|
521
|
+
content: m.content,
|
|
522
|
+
timestamp: m.timestamp.toISOString(),
|
|
523
|
+
})),
|
|
524
|
+
}));
|
|
525
|
+
|
|
499
526
|
const formatted = response.results.map((r) => ({
|
|
527
|
+
type: "memory" as const,
|
|
500
528
|
id: r.memory.id,
|
|
501
529
|
content: r.memory.content,
|
|
502
530
|
source: r.memory.source,
|
|
@@ -511,6 +539,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
511
539
|
|
|
512
540
|
// Format connected memories (Hebbian associations)
|
|
513
541
|
const connectedFormatted = response.connected_memories.map((c) => ({
|
|
542
|
+
type: "connected" as const,
|
|
514
543
|
id: c.memory.id,
|
|
515
544
|
content: c.memory.content,
|
|
516
545
|
connected_to: c.connected_to,
|
|
@@ -524,10 +553,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
524
553
|
text: JSON.stringify({
|
|
525
554
|
recall_id: response.recall_id, // For memory_feedback
|
|
526
555
|
query,
|
|
556
|
+
// Digests first - they provide synthesized context
|
|
557
|
+
digests: digestsFormatted,
|
|
558
|
+
digests_count: digestsFormatted.length,
|
|
559
|
+
// Then individual memories for specific evidence
|
|
527
560
|
results: formatted,
|
|
528
561
|
count: formatted.length,
|
|
529
562
|
connected_memories: connectedFormatted,
|
|
530
|
-
hint: formatted.length > 0
|
|
563
|
+
hint: formatted.length > 0 || digestsFormatted.length > 0
|
|
564
|
+
? "Call memory_feedback with useful_memory_ids after answering"
|
|
565
|
+
: undefined,
|
|
531
566
|
}, null, 2),
|
|
532
567
|
},
|
|
533
568
|
],
|
|
@@ -847,16 +882,27 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
847
882
|
// ============ Main ============
|
|
848
883
|
|
|
849
884
|
async function main() {
|
|
850
|
-
|
|
851
|
-
cleanupZombies();
|
|
852
|
-
writePidFile();
|
|
885
|
+
const transportMode = getTransportMode();
|
|
853
886
|
|
|
854
|
-
|
|
887
|
+
// Zombie cleanup only needed in stdio mode (local usage)
|
|
888
|
+
if (transportMode === "stdio") {
|
|
889
|
+
cleanupZombies();
|
|
890
|
+
writePidFile();
|
|
891
|
+
}
|
|
855
892
|
|
|
856
|
-
|
|
857
|
-
await server.connect(transport);
|
|
893
|
+
await initialize();
|
|
858
894
|
|
|
859
|
-
|
|
895
|
+
if (transportMode === "http") {
|
|
896
|
+
// HTTP mode - for Railway/remote deployment
|
|
897
|
+
const port = getHttpPort();
|
|
898
|
+
await startHttpServer({ port, server });
|
|
899
|
+
console.error(`[Engram] MCP server running in HTTP mode (PID ${process.pid})`);
|
|
900
|
+
} else {
|
|
901
|
+
// Stdio mode (default) - for local Claude Desktop/Cursor
|
|
902
|
+
const transport = new StdioServerTransport();
|
|
903
|
+
await server.connect(transport);
|
|
904
|
+
console.error(`[Engram] MCP server running on stdio (PID ${process.pid})`);
|
|
905
|
+
}
|
|
860
906
|
}
|
|
861
907
|
|
|
862
908
|
main().catch((error) => {
|
package/src/retrieval/hybrid.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Enhanced with temporal decay and salience scoring
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { EngramDatabase, Memory } from "../storage/database.js";
|
|
7
|
+
import { EngramDatabase, Memory, Digest } from "../storage/database.js";
|
|
8
8
|
import { KnowledgeGraph } from "../graph/knowledge-graph.js";
|
|
9
9
|
import { ColBERTRetriever, SimpleRetriever, SearchResult, Document } from "./colbert.js";
|
|
10
10
|
|
|
@@ -20,8 +20,15 @@ export interface HybridSearchResult {
|
|
|
20
20
|
};
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
export interface DigestSearchResult {
|
|
24
|
+
digest: Digest;
|
|
25
|
+
score: number;
|
|
26
|
+
key_memories: Memory[]; // 2-3 source memories that best support this digest
|
|
27
|
+
}
|
|
28
|
+
|
|
23
29
|
export interface HybridSearchResponse {
|
|
24
30
|
results: HybridSearchResult[];
|
|
31
|
+
digests: DigestSearchResult[]; // Relevant synthesized context
|
|
25
32
|
recall_id: string; // For LLM feedback
|
|
26
33
|
connected_memories: Array<{
|
|
27
34
|
memory: Memory;
|
|
@@ -179,11 +186,15 @@ export class HybridSearch {
|
|
|
179
186
|
if (allCandidateIds.size === 0) {
|
|
180
187
|
return {
|
|
181
188
|
results: [],
|
|
189
|
+
digests: [],
|
|
182
190
|
recall_id: recallId,
|
|
183
191
|
connected_memories: [],
|
|
184
192
|
};
|
|
185
193
|
}
|
|
186
194
|
|
|
195
|
+
// Search digests via BM25 (top 3 relevant digests)
|
|
196
|
+
const digestResults = this.searchDigests(query, 3);
|
|
197
|
+
|
|
187
198
|
// Create rankings for RRF
|
|
188
199
|
const rankings: Map<string, { bm25?: number; semantic?: number; graph?: number; connected?: number }> = new Map();
|
|
189
200
|
|
|
@@ -335,13 +346,62 @@ export class HybridSearch {
|
|
|
335
346
|
}
|
|
336
347
|
}
|
|
337
348
|
|
|
349
|
+
// TOKEN EFFICIENCY: If digests are returned, reduce memory count
|
|
350
|
+
// Digest provides context (synthesis), memories provide evidence (specifics)
|
|
351
|
+
// Return fewer memories when we have good digest coverage
|
|
352
|
+
let finalResults = results;
|
|
353
|
+
if (digestResults.length > 0) {
|
|
354
|
+
// Get IDs of memories already covered by digests as key_memories
|
|
355
|
+
const coveredByDigests = new Set<string>();
|
|
356
|
+
digestResults.forEach(d => d.key_memories.forEach(m => coveredByDigests.add(m.id)));
|
|
357
|
+
|
|
358
|
+
// Keep memories not already shown as key_memories in digests
|
|
359
|
+
// Also limit to fewer since digests provide the context
|
|
360
|
+
const maxMemoriesWithDigests = Math.max(2, Math.floor(limit / 2));
|
|
361
|
+
finalResults = results
|
|
362
|
+
.filter(r => !coveredByDigests.has(r.memory.id))
|
|
363
|
+
.slice(0, maxMemoriesWithDigests);
|
|
364
|
+
}
|
|
365
|
+
|
|
338
366
|
return {
|
|
339
|
-
results,
|
|
367
|
+
results: finalResults,
|
|
368
|
+
digests: digestResults,
|
|
340
369
|
recall_id: recallId,
|
|
341
370
|
connected_memories: connectedMemories,
|
|
342
371
|
};
|
|
343
372
|
}
|
|
344
373
|
|
|
374
|
+
/**
|
|
375
|
+
* Search digests via BM25 and return with key source memories
|
|
376
|
+
* Returns top N digests with 2-3 representative source memories each
|
|
377
|
+
*/
|
|
378
|
+
private searchDigests(query: string, limit: number): DigestSearchResult[] {
|
|
379
|
+
try {
|
|
380
|
+
const digestHits = this.db.searchDigestsBM25(query, limit);
|
|
381
|
+
|
|
382
|
+
return digestHits.map(hit => {
|
|
383
|
+
// Get source memories for this digest, take top 3 most relevant
|
|
384
|
+
const sources = this.db.getDigestSources(hit.id);
|
|
385
|
+
// Sort by importance and recency, take best 3
|
|
386
|
+
const keyMemories = sources
|
|
387
|
+
.sort((a, b) => {
|
|
388
|
+
const scoreA = (a.importance || 0.5) + (a.access_count || 0) * 0.1;
|
|
389
|
+
const scoreB = (b.importance || 0.5) + (b.access_count || 0) * 0.1;
|
|
390
|
+
return scoreB - scoreA;
|
|
391
|
+
})
|
|
392
|
+
.slice(0, 3);
|
|
393
|
+
|
|
394
|
+
return {
|
|
395
|
+
digest: hit,
|
|
396
|
+
score: Math.abs(hit.score), // BM25 returns negative scores
|
|
397
|
+
key_memories: keyMemories,
|
|
398
|
+
};
|
|
399
|
+
});
|
|
400
|
+
} catch {
|
|
401
|
+
return [];
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
345
405
|
/**
|
|
346
406
|
* Expanded search when LLM needs more memories
|
|
347
407
|
* Relaxes constraints and follows weaker connections
|
|
@@ -355,7 +415,7 @@ export class HybridSearch {
|
|
|
355
415
|
// Get the original retrieval log
|
|
356
416
|
const log = this.db.getRetrievalLog(recallId);
|
|
357
417
|
if (!log) {
|
|
358
|
-
return { results: [], recall_id: recallId, connected_memories: [] };
|
|
418
|
+
return { results: [], digests: [], recall_id: recallId, connected_memories: [] };
|
|
359
419
|
}
|
|
360
420
|
|
|
361
421
|
// Search again with relaxed parameters
|