agent-working-memory 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,445 +1,530 @@
1
- // Copyright 2026 Robert Winter / Complete Ideas
2
- // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * Sleep Cycle — offline memory consolidation.
5
- *
6
- * Models the brain's consolidation during sleep:
7
- * 1. Replay — find clusters of semantically similar memories
8
- * 2. Strengthen — reinforce edges within clusters + access-weighted boost
9
- * 3. Bridge — create cross-cluster shortcuts between related topic areas
10
- * 4. Decay — weaken unused edges, prune dead ones
11
- * 5. Homeostasis — normalize outgoing edge weights to prevent hub explosion
12
- * 6. Forget — archive/delete memories that were never retrieved (age-gated)
13
- * 7. Sweep — promote or discard uncertain (staging) memories
14
- *
15
- * No artificial "summary nodes" are created. Instead, the associative
16
- * graph gets denser where knowledge overlaps and sparser where it doesn't.
17
- * The beam search graph walk in activation.ts naturally propagates through
18
- * these strengthened pathways.
19
- *
20
- * Run between sessions or on a timer (e.g., every few hours).
21
- */
22
-
23
- import { cosineSimilarity } from '../core/embeddings.js';
24
- import { strengthenAssociation, decayAssociation } from '../core/hebbian.js';
25
- import type { Engram } from '../types/index.js';
26
- import type { EngramStore } from '../storage/sqlite.js';
27
-
28
- /** Cosine similarity threshold for considering two memories related */
29
- const SIMILARITY_THRESHOLD = 0.65;
30
-
31
- /** Lower threshold for cross-cluster bridge edges */
32
- const BRIDGE_THRESHOLD = 0.25;
33
-
34
- /** Minimum edge weight to form a new connection during replay */
35
- const INITIAL_EDGE_WEIGHT = 0.3;
36
-
37
- /** Boost factor for strengthening existing edges between cluster members */
38
- const CONSOLIDATION_SIGNAL = 0.5;
39
-
40
- /** Max new edges to create per sleep cycle (prevent graph explosion) */
41
- const MAX_NEW_EDGES_PER_CYCLE = 50;
42
-
43
- /** Max bridge edges per cycle (cross-cluster shortcuts) */
44
- const MAX_BRIDGE_EDGES_PER_CYCLE = 20;
45
-
46
- /** Edge weight below which we prune during decay */
47
- const PRUNE_THRESHOLD = 0.01;
48
-
49
- /** Target total outgoing edge weight per node (homeostasis) */
50
- const HOMEOSTASIS_TARGET = 10.0;
51
-
52
- /** Grace period before forgetting curve starts (days) */
53
- const FORGET_GRACE_DAYS = 7;
54
-
55
- /** Age at which never-retrieved memories get archived (days) */
56
- const FORGET_ARCHIVE_DAYS = 30;
57
-
58
- /** Age at which archived, never-retrieved, unconnected memories get deleted (days) */
59
- const FORGET_DELETE_DAYS = 90;
60
-
61
- /** Cosine similarity above which two low-confidence memories are considered redundant */
62
- const REDUNDANCY_THRESHOLD = 0.85;
63
-
64
- /** Max redundant memories to prune per cycle (gradual, not sudden) */
65
- const MAX_REDUNDANCY_PRUNE_PER_CYCLE = 10;
66
-
67
- export interface ConsolidationResult {
68
- clustersFound: number;
69
- edgesStrengthened: number;
70
- edgesCreated: number;
71
- bridgesCreated: number;
72
- edgesDecayed: number;
73
- edgesPruned: number;
74
- edgesNormalized: number;
75
- memoriesForgotten: number;
76
- memoriesArchived: number;
77
- redundancyPruned: number;
78
- stagingPromoted: number;
79
- stagingDiscarded: number;
80
- engramsProcessed: number;
81
- }
82
-
83
- export class ConsolidationEngine {
84
- private store: EngramStore;
85
-
86
- constructor(store: EngramStore) {
87
- this.store = store;
88
- }
89
-
90
- /**
91
- * Run a full sleep cycle for an agent.
92
- *
93
- * Phase 1: Replay — find clusters of semantically similar memories
94
- * Phase 2: Strengthen — reinforce edges within clusters (access-weighted)
95
- * Phase 3: Bridge — create cross-cluster shortcuts
96
- * Phase 4: Decay — weaken unused edges, prune dead ones
97
- * Phase 5: Homeostasis — normalize outgoing edge weights per node
98
- * Phase 6: Forget — archive/delete memories never retrieved (age-gated)
99
- * Phase 7: Sweep — check staging buffer for resonance
100
- */
101
- consolidate(agentId: string): ConsolidationResult {
102
- const result: ConsolidationResult = {
103
- clustersFound: 0,
104
- edgesStrengthened: 0,
105
- edgesCreated: 0,
106
- bridgesCreated: 0,
107
- edgesDecayed: 0,
108
- edgesPruned: 0,
109
- edgesNormalized: 0,
110
- memoriesForgotten: 0,
111
- memoriesArchived: 0,
112
- redundancyPruned: 0,
113
- stagingPromoted: 0,
114
- stagingDiscarded: 0,
115
- engramsProcessed: 0,
116
- };
117
-
118
- // --- Phase 1: Replay ---
119
- // Get all active engrams with embeddings
120
- const engrams = this.store.getEngramsByAgent(agentId, 'active')
121
- .filter(e => e.embedding && e.embedding.length > 0);
122
-
123
- result.engramsProcessed = engrams.length;
124
- if (engrams.length < 2) return result;
125
-
126
- // Find clusters of related memories
127
- const clusters = this.findClusters(engrams);
128
- result.clustersFound = clusters.length;
129
-
130
- // --- Phase 2: Strengthen (access-weighted) ---
131
- // Memories that are retrieved more often get stronger consolidation.
132
- // This mirrors how the brain preferentially consolidates practiced memories.
133
- let newEdges = 0;
134
- for (const cluster of clusters) {
135
- for (let i = 0; i < cluster.length; i++) {
136
- for (let j = i + 1; j < cluster.length; j++) {
137
- const a = cluster[i];
138
- const b = cluster[j];
139
-
140
- // Access-weighted signal: more retrieved = stronger consolidation
141
- const accessFactor = Math.min(
142
- 1.0,
143
- 0.3 + 0.7 * Math.log1p(a.accessCount + b.accessCount) / Math.log1p(20),
144
- );
145
-
146
- const existing = this.store.getAssociation(a.id, b.id);
147
- if (existing) {
148
- const newWeight = strengthenAssociation(
149
- existing.weight, CONSOLIDATION_SIGNAL * accessFactor, 0.25,
150
- );
151
- this.store.upsertAssociation(
152
- a.id, b.id, newWeight, existing.type, existing.confidence,
153
- );
154
- result.edgesStrengthened++;
155
- } else if (newEdges < MAX_NEW_EDGES_PER_CYCLE) {
156
- this.store.upsertAssociation(
157
- a.id, b.id, INITIAL_EDGE_WEIGHT * accessFactor, 'connection',
158
- );
159
- newEdges++;
160
- result.edgesCreated++;
161
- }
162
- }
163
- }
164
- }
165
-
166
- // --- Phase 3: Cross-cluster bridge edges ---
167
- // For each pair of clusters, compute centroid similarity. If moderate
168
- // similarity exists but no direct edge, create a low-weight bridge.
169
- // This is what enables cross-topic retrieval to improve over time.
170
- if (clusters.length >= 2) {
171
- let bridges = 0;
172
- const centroids = clusters.map(cluster => this.computeCentroid(cluster));
173
-
174
- for (let i = 0; i < clusters.length && bridges < MAX_BRIDGE_EDGES_PER_CYCLE; i++) {
175
- for (let j = i + 1; j < clusters.length && bridges < MAX_BRIDGE_EDGES_PER_CYCLE; j++) {
176
- const sim = cosineSimilarity(centroids[i], centroids[j]);
177
- if (sim < BRIDGE_THRESHOLD || sim >= SIMILARITY_THRESHOLD) continue;
178
-
179
- // Find the best representative from each cluster (highest accessCount)
180
- const repA = clusters[i].reduce((best, e) => e.accessCount > best.accessCount ? e : best);
181
- const repB = clusters[j].reduce((best, e) => e.accessCount > best.accessCount ? e : best);
182
-
183
- const existing = this.store.getAssociation(repA.id, repB.id);
184
- if (!existing) {
185
- // Bridge weight proportional to inter-cluster similarity
186
- const bridgeWeight = 0.15 + 0.15 * ((sim - BRIDGE_THRESHOLD) / (SIMILARITY_THRESHOLD - BRIDGE_THRESHOLD));
187
- this.store.upsertAssociation(repA.id, repB.id, bridgeWeight, 'bridge');
188
- bridges++;
189
- result.bridgesCreated++;
190
- }
191
- }
192
- }
193
- }
194
-
195
- // --- Phase 4: Decay (confidence-modulated) ---
196
- // High-confidence edges decay slower. This means edges between memories
197
- // that received positive feedback are more durable — just like how
198
- // practiced memories are more resistant to forgetting in the brain.
199
- // Base half-life: 7 days. High-confidence (0.8+) gets up to 30 days.
200
- const engramConfMap = new Map(engrams.map(e => [e.id, e.confidence]));
201
- const associations = this.store.getAllAssociations(agentId);
202
- for (const assoc of associations) {
203
- const daysSince =
204
- (Date.now() - assoc.lastActivated.getTime()) / (1000 * 60 * 60 * 24);
205
- if (daysSince < 0.5) continue; // Skip recently activated
206
-
207
- // Confidence-modulated half-life: higher confidence = slower decay (capped at 3x)
208
- // Base: 7 days. Conf 0.5 → 7 days. Conf 0.8 → ~15 days. Conf 1.0 → 21 days (3x).
209
- // Cap prevents any edge from becoming immortal.
210
- const fromConf = engramConfMap.get(assoc.fromEngramId) ?? 0.5;
211
- const toConf = engramConfMap.get(assoc.toEngramId) ?? 0.5;
212
- const maxConf = Math.max(fromConf, toConf);
213
- const halfLifeDays = Math.min(7 * (1 + 2 * Math.max(0, (maxConf - 0.5) / 0.5)), 21);
214
-
215
- const newWeight = decayAssociation(assoc.weight, daysSince, halfLifeDays);
216
- if (newWeight < PRUNE_THRESHOLD) {
217
- this.store.deleteAssociation(assoc.id);
218
- result.edgesPruned++;
219
- } else if (Math.abs(newWeight - assoc.weight) > 0.001) {
220
- this.store.upsertAssociation(
221
- assoc.fromEngramId, assoc.toEngramId,
222
- newWeight, assoc.type, assoc.confidence,
223
- );
224
- result.edgesDecayed++;
225
- }
226
- }
227
-
228
- // --- Phase 5: Synaptic homeostasis ---
229
- // Normalize total outgoing edge weight per node to prevent hub explosion.
230
- // Nodes with many strong edges get scaled down so relative weights stay meaningful.
231
- const engramIds = new Set(engrams.map(e => e.id));
232
- for (const id of engramIds) {
233
- const outgoing = this.store.getOutgoingAssociations(id);
234
- const totalWeight = outgoing.reduce((sum, a) => sum + a.weight, 0);
235
- if (totalWeight > HOMEOSTASIS_TARGET) {
236
- const scale = HOMEOSTASIS_TARGET / totalWeight;
237
- for (const edge of outgoing) {
238
- const newWeight = edge.weight * scale;
239
- if (newWeight < PRUNE_THRESHOLD) {
240
- this.store.deleteAssociation(edge.id);
241
- result.edgesPruned++;
242
- } else {
243
- this.store.upsertAssociation(
244
- edge.fromEngramId, edge.toEngramId,
245
- newWeight, edge.type, edge.confidence,
246
- );
247
- }
248
- }
249
- result.edgesNormalized++;
250
- }
251
- }
252
-
253
- // --- Phase 6: Forgetting (age-gated) ---
254
- // Models how human memory actually works:
255
- // - New memories get a grace period (too new to judge)
256
- // - Retrieval acts as rehearsal — resets the forgetting clock
257
- // - Well-connected memories persist (edges = integration into knowledge)
258
- // - Old, isolated, unretrieved memories fade to archive (not deleted)
259
- // - Archived memories can still be recovered via deep search
260
- // - Only truly orphaned, ancient memories get deleted
261
- //
262
- // Key insight: outdated memories still have value as historical context.
263
- // "We used to use X" helps explain why we now use Y.
264
- for (const engram of engrams) {
265
- const ageDays = (Date.now() - engram.createdAt.getTime()) / (1000 * 60 * 60 * 24);
266
- if (ageDays < FORGET_GRACE_DAYS) continue; // Grace period — too new to judge
267
-
268
- const edgeCount = this.store.countAssociationsFor(engram.id);
269
-
270
- // Connections keep memories alive well-integrated knowledge persists.
271
- // High-confidence memories (feedback-confirmed) need fewer edges to survive.
272
- // Models a senior dev who remembers standalone important facts.
273
- // Default: 3 edges. At conf 0.7: 2 edges. At conf 0.8+: 1 edge.
274
- const edgeProtectionThreshold = engram.confidence > 0.7
275
- ? Math.max(1, Math.round(3 - 4 * (engram.confidence - 0.5)))
276
- : 3;
277
- if (edgeCount > edgeProtectionThreshold) continue;
278
-
279
- // Compute effective forgetting threshold based on memory strength signals.
280
- // Rehearsal (access + feedback) extends protection but NEVER makes immortal.
281
- // Models a sharp 20-year senior dev: confirmed knowledge persists for months/years.
282
- // - Base: FORGET_ARCHIVE_DAYS (30 days)
283
- // - Access extends by log-scaled factor: 5 accesses ≈ 2x, 10 ≈ 2.5x
284
- // - Confidence modulates up to 4x (0.5→1x, 0.7→2.6x, 0.8→3.4x, 1.0→4x)
285
- // - Hard cap: 12x base (360 days) even the sharpest memory fades after a year
286
- const accessFactor = 1 + Math.log1p(engram.accessCount) * 0.6;
287
- const confFactor = 1 + 3 * Math.max(0, (engram.confidence - 0.5) / 0.5);
288
- const effectiveArchiveDays = Math.min(
289
- FORGET_ARCHIVE_DAYS * accessFactor * confFactor,
290
- FORGET_ARCHIVE_DAYS * 12, // Hard cap: 12x base (360 days)
291
- );
292
-
293
- const daysSinceAccess = (Date.now() - engram.lastAccessed.getTime()) / (1000 * 60 * 60 * 24);
294
-
295
- if (engram.accessCount === 0 && ageDays > FORGET_ARCHIVE_DAYS) {
296
- // Never retrieved, old, weakly connected archive
297
- this.store.updateStage(engram.id, 'archived');
298
- result.memoriesArchived++;
299
- } else if (engram.accessCount > 0 && daysSinceAccess > effectiveArchiveDays) {
300
- // Accessed before but not recently enough given its strength — archive
301
- this.store.updateStage(engram.id, 'archived');
302
- result.memoriesArchived++;
303
- }
304
- }
305
-
306
- // Check archived memories for deletion only truly orphaned ancient ones
307
- const archived = this.store.getEngramsByAgent(agentId, 'archived');
308
- for (const engram of archived) {
309
- const ageDays = (Date.now() - engram.createdAt.getTime()) / (1000 * 60 * 60 * 24);
310
- const edgeCount = this.store.countAssociationsFor(engram.id);
311
-
312
- if (engram.accessCount === 0 && ageDays > FORGET_DELETE_DAYS && edgeCount === 0) {
313
- // Very old, never accessed, completely isolated truly forgotten
314
- this.store.deleteEngram(engram.id);
315
- result.memoriesForgotten++;
316
- }
317
- // Otherwise: stay archived still searchable, just not in active recall
318
- }
319
-
320
- // --- Phase 6.5: Redundancy pruning ---
321
- // A senior dev doesn't store 30 nearly-identical memories. When multiple
322
- // low-confidence memories are semantically redundant (cosine > 0.85), keep
323
- // only the one with highest accessCount + confidence and archive the rest.
324
- // This naturally defeats volume-based attacks (narcissistic interference,
325
- // spam) while improving signal-to-noise ratio for linked memories.
326
- // High-confidence memories (feedback-confirmed) are never pruned they
327
- // represent verified knowledge worth keeping even if similar.
328
- // Only consider memories that are both low-confidence AND rarely accessed.
329
- // Memories retrieved 3+ times have proven useful — they stay even if similar
330
- // to others. This prevents pruning seed memories that match bulk templates.
331
- const lowConfEngrams = engrams.filter(e =>
332
- e.confidence < 0.6 && e.accessCount < 3 && e.embedding && e.embedding.length > 0);
333
- const pruned = new Set<string>();
334
- let redundancyCount = 0;
335
-
336
- // Sort by quality: highest accessCount + confidence first (survivors)
337
- const sortedLow = [...lowConfEngrams].sort((a, b) =>
338
- (b.accessCount + b.confidence * 10) - (a.accessCount + a.confidence * 10));
339
-
340
- for (let i = 0; i < sortedLow.length && redundancyCount < MAX_REDUNDANCY_PRUNE_PER_CYCLE; i++) {
341
- if (pruned.has(sortedLow[i].id)) continue;
342
- for (let j = i + 1; j < sortedLow.length && redundancyCount < MAX_REDUNDANCY_PRUNE_PER_CYCLE; j++) {
343
- if (pruned.has(sortedLow[j].id)) continue;
344
- if (!sortedLow[i].embedding || !sortedLow[j].embedding) continue;
345
-
346
- const sim = cosineSimilarity(sortedLow[i].embedding!, sortedLow[j].embedding!);
347
- if (sim >= REDUNDANCY_THRESHOLD) {
348
- // Archive the lower-quality duplicate
349
- this.store.updateStage(sortedLow[j].id, 'archived');
350
- pruned.add(sortedLow[j].id);
351
- redundancyCount++;
352
- }
353
- }
354
- }
355
- result.redundancyPruned = redundancyCount;
356
-
357
- // --- Phase 7: Sweep staging ---
358
- const staging = this.store.getEngramsByAgent(agentId, 'staging')
359
- .filter(e => e.embedding && e.embedding.length > 0);
360
-
361
- for (const staged of staging) {
362
- const ageMs = Date.now() - staged.createdAt.getTime();
363
-
364
- // Check if this staging memory resonates with any active memory
365
- let maxSim = 0;
366
- for (const active of engrams) {
367
- if (!active.embedding || !staged.embedding) continue;
368
- const sim = cosineSimilarity(staged.embedding, active.embedding);
369
- if (sim > maxSim) maxSim = sim;
370
- }
371
-
372
- if (maxSim >= 0.6) {
373
- // Resonates promote to active
374
- this.store.updateStage(staged.id, 'active');
375
- result.stagingPromoted++;
376
- } else if (ageMs > 24 * 60 * 60 * 1000) {
377
- // Over 24h and no resonance — discard
378
- this.store.deleteEngram(staged.id);
379
- result.stagingDiscarded++;
380
- }
381
- // Otherwise: leave in staging, maybe next cycle
382
- }
383
-
384
- return result;
385
- }
386
-
387
- /**
388
- * Find clusters of semantically similar memories.
389
- * Greedy agglomerative — each memory belongs to at most one cluster.
390
- * Clusters of size 2+ are returned (pairs count — they link).
391
- */
392
- private findClusters(engrams: Engram[]): Engram[][] {
393
- const assigned = new Set<string>();
394
- const clusters: Engram[][] = [];
395
-
396
- // Seed clusters from most-accessed memories (strongest traces)
397
- const sorted = [...engrams].sort((a, b) => b.accessCount - a.accessCount);
398
-
399
- for (const seed of sorted) {
400
- if (assigned.has(seed.id)) continue;
401
-
402
- const cluster: Engram[] = [seed];
403
- assigned.add(seed.id);
404
-
405
- for (const candidate of sorted) {
406
- if (assigned.has(candidate.id)) continue;
407
- if (!seed.embedding || !candidate.embedding) continue;
408
-
409
- const sim = cosineSimilarity(seed.embedding, candidate.embedding);
410
- if (sim >= SIMILARITY_THRESHOLD) {
411
- cluster.push(candidate);
412
- assigned.add(candidate.id);
413
- }
414
- }
415
-
416
- if (cluster.length >= 2) {
417
- clusters.push(cluster);
418
- } else {
419
- for (const e of cluster) assigned.delete(e.id);
420
- }
421
- }
422
-
423
- return clusters;
424
- }
425
-
426
- /**
427
- * Compute the centroid (average embedding) of a cluster.
428
- */
429
- private computeCentroid(cluster: Engram[]): number[] {
430
- const withEmbed = cluster.filter(e => e.embedding && e.embedding.length > 0);
431
- if (withEmbed.length === 0) return [];
432
-
433
- const dim = withEmbed[0].embedding!.length;
434
- const centroid = new Array<number>(dim).fill(0);
435
- for (const e of withEmbed) {
436
- for (let i = 0; i < dim; i++) {
437
- centroid[i] += e.embedding![i];
438
- }
439
- }
440
- for (let i = 0; i < dim; i++) {
441
- centroid[i] /= withEmbed.length;
442
- }
443
- return centroid;
444
- }
445
- }
1
+ // Copyright 2026 Robert Winter / Complete Ideas
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * Sleep Cycle — offline memory consolidation.
5
+ *
6
+ * Models the brain's consolidation during sleep:
7
+ * 1. Replay — find clusters of semantically similar memories
8
+ * 2. Strengthen — reinforce edges within clusters + access-weighted boost
9
+ * 3. Bridge — create cross-cluster shortcuts between related topic areas
10
+ * 4. Decay — weaken unused edges, prune dead ones
11
+ * 5. Homeostasis — normalize outgoing edge weights to prevent hub explosion
12
+ * 6. Forget — archive/delete memories that were never retrieved (age-gated)
13
+ * 7. Sweep — promote or discard uncertain (staging) memories
14
+ *
15
+ * No artificial "summary nodes" are created. Instead, the associative
16
+ * graph gets denser where knowledge overlaps and sparser where it doesn't.
17
+ * The beam search graph walk in activation.ts naturally propagates through
18
+ * these strengthened pathways.
19
+ *
20
+ * Run between sessions or on a timer (e.g., every few hours).
21
+ */
22
+
23
+ import { cosineSimilarity } from '../core/embeddings.js';
24
+ import { strengthenAssociation, decayAssociation } from '../core/hebbian.js';
25
+ import type { Engram } from '../types/index.js';
26
+ import type { EngramStore } from '../storage/sqlite.js';
27
+
28
+ /** Cosine similarity threshold for considering two memories related */
29
+ const SIMILARITY_THRESHOLD = 0.65;
30
+
31
+ /** Lower threshold for cross-cluster bridge edges */
32
+ const BRIDGE_THRESHOLD = 0.25;
33
+
34
+ /** Minimum edge weight to form a new connection during replay */
35
+ const INITIAL_EDGE_WEIGHT = 0.3;
36
+
37
+ /** Boost factor for strengthening existing edges between cluster members */
38
+ const CONSOLIDATION_SIGNAL = 0.5;
39
+
40
+ /** Max new edges to create per sleep cycle (prevent graph explosion) */
41
+ const MAX_NEW_EDGES_PER_CYCLE = 50;
42
+
43
+ /** Max bridge edges per cycle (cross-cluster shortcuts) */
44
+ const MAX_BRIDGE_EDGES_PER_CYCLE = 20;
45
+
46
+ /** Edge weight below which we prune during decay */
47
+ const PRUNE_THRESHOLD = 0.01;
48
+
49
+ /** Target total outgoing edge weight per node (homeostasis) */
50
+ const HOMEOSTASIS_TARGET = 10.0;
51
+
52
+ /** Grace period before forgetting curve starts (days) */
53
+ const FORGET_GRACE_DAYS = 7;
54
+
55
+ /** Consolidation cycles before 0-access memories get archived */
56
+ const FORGET_CYCLE_THRESHOLD = 5;
57
+
58
+ /** Percentile of edge count distribution used for forgetting protection (0-1) */
59
+ const EDGE_PROTECTION_PERCENTILE = 0.25;
60
+
61
+ /** Age at which never-retrieved memories get archived (days) */
62
+ const FORGET_ARCHIVE_DAYS = 30;
63
+
64
+ /** Age at which archived, never-retrieved, unconnected memories get deleted (days) */
65
+ const FORGET_DELETE_DAYS = 90;
66
+
67
+ /** Cosine similarity above which two low-confidence memories are considered redundant */
68
+ const REDUNDANCY_THRESHOLD = 0.85;
69
+
70
+ /** Max redundant memories to prune per cycle (gradual, not sudden) */
71
+ const MAX_REDUNDANCY_PRUNE_PER_CYCLE = 10;
72
+
73
+ /** Max confidence drift per consolidation cycle (prevents runaway) */
74
+ const CONFIDENCE_DRIFT_CAP = 0.03;
75
+
76
+ /** Days without recall before confidence starts drifting down */
77
+ const CONFIDENCE_NEGLECT_DAYS = 30;
78
+
79
+ export interface ConsolidationResult {
80
+ clustersFound: number;
81
+ edgesStrengthened: number;
82
+ edgesCreated: number;
83
+ bridgesCreated: number;
84
+ edgesDecayed: number;
85
+ edgesPruned: number;
86
+ edgesNormalized: number;
87
+ memoriesForgotten: number;
88
+ memoriesArchived: number;
89
+ redundancyPruned: number;
90
+ confidenceAdjusted: number;
91
+ stagingPromoted: number;
92
+ stagingDiscarded: number;
93
+ engramsProcessed: number;
94
+ }
95
+
96
+ export class ConsolidationEngine {
97
+ private store: EngramStore;
98
+
99
+ constructor(store: EngramStore) {
100
+ this.store = store;
101
+ }
102
+
103
+ /**
104
+ * Run a full sleep cycle for an agent.
105
+ *
106
+ * Phase 1: Replay — find clusters of semantically similar memories
107
+ * Phase 2: Strengthen — reinforce edges within clusters (access-weighted)
108
+ * Phase 3: Bridge — create cross-cluster shortcuts
109
+ * Phase 4: Decay — weaken unused edges, prune dead ones
110
+ * Phase 5: Homeostasis — normalize outgoing edge weights per node
111
+ * Phase 6: Forget — archive/delete memories never retrieved (age-gated)
112
+ * Phase 6.7: Confidence drift — adjust confidence based on structural signals
113
+ * Phase 7: Sweep — check staging buffer for resonance
114
+ */
115
+ consolidate(agentId: string): ConsolidationResult {
116
+ const result: ConsolidationResult = {
117
+ clustersFound: 0,
118
+ edgesStrengthened: 0,
119
+ edgesCreated: 0,
120
+ bridgesCreated: 0,
121
+ edgesDecayed: 0,
122
+ edgesPruned: 0,
123
+ edgesNormalized: 0,
124
+ memoriesForgotten: 0,
125
+ memoriesArchived: 0,
126
+ redundancyPruned: 0,
127
+ confidenceAdjusted: 0,
128
+ stagingPromoted: 0,
129
+ stagingDiscarded: 0,
130
+ engramsProcessed: 0,
131
+ };
132
+
133
+ // --- Phase 1: Replay ---
134
+ // Get all active engrams with embeddings
135
+ const engrams = this.store.getEngramsByAgent(agentId, 'active')
136
+ .filter(e => e.embedding && e.embedding.length > 0);
137
+
138
+ result.engramsProcessed = engrams.length;
139
+ if (engrams.length < 2) return result;
140
+
141
+ // Find clusters of related memories
142
+ const clusters = this.findClusters(engrams);
143
+ result.clustersFound = clusters.length;
144
+
145
+ // --- Phase 2: Strengthen (access-weighted) ---
146
+ // Memories that are retrieved more often get stronger consolidation.
147
+ // This mirrors how the brain preferentially consolidates practiced memories.
148
+ let newEdges = 0;
149
+ for (const cluster of clusters) {
150
+ for (let i = 0; i < cluster.length; i++) {
151
+ for (let j = i + 1; j < cluster.length; j++) {
152
+ const a = cluster[i];
153
+ const b = cluster[j];
154
+
155
+ // Access-weighted signal: more retrieved = stronger consolidation
156
+ const accessFactor = Math.min(
157
+ 1.0,
158
+ 0.3 + 0.7 * Math.log1p(a.accessCount + b.accessCount) / Math.log1p(20),
159
+ );
160
+
161
+ const existing = this.store.getAssociation(a.id, b.id);
162
+ if (existing) {
163
+ const newWeight = strengthenAssociation(
164
+ existing.weight, CONSOLIDATION_SIGNAL * accessFactor, 0.25,
165
+ );
166
+ this.store.upsertAssociation(
167
+ a.id, b.id, newWeight, existing.type, existing.confidence,
168
+ );
169
+ result.edgesStrengthened++;
170
+ } else if (newEdges < MAX_NEW_EDGES_PER_CYCLE) {
171
+ this.store.upsertAssociation(
172
+ a.id, b.id, INITIAL_EDGE_WEIGHT * accessFactor, 'connection',
173
+ );
174
+ newEdges++;
175
+ result.edgesCreated++;
176
+ }
177
+ }
178
+ }
179
+ }
180
+
181
+ // --- Phase 3: Cross-cluster bridge edges ---
182
+ // For each pair of clusters, compute centroid similarity. If moderate
183
+ // similarity exists but no direct edge, create a low-weight bridge.
184
+ // This is what enables cross-topic retrieval to improve over time.
185
+ if (clusters.length >= 2) {
186
+ let bridges = 0;
187
+ const centroids = clusters.map(cluster => this.computeCentroid(cluster));
188
+
189
+ for (let i = 0; i < clusters.length && bridges < MAX_BRIDGE_EDGES_PER_CYCLE; i++) {
190
+ for (let j = i + 1; j < clusters.length && bridges < MAX_BRIDGE_EDGES_PER_CYCLE; j++) {
191
+ const sim = cosineSimilarity(centroids[i], centroids[j]);
192
+ if (sim < BRIDGE_THRESHOLD || sim >= SIMILARITY_THRESHOLD) continue;
193
+
194
+ // Find the best representative from each cluster (highest accessCount)
195
+ const repA = clusters[i].reduce((best, e) => e.accessCount > best.accessCount ? e : best);
196
+ const repB = clusters[j].reduce((best, e) => e.accessCount > best.accessCount ? e : best);
197
+
198
+ const existing = this.store.getAssociation(repA.id, repB.id);
199
+ if (!existing) {
200
+ // Bridge weight proportional to inter-cluster similarity
201
+ const bridgeWeight = 0.15 + 0.15 * ((sim - BRIDGE_THRESHOLD) / (SIMILARITY_THRESHOLD - BRIDGE_THRESHOLD));
202
+ this.store.upsertAssociation(repA.id, repB.id, bridgeWeight, 'bridge');
203
+ bridges++;
204
+ result.bridgesCreated++;
205
+ }
206
+ }
207
+ }
208
+ }
209
+
210
+ // --- Phase 4: Decay (confidence-modulated) ---
211
+ // High-confidence edges decay slower. This means edges between memories
212
+ // that received positive feedback are more durable — just like how
213
+ // practiced memories are more resistant to forgetting in the brain.
214
+ // Base half-life: 7 days. High-confidence (0.8+) gets up to 30 days.
215
+ const engramConfMap = new Map(engrams.map(e => [e.id, e.confidence]));
216
+ const associations = this.store.getAllAssociations(agentId);
217
+ for (const assoc of associations) {
218
+ const daysSince =
219
+ (Date.now() - assoc.lastActivated.getTime()) / (1000 * 60 * 60 * 24);
220
+ if (daysSince < 0.5) continue; // Skip recently activated
221
+
222
+ // Confidence-modulated half-life: higher confidence = slower decay (capped at 3x)
223
+ // Base: 7 days. Conf 0.5 → 7 days. Conf 0.8 → ~15 days. Conf 1.0 → 21 days (3x).
224
+ // Cap prevents any edge from becoming immortal.
225
+ const fromConf = engramConfMap.get(assoc.fromEngramId) ?? 0.5;
226
+ const toConf = engramConfMap.get(assoc.toEngramId) ?? 0.5;
227
+ const maxConf = Math.max(fromConf, toConf);
228
+ const halfLifeDays = Math.min(7 * (1 + 2 * Math.max(0, (maxConf - 0.5) / 0.5)), 21);
229
+
230
+ const newWeight = decayAssociation(assoc.weight, daysSince, halfLifeDays);
231
+ if (newWeight < PRUNE_THRESHOLD) {
232
+ this.store.deleteAssociation(assoc.id);
233
+ result.edgesPruned++;
234
+ } else if (Math.abs(newWeight - assoc.weight) > 0.001) {
235
+ this.store.upsertAssociation(
236
+ assoc.fromEngramId, assoc.toEngramId,
237
+ newWeight, assoc.type, assoc.confidence,
238
+ );
239
+ result.edgesDecayed++;
240
+ }
241
+ }
242
+
243
+ // --- Phase 5: Synaptic homeostasis ---
244
+ // Normalize total outgoing edge weight per node to prevent hub explosion.
245
+ // Nodes with many strong edges get scaled down so relative weights stay meaningful.
246
+ const engramIds = new Set(engrams.map(e => e.id));
247
+ for (const id of engramIds) {
248
+ const outgoing = this.store.getOutgoingAssociations(id);
249
+ const totalWeight = outgoing.reduce((sum, a) => sum + a.weight, 0);
250
+ if (totalWeight > HOMEOSTASIS_TARGET) {
251
+ const scale = HOMEOSTASIS_TARGET / totalWeight;
252
+ for (const edge of outgoing) {
253
+ const newWeight = edge.weight * scale;
254
+ if (newWeight < PRUNE_THRESHOLD) {
255
+ this.store.deleteAssociation(edge.id);
256
+ result.edgesPruned++;
257
+ } else {
258
+ this.store.upsertAssociation(
259
+ edge.fromEngramId, edge.toEngramId,
260
+ newWeight, edge.type, edge.confidence,
261
+ );
262
+ }
263
+ }
264
+ result.edgesNormalized++;
265
+ }
266
+ }
267
+
268
+ // --- Phase 6: Forgetting (age-gated) ---
269
+ // Models how human memory actually works:
270
+ // - New memories get a grace period (too new to judge)
271
+ // - Retrieval acts as rehearsal resets the forgetting clock
272
+ // - Well-connected memories persist (edges = integration into knowledge)
273
+ // - Old, isolated, unretrieved memories fade to archive (not deleted)
274
+ // - Archived memories can still be recovered via deep search
275
+ // - Only truly orphaned, ancient memories get deleted
276
+ //
277
+ // Key insight: outdated memories still have value as historical context.
278
+ // "We used to use X" helps explain why we now use Y.
279
+ // Compute edge count percentile for relative protection threshold.
280
+ // With avg 12 edges/node, an absolute threshold of 3 protects everything.
281
+ // Use 25th percentile so "weakly connected" is relative to actual graph density.
282
+ const edgeCounts = engrams.map(e => this.store.countAssociationsFor(e.id));
283
+ edgeCounts.sort((a, b) => a - b);
284
+ const percentileIdx = Math.floor(edgeCounts.length * EDGE_PROTECTION_PERCENTILE);
285
+ const baseEdgeThreshold = edgeCounts.length > 0 ? edgeCounts[percentileIdx] : 3;
286
+
287
+ // Get consolidation cycle count for cycle-based archiving
288
+ const cycleCount = this.store.getConsolidationCycleCount(agentId);
289
+
290
+ for (const engram of engrams) {
291
+ const ageDays = (Date.now() - engram.createdAt.getTime()) / (1000 * 60 * 60 * 24);
292
+ if (ageDays < FORGET_GRACE_DAYS) continue; // Grace period — too new to judge
293
+
294
+ const edgeCount = this.store.countAssociationsFor(engram.id);
295
+
296
+ // Use relative threshold (percentile-based) instead of absolute.
297
+ // High-confidence memories need fewer edges to survive.
298
+ const confReduction = engram.confidence > 0.7
299
+ ? Math.min(0.6, (engram.confidence - 0.7) * 2)
300
+ : 0;
301
+ const edgeProtectionThreshold = Math.max(1, Math.round(baseEdgeThreshold * (1 - confReduction)));
302
+ if (edgeCount > edgeProtectionThreshold) continue;
303
+
304
+ // Cycle-based archive: 0-access memories archived after N cycles
305
+ // regardless of age. Handles small pools where time thresholds are too generous.
306
+ if (engram.accessCount === 0 && cycleCount >= FORGET_CYCLE_THRESHOLD) {
307
+ this.store.updateStage(engram.id, 'archived');
308
+ result.memoriesArchived++;
309
+ continue;
310
+ }
311
+
312
+ // Compute effective forgetting threshold based on memory strength signals.
313
+ // Rehearsal (access + feedback) extends protection but NEVER makes immortal.
314
+ // Models a sharp 20-year senior dev: confirmed knowledge persists for months/years.
315
+ // - Base: FORGET_ARCHIVE_DAYS (30 days)
316
+ // - Access extends by log-scaled factor: 5 accesses ≈ 2x, 10 ≈ 2.5x
317
+ // - Confidence modulates up to 4x (0.5→1x, 0.7→2.6x, 0.8→3.4x, 1.0→4x)
318
+ // - Hard cap: 12x base (360 days) — even the sharpest memory fades after a year
319
+ const accessFactor = 1 + Math.log1p(engram.accessCount) * 0.6;
320
+ const confFactor = 1 + 3 * Math.max(0, (engram.confidence - 0.5) / 0.5);
321
+ const effectiveArchiveDays = Math.min(
322
+ FORGET_ARCHIVE_DAYS * accessFactor * confFactor,
323
+ FORGET_ARCHIVE_DAYS * 12, // Hard cap: 12x base (360 days)
324
+ );
325
+
326
+ const daysSinceAccess = (Date.now() - engram.lastAccessed.getTime()) / (1000 * 60 * 60 * 24);
327
+
328
+ if (engram.accessCount === 0 && ageDays > FORGET_ARCHIVE_DAYS) {
329
+ // Never retrieved, old, weakly connected archive
330
+ this.store.updateStage(engram.id, 'archived');
331
+ result.memoriesArchived++;
332
+ } else if (engram.accessCount > 0 && daysSinceAccess > effectiveArchiveDays) {
333
+ // Accessed before but not recently enough given its strength — archive
334
+ this.store.updateStage(engram.id, 'archived');
335
+ result.memoriesArchived++;
336
+ }
337
+ }
338
+
339
+ // Check archived memories for deletion — only truly orphaned ancient ones
340
+ const archived = this.store.getEngramsByAgent(agentId, 'archived');
341
+ for (const engram of archived) {
342
+ const ageDays = (Date.now() - engram.createdAt.getTime()) / (1000 * 60 * 60 * 24);
343
+ const edgeCount = this.store.countAssociationsFor(engram.id);
344
+
345
+ if (engram.accessCount === 0 && ageDays > FORGET_DELETE_DAYS && edgeCount === 0) {
346
+ // Very old, never accessed, completely isolated → truly forgotten
347
+ this.store.deleteEngram(engram.id);
348
+ result.memoriesForgotten++;
349
+ }
350
+ // Otherwise: stay archived — still searchable, just not in active recall
351
+ }
352
+
353
+ // --- Phase 6.5: Redundancy pruning ---
354
+ // A senior dev doesn't store 30 nearly-identical memories. When multiple
355
+ // low-confidence memories are semantically redundant (cosine > 0.85), keep
356
+ // only the one with highest accessCount + confidence and archive the rest.
357
+ // This naturally defeats volume-based attacks (narcissistic interference,
358
+ // spam) while improving signal-to-noise ratio for linked memories.
359
+ // High-confidence memories (feedback-confirmed) are never pruned they
360
+ // represent verified knowledge worth keeping even if similar.
361
+ // Only consider memories that are both low-confidence AND rarely accessed.
362
+ // Memories retrieved 3+ times have proven useful — they stay even if similar
363
+ // to others. This prevents pruning seed memories that match bulk templates.
364
+ const lowConfEngrams = engrams.filter(e =>
365
+ e.confidence < 0.6 && e.accessCount < 3 && e.embedding && e.embedding.length > 0);
366
+ const pruned = new Set<string>();
367
+ let redundancyCount = 0;
368
+
369
+ // Sort by quality: highest accessCount + confidence first (survivors)
370
+ const sortedLow = [...lowConfEngrams].sort((a, b) =>
371
+ (b.accessCount + b.confidence * 10) - (a.accessCount + a.confidence * 10));
372
+
373
+ for (let i = 0; i < sortedLow.length && redundancyCount < MAX_REDUNDANCY_PRUNE_PER_CYCLE; i++) {
374
+ if (pruned.has(sortedLow[i].id)) continue;
375
+ for (let j = i + 1; j < sortedLow.length && redundancyCount < MAX_REDUNDANCY_PRUNE_PER_CYCLE; j++) {
376
+ if (pruned.has(sortedLow[j].id)) continue;
377
+ if (!sortedLow[i].embedding || !sortedLow[j].embedding) continue;
378
+
379
+ const sim = cosineSimilarity(sortedLow[i].embedding!, sortedLow[j].embedding!);
380
+ if (sim >= REDUNDANCY_THRESHOLD) {
381
+ // Archive the lower-quality duplicate
382
+ this.store.updateStage(sortedLow[j].id, 'archived');
383
+ pruned.add(sortedLow[j].id);
384
+ redundancyCount++;
385
+ }
386
+ }
387
+ }
388
+ result.redundancyPruned = redundancyCount;
389
+
390
+ // --- Phase 6.7: Confidence drift ---
391
+ // Adjust confidence based on structural signals that emerge from the graph.
392
+ // This makes confidence evolve over time without explicit feedback calls.
393
+ //
394
+ // Three signals:
395
+ // 1. Well-clustered memories (appeared in 1+ clusters) get a small boost
396
+ // they're integrated into the knowledge graph, likely valuable.
397
+ // 2. Isolated memories (0 edges after consolidation) get a small penalty
398
+ // — nothing connects to them, possibly noise.
399
+ // 3. Neglected memories (not recalled in 30+ days) drift toward 0.3
400
+ // if the system never needs them, they're probably not important.
401
+ //
402
+ // All adjustments are capped at ±0.03 per cycle to prevent runaway.
403
+ // Confidence is floored at 0.15 (never reaches 0 — retraction handles that).
404
+ // Confidence is capped at 0.85 (only explicit feedback can push above).
405
+ const clusteredIds = new Set<string>();
406
+ for (const cluster of clusters) {
407
+ for (const e of cluster) clusteredIds.add(e.id);
408
+ }
409
+
410
+ for (const engram of engrams) {
411
+ let drift = 0;
412
+ const edgeCount = this.store.countAssociationsFor(engram.id);
413
+ const daysSinceAccess = (Date.now() - engram.lastAccessed.getTime()) / (1000 * 60 * 60 * 24);
414
+
415
+ // Signal 1: Cluster membership → small boost
416
+ if (clusteredIds.has(engram.id)) {
417
+ drift += 0.01;
418
+ }
419
+
420
+ // Signal 2: Zero edges → small penalty
421
+ if (edgeCount === 0) {
422
+ drift -= 0.02;
423
+ }
424
+
425
+ // Signal 3: Long neglect → drift toward 0.3
426
+ if (daysSinceAccess > CONFIDENCE_NEGLECT_DAYS && engram.confidence > 0.3) {
427
+ drift -= 0.01;
428
+ }
429
+
430
+ // Apply with cap
431
+ if (Math.abs(drift) > 0.001) {
432
+ drift = Math.max(-CONFIDENCE_DRIFT_CAP, Math.min(CONFIDENCE_DRIFT_CAP, drift));
433
+ const newConf = Math.max(0.15, Math.min(0.85, engram.confidence + drift));
434
+ if (Math.abs(newConf - engram.confidence) > 0.001) {
435
+ this.store.updateConfidence(engram.id, newConf);
436
+ result.confidenceAdjusted++;
437
+ }
438
+ }
439
+ }
440
+
441
+ // --- Phase 7: Sweep staging ---
442
+ const staging = this.store.getEngramsByAgent(agentId, 'staging')
443
+ .filter(e => e.embedding && e.embedding.length > 0);
444
+
445
+ for (const staged of staging) {
446
+ const ageMs = Date.now() - staged.createdAt.getTime();
447
+
448
+ // Check if this staging memory resonates with any active memory
449
+ let maxSim = 0;
450
+ for (const active of engrams) {
451
+ if (!active.embedding || !staged.embedding) continue;
452
+ const sim = cosineSimilarity(staged.embedding, active.embedding);
453
+ if (sim > maxSim) maxSim = sim;
454
+ }
455
+
456
+ if (maxSim >= 0.6) {
457
+ // Resonates — promote to active with low confidence (barely made it)
458
+ this.store.updateStage(staged.id, 'active');
459
+ this.store.updateConfidence(staged.id, 0.40);
460
+ result.stagingPromoted++;
461
+ } else if (ageMs > 24 * 60 * 60 * 1000) {
462
+ // Over 24h and no resonance — discard
463
+ this.store.deleteEngram(staged.id);
464
+ result.stagingDiscarded++;
465
+ }
466
+ // Otherwise: leave in staging, maybe next cycle
467
+ }
468
+
469
+ return result;
470
+ }
471
+
472
+ /**
473
+ * Find clusters of semantically similar memories.
474
+ * Greedy agglomerative — each memory belongs to at most one cluster.
475
+ * Clusters of size 2+ are returned (pairs count — they link).
476
+ */
477
+ private findClusters(engrams: Engram[]): Engram[][] {
478
+ const assigned = new Set<string>();
479
+ const clusters: Engram[][] = [];
480
+
481
+ // Seed clusters from most-accessed memories (strongest traces)
482
+ const sorted = [...engrams].sort((a, b) => b.accessCount - a.accessCount);
483
+
484
+ for (const seed of sorted) {
485
+ if (assigned.has(seed.id)) continue;
486
+
487
+ const cluster: Engram[] = [seed];
488
+ assigned.add(seed.id);
489
+
490
+ for (const candidate of sorted) {
491
+ if (assigned.has(candidate.id)) continue;
492
+ if (!seed.embedding || !candidate.embedding) continue;
493
+
494
+ const sim = cosineSimilarity(seed.embedding, candidate.embedding);
495
+ if (sim >= SIMILARITY_THRESHOLD) {
496
+ cluster.push(candidate);
497
+ assigned.add(candidate.id);
498
+ }
499
+ }
500
+
501
+ if (cluster.length >= 2) {
502
+ clusters.push(cluster);
503
+ } else {
504
+ for (const e of cluster) assigned.delete(e.id);
505
+ }
506
+ }
507
+
508
+ return clusters;
509
+ }
510
+
511
+ /**
512
+ * Compute the centroid (average embedding) of a cluster.
513
+ */
514
+ private computeCentroid(cluster: Engram[]): number[] {
515
+ const withEmbed = cluster.filter(e => e.embedding && e.embedding.length > 0);
516
+ if (withEmbed.length === 0) return [];
517
+
518
+ const dim = withEmbed[0].embedding!.length;
519
+ const centroid = new Array<number>(dim).fill(0);
520
+ for (const e of withEmbed) {
521
+ for (let i = 0; i < dim; i++) {
522
+ centroid[i] += e.embedding![i];
523
+ }
524
+ }
525
+ for (let i = 0; i < dim; i++) {
526
+ centroid[i] /= withEmbed.length;
527
+ }
528
+ return centroid;
529
+ }
530
+ }