agent-working-memory 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +87 -46
  2. package/dist/api/routes.d.ts.map +1 -1
  3. package/dist/api/routes.js +21 -5
  4. package/dist/api/routes.js.map +1 -1
  5. package/dist/cli.js +67 -67
  6. package/dist/coordination/index.d.ts +11 -0
  7. package/dist/coordination/index.d.ts.map +1 -0
  8. package/dist/coordination/index.js +39 -0
  9. package/dist/coordination/index.js.map +1 -0
  10. package/dist/coordination/mcp-tools.d.ts +8 -0
  11. package/dist/coordination/mcp-tools.d.ts.map +1 -0
  12. package/dist/coordination/mcp-tools.js +216 -0
  13. package/dist/coordination/mcp-tools.js.map +1 -0
  14. package/dist/coordination/routes.d.ts +9 -0
  15. package/dist/coordination/routes.d.ts.map +1 -0
  16. package/dist/coordination/routes.js +434 -0
  17. package/dist/coordination/routes.js.map +1 -0
  18. package/dist/coordination/schema.d.ts +12 -0
  19. package/dist/coordination/schema.d.ts.map +1 -0
  20. package/dist/coordination/schema.js +91 -0
  21. package/dist/coordination/schema.js.map +1 -0
  22. package/dist/coordination/schemas.d.ts +208 -0
  23. package/dist/coordination/schemas.d.ts.map +1 -0
  24. package/dist/coordination/schemas.js +109 -0
  25. package/dist/coordination/schemas.js.map +1 -0
  26. package/dist/coordination/stale.d.ts +25 -0
  27. package/dist/coordination/stale.d.ts.map +1 -0
  28. package/dist/coordination/stale.js +53 -0
  29. package/dist/coordination/stale.js.map +1 -0
  30. package/dist/index.js +21 -3
  31. package/dist/index.js.map +1 -1
  32. package/dist/mcp.js +90 -79
  33. package/dist/mcp.js.map +1 -1
  34. package/dist/storage/sqlite.d.ts +3 -0
  35. package/dist/storage/sqlite.d.ts.map +1 -1
  36. package/dist/storage/sqlite.js +285 -281
  37. package/dist/storage/sqlite.js.map +1 -1
  38. package/package.json +55 -55
  39. package/src/api/index.ts +3 -3
  40. package/src/api/routes.ts +551 -536
  41. package/src/cli.ts +397 -397
  42. package/src/coordination/index.ts +47 -0
  43. package/src/coordination/mcp-tools.ts +313 -0
  44. package/src/coordination/routes.ts +656 -0
  45. package/src/coordination/schema.ts +94 -0
  46. package/src/coordination/schemas.ts +136 -0
  47. package/src/coordination/stale.ts +89 -0
  48. package/src/core/decay.ts +63 -63
  49. package/src/core/embeddings.ts +88 -88
  50. package/src/core/hebbian.ts +93 -93
  51. package/src/core/index.ts +5 -5
  52. package/src/core/logger.ts +36 -36
  53. package/src/core/query-expander.ts +66 -66
  54. package/src/core/reranker.ts +101 -101
  55. package/src/engine/activation.ts +656 -656
  56. package/src/engine/connections.ts +103 -103
  57. package/src/engine/consolidation-scheduler.ts +125 -125
  58. package/src/engine/eval.ts +102 -102
  59. package/src/engine/eviction.ts +101 -101
  60. package/src/engine/index.ts +8 -8
  61. package/src/engine/retraction.ts +100 -100
  62. package/src/engine/staging.ts +74 -74
  63. package/src/index.ts +137 -121
  64. package/src/mcp.ts +1024 -1013
  65. package/src/storage/index.ts +3 -3
  66. package/src/storage/sqlite.ts +968 -963
  67. package/src/types/agent.ts +67 -67
  68. package/src/types/checkpoint.ts +46 -46
  69. package/src/types/engram.ts +217 -217
  70. package/src/types/eval.ts +100 -100
  71. package/src/types/index.ts +6 -6
@@ -1,103 +1,103 @@
1
- // Copyright 2026 Robert Winter / Complete Ideas
2
- // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * Connection Engine — discovers links between memories.
5
- *
6
- * Runs asynchronously. When a new engram is written, the connection
7
- * engine checks it against existing memories and forms association
8
- * edges where resonance exceeds a threshold.
9
- *
10
- * Connection memories are first-class engrams — they can themselves
11
- * activate and form higher-order connections, producing emergent
12
- * associative structure over time.
13
- */
14
-
15
- import type { EngramStore } from '../storage/sqlite.js';
16
- import type { ActivationEngine } from './activation.js';
17
- import type { Engram } from '../types/index.js';
18
-
19
- export class ConnectionEngine {
20
- private store: EngramStore;
21
- private engine: ActivationEngine;
22
- private threshold: number;
23
- private queue: string[] = [];
24
- private processing = false;
25
-
26
- constructor(
27
- store: EngramStore,
28
- engine: ActivationEngine,
29
- threshold: number = 0.7
30
- ) {
31
- this.store = store;
32
- this.engine = engine;
33
- this.threshold = threshold;
34
- }
35
-
36
- /**
37
- * Queue a newly written engram for connection discovery.
38
- */
39
- enqueue(engramId: string): void {
40
- this.queue.push(engramId);
41
- if (!this.processing) {
42
- this.processQueue();
43
- }
44
- }
45
-
46
- private async processQueue(): Promise<void> {
47
- this.processing = true;
48
-
49
- while (this.queue.length > 0) {
50
- const engramId = this.queue.shift()!;
51
- const engram = this.store.getEngram(engramId);
52
- if (!engram || engram.stage !== 'active') continue;
53
-
54
- try {
55
- await this.findConnections(engram);
56
- } catch {
57
- // Connection discovery is best-effort — don't crash the server
58
- }
59
- }
60
-
61
- this.processing = false;
62
- }
63
-
64
- /**
65
- * Find and create connections for a given engram.
66
- */
67
- private async findConnections(engram: Engram): Promise<void> {
68
- const results = await this.engine.activate({
69
- agentId: engram.agentId,
70
- context: `${engram.concept} ${engram.content}`,
71
- limit: 5,
72
- minScore: this.threshold,
73
- internal: true,
74
- });
75
-
76
- // Filter out self and already-connected engrams
77
- const existing = this.store.getAssociationsFor(engram.id);
78
- const existingIds = new Set(existing.map(a =>
79
- a.fromEngramId === engram.id ? a.toEngramId : a.fromEngramId
80
- ));
81
-
82
- for (const result of results) {
83
- if (result.engram.id === engram.id) continue;
84
- if (existingIds.has(result.engram.id)) continue;
85
-
86
- // Create a connection association
87
- this.store.upsertAssociation(
88
- engram.id,
89
- result.engram.id,
90
- result.score,
91
- 'connection'
92
- );
93
-
94
- // Bidirectional
95
- this.store.upsertAssociation(
96
- result.engram.id,
97
- engram.id,
98
- result.score,
99
- 'connection'
100
- );
101
- }
102
- }
103
- }
1
+ // Copyright 2026 Robert Winter / Complete Ideas
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * Connection Engine — discovers links between memories.
5
+ *
6
+ * Runs asynchronously. When a new engram is written, the connection
7
+ * engine checks it against existing memories and forms association
8
+ * edges where resonance exceeds a threshold.
9
+ *
10
+ * Connection memories are first-class engrams — they can themselves
11
+ * activate and form higher-order connections, producing emergent
12
+ * associative structure over time.
13
+ */
14
+
15
+ import type { EngramStore } from '../storage/sqlite.js';
16
+ import type { ActivationEngine } from './activation.js';
17
+ import type { Engram } from '../types/index.js';
18
+
19
+ export class ConnectionEngine {
20
+ private store: EngramStore;
21
+ private engine: ActivationEngine;
22
+ private threshold: number;
23
+ private queue: string[] = [];
24
+ private processing = false;
25
+
26
+ constructor(
27
+ store: EngramStore,
28
+ engine: ActivationEngine,
29
+ threshold: number = 0.7
30
+ ) {
31
+ this.store = store;
32
+ this.engine = engine;
33
+ this.threshold = threshold;
34
+ }
35
+
36
+ /**
37
+ * Queue a newly written engram for connection discovery.
38
+ */
39
+ enqueue(engramId: string): void {
40
+ this.queue.push(engramId);
41
+ if (!this.processing) {
42
+ this.processQueue();
43
+ }
44
+ }
45
+
46
+ private async processQueue(): Promise<void> {
47
+ this.processing = true;
48
+
49
+ while (this.queue.length > 0) {
50
+ const engramId = this.queue.shift()!;
51
+ const engram = this.store.getEngram(engramId);
52
+ if (!engram || engram.stage !== 'active') continue;
53
+
54
+ try {
55
+ await this.findConnections(engram);
56
+ } catch {
57
+ // Connection discovery is best-effort — don't crash the server
58
+ }
59
+ }
60
+
61
+ this.processing = false;
62
+ }
63
+
64
+ /**
65
+ * Find and create connections for a given engram.
66
+ */
67
+ private async findConnections(engram: Engram): Promise<void> {
68
+ const results = await this.engine.activate({
69
+ agentId: engram.agentId,
70
+ context: `${engram.concept} ${engram.content}`,
71
+ limit: 5,
72
+ minScore: this.threshold,
73
+ internal: true,
74
+ });
75
+
76
+ // Filter out self and already-connected engrams
77
+ const existing = this.store.getAssociationsFor(engram.id);
78
+ const existingIds = new Set(existing.map(a =>
79
+ a.fromEngramId === engram.id ? a.toEngramId : a.fromEngramId
80
+ ));
81
+
82
+ for (const result of results) {
83
+ if (result.engram.id === engram.id) continue;
84
+ if (existingIds.has(result.engram.id)) continue;
85
+
86
+ // Create a connection association
87
+ this.store.upsertAssociation(
88
+ engram.id,
89
+ result.engram.id,
90
+ result.score,
91
+ 'connection'
92
+ );
93
+
94
+ // Bidirectional
95
+ this.store.upsertAssociation(
96
+ result.engram.id,
97
+ engram.id,
98
+ result.score,
99
+ 'connection'
100
+ );
101
+ }
102
+ }
103
+ }
@@ -1,125 +1,125 @@
1
- // Copyright 2026 Robert Winter / Complete Ideas
2
- // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * Consolidation Scheduler — automatically triggers sleep cycles.
5
- *
6
- * Four triggers:
7
- * 1. Idle — agent inactive >10min → full consolidation
8
- * 2. Volume — 50+ writes since last consolidation → full consolidation
9
- * 3. Time — 30min since last consolidation → full consolidation
10
- * 4. Adaptive — retrieval precision <0.4 → full consolidation
11
- *
12
- * Also provides mini-consolidation for restore (fire-and-forget, lightweight).
13
- * Checks every 30 seconds across all active agents.
14
- */
15
-
16
- import type { EngramStore } from '../storage/sqlite.js';
17
- import type { ConsolidationEngine } from './consolidation.js';
18
-
19
- const TICK_INTERVAL_MS = 30_000; // Check every 30s
20
- const IDLE_THRESHOLD_MS = 10 * 60_000; // 10 minutes
21
- const VOLUME_THRESHOLD = 50; // 50 writes
22
- const TIME_THRESHOLD_MS = 30 * 60_000; // 30 minutes
23
- const PRECISION_THRESHOLD = 0.4; // Below this triggers consolidation
24
-
25
- export class ConsolidationScheduler {
26
- private timer: ReturnType<typeof setInterval> | null = null;
27
- private running = false;
28
-
29
- constructor(
30
- private store: EngramStore,
31
- private consolidationEngine: ConsolidationEngine,
32
- ) {}
33
-
34
- start(): void {
35
- if (this.timer) return;
36
- this.timer = setInterval(() => this.tick(), TICK_INTERVAL_MS);
37
- console.log('ConsolidationScheduler started (30s tick)');
38
- }
39
-
40
- stop(): void {
41
- if (this.timer) {
42
- clearInterval(this.timer);
43
- this.timer = null;
44
- }
45
- console.log('ConsolidationScheduler stopped');
46
- }
47
-
48
- /**
49
- * Mini-consolidation — lightweight, called from restore path.
50
- * Only runs replay + strengthen (phases 1-2), skips heavy phases.
51
- */
52
- async runMiniConsolidation(agentId: string): Promise<void> {
53
- if (this.running) return;
54
- this.running = true;
55
- try {
56
- console.log(`[scheduler] mini-consolidation for ${agentId}`);
57
- await this.consolidationEngine.consolidate(agentId);
58
- this.store.markConsolidation(agentId, true);
59
- } catch (err) {
60
- console.error(`[scheduler] mini-consolidation failed for ${agentId}:`, err);
61
- } finally {
62
- this.running = false;
63
- }
64
- }
65
-
66
- private tick(): void {
67
- if (this.running) return;
68
-
69
- const agents = this.store.getActiveAgents();
70
- const now = Date.now();
71
-
72
- for (const agent of agents) {
73
- const idleMs = now - agent.lastActivityAt.getTime();
74
- const sinceConsolidation = agent.lastConsolidationAt
75
- ? now - agent.lastConsolidationAt.getTime()
76
- : Infinity;
77
-
78
- let trigger: string | null = null;
79
-
80
- // 1. Idle trigger — agent stopped writing/recalling >10min ago
81
- if (idleMs > IDLE_THRESHOLD_MS && sinceConsolidation > IDLE_THRESHOLD_MS) {
82
- trigger = `idle (${Math.round(idleMs / 60_000)}min)`;
83
- }
84
-
85
- // 2. Volume trigger — many writes accumulated
86
- if (!trigger && agent.writeCount >= VOLUME_THRESHOLD) {
87
- trigger = `volume (${agent.writeCount} writes)`;
88
- }
89
-
90
- // 3. Time trigger — been too long since last consolidation
91
- if (!trigger && sinceConsolidation > TIME_THRESHOLD_MS) {
92
- trigger = `time (${Math.round(sinceConsolidation / 60_000)}min)`;
93
- }
94
-
95
- // 4. Adaptive trigger — precision is low
96
- if (!trigger) {
97
- try {
98
- const precision = this.store.getRetrievalPrecision(agent.agentId, 1);
99
- if (precision > 0 && precision < PRECISION_THRESHOLD) {
100
- trigger = `adaptive (precision ${(precision * 100).toFixed(0)}%)`;
101
- }
102
- } catch { /* precision check is non-fatal */ }
103
- }
104
-
105
- if (trigger) {
106
- this.runFullConsolidation(agent.agentId, trigger);
107
- return; // One consolidation per tick to avoid overload
108
- }
109
- }
110
- }
111
-
112
- private async runFullConsolidation(agentId: string, reason: string): Promise<void> {
113
- this.running = true;
114
- try {
115
- console.log(`[scheduler] full consolidation for ${agentId} — trigger: ${reason}`);
116
- const result = await this.consolidationEngine.consolidate(agentId);
117
- this.store.markConsolidation(agentId, false);
118
- console.log(`[scheduler] consolidation done: ${result.edgesStrengthened} strengthened, ${result.memoriesForgotten} forgotten`);
119
- } catch (err) {
120
- console.error(`[scheduler] consolidation failed for ${agentId}:`, err);
121
- } finally {
122
- this.running = false;
123
- }
124
- }
125
- }
1
+ // Copyright 2026 Robert Winter / Complete Ideas
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * Consolidation Scheduler — automatically triggers sleep cycles.
5
+ *
6
+ * Four triggers:
7
+ * 1. Idle — agent inactive >10min → full consolidation
8
+ * 2. Volume — 50+ writes since last consolidation → full consolidation
9
+ * 3. Time — 30min since last consolidation → full consolidation
10
+ * 4. Adaptive — retrieval precision <0.4 → full consolidation
11
+ *
12
+ * Also provides mini-consolidation for restore (fire-and-forget, lightweight).
13
+ * Checks every 30 seconds across all active agents.
14
+ */
15
+
16
+ import type { EngramStore } from '../storage/sqlite.js';
17
+ import type { ConsolidationEngine } from './consolidation.js';
18
+
19
+ const TICK_INTERVAL_MS = 30_000; // Check every 30s
20
+ const IDLE_THRESHOLD_MS = 10 * 60_000; // 10 minutes
21
+ const VOLUME_THRESHOLD = 50; // 50 writes
22
+ const TIME_THRESHOLD_MS = 30 * 60_000; // 30 minutes
23
+ const PRECISION_THRESHOLD = 0.4; // Below this triggers consolidation
24
+
25
+ export class ConsolidationScheduler {
26
+ private timer: ReturnType<typeof setInterval> | null = null;
27
+ private running = false;
28
+
29
+ constructor(
30
+ private store: EngramStore,
31
+ private consolidationEngine: ConsolidationEngine,
32
+ ) {}
33
+
34
+ start(): void {
35
+ if (this.timer) return;
36
+ this.timer = setInterval(() => this.tick(), TICK_INTERVAL_MS);
37
+ console.log('ConsolidationScheduler started (30s tick)');
38
+ }
39
+
40
+ stop(): void {
41
+ if (this.timer) {
42
+ clearInterval(this.timer);
43
+ this.timer = null;
44
+ }
45
+ console.log('ConsolidationScheduler stopped');
46
+ }
47
+
48
+ /**
49
+ * Mini-consolidation — lightweight, called from restore path.
50
+ * Only runs replay + strengthen (phases 1-2), skips heavy phases.
51
+ */
52
+ async runMiniConsolidation(agentId: string): Promise<void> {
53
+ if (this.running) return;
54
+ this.running = true;
55
+ try {
56
+ console.log(`[scheduler] mini-consolidation for ${agentId}`);
57
+ await this.consolidationEngine.consolidate(agentId);
58
+ this.store.markConsolidation(agentId, true);
59
+ } catch (err) {
60
+ console.error(`[scheduler] mini-consolidation failed for ${agentId}:`, err);
61
+ } finally {
62
+ this.running = false;
63
+ }
64
+ }
65
+
66
+ private tick(): void {
67
+ if (this.running) return;
68
+
69
+ const agents = this.store.getActiveAgents();
70
+ const now = Date.now();
71
+
72
+ for (const agent of agents) {
73
+ const idleMs = now - agent.lastActivityAt.getTime();
74
+ const sinceConsolidation = agent.lastConsolidationAt
75
+ ? now - agent.lastConsolidationAt.getTime()
76
+ : Infinity;
77
+
78
+ let trigger: string | null = null;
79
+
80
+ // 1. Idle trigger — agent stopped writing/recalling >10min ago
81
+ if (idleMs > IDLE_THRESHOLD_MS && sinceConsolidation > IDLE_THRESHOLD_MS) {
82
+ trigger = `idle (${Math.round(idleMs / 60_000)}min)`;
83
+ }
84
+
85
+ // 2. Volume trigger — many writes accumulated
86
+ if (!trigger && agent.writeCount >= VOLUME_THRESHOLD) {
87
+ trigger = `volume (${agent.writeCount} writes)`;
88
+ }
89
+
90
+ // 3. Time trigger — been too long since last consolidation
91
+ if (!trigger && sinceConsolidation > TIME_THRESHOLD_MS) {
92
+ trigger = `time (${Math.round(sinceConsolidation / 60_000)}min)`;
93
+ }
94
+
95
+ // 4. Adaptive trigger — precision is low
96
+ if (!trigger) {
97
+ try {
98
+ const precision = this.store.getRetrievalPrecision(agent.agentId, 1);
99
+ if (precision > 0 && precision < PRECISION_THRESHOLD) {
100
+ trigger = `adaptive (precision ${(precision * 100).toFixed(0)}%)`;
101
+ }
102
+ } catch { /* precision check is non-fatal */ }
103
+ }
104
+
105
+ if (trigger) {
106
+ this.runFullConsolidation(agent.agentId, trigger);
107
+ return; // One consolidation per tick to avoid overload
108
+ }
109
+ }
110
+ }
111
+
112
+ private async runFullConsolidation(agentId: string, reason: string): Promise<void> {
113
+ this.running = true;
114
+ try {
115
+ console.log(`[scheduler] full consolidation for ${agentId} — trigger: ${reason}`);
116
+ const result = await this.consolidationEngine.consolidate(agentId);
117
+ this.store.markConsolidation(agentId, false);
118
+ console.log(`[scheduler] consolidation done: ${result.edgesStrengthened} strengthened, ${result.memoriesForgotten} forgotten`);
119
+ } catch (err) {
120
+ console.error(`[scheduler] consolidation failed for ${agentId}:`, err);
121
+ } finally {
122
+ this.running = false;
123
+ }
124
+ }
125
+ }
@@ -1,102 +1,102 @@
1
- // Copyright 2026 Robert Winter / Complete Ideas
2
- // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * Evaluation Engine — measures whether memory actually helps.
5
- *
6
- * Four dimensions (from Codex):
7
- * 1. Retrieval quality — precision@k, latency
8
- * 2. Connection quality — edge utility, stability
9
- * 3. Staging accuracy — promotion precision, discard regret
10
- * 4. Memory health — contamination tracking, confidence distribution
11
- *
12
- * Task impact (with/without memory) is measured externally via TaskTrial records.
13
- */
14
-
15
- import type { EngramStore } from '../storage/sqlite.js';
16
- import type { EvalMetrics } from '../types/index.js';
17
-
18
- export class EvalEngine {
19
- private store: EngramStore;
20
-
21
- constructor(store: EngramStore) {
22
- this.store = store;
23
- }
24
-
25
- /**
26
- * Compute aggregate metrics for an agent over a time window.
27
- */
28
- computeMetrics(agentId: string, windowHours: number = 24): EvalMetrics {
29
- const window = windowHours <= 24 ? '24h' : `${Math.round(windowHours / 24)}d`;
30
-
31
- // Retrieval quality
32
- const precision = this.store.getRetrievalPrecision(agentId, windowHours);
33
-
34
- // Staging accuracy
35
- const stagingMetrics = this.store.getStagingMetrics(agentId);
36
- const totalStaged = stagingMetrics.promoted + stagingMetrics.discarded + stagingMetrics.expired;
37
- const promotionPrecision = totalStaged > 0 ? stagingMetrics.promoted / totalStaged : 0;
38
-
39
- // Memory health
40
- const activeEngrams = this.store.getEngramsByAgent(agentId, 'active');
41
- const stagingEngrams = this.store.getEngramsByAgent(agentId, 'staging');
42
- const retractedEngrams = this.store.getEngramsByAgent(agentId, undefined, true)
43
- .filter(e => e.retracted);
44
- const allAssociations = this.store.getAllAssociations(agentId);
45
-
46
- const avgConfidence = activeEngrams.length > 0
47
- ? activeEngrams.reduce((sum, e) => sum + e.confidence, 0) / activeEngrams.length
48
- : 0;
49
-
50
- // Edge utility — % of edges that have been used in activation
51
- const usedEdges = allAssociations.filter(a => a.activationCount > 0);
52
- const edgeUtility = allAssociations.length > 0
53
- ? usedEdges.length / allAssociations.length
54
- : 0;
55
-
56
- // Edge survival — average age of edges that are still above minimum weight
57
- const livingEdges = allAssociations.filter(a => a.weight > 0.01);
58
- const avgSurvival = livingEdges.length > 0
59
- ? livingEdges.reduce((sum, a) =>
60
- sum + (Date.now() - a.createdAt.getTime()) / (1000 * 60 * 60 * 24), 0
61
- ) / livingEdges.length
62
- : 0;
63
-
64
- // Activation performance stats
65
- const activationStats = this.store.getActivationStats(agentId, windowHours);
66
-
67
- // Consolidated count
68
- const consolidatedCount = this.store.getConsolidatedCount(agentId);
69
-
70
- return {
71
- agentId,
72
- timestamp: new Date(),
73
- window,
74
-
75
- activationCount: activationStats.count,
76
- avgPrecisionAtK: precision,
77
- avgLatencyMs: activationStats.avgLatencyMs,
78
- p95LatencyMs: activationStats.p95LatencyMs,
79
-
80
- totalEdges: allAssociations.length,
81
- edgesUsedInActivation: usedEdges.length,
82
- edgeUtilityRate: edgeUtility,
83
- avgEdgeSurvivalDays: avgSurvival,
84
-
85
- totalStaged: totalStaged,
86
- promotedCount: stagingMetrics.promoted,
87
- discardedCount: stagingMetrics.discarded,
88
- promotionPrecision,
89
- discardRegret: 0, // Requires tracking discarded-then-rediscovered items
90
-
91
- activeEngramCount: activeEngrams.length,
92
- stagingEngramCount: stagingEngrams.length,
93
- retractedCount: retractedEngrams.length,
94
- consolidatedCount,
95
- avgConfidence,
96
-
97
- staleUsageCount: 0, // Requires per-activation age/confidence tracking
98
- retractionRate: retractedEngrams.length /
99
- Math.max(activeEngrams.length + retractedEngrams.length, 1),
100
- };
101
- }
102
- }
1
+ // Copyright 2026 Robert Winter / Complete Ideas
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * Evaluation Engine — measures whether memory actually helps.
5
+ *
6
+ * Four dimensions (from Codex):
7
+ * 1. Retrieval quality — precision@k, latency
8
+ * 2. Connection quality — edge utility, stability
9
+ * 3. Staging accuracy — promotion precision, discard regret
10
+ * 4. Memory health — contamination tracking, confidence distribution
11
+ *
12
+ * Task impact (with/without memory) is measured externally via TaskTrial records.
13
+ */
14
+
15
+ import type { EngramStore } from '../storage/sqlite.js';
16
+ import type { EvalMetrics } from '../types/index.js';
17
+
18
+ export class EvalEngine {
19
+ private store: EngramStore;
20
+
21
+ constructor(store: EngramStore) {
22
+ this.store = store;
23
+ }
24
+
25
+ /**
26
+ * Compute aggregate metrics for an agent over a time window.
27
+ */
28
+ computeMetrics(agentId: string, windowHours: number = 24): EvalMetrics {
29
+ const window = windowHours <= 24 ? '24h' : `${Math.round(windowHours / 24)}d`;
30
+
31
+ // Retrieval quality
32
+ const precision = this.store.getRetrievalPrecision(agentId, windowHours);
33
+
34
+ // Staging accuracy
35
+ const stagingMetrics = this.store.getStagingMetrics(agentId);
36
+ const totalStaged = stagingMetrics.promoted + stagingMetrics.discarded + stagingMetrics.expired;
37
+ const promotionPrecision = totalStaged > 0 ? stagingMetrics.promoted / totalStaged : 0;
38
+
39
+ // Memory health
40
+ const activeEngrams = this.store.getEngramsByAgent(agentId, 'active');
41
+ const stagingEngrams = this.store.getEngramsByAgent(agentId, 'staging');
42
+ const retractedEngrams = this.store.getEngramsByAgent(agentId, undefined, true)
43
+ .filter(e => e.retracted);
44
+ const allAssociations = this.store.getAllAssociations(agentId);
45
+
46
+ const avgConfidence = activeEngrams.length > 0
47
+ ? activeEngrams.reduce((sum, e) => sum + e.confidence, 0) / activeEngrams.length
48
+ : 0;
49
+
50
+ // Edge utility — % of edges that have been used in activation
51
+ const usedEdges = allAssociations.filter(a => a.activationCount > 0);
52
+ const edgeUtility = allAssociations.length > 0
53
+ ? usedEdges.length / allAssociations.length
54
+ : 0;
55
+
56
+ // Edge survival — average age of edges that are still above minimum weight
57
+ const livingEdges = allAssociations.filter(a => a.weight > 0.01);
58
+ const avgSurvival = livingEdges.length > 0
59
+ ? livingEdges.reduce((sum, a) =>
60
+ sum + (Date.now() - a.createdAt.getTime()) / (1000 * 60 * 60 * 24), 0
61
+ ) / livingEdges.length
62
+ : 0;
63
+
64
+ // Activation performance stats
65
+ const activationStats = this.store.getActivationStats(agentId, windowHours);
66
+
67
+ // Consolidated count
68
+ const consolidatedCount = this.store.getConsolidatedCount(agentId);
69
+
70
+ return {
71
+ agentId,
72
+ timestamp: new Date(),
73
+ window,
74
+
75
+ activationCount: activationStats.count,
76
+ avgPrecisionAtK: precision,
77
+ avgLatencyMs: activationStats.avgLatencyMs,
78
+ p95LatencyMs: activationStats.p95LatencyMs,
79
+
80
+ totalEdges: allAssociations.length,
81
+ edgesUsedInActivation: usedEdges.length,
82
+ edgeUtilityRate: edgeUtility,
83
+ avgEdgeSurvivalDays: avgSurvival,
84
+
85
+ totalStaged: totalStaged,
86
+ promotedCount: stagingMetrics.promoted,
87
+ discardedCount: stagingMetrics.discarded,
88
+ promotionPrecision,
89
+ discardRegret: 0, // Requires tracking discarded-then-rediscovered items
90
+
91
+ activeEngramCount: activeEngrams.length,
92
+ stagingEngramCount: stagingEngrams.length,
93
+ retractedCount: retractedEngrams.length,
94
+ consolidatedCount,
95
+ avgConfidence,
96
+
97
+ staleUsageCount: 0, // Requires per-activation age/confidence tracking
98
+ retractionRate: retractedEngrams.length /
99
+ Math.max(activeEngrams.length + retractedEngrams.length, 1),
100
+ };
101
+ }
102
+ }