agent-working-memory 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +428 -399
  2. package/dist/api/routes.d.ts.map +1 -1
  3. package/dist/api/routes.js +60 -5
  4. package/dist/api/routes.js.map +1 -1
  5. package/dist/cli.js +468 -68
  6. package/dist/cli.js.map +1 -1
  7. package/dist/coordination/index.d.ts +11 -0
  8. package/dist/coordination/index.d.ts.map +1 -0
  9. package/dist/coordination/index.js +39 -0
  10. package/dist/coordination/index.js.map +1 -0
  11. package/dist/coordination/mcp-tools.d.ts +8 -0
  12. package/dist/coordination/mcp-tools.d.ts.map +1 -0
  13. package/dist/coordination/mcp-tools.js +221 -0
  14. package/dist/coordination/mcp-tools.js.map +1 -0
  15. package/dist/coordination/routes.d.ts +9 -0
  16. package/dist/coordination/routes.d.ts.map +1 -0
  17. package/dist/coordination/routes.js +573 -0
  18. package/dist/coordination/routes.js.map +1 -0
  19. package/dist/coordination/schema.d.ts +12 -0
  20. package/dist/coordination/schema.d.ts.map +1 -0
  21. package/dist/coordination/schema.js +125 -0
  22. package/dist/coordination/schema.js.map +1 -0
  23. package/dist/coordination/schemas.d.ts +227 -0
  24. package/dist/coordination/schemas.d.ts.map +1 -0
  25. package/dist/coordination/schemas.js +125 -0
  26. package/dist/coordination/schemas.js.map +1 -0
  27. package/dist/coordination/stale.d.ts +27 -0
  28. package/dist/coordination/stale.d.ts.map +1 -0
  29. package/dist/coordination/stale.js +58 -0
  30. package/dist/coordination/stale.js.map +1 -0
  31. package/dist/engine/activation.d.ts.map +1 -1
  32. package/dist/engine/activation.js +119 -23
  33. package/dist/engine/activation.js.map +1 -1
  34. package/dist/engine/consolidation.d.ts.map +1 -1
  35. package/dist/engine/consolidation.js +27 -6
  36. package/dist/engine/consolidation.js.map +1 -1
  37. package/dist/index.js +100 -4
  38. package/dist/index.js.map +1 -1
  39. package/dist/mcp.js +149 -80
  40. package/dist/mcp.js.map +1 -1
  41. package/dist/storage/sqlite.d.ts +21 -0
  42. package/dist/storage/sqlite.d.ts.map +1 -1
  43. package/dist/storage/sqlite.js +331 -282
  44. package/dist/storage/sqlite.js.map +1 -1
  45. package/dist/types/engram.d.ts +24 -0
  46. package/dist/types/engram.d.ts.map +1 -1
  47. package/dist/types/engram.js.map +1 -1
  48. package/package.json +57 -55
  49. package/src/api/index.ts +3 -3
  50. package/src/api/routes.ts +600 -536
  51. package/src/cli.ts +850 -397
  52. package/src/coordination/index.ts +47 -0
  53. package/src/coordination/mcp-tools.ts +318 -0
  54. package/src/coordination/routes.ts +846 -0
  55. package/src/coordination/schema.ts +120 -0
  56. package/src/coordination/schemas.ts +155 -0
  57. package/src/coordination/stale.ts +97 -0
  58. package/src/core/decay.ts +63 -63
  59. package/src/core/embeddings.ts +88 -88
  60. package/src/core/hebbian.ts +93 -93
  61. package/src/core/index.ts +5 -5
  62. package/src/core/logger.ts +36 -36
  63. package/src/core/query-expander.ts +66 -66
  64. package/src/core/reranker.ts +101 -101
  65. package/src/engine/activation.ts +758 -656
  66. package/src/engine/connections.ts +103 -103
  67. package/src/engine/consolidation-scheduler.ts +125 -125
  68. package/src/engine/consolidation.ts +29 -6
  69. package/src/engine/eval.ts +102 -102
  70. package/src/engine/eviction.ts +101 -101
  71. package/src/engine/index.ts +8 -8
  72. package/src/engine/retraction.ts +100 -100
  73. package/src/engine/staging.ts +74 -74
  74. package/src/index.ts +208 -121
  75. package/src/mcp.ts +1093 -1013
  76. package/src/storage/index.ts +3 -3
  77. package/src/storage/sqlite.ts +1017 -963
  78. package/src/types/agent.ts +67 -67
  79. package/src/types/checkpoint.ts +46 -46
  80. package/src/types/engram.ts +245 -217
  81. package/src/types/eval.ts +100 -100
  82. package/src/types/index.ts +6 -6
@@ -1,217 +1,245 @@
1
- // Copyright 2026 Robert Winter / Complete Ideas
2
- // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * Engram — the fundamental unit of agent memory.
5
- *
6
- * An engram represents a single memory trace with salience metadata,
7
- * staging lifecycle, retraction support, and optional task management.
8
- */
9
-
10
- export interface Engram {
11
- id: string;
12
- agentId: string;
13
- concept: string;
14
- content: string;
15
- embedding: number[] | null;
16
-
17
- // Cognitive scores
18
- confidence: number; // 0-1 Bayesian posterior — updated on retrieval feedback
19
- salience: number; // Write-time importance score
20
- accessCount: number; // For ACT-R decay calculation
21
- lastAccessed: Date;
22
- createdAt: Date;
23
-
24
- // Salience audit trail
25
- salienceFeatures: SalienceFeatures;
26
- reasonCodes: string[];
27
-
28
- // Lifecycle
29
- stage: EngramStage;
30
- ttl: number | null; // Milliseconds — only for staging buffer entries
31
-
32
- // Negative memory
33
- retracted: boolean;
34
- retractedBy: string | null; // ID of the engram that invalidated this one
35
- retractedAt: Date | null;
36
-
37
- // Tags for concept-based retrieval
38
- tags: string[];
39
-
40
- // Episode grouping
41
- episodeId: string | null;
42
-
43
- // Memory class
44
- memoryClass: MemoryClass;
45
-
46
- // Supersession "this replaces that" (not retraction — original wasn't wrong, just outdated)
47
- supersededBy: string | null; // ID of the engram that replaced this one
48
- supersedes: string | null; // ID of the engram this one replaces
49
-
50
- // Task management (null = not a task)
51
- taskStatus: TaskStatus | null;
52
- taskPriority: TaskPriority | null;
53
- blockedBy: string | null; // ID of blocking engram/task
54
- }
55
-
56
- export type EngramStage = 'staging' | 'active' | 'consolidated' | 'archived';
57
-
58
- export type TaskStatus = 'open' | 'in_progress' | 'blocked' | 'done';
59
- export type TaskPriority = 'urgent' | 'high' | 'medium' | 'low';
60
-
61
- /**
62
- * Memory class controls salience floor and recall priority.
63
- *
64
- * canonical: Source-of-truth facts (current state, decisions, architecture).
65
- * Never goes to staging. Minimum salience 0.7.
66
- * working: Normal observations, learnings, context (default).
67
- * Standard salience rules apply.
68
- * ephemeral: Temporary context (debugging traces, session-specific notes).
69
- * Stronger time decay, lower recall priority.
70
- */
71
- export type MemoryClass = 'canonical' | 'working' | 'ephemeral';
72
-
73
- /**
74
- * Raw feature scores that produced the salience score.
75
- * Persisted for auditability and tuning.
76
- */
77
- export interface SalienceFeatures {
78
- surprise: number;
79
- decisionMade: boolean;
80
- causalDepth: number;
81
- resolutionEffort: number;
82
- eventType: string;
83
- }
84
-
85
- export interface EngramCreate {
86
- agentId: string;
87
- concept: string;
88
- content: string;
89
- tags?: string[];
90
- embedding?: number[];
91
- salience?: number;
92
- confidence?: number;
93
- salienceFeatures?: SalienceFeatures;
94
- reasonCodes?: string[];
95
- episodeId?: string;
96
- ttl?: number;
97
- memoryClass?: MemoryClass;
98
- supersedes?: string;
99
- taskStatus?: TaskStatus;
100
- taskPriority?: TaskPriority;
101
- blockedBy?: string;
102
- }
103
-
104
- /**
105
- * Association — weighted edge between two engrams.
106
- * Strengthened by Hebbian co-activation, decays when unused.
107
- * Capped at MAX_EDGES_PER_ENGRAM to prevent graph explosion.
108
- */
109
- export interface Association {
110
- id: string;
111
- fromEngramId: string;
112
- toEngramId: string;
113
- weight: number; // Log-space, updated via Hebbian rule
114
- confidence: number; // Edge-level confidence (separate from node)
115
- type: AssociationType;
116
- activationCount: number; // How many times this edge contributed to retrieval
117
- createdAt: Date;
118
- lastActivated: Date;
119
- }
120
-
121
- export type AssociationType = 'hebbian' | 'connection' | 'causal' | 'temporal' | 'invalidation' | 'bridge';
122
-
123
- export const MAX_EDGES_PER_ENGRAM = 20;
124
-
125
- /**
126
- * Activation result — returned from the activation pipeline.
127
- */
128
- export interface ActivationResult {
129
- engram: Engram;
130
- score: number;
131
- phaseScores: PhaseScores; // Per-phase breakdown for explainability
132
- why: string; // Human-readable explanation
133
- associations: Association[];
134
- }
135
-
136
- /**
137
- * Per-phase scoring breakdown — full audit of how each phase contributed.
138
- */
139
- export interface PhaseScores {
140
- textMatch: number;
141
- vectorMatch: number;
142
- decayScore: number;
143
- hebbianBoost: number;
144
- graphBoost: number;
145
- confidenceGate: number;
146
- composite: number;
147
- rerankerScore: number; // Cross-encoder relevance (0-1), 0 if reranker disabled
148
- }
149
-
150
- export interface ActivationQuery {
151
- agentId: string;
152
- context: string;
153
- limit?: number;
154
- minScore?: number;
155
- includeStaging?: boolean;
156
- includeRetracted?: boolean;
157
- useReranker?: boolean; // Enable cross-encoder re-ranking (default: true)
158
- useExpansion?: boolean; // Enable query expansion (default: true)
159
- abstentionThreshold?: number; // Min reranker score to return results (default: 0)
160
- internal?: boolean; // Skip access count increment, Hebbian update, and event logging (for system calls)
161
- }
162
-
163
- /**
164
- * Search query — deterministic retrieval for diagnostics and debugging.
165
- * Separate from activation (which is cognitive/associative).
166
- */
167
- export interface SearchQuery {
168
- agentId: string;
169
- text?: string; // Exact or partial text match
170
- concept?: string; // Exact concept match
171
- tags?: string[]; // Tag filter (AND)
172
- stage?: EngramStage;
173
- retracted?: boolean;
174
- limit?: number;
175
- offset?: number;
176
- }
177
-
178
- /**
179
- * Retrieval feedback — agent reports whether a memory was useful.
180
- * Used to update confidence scores and eval metrics.
181
- */
182
- export interface RetrievalFeedback {
183
- engramId: string;
184
- useful: boolean;
185
- context: string; // What the agent was doing when it judged usefulness
186
- }
187
-
188
- /**
189
- * Retraction — marks a memory as invalid/wrong.
190
- */
191
- export interface Retraction {
192
- targetEngramId: string;
193
- reason: string;
194
- counterContent?: string; // Optional: what the correct information is
195
- agentId: string;
196
- }
197
-
198
- /**
199
- * Episode a temporal grouping of engrams from a session or time window.
200
- * Enables episode-first retrieval: find relevant episodes, then drill into engrams.
201
- */
202
- export interface Episode {
203
- id: string;
204
- agentId: string;
205
- label: string; // Short description (e.g., "Express migration session")
206
- embedding: number[] | null; // Centroid of member engram embeddings
207
- engramCount: number;
208
- startTime: Date;
209
- endTime: Date;
210
- createdAt: Date;
211
- }
212
-
213
- export interface EpisodeCreate {
214
- agentId: string;
215
- label: string;
216
- embedding?: number[];
217
- }
1
+ // Copyright 2026 Robert Winter / Complete Ideas
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * Engram — the fundamental unit of agent memory.
5
+ *
6
+ * An engram represents a single memory trace with salience metadata,
7
+ * staging lifecycle, retraction support, and optional task management.
8
+ */
9
+
10
+ export interface Engram {
11
+ id: string;
12
+ agentId: string;
13
+ concept: string;
14
+ content: string;
15
+ embedding: number[] | null;
16
+
17
+ // Cognitive scores
18
+ confidence: number; // 0-1 Bayesian posterior — updated on retrieval feedback
19
+ salience: number; // Write-time importance score
20
+ accessCount: number; // For ACT-R decay calculation
21
+ lastAccessed: Date;
22
+ createdAt: Date;
23
+
24
+ // Salience audit trail
25
+ salienceFeatures: SalienceFeatures;
26
+ reasonCodes: string[];
27
+
28
+ // Lifecycle
29
+ stage: EngramStage;
30
+ ttl: number | null; // Milliseconds — only for staging buffer entries
31
+
32
+ // Negative memory
33
+ retracted: boolean;
34
+ retractedBy: string | null; // ID of the engram that invalidated this one
35
+ retractedAt: Date | null;
36
+
37
+ // Tags for concept-based retrieval
38
+ tags: string[];
39
+
40
+ // Episode grouping
41
+ episodeId: string | null;
42
+
43
+ // Memory class
44
+ memoryClass: MemoryClass;
45
+
46
+ // Memory type (content classification)
47
+ memoryType: MemoryType;
48
+
49
+ // Supersession — "this replaces that" (not retraction — original wasn't wrong, just outdated)
50
+ supersededBy: string | null; // ID of the engram that replaced this one
51
+ supersedes: string | null; // ID of the engram this one replaces
52
+
53
+ // Task management (null = not a task)
54
+ taskStatus: TaskStatus | null;
55
+ taskPriority: TaskPriority | null;
56
+ blockedBy: string | null; // ID of blocking engram/task
57
+ }
58
+
59
+ export type EngramStage = 'staging' | 'active' | 'consolidated' | 'archived';
60
+
61
+ export type TaskStatus = 'open' | 'in_progress' | 'blocked' | 'done';
62
+ export type TaskPriority = 'urgent' | 'high' | 'medium' | 'low';
63
+
64
+ /**
65
+ * Memory class controls salience floor and recall priority.
66
+ *
67
+ * canonical: Source-of-truth facts (current state, decisions, architecture).
68
+ * Never goes to staging. Minimum salience 0.7.
69
+ * working: Normal observations, learnings, context (default).
70
+ * Standard salience rules apply.
71
+ * ephemeral: Temporary context (debugging traces, session-specific notes).
72
+ * Stronger time decay, lower recall priority.
73
+ */
74
+ export type MemoryClass = 'canonical' | 'working' | 'ephemeral';
75
+
76
+ /**
77
+ * Memory type — content classification for retrieval routing.
78
+ *
79
+ * episodic: Events, incidents, debugging sessions ("we did X because Y").
80
+ * semantic: Facts, decisions, patterns ("X is true", "we use Y for Z").
81
+ * procedural: How-to, steps, processes ("to deploy, run X then Y").
82
+ * unclassified: Default for backwards compatibility.
83
+ */
84
+ export type MemoryType = 'episodic' | 'semantic' | 'procedural' | 'unclassified';
85
+
86
+ /**
87
+ * Raw feature scores that produced the salience score.
88
+ * Persisted for auditability and tuning.
89
+ */
90
+ export interface SalienceFeatures {
91
+ surprise: number;
92
+ decisionMade: boolean;
93
+ causalDepth: number;
94
+ resolutionEffort: number;
95
+ eventType: string;
96
+ }
97
+
98
+ export interface EngramCreate {
99
+ agentId: string;
100
+ concept: string;
101
+ content: string;
102
+ tags?: string[];
103
+ embedding?: number[];
104
+ salience?: number;
105
+ confidence?: number;
106
+ salienceFeatures?: SalienceFeatures;
107
+ reasonCodes?: string[];
108
+ episodeId?: string;
109
+ ttl?: number;
110
+ memoryClass?: MemoryClass;
111
+ memoryType?: MemoryType;
112
+ supersedes?: string;
113
+ taskStatus?: TaskStatus;
114
+ taskPriority?: TaskPriority;
115
+ blockedBy?: string;
116
+ }
117
+
118
+ /**
119
+ * Association — weighted edge between two engrams.
120
+ * Strengthened by Hebbian co-activation, decays when unused.
121
+ * Capped at MAX_EDGES_PER_ENGRAM to prevent graph explosion.
122
+ */
123
+ export interface Association {
124
+ id: string;
125
+ fromEngramId: string;
126
+ toEngramId: string;
127
+ weight: number; // Log-space, updated via Hebbian rule
128
+ confidence: number; // Edge-level confidence (separate from node)
129
+ type: AssociationType;
130
+ activationCount: number; // How many times this edge contributed to retrieval
131
+ createdAt: Date;
132
+ lastActivated: Date;
133
+ }
134
+
135
+ export type AssociationType = 'hebbian' | 'connection' | 'causal' | 'temporal' | 'invalidation' | 'bridge';
136
+
137
+ export const MAX_EDGES_PER_ENGRAM = 20;
138
+
139
+ /**
140
+ * Activation result — returned from the activation pipeline.
141
+ */
142
+ export interface ActivationResult {
143
+ engram: Engram;
144
+ score: number;
145
+ phaseScores: PhaseScores; // Per-phase breakdown for explainability
146
+ why: string; // Human-readable explanation
147
+ associations: Association[];
148
+ }
149
+
150
+ /**
151
+ * Per-phase scoring breakdown — full audit of how each phase contributed.
152
+ */
153
+ export interface PhaseScores {
154
+ textMatch: number;
155
+ vectorMatch: number;
156
+ decayScore: number;
157
+ hebbianBoost: number;
158
+ graphBoost: number;
159
+ confidenceGate: number;
160
+ composite: number;
161
+ rerankerScore: number; // Cross-encoder relevance (0-1), 0 if reranker disabled
162
+ }
163
+
164
+ /**
165
+ * Query mode — controls how the activation pipeline weights its signals.
166
+ *
167
+ * targeted: Query has identifiers, ticket IDs, specific names. Boost BM25,
168
+ * narrow graph beam, stronger decay, stricter vector z-gate.
169
+ * exploratory: Vague/conceptual query. Boost vector/semantic signals, wider
170
+ * graph beam, weaker decay, relaxed z-gate.
171
+ * balanced: Default weights (current behavior).
172
+ * auto: Classify automatically based on query characteristics.
173
+ */
174
+ export type QueryMode = 'targeted' | 'exploratory' | 'balanced' | 'auto';
175
+
176
+ export interface ActivationQuery {
177
+ agentId: string;
178
+ context: string;
179
+ limit?: number;
180
+ minScore?: number;
181
+ includeStaging?: boolean;
182
+ includeRetracted?: boolean;
183
+ useReranker?: boolean; // Enable cross-encoder re-ranking (default: true)
184
+ useExpansion?: boolean; // Enable query expansion (default: true)
185
+ abstentionThreshold?: number; // Min reranker score to return results (default: 0)
186
+ internal?: boolean; // Skip access count increment, Hebbian update, and event logging (for system calls)
187
+ memoryType?: MemoryType; // Filter by memory type (episodic, semantic, procedural)
188
+ mode?: QueryMode; // Pipeline mode — 'auto' by default
189
+ }
190
+
191
+ /**
192
+ * Search query — deterministic retrieval for diagnostics and debugging.
193
+ * Separate from activation (which is cognitive/associative).
194
+ */
195
+ export interface SearchQuery {
196
+ agentId: string;
197
+ text?: string; // Exact or partial text match
198
+ concept?: string; // Exact concept match
199
+ tags?: string[]; // Tag filter (AND)
200
+ stage?: EngramStage;
201
+ retracted?: boolean;
202
+ limit?: number;
203
+ offset?: number;
204
+ }
205
+
206
+ /**
207
+ * Retrieval feedback — agent reports whether a memory was useful.
208
+ * Used to update confidence scores and eval metrics.
209
+ */
210
+ export interface RetrievalFeedback {
211
+ engramId: string;
212
+ useful: boolean;
213
+ context: string; // What the agent was doing when it judged usefulness
214
+ }
215
+
216
+ /**
217
+ * Retraction — marks a memory as invalid/wrong.
218
+ */
219
+ export interface Retraction {
220
+ targetEngramId: string;
221
+ reason: string;
222
+ counterContent?: string; // Optional: what the correct information is
223
+ agentId: string;
224
+ }
225
+
226
+ /**
227
+ * Episode — a temporal grouping of engrams from a session or time window.
228
+ * Enables episode-first retrieval: find relevant episodes, then drill into engrams.
229
+ */
230
+ export interface Episode {
231
+ id: string;
232
+ agentId: string;
233
+ label: string; // Short description (e.g., "Express migration session")
234
+ embedding: number[] | null; // Centroid of member engram embeddings
235
+ engramCount: number;
236
+ startTime: Date;
237
+ endTime: Date;
238
+ createdAt: Date;
239
+ }
240
+
241
+ export interface EpisodeCreate {
242
+ agentId: string;
243
+ label: string;
244
+ embedding?: number[];
245
+ }
package/src/types/eval.ts CHANGED
@@ -1,100 +1,100 @@
1
- // Copyright 2026 Robert Winter / Complete Ideas
2
- // SPDX-License-Identifier: Apache-2.0
3
- /**
4
- * Evaluation types — measuring whether memory actually helps.
5
- *
6
- * Four measurement dimensions:
7
- * 1. Retrieval quality (precision, recall, latency)
8
- * 2. Connection quality (edge utility, stability)
9
- * 3. Staging accuracy (promotion precision, discard regret)
10
- * 4. Task impact (with/without memory comparison)
11
- */
12
-
13
- /**
14
- * Single activation event record — logged for offline analysis.
15
- */
16
- export interface ActivationEvent {
17
- id: string;
18
- agentId: string;
19
- timestamp: Date;
20
- context: string;
21
- resultsReturned: number;
22
- topScore: number;
23
- latencyMs: number;
24
- engramIds: string[];
25
- feedback?: RetrievalFeedbackEvent[];
26
- }
27
-
28
- export interface RetrievalFeedbackEvent {
29
- engramId: string;
30
- useful: boolean;
31
- timestamp: Date;
32
- }
33
-
34
- /**
35
- * Staging lifecycle event — tracks promote/discard decisions.
36
- */
37
- export interface StagingEvent {
38
- engramId: string;
39
- agentId: string;
40
- action: 'promoted' | 'discarded' | 'expired';
41
- resonanceScore: number | null;
42
- timestamp: Date;
43
- ageMs: number; // How long it lived in staging
44
- }
45
-
46
- /**
47
- * Aggregate metrics snapshot — computed periodically.
48
- */
49
- export interface EvalMetrics {
50
- agentId: string;
51
- timestamp: Date;
52
- window: string; // e.g., "24h", "7d"
53
-
54
- // Retrieval quality
55
- activationCount: number;
56
- avgPrecisionAtK: number; // Of returned results, % judged useful
57
- avgLatencyMs: number;
58
- p95LatencyMs: number;
59
-
60
- // Connection quality
61
- totalEdges: number;
62
- edgesUsedInActivation: number;
63
- edgeUtilityRate: number; // % of edges that contributed to retrieval
64
- avgEdgeSurvivalDays: number;
65
-
66
- // Staging accuracy
67
- totalStaged: number;
68
- promotedCount: number;
69
- discardedCount: number;
70
- promotionPrecision: number; // % of promoted items later used
71
- discardRegret: number; // % of discarded items agent re-introduced
72
-
73
- // Memory health
74
- activeEngramCount: number;
75
- stagingEngramCount: number;
76
- retractedCount: number;
77
- consolidatedCount: number;
78
- avgConfidence: number;
79
-
80
- // Contamination tracking
81
- staleUsageCount: number; // Activations using outdated engrams
82
- retractionRate: number; // Rate of memories being invalidated
83
- }
84
-
85
- /**
86
- * Task trial — for with/without memory comparison.
87
- */
88
- export interface TaskTrial {
89
- id: string;
90
- agentId: string;
91
- taskDescription: string;
92
- memoryEnabled: boolean;
93
- startedAt: Date;
94
- completedAt: Date | null;
95
- success: boolean | null;
96
- stepsToCompletion: number;
97
- errorsEncountered: number;
98
- memoriesActivated: number;
99
- userCorrections: number;
100
- }
1
+ // Copyright 2026 Robert Winter / Complete Ideas
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * Evaluation types — measuring whether memory actually helps.
5
+ *
6
+ * Four measurement dimensions:
7
+ * 1. Retrieval quality (precision, recall, latency)
8
+ * 2. Connection quality (edge utility, stability)
9
+ * 3. Staging accuracy (promotion precision, discard regret)
10
+ * 4. Task impact (with/without memory comparison)
11
+ */
12
+
13
+ /**
14
+ * Single activation event record — logged for offline analysis.
15
+ */
16
+ export interface ActivationEvent {
17
+ id: string;
18
+ agentId: string;
19
+ timestamp: Date;
20
+ context: string;
21
+ resultsReturned: number;
22
+ topScore: number;
23
+ latencyMs: number;
24
+ engramIds: string[];
25
+ feedback?: RetrievalFeedbackEvent[];
26
+ }
27
+
28
+ export interface RetrievalFeedbackEvent {
29
+ engramId: string;
30
+ useful: boolean;
31
+ timestamp: Date;
32
+ }
33
+
34
+ /**
35
+ * Staging lifecycle event — tracks promote/discard decisions.
36
+ */
37
+ export interface StagingEvent {
38
+ engramId: string;
39
+ agentId: string;
40
+ action: 'promoted' | 'discarded' | 'expired';
41
+ resonanceScore: number | null;
42
+ timestamp: Date;
43
+ ageMs: number; // How long it lived in staging
44
+ }
45
+
46
+ /**
47
+ * Aggregate metrics snapshot — computed periodically.
48
+ */
49
+ export interface EvalMetrics {
50
+ agentId: string;
51
+ timestamp: Date;
52
+ window: string; // e.g., "24h", "7d"
53
+
54
+ // Retrieval quality
55
+ activationCount: number;
56
+ avgPrecisionAtK: number; // Of returned results, % judged useful
57
+ avgLatencyMs: number;
58
+ p95LatencyMs: number;
59
+
60
+ // Connection quality
61
+ totalEdges: number;
62
+ edgesUsedInActivation: number;
63
+ edgeUtilityRate: number; // % of edges that contributed to retrieval
64
+ avgEdgeSurvivalDays: number;
65
+
66
+ // Staging accuracy
67
+ totalStaged: number;
68
+ promotedCount: number;
69
+ discardedCount: number;
70
+ promotionPrecision: number; // % of promoted items later used
71
+ discardRegret: number; // % of discarded items agent re-introduced
72
+
73
+ // Memory health
74
+ activeEngramCount: number;
75
+ stagingEngramCount: number;
76
+ retractedCount: number;
77
+ consolidatedCount: number;
78
+ avgConfidence: number;
79
+
80
+ // Contamination tracking
81
+ staleUsageCount: number; // Activations using outdated engrams
82
+ retractionRate: number; // Rate of memories being invalidated
83
+ }
84
+
85
+ /**
86
+ * Task trial — for with/without memory comparison.
87
+ */
88
+ export interface TaskTrial {
89
+ id: string;
90
+ agentId: string;
91
+ taskDescription: string;
92
+ memoryEnabled: boolean;
93
+ startedAt: Date;
94
+ completedAt: Date | null;
95
+ success: boolean | null;
96
+ stepsToCompletion: number;
97
+ errorsEncountered: number;
98
+ memoriesActivated: number;
99
+ userCorrections: number;
100
+ }