agent-working-memory 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +428 -399
- package/dist/api/routes.d.ts.map +1 -1
- package/dist/api/routes.js +60 -5
- package/dist/api/routes.js.map +1 -1
- package/dist/cli.js +468 -68
- package/dist/cli.js.map +1 -1
- package/dist/coordination/index.d.ts +11 -0
- package/dist/coordination/index.d.ts.map +1 -0
- package/dist/coordination/index.js +39 -0
- package/dist/coordination/index.js.map +1 -0
- package/dist/coordination/mcp-tools.d.ts +8 -0
- package/dist/coordination/mcp-tools.d.ts.map +1 -0
- package/dist/coordination/mcp-tools.js +221 -0
- package/dist/coordination/mcp-tools.js.map +1 -0
- package/dist/coordination/routes.d.ts +9 -0
- package/dist/coordination/routes.d.ts.map +1 -0
- package/dist/coordination/routes.js +573 -0
- package/dist/coordination/routes.js.map +1 -0
- package/dist/coordination/schema.d.ts +12 -0
- package/dist/coordination/schema.d.ts.map +1 -0
- package/dist/coordination/schema.js +125 -0
- package/dist/coordination/schema.js.map +1 -0
- package/dist/coordination/schemas.d.ts +227 -0
- package/dist/coordination/schemas.d.ts.map +1 -0
- package/dist/coordination/schemas.js +125 -0
- package/dist/coordination/schemas.js.map +1 -0
- package/dist/coordination/stale.d.ts +27 -0
- package/dist/coordination/stale.d.ts.map +1 -0
- package/dist/coordination/stale.js +58 -0
- package/dist/coordination/stale.js.map +1 -0
- package/dist/engine/activation.d.ts.map +1 -1
- package/dist/engine/activation.js +119 -23
- package/dist/engine/activation.js.map +1 -1
- package/dist/engine/consolidation.d.ts.map +1 -1
- package/dist/engine/consolidation.js +27 -6
- package/dist/engine/consolidation.js.map +1 -1
- package/dist/index.js +100 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +149 -80
- package/dist/mcp.js.map +1 -1
- package/dist/storage/sqlite.d.ts +21 -0
- package/dist/storage/sqlite.d.ts.map +1 -1
- package/dist/storage/sqlite.js +331 -282
- package/dist/storage/sqlite.js.map +1 -1
- package/dist/types/engram.d.ts +24 -0
- package/dist/types/engram.d.ts.map +1 -1
- package/dist/types/engram.js.map +1 -1
- package/package.json +57 -55
- package/src/api/index.ts +3 -3
- package/src/api/routes.ts +600 -536
- package/src/cli.ts +850 -397
- package/src/coordination/index.ts +47 -0
- package/src/coordination/mcp-tools.ts +318 -0
- package/src/coordination/routes.ts +846 -0
- package/src/coordination/schema.ts +120 -0
- package/src/coordination/schemas.ts +155 -0
- package/src/coordination/stale.ts +97 -0
- package/src/core/decay.ts +63 -63
- package/src/core/embeddings.ts +88 -88
- package/src/core/hebbian.ts +93 -93
- package/src/core/index.ts +5 -5
- package/src/core/logger.ts +36 -36
- package/src/core/query-expander.ts +66 -66
- package/src/core/reranker.ts +101 -101
- package/src/engine/activation.ts +758 -656
- package/src/engine/connections.ts +103 -103
- package/src/engine/consolidation-scheduler.ts +125 -125
- package/src/engine/consolidation.ts +29 -6
- package/src/engine/eval.ts +102 -102
- package/src/engine/eviction.ts +101 -101
- package/src/engine/index.ts +8 -8
- package/src/engine/retraction.ts +100 -100
- package/src/engine/staging.ts +74 -74
- package/src/index.ts +208 -121
- package/src/mcp.ts +1093 -1013
- package/src/storage/index.ts +3 -3
- package/src/storage/sqlite.ts +1017 -963
- package/src/types/agent.ts +67 -67
- package/src/types/checkpoint.ts +46 -46
- package/src/types/engram.ts +245 -217
- package/src/types/eval.ts +100 -100
- package/src/types/index.ts +6 -6
package/src/types/engram.ts
CHANGED
|
@@ -1,217 +1,245 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Engram — the fundamental unit of agent memory.
|
|
5
|
-
*
|
|
6
|
-
* An engram represents a single memory trace with salience metadata,
|
|
7
|
-
* staging lifecycle, retraction support, and optional task management.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
export interface Engram {
|
|
11
|
-
id: string;
|
|
12
|
-
agentId: string;
|
|
13
|
-
concept: string;
|
|
14
|
-
content: string;
|
|
15
|
-
embedding: number[] | null;
|
|
16
|
-
|
|
17
|
-
// Cognitive scores
|
|
18
|
-
confidence: number; // 0-1 Bayesian posterior — updated on retrieval feedback
|
|
19
|
-
salience: number; // Write-time importance score
|
|
20
|
-
accessCount: number; // For ACT-R decay calculation
|
|
21
|
-
lastAccessed: Date;
|
|
22
|
-
createdAt: Date;
|
|
23
|
-
|
|
24
|
-
// Salience audit trail
|
|
25
|
-
salienceFeatures: SalienceFeatures;
|
|
26
|
-
reasonCodes: string[];
|
|
27
|
-
|
|
28
|
-
// Lifecycle
|
|
29
|
-
stage: EngramStage;
|
|
30
|
-
ttl: number | null; // Milliseconds — only for staging buffer entries
|
|
31
|
-
|
|
32
|
-
// Negative memory
|
|
33
|
-
retracted: boolean;
|
|
34
|
-
retractedBy: string | null; // ID of the engram that invalidated this one
|
|
35
|
-
retractedAt: Date | null;
|
|
36
|
-
|
|
37
|
-
// Tags for concept-based retrieval
|
|
38
|
-
tags: string[];
|
|
39
|
-
|
|
40
|
-
// Episode grouping
|
|
41
|
-
episodeId: string | null;
|
|
42
|
-
|
|
43
|
-
// Memory class
|
|
44
|
-
memoryClass: MemoryClass;
|
|
45
|
-
|
|
46
|
-
//
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
export type
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
export
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
*
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Engram — the fundamental unit of agent memory.
|
|
5
|
+
*
|
|
6
|
+
* An engram represents a single memory trace with salience metadata,
|
|
7
|
+
* staging lifecycle, retraction support, and optional task management.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export interface Engram {
|
|
11
|
+
id: string;
|
|
12
|
+
agentId: string;
|
|
13
|
+
concept: string;
|
|
14
|
+
content: string;
|
|
15
|
+
embedding: number[] | null;
|
|
16
|
+
|
|
17
|
+
// Cognitive scores
|
|
18
|
+
confidence: number; // 0-1 Bayesian posterior — updated on retrieval feedback
|
|
19
|
+
salience: number; // Write-time importance score
|
|
20
|
+
accessCount: number; // For ACT-R decay calculation
|
|
21
|
+
lastAccessed: Date;
|
|
22
|
+
createdAt: Date;
|
|
23
|
+
|
|
24
|
+
// Salience audit trail
|
|
25
|
+
salienceFeatures: SalienceFeatures;
|
|
26
|
+
reasonCodes: string[];
|
|
27
|
+
|
|
28
|
+
// Lifecycle
|
|
29
|
+
stage: EngramStage;
|
|
30
|
+
ttl: number | null; // Milliseconds — only for staging buffer entries
|
|
31
|
+
|
|
32
|
+
// Negative memory
|
|
33
|
+
retracted: boolean;
|
|
34
|
+
retractedBy: string | null; // ID of the engram that invalidated this one
|
|
35
|
+
retractedAt: Date | null;
|
|
36
|
+
|
|
37
|
+
// Tags for concept-based retrieval
|
|
38
|
+
tags: string[];
|
|
39
|
+
|
|
40
|
+
// Episode grouping
|
|
41
|
+
episodeId: string | null;
|
|
42
|
+
|
|
43
|
+
// Memory class
|
|
44
|
+
memoryClass: MemoryClass;
|
|
45
|
+
|
|
46
|
+
// Memory type (content classification)
|
|
47
|
+
memoryType: MemoryType;
|
|
48
|
+
|
|
49
|
+
// Supersession — "this replaces that" (not retraction — original wasn't wrong, just outdated)
|
|
50
|
+
supersededBy: string | null; // ID of the engram that replaced this one
|
|
51
|
+
supersedes: string | null; // ID of the engram this one replaces
|
|
52
|
+
|
|
53
|
+
// Task management (null = not a task)
|
|
54
|
+
taskStatus: TaskStatus | null;
|
|
55
|
+
taskPriority: TaskPriority | null;
|
|
56
|
+
blockedBy: string | null; // ID of blocking engram/task
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export type EngramStage = 'staging' | 'active' | 'consolidated' | 'archived';
|
|
60
|
+
|
|
61
|
+
export type TaskStatus = 'open' | 'in_progress' | 'blocked' | 'done';
|
|
62
|
+
export type TaskPriority = 'urgent' | 'high' | 'medium' | 'low';
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Memory class — controls salience floor and recall priority.
|
|
66
|
+
*
|
|
67
|
+
* canonical: Source-of-truth facts (current state, decisions, architecture).
|
|
68
|
+
* Never goes to staging. Minimum salience 0.7.
|
|
69
|
+
* working: Normal observations, learnings, context (default).
|
|
70
|
+
* Standard salience rules apply.
|
|
71
|
+
* ephemeral: Temporary context (debugging traces, session-specific notes).
|
|
72
|
+
* Stronger time decay, lower recall priority.
|
|
73
|
+
*/
|
|
74
|
+
export type MemoryClass = 'canonical' | 'working' | 'ephemeral';
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Memory type — content classification for retrieval routing.
|
|
78
|
+
*
|
|
79
|
+
* episodic: Events, incidents, debugging sessions ("we did X because Y").
|
|
80
|
+
* semantic: Facts, decisions, patterns ("X is true", "we use Y for Z").
|
|
81
|
+
* procedural: How-to, steps, processes ("to deploy, run X then Y").
|
|
82
|
+
* unclassified: Default for backwards compatibility.
|
|
83
|
+
*/
|
|
84
|
+
export type MemoryType = 'episodic' | 'semantic' | 'procedural' | 'unclassified';
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Raw feature scores that produced the salience score.
|
|
88
|
+
* Persisted for auditability and tuning.
|
|
89
|
+
*/
|
|
90
|
+
export interface SalienceFeatures {
|
|
91
|
+
surprise: number;
|
|
92
|
+
decisionMade: boolean;
|
|
93
|
+
causalDepth: number;
|
|
94
|
+
resolutionEffort: number;
|
|
95
|
+
eventType: string;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export interface EngramCreate {
|
|
99
|
+
agentId: string;
|
|
100
|
+
concept: string;
|
|
101
|
+
content: string;
|
|
102
|
+
tags?: string[];
|
|
103
|
+
embedding?: number[];
|
|
104
|
+
salience?: number;
|
|
105
|
+
confidence?: number;
|
|
106
|
+
salienceFeatures?: SalienceFeatures;
|
|
107
|
+
reasonCodes?: string[];
|
|
108
|
+
episodeId?: string;
|
|
109
|
+
ttl?: number;
|
|
110
|
+
memoryClass?: MemoryClass;
|
|
111
|
+
memoryType?: MemoryType;
|
|
112
|
+
supersedes?: string;
|
|
113
|
+
taskStatus?: TaskStatus;
|
|
114
|
+
taskPriority?: TaskPriority;
|
|
115
|
+
blockedBy?: string;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Association — weighted edge between two engrams.
|
|
120
|
+
* Strengthened by Hebbian co-activation, decays when unused.
|
|
121
|
+
* Capped at MAX_EDGES_PER_ENGRAM to prevent graph explosion.
|
|
122
|
+
*/
|
|
123
|
+
export interface Association {
|
|
124
|
+
id: string;
|
|
125
|
+
fromEngramId: string;
|
|
126
|
+
toEngramId: string;
|
|
127
|
+
weight: number; // Log-space, updated via Hebbian rule
|
|
128
|
+
confidence: number; // Edge-level confidence (separate from node)
|
|
129
|
+
type: AssociationType;
|
|
130
|
+
activationCount: number; // How many times this edge contributed to retrieval
|
|
131
|
+
createdAt: Date;
|
|
132
|
+
lastActivated: Date;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export type AssociationType = 'hebbian' | 'connection' | 'causal' | 'temporal' | 'invalidation' | 'bridge';
|
|
136
|
+
|
|
137
|
+
export const MAX_EDGES_PER_ENGRAM = 20;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Activation result — returned from the activation pipeline.
|
|
141
|
+
*/
|
|
142
|
+
export interface ActivationResult {
|
|
143
|
+
engram: Engram;
|
|
144
|
+
score: number;
|
|
145
|
+
phaseScores: PhaseScores; // Per-phase breakdown for explainability
|
|
146
|
+
why: string; // Human-readable explanation
|
|
147
|
+
associations: Association[];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Per-phase scoring breakdown — full audit of how each phase contributed.
|
|
152
|
+
*/
|
|
153
|
+
export interface PhaseScores {
|
|
154
|
+
textMatch: number;
|
|
155
|
+
vectorMatch: number;
|
|
156
|
+
decayScore: number;
|
|
157
|
+
hebbianBoost: number;
|
|
158
|
+
graphBoost: number;
|
|
159
|
+
confidenceGate: number;
|
|
160
|
+
composite: number;
|
|
161
|
+
rerankerScore: number; // Cross-encoder relevance (0-1), 0 if reranker disabled
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Query mode — controls how the activation pipeline weights its signals.
|
|
166
|
+
*
|
|
167
|
+
* targeted: Query has identifiers, ticket IDs, specific names. Boost BM25,
|
|
168
|
+
* narrow graph beam, stronger decay, stricter vector z-gate.
|
|
169
|
+
* exploratory: Vague/conceptual query. Boost vector/semantic signals, wider
|
|
170
|
+
* graph beam, weaker decay, relaxed z-gate.
|
|
171
|
+
* balanced: Default weights (current behavior).
|
|
172
|
+
* auto: Classify automatically based on query characteristics.
|
|
173
|
+
*/
|
|
174
|
+
export type QueryMode = 'targeted' | 'exploratory' | 'balanced' | 'auto';
|
|
175
|
+
|
|
176
|
+
export interface ActivationQuery {
|
|
177
|
+
agentId: string;
|
|
178
|
+
context: string;
|
|
179
|
+
limit?: number;
|
|
180
|
+
minScore?: number;
|
|
181
|
+
includeStaging?: boolean;
|
|
182
|
+
includeRetracted?: boolean;
|
|
183
|
+
useReranker?: boolean; // Enable cross-encoder re-ranking (default: true)
|
|
184
|
+
useExpansion?: boolean; // Enable query expansion (default: true)
|
|
185
|
+
abstentionThreshold?: number; // Min reranker score to return results (default: 0)
|
|
186
|
+
internal?: boolean; // Skip access count increment, Hebbian update, and event logging (for system calls)
|
|
187
|
+
memoryType?: MemoryType; // Filter by memory type (episodic, semantic, procedural)
|
|
188
|
+
mode?: QueryMode; // Pipeline mode — 'auto' by default
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Search query — deterministic retrieval for diagnostics and debugging.
|
|
193
|
+
* Separate from activation (which is cognitive/associative).
|
|
194
|
+
*/
|
|
195
|
+
export interface SearchQuery {
|
|
196
|
+
agentId: string;
|
|
197
|
+
text?: string; // Exact or partial text match
|
|
198
|
+
concept?: string; // Exact concept match
|
|
199
|
+
tags?: string[]; // Tag filter (AND)
|
|
200
|
+
stage?: EngramStage;
|
|
201
|
+
retracted?: boolean;
|
|
202
|
+
limit?: number;
|
|
203
|
+
offset?: number;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Retrieval feedback — agent reports whether a memory was useful.
|
|
208
|
+
* Used to update confidence scores and eval metrics.
|
|
209
|
+
*/
|
|
210
|
+
export interface RetrievalFeedback {
|
|
211
|
+
engramId: string;
|
|
212
|
+
useful: boolean;
|
|
213
|
+
context: string; // What the agent was doing when it judged usefulness
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Retraction — marks a memory as invalid/wrong.
|
|
218
|
+
*/
|
|
219
|
+
export interface Retraction {
|
|
220
|
+
targetEngramId: string;
|
|
221
|
+
reason: string;
|
|
222
|
+
counterContent?: string; // Optional: what the correct information is
|
|
223
|
+
agentId: string;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Episode — a temporal grouping of engrams from a session or time window.
|
|
228
|
+
* Enables episode-first retrieval: find relevant episodes, then drill into engrams.
|
|
229
|
+
*/
|
|
230
|
+
export interface Episode {
|
|
231
|
+
id: string;
|
|
232
|
+
agentId: string;
|
|
233
|
+
label: string; // Short description (e.g., "Express migration session")
|
|
234
|
+
embedding: number[] | null; // Centroid of member engram embeddings
|
|
235
|
+
engramCount: number;
|
|
236
|
+
startTime: Date;
|
|
237
|
+
endTime: Date;
|
|
238
|
+
createdAt: Date;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
export interface EpisodeCreate {
|
|
242
|
+
agentId: string;
|
|
243
|
+
label: string;
|
|
244
|
+
embedding?: number[];
|
|
245
|
+
}
|
package/src/types/eval.ts
CHANGED
|
@@ -1,100 +1,100 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Evaluation types — measuring whether memory actually helps.
|
|
5
|
-
*
|
|
6
|
-
* Four measurement dimensions:
|
|
7
|
-
* 1. Retrieval quality (precision, recall, latency)
|
|
8
|
-
* 2. Connection quality (edge utility, stability)
|
|
9
|
-
* 3. Staging accuracy (promotion precision, discard regret)
|
|
10
|
-
* 4. Task impact (with/without memory comparison)
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Single activation event record — logged for offline analysis.
|
|
15
|
-
*/
|
|
16
|
-
export interface ActivationEvent {
|
|
17
|
-
id: string;
|
|
18
|
-
agentId: string;
|
|
19
|
-
timestamp: Date;
|
|
20
|
-
context: string;
|
|
21
|
-
resultsReturned: number;
|
|
22
|
-
topScore: number;
|
|
23
|
-
latencyMs: number;
|
|
24
|
-
engramIds: string[];
|
|
25
|
-
feedback?: RetrievalFeedbackEvent[];
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface RetrievalFeedbackEvent {
|
|
29
|
-
engramId: string;
|
|
30
|
-
useful: boolean;
|
|
31
|
-
timestamp: Date;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Staging lifecycle event — tracks promote/discard decisions.
|
|
36
|
-
*/
|
|
37
|
-
export interface StagingEvent {
|
|
38
|
-
engramId: string;
|
|
39
|
-
agentId: string;
|
|
40
|
-
action: 'promoted' | 'discarded' | 'expired';
|
|
41
|
-
resonanceScore: number | null;
|
|
42
|
-
timestamp: Date;
|
|
43
|
-
ageMs: number; // How long it lived in staging
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Aggregate metrics snapshot — computed periodically.
|
|
48
|
-
*/
|
|
49
|
-
export interface EvalMetrics {
|
|
50
|
-
agentId: string;
|
|
51
|
-
timestamp: Date;
|
|
52
|
-
window: string; // e.g., "24h", "7d"
|
|
53
|
-
|
|
54
|
-
// Retrieval quality
|
|
55
|
-
activationCount: number;
|
|
56
|
-
avgPrecisionAtK: number; // Of returned results, % judged useful
|
|
57
|
-
avgLatencyMs: number;
|
|
58
|
-
p95LatencyMs: number;
|
|
59
|
-
|
|
60
|
-
// Connection quality
|
|
61
|
-
totalEdges: number;
|
|
62
|
-
edgesUsedInActivation: number;
|
|
63
|
-
edgeUtilityRate: number; // % of edges that contributed to retrieval
|
|
64
|
-
avgEdgeSurvivalDays: number;
|
|
65
|
-
|
|
66
|
-
// Staging accuracy
|
|
67
|
-
totalStaged: number;
|
|
68
|
-
promotedCount: number;
|
|
69
|
-
discardedCount: number;
|
|
70
|
-
promotionPrecision: number; // % of promoted items later used
|
|
71
|
-
discardRegret: number; // % of discarded items agent re-introduced
|
|
72
|
-
|
|
73
|
-
// Memory health
|
|
74
|
-
activeEngramCount: number;
|
|
75
|
-
stagingEngramCount: number;
|
|
76
|
-
retractedCount: number;
|
|
77
|
-
consolidatedCount: number;
|
|
78
|
-
avgConfidence: number;
|
|
79
|
-
|
|
80
|
-
// Contamination tracking
|
|
81
|
-
staleUsageCount: number; // Activations using outdated engrams
|
|
82
|
-
retractionRate: number; // Rate of memories being invalidated
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* Task trial — for with/without memory comparison.
|
|
87
|
-
*/
|
|
88
|
-
export interface TaskTrial {
|
|
89
|
-
id: string;
|
|
90
|
-
agentId: string;
|
|
91
|
-
taskDescription: string;
|
|
92
|
-
memoryEnabled: boolean;
|
|
93
|
-
startedAt: Date;
|
|
94
|
-
completedAt: Date | null;
|
|
95
|
-
success: boolean | null;
|
|
96
|
-
stepsToCompletion: number;
|
|
97
|
-
errorsEncountered: number;
|
|
98
|
-
memoriesActivated: number;
|
|
99
|
-
userCorrections: number;
|
|
100
|
-
}
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Evaluation types — measuring whether memory actually helps.
|
|
5
|
+
*
|
|
6
|
+
* Four measurement dimensions:
|
|
7
|
+
* 1. Retrieval quality (precision, recall, latency)
|
|
8
|
+
* 2. Connection quality (edge utility, stability)
|
|
9
|
+
* 3. Staging accuracy (promotion precision, discard regret)
|
|
10
|
+
* 4. Task impact (with/without memory comparison)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Single activation event record — logged for offline analysis.
|
|
15
|
+
*/
|
|
16
|
+
export interface ActivationEvent {
|
|
17
|
+
id: string;
|
|
18
|
+
agentId: string;
|
|
19
|
+
timestamp: Date;
|
|
20
|
+
context: string;
|
|
21
|
+
resultsReturned: number;
|
|
22
|
+
topScore: number;
|
|
23
|
+
latencyMs: number;
|
|
24
|
+
engramIds: string[];
|
|
25
|
+
feedback?: RetrievalFeedbackEvent[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface RetrievalFeedbackEvent {
|
|
29
|
+
engramId: string;
|
|
30
|
+
useful: boolean;
|
|
31
|
+
timestamp: Date;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Staging lifecycle event — tracks promote/discard decisions.
|
|
36
|
+
*/
|
|
37
|
+
export interface StagingEvent {
|
|
38
|
+
engramId: string;
|
|
39
|
+
agentId: string;
|
|
40
|
+
action: 'promoted' | 'discarded' | 'expired';
|
|
41
|
+
resonanceScore: number | null;
|
|
42
|
+
timestamp: Date;
|
|
43
|
+
ageMs: number; // How long it lived in staging
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Aggregate metrics snapshot — computed periodically.
|
|
48
|
+
*/
|
|
49
|
+
export interface EvalMetrics {
|
|
50
|
+
agentId: string;
|
|
51
|
+
timestamp: Date;
|
|
52
|
+
window: string; // e.g., "24h", "7d"
|
|
53
|
+
|
|
54
|
+
// Retrieval quality
|
|
55
|
+
activationCount: number;
|
|
56
|
+
avgPrecisionAtK: number; // Of returned results, % judged useful
|
|
57
|
+
avgLatencyMs: number;
|
|
58
|
+
p95LatencyMs: number;
|
|
59
|
+
|
|
60
|
+
// Connection quality
|
|
61
|
+
totalEdges: number;
|
|
62
|
+
edgesUsedInActivation: number;
|
|
63
|
+
edgeUtilityRate: number; // % of edges that contributed to retrieval
|
|
64
|
+
avgEdgeSurvivalDays: number;
|
|
65
|
+
|
|
66
|
+
// Staging accuracy
|
|
67
|
+
totalStaged: number;
|
|
68
|
+
promotedCount: number;
|
|
69
|
+
discardedCount: number;
|
|
70
|
+
promotionPrecision: number; // % of promoted items later used
|
|
71
|
+
discardRegret: number; // % of discarded items agent re-introduced
|
|
72
|
+
|
|
73
|
+
// Memory health
|
|
74
|
+
activeEngramCount: number;
|
|
75
|
+
stagingEngramCount: number;
|
|
76
|
+
retractedCount: number;
|
|
77
|
+
consolidatedCount: number;
|
|
78
|
+
avgConfidence: number;
|
|
79
|
+
|
|
80
|
+
// Contamination tracking
|
|
81
|
+
staleUsageCount: number; // Activations using outdated engrams
|
|
82
|
+
retractionRate: number; // Rate of memories being invalidated
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Task trial — for with/without memory comparison.
|
|
87
|
+
*/
|
|
88
|
+
export interface TaskTrial {
|
|
89
|
+
id: string;
|
|
90
|
+
agentId: string;
|
|
91
|
+
taskDescription: string;
|
|
92
|
+
memoryEnabled: boolean;
|
|
93
|
+
startedAt: Date;
|
|
94
|
+
completedAt: Date | null;
|
|
95
|
+
success: boolean | null;
|
|
96
|
+
stepsToCompletion: number;
|
|
97
|
+
errorsEncountered: number;
|
|
98
|
+
memoriesActivated: number;
|
|
99
|
+
userCorrections: number;
|
|
100
|
+
}
|