agent-working-memory 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +15 -4
- package/dist/cli.js.map +1 -1
- package/dist/core/salience.d.ts.map +1 -1
- package/dist/core/salience.js +11 -10
- package/dist/core/salience.js.map +1 -1
- package/dist/engine/consolidation.d.ts.map +1 -1
- package/dist/engine/consolidation.js +26 -7
- package/dist/engine/consolidation.js.map +1 -1
- package/dist/mcp.js +90 -82
- package/dist/mcp.js.map +1 -1
- package/dist/storage/sqlite.d.ts +1 -0
- package/dist/storage/sqlite.d.ts.map +1 -1
- package/dist/storage/sqlite.js +11 -0
- package/dist/storage/sqlite.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +16 -4
- package/src/core/salience.ts +168 -162
- package/src/engine/consolidation.ts +469 -445
- package/src/hooks/sidecar.ts +289 -289
- package/src/mcp.ts +971 -963
- package/src/storage/sqlite.ts +14 -0
package/src/core/salience.ts
CHANGED
|
@@ -1,162 +1,168 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Salience Filter — decides what's worth remembering.
|
|
5
|
-
*
|
|
6
|
-
* Codex feedback incorporated:
|
|
7
|
-
* - Persists raw feature scores for auditability
|
|
8
|
-
* - Returns reason codes for explainability
|
|
9
|
-
* - Thresholds are tunable per agent
|
|
10
|
-
* - Deterministic heuristics first, LLM augmentation optional
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import type { SalienceFeatures, MemoryClass } from '../types/index.js';
|
|
14
|
-
import type { EngramStore } from '../storage/sqlite.js';
|
|
15
|
-
|
|
16
|
-
export type SalienceEventType = 'decision' | 'friction' | 'surprise' | 'causal' | 'observation';
|
|
17
|
-
|
|
18
|
-
export interface SalienceInput {
|
|
19
|
-
content: string;
|
|
20
|
-
eventType?: SalienceEventType;
|
|
21
|
-
surprise?: number;
|
|
22
|
-
decisionMade?: boolean;
|
|
23
|
-
causalDepth?: number;
|
|
24
|
-
resolutionEffort?: number;
|
|
25
|
-
/** 0 = exact duplicate exists, 1 = completely novel. Computed by caller via BM25 similarity check. */
|
|
26
|
-
novelty?: number;
|
|
27
|
-
/** Memory class — canonical memories get salience floor of 0.7 and never stage. */
|
|
28
|
-
memoryClass?: MemoryClass;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface SalienceResult {
|
|
32
|
-
score: number;
|
|
33
|
-
disposition: 'active' | 'staging' | 'discard';
|
|
34
|
-
features: SalienceFeatures;
|
|
35
|
-
reasonCodes: string[];
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Weights for the salience scoring formula.
|
|
40
|
-
* Novelty is the strongest signal — new information should always be stored.
|
|
41
|
-
* Duplicates get filtered aggressively.
|
|
42
|
-
*/
|
|
43
|
-
const WEIGHTS = {
|
|
44
|
-
surprise: 0.15,
|
|
45
|
-
decision: 0.15,
|
|
46
|
-
causalDepth: 0.15,
|
|
47
|
-
resolutionEffort: 0.1,
|
|
48
|
-
novelty: 0.45,
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Rule-based salience scorer with full audit trail.
|
|
53
|
-
*/
|
|
54
|
-
export function evaluateSalience(
|
|
55
|
-
input: SalienceInput,
|
|
56
|
-
activeThreshold: number = 0.4,
|
|
57
|
-
stagingThreshold: number = 0.2
|
|
58
|
-
): SalienceResult {
|
|
59
|
-
const features: SalienceFeatures = {
|
|
60
|
-
surprise: input.surprise ?? 0,
|
|
61
|
-
decisionMade: input.decisionMade ?? false,
|
|
62
|
-
causalDepth: input.causalDepth ?? 0,
|
|
63
|
-
resolutionEffort: input.resolutionEffort ?? 0,
|
|
64
|
-
eventType: input.eventType ?? 'observation',
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
const reasonCodes: string[] = [];
|
|
68
|
-
|
|
69
|
-
// Novelty: 1.0 = completely new info, 0 = exact duplicate exists
|
|
70
|
-
// Default to 0.8 (assume mostly novel) when caller doesn't check
|
|
71
|
-
const novelty = input.novelty ?? 0.8;
|
|
72
|
-
|
|
73
|
-
// Score components
|
|
74
|
-
const surpriseScore = WEIGHTS.surprise * features.surprise;
|
|
75
|
-
const decisionScore = WEIGHTS.decision * (features.decisionMade ? 1.0 : 0);
|
|
76
|
-
const causalScore = WEIGHTS.causalDepth * features.causalDepth;
|
|
77
|
-
const effortScore = WEIGHTS.resolutionEffort * features.resolutionEffort;
|
|
78
|
-
const noveltyScore = WEIGHTS.novelty * novelty;
|
|
79
|
-
|
|
80
|
-
if (features.surprise > 0.5) reasonCodes.push('high_surprise');
|
|
81
|
-
if (features.decisionMade) reasonCodes.push('decision_point');
|
|
82
|
-
if (features.causalDepth > 0.5) reasonCodes.push('causal_insight');
|
|
83
|
-
if (features.resolutionEffort > 0.5) reasonCodes.push('high_effort_resolution');
|
|
84
|
-
if (novelty > 0.7) reasonCodes.push('novel_information');
|
|
85
|
-
if (novelty < 0.3) reasonCodes.push('redundant_information');
|
|
86
|
-
|
|
87
|
-
// Event type bonus
|
|
88
|
-
let typeBonus = 0;
|
|
89
|
-
switch (features.eventType) {
|
|
90
|
-
case 'decision': typeBonus = 0.15; reasonCodes.push('event:decision'); break;
|
|
91
|
-
case 'friction': typeBonus = 0.2; reasonCodes.push('event:friction'); break;
|
|
92
|
-
case 'surprise': typeBonus = 0.25; reasonCodes.push('event:surprise'); break;
|
|
93
|
-
case 'causal': typeBonus = 0.2; reasonCodes.push('event:causal'); break;
|
|
94
|
-
case 'observation': break;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
let score = Math.min(surpriseScore + decisionScore + causalScore + effortScore + noveltyScore + typeBonus, 1.0);
|
|
98
|
-
|
|
99
|
-
// Memory class overrides
|
|
100
|
-
const memoryClass = input.memoryClass ?? 'working';
|
|
101
|
-
|
|
102
|
-
if (memoryClass === 'canonical') {
|
|
103
|
-
// Canonical memories: salience floor of 0.7, never go to staging
|
|
104
|
-
score = Math.max(score, 0.7);
|
|
105
|
-
reasonCodes.push('class:canonical');
|
|
106
|
-
} else if (memoryClass === 'ephemeral') {
|
|
107
|
-
reasonCodes.push('class:ephemeral');
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
let disposition: 'active' | 'staging' | 'discard';
|
|
111
|
-
if (memoryClass === 'canonical') {
|
|
112
|
-
// Canonical always goes active — they represent current truth
|
|
113
|
-
disposition = 'active';
|
|
114
|
-
reasonCodes.push('disposition:active');
|
|
115
|
-
} else if (score >= activeThreshold) {
|
|
116
|
-
disposition = 'active';
|
|
117
|
-
reasonCodes.push('disposition:active');
|
|
118
|
-
} else if (score >= stagingThreshold) {
|
|
119
|
-
disposition = 'staging';
|
|
120
|
-
reasonCodes.push('disposition:staging');
|
|
121
|
-
} else {
|
|
122
|
-
disposition = 'discard';
|
|
123
|
-
reasonCodes.push('disposition:discard');
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
return { score, disposition, features, reasonCodes };
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Compute novelty score by checking how similar the content is to existing memories.
|
|
131
|
-
* Uses BM25 (synchronous, fast) to find the closest existing memory.
|
|
132
|
-
*
|
|
133
|
-
* Returns 0..1 where:
|
|
134
|
-
* 1.0 = nothing similar exists (completely novel)
|
|
135
|
-
* 0.0 = near-exact duplicate exists
|
|
136
|
-
*
|
|
137
|
-
* The check is cheap (~1ms) because BM25 is synchronous SQLite FTS5.
|
|
138
|
-
*/
|
|
139
|
-
export function computeNovelty(store: EngramStore, agentId: string, concept: string, content: string): number {
|
|
140
|
-
try {
|
|
141
|
-
// Search using concept + first 100 chars of content (enough to detect duplicates, fast)
|
|
142
|
-
const contentStr = typeof content === 'string' ? content : '';
|
|
143
|
-
const conceptStr = typeof concept === 'string' ? concept : '';
|
|
144
|
-
const searchText = `${conceptStr} ${contentStr.slice(0, 100)}`;
|
|
145
|
-
|
|
146
|
-
const results = store.searchBM25WithRank(agentId, searchText,
|
|
147
|
-
if (results.length === 0) return 1.0; // Nothing similar — fully novel
|
|
148
|
-
|
|
149
|
-
// searchBM25WithRank normalizes scores to 0..1 via |rank|/(1+|rank|).
|
|
150
|
-
// Higher score = stronger match = less novel.
|
|
151
|
-
const topScore = results[0].bm25Score;
|
|
152
|
-
|
|
153
|
-
if
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
//
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Salience Filter — decides what's worth remembering.
|
|
5
|
+
*
|
|
6
|
+
* Codex feedback incorporated:
|
|
7
|
+
* - Persists raw feature scores for auditability
|
|
8
|
+
* - Returns reason codes for explainability
|
|
9
|
+
* - Thresholds are tunable per agent
|
|
10
|
+
* - Deterministic heuristics first, LLM augmentation optional
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { SalienceFeatures, MemoryClass } from '../types/index.js';
|
|
14
|
+
import type { EngramStore } from '../storage/sqlite.js';
|
|
15
|
+
|
|
16
|
+
export type SalienceEventType = 'decision' | 'friction' | 'surprise' | 'causal' | 'observation';
|
|
17
|
+
|
|
18
|
+
export interface SalienceInput {
|
|
19
|
+
content: string;
|
|
20
|
+
eventType?: SalienceEventType;
|
|
21
|
+
surprise?: number;
|
|
22
|
+
decisionMade?: boolean;
|
|
23
|
+
causalDepth?: number;
|
|
24
|
+
resolutionEffort?: number;
|
|
25
|
+
/** 0 = exact duplicate exists, 1 = completely novel. Computed by caller via BM25 similarity check. */
|
|
26
|
+
novelty?: number;
|
|
27
|
+
/** Memory class — canonical memories get salience floor of 0.7 and never stage. */
|
|
28
|
+
memoryClass?: MemoryClass;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface SalienceResult {
|
|
32
|
+
score: number;
|
|
33
|
+
disposition: 'active' | 'staging' | 'discard';
|
|
34
|
+
features: SalienceFeatures;
|
|
35
|
+
reasonCodes: string[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Weights for the salience scoring formula.
|
|
40
|
+
* Novelty is the strongest signal — new information should always be stored.
|
|
41
|
+
* Duplicates get filtered aggressively.
|
|
42
|
+
*/
|
|
43
|
+
const WEIGHTS = {
|
|
44
|
+
surprise: 0.15,
|
|
45
|
+
decision: 0.15,
|
|
46
|
+
causalDepth: 0.15,
|
|
47
|
+
resolutionEffort: 0.1,
|
|
48
|
+
novelty: 0.45,
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Rule-based salience scorer with full audit trail.
|
|
53
|
+
*/
|
|
54
|
+
export function evaluateSalience(
|
|
55
|
+
input: SalienceInput,
|
|
56
|
+
activeThreshold: number = 0.4,
|
|
57
|
+
stagingThreshold: number = 0.2
|
|
58
|
+
): SalienceResult {
|
|
59
|
+
const features: SalienceFeatures = {
|
|
60
|
+
surprise: input.surprise ?? 0,
|
|
61
|
+
decisionMade: input.decisionMade ?? false,
|
|
62
|
+
causalDepth: input.causalDepth ?? 0,
|
|
63
|
+
resolutionEffort: input.resolutionEffort ?? 0,
|
|
64
|
+
eventType: input.eventType ?? 'observation',
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const reasonCodes: string[] = [];
|
|
68
|
+
|
|
69
|
+
// Novelty: 1.0 = completely new info, 0 = exact duplicate exists
|
|
70
|
+
// Default to 0.8 (assume mostly novel) when caller doesn't check
|
|
71
|
+
const novelty = input.novelty ?? 0.8;
|
|
72
|
+
|
|
73
|
+
// Score components
|
|
74
|
+
const surpriseScore = WEIGHTS.surprise * features.surprise;
|
|
75
|
+
const decisionScore = WEIGHTS.decision * (features.decisionMade ? 1.0 : 0);
|
|
76
|
+
const causalScore = WEIGHTS.causalDepth * features.causalDepth;
|
|
77
|
+
const effortScore = WEIGHTS.resolutionEffort * features.resolutionEffort;
|
|
78
|
+
const noveltyScore = WEIGHTS.novelty * novelty;
|
|
79
|
+
|
|
80
|
+
if (features.surprise > 0.5) reasonCodes.push('high_surprise');
|
|
81
|
+
if (features.decisionMade) reasonCodes.push('decision_point');
|
|
82
|
+
if (features.causalDepth > 0.5) reasonCodes.push('causal_insight');
|
|
83
|
+
if (features.resolutionEffort > 0.5) reasonCodes.push('high_effort_resolution');
|
|
84
|
+
if (novelty > 0.7) reasonCodes.push('novel_information');
|
|
85
|
+
if (novelty < 0.3) reasonCodes.push('redundant_information');
|
|
86
|
+
|
|
87
|
+
// Event type bonus
|
|
88
|
+
let typeBonus = 0;
|
|
89
|
+
switch (features.eventType) {
|
|
90
|
+
case 'decision': typeBonus = 0.15; reasonCodes.push('event:decision'); break;
|
|
91
|
+
case 'friction': typeBonus = 0.2; reasonCodes.push('event:friction'); break;
|
|
92
|
+
case 'surprise': typeBonus = 0.25; reasonCodes.push('event:surprise'); break;
|
|
93
|
+
case 'causal': typeBonus = 0.2; reasonCodes.push('event:causal'); break;
|
|
94
|
+
case 'observation': break;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
let score = Math.min(surpriseScore + decisionScore + causalScore + effortScore + noveltyScore + typeBonus, 1.0);
|
|
98
|
+
|
|
99
|
+
// Memory class overrides
|
|
100
|
+
const memoryClass = input.memoryClass ?? 'working';
|
|
101
|
+
|
|
102
|
+
if (memoryClass === 'canonical') {
|
|
103
|
+
// Canonical memories: salience floor of 0.7, never go to staging
|
|
104
|
+
score = Math.max(score, 0.7);
|
|
105
|
+
reasonCodes.push('class:canonical');
|
|
106
|
+
} else if (memoryClass === 'ephemeral') {
|
|
107
|
+
reasonCodes.push('class:ephemeral');
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
let disposition: 'active' | 'staging' | 'discard';
|
|
111
|
+
if (memoryClass === 'canonical') {
|
|
112
|
+
// Canonical always goes active — they represent current truth
|
|
113
|
+
disposition = 'active';
|
|
114
|
+
reasonCodes.push('disposition:active');
|
|
115
|
+
} else if (score >= activeThreshold) {
|
|
116
|
+
disposition = 'active';
|
|
117
|
+
reasonCodes.push('disposition:active');
|
|
118
|
+
} else if (score >= stagingThreshold) {
|
|
119
|
+
disposition = 'staging';
|
|
120
|
+
reasonCodes.push('disposition:staging');
|
|
121
|
+
} else {
|
|
122
|
+
disposition = 'discard';
|
|
123
|
+
reasonCodes.push('disposition:discard');
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return { score, disposition, features, reasonCodes };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Compute novelty score by checking how similar the content is to existing memories.
|
|
131
|
+
* Uses BM25 (synchronous, fast) to find the closest existing memory.
|
|
132
|
+
*
|
|
133
|
+
* Returns 0..1 where:
|
|
134
|
+
* 1.0 = nothing similar exists (completely novel)
|
|
135
|
+
* 0.0 = near-exact duplicate exists
|
|
136
|
+
*
|
|
137
|
+
* The check is cheap (~1ms) because BM25 is synchronous SQLite FTS5.
|
|
138
|
+
*/
|
|
139
|
+
export function computeNovelty(store: EngramStore, agentId: string, concept: string, content: string): number {
|
|
140
|
+
try {
|
|
141
|
+
// Search using concept + first 100 chars of content (enough to detect duplicates, fast)
|
|
142
|
+
const contentStr = typeof content === 'string' ? content : '';
|
|
143
|
+
const conceptStr = typeof concept === 'string' ? concept : '';
|
|
144
|
+
const searchText = `${conceptStr} ${contentStr.slice(0, 100)}`;
|
|
145
|
+
|
|
146
|
+
const results = store.searchBM25WithRank(agentId, searchText, 5);
|
|
147
|
+
if (results.length === 0) return 1.0; // Nothing similar — fully novel
|
|
148
|
+
|
|
149
|
+
// searchBM25WithRank normalizes scores to 0..1 via |rank|/(1+|rank|).
|
|
150
|
+
// Higher score = stronger match = less novel.
|
|
151
|
+
const topScore = results[0].bm25Score;
|
|
152
|
+
|
|
153
|
+
// Penalize exact concept string duplicates — if any result has the same concept,
|
|
154
|
+
// heavily reduce novelty to prevent hub toxicity from repeated task_end summaries
|
|
155
|
+
const conceptLower = conceptStr.toLowerCase().trim();
|
|
156
|
+
const exactConceptMatch = results.some(r => r.engram?.concept?.toLowerCase().trim() === conceptLower);
|
|
157
|
+
const conceptPenalty = exactConceptMatch ? 0.4 : 0;
|
|
158
|
+
|
|
159
|
+
// Continuous novelty: inversely proportional to BM25 similarity
|
|
160
|
+
// Maps topScore (0..1) → novelty (0.1..0.95) using a smooth curve
|
|
161
|
+
// Floor at 0.1 (never zero — even duplicates might have new context)
|
|
162
|
+
// Ceiling at 0.95 (never 1.0 — always a tiny chance of overlap)
|
|
163
|
+
return Math.max(0.1, Math.min(0.95, 1.0 - topScore - conceptPenalty));
|
|
164
|
+
} catch {
|
|
165
|
+
// If BM25 search fails (e.g., FTS not ready), assume novel
|
|
166
|
+
return 0.8;
|
|
167
|
+
}
|
|
168
|
+
}
|