audrey 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +808 -681
- package/mcp-server/config.js +76 -76
- package/mcp-server/index.js +728 -437
- package/package.json +76 -77
- package/src/adaptive.js +53 -53
- package/src/affect.js +64 -64
- package/src/audrey.js +604 -570
- package/src/causal.js +95 -95
- package/src/confidence.js +120 -120
- package/src/consolidate.js +265 -242
- package/src/context.js +15 -15
- package/src/db.js +37 -0
- package/src/decay.js +84 -84
- package/src/embedding.js +256 -256
- package/src/export.js +67 -61
- package/src/forget.js +111 -111
- package/src/import.js +245 -123
- package/src/index.js +27 -20
- package/src/interference.js +51 -51
- package/src/introspect.js +48 -48
- package/src/llm.js +246 -240
- package/src/migrate.js +58 -58
- package/src/prompts.js +223 -223
- package/src/recall.js +352 -329
- package/src/rollback.js +42 -42
- package/src/ulid.js +18 -18
- package/src/utils.js +38 -38
- package/src/validate.js +172 -172
package/src/consolidate.js
CHANGED
|
@@ -1,242 +1,265 @@
|
|
|
1
|
-
import { generateId } from './ulid.js';
|
|
2
|
-
import { buildPrincipleExtractionPrompt } from './prompts.js';
|
|
3
|
-
|
|
4
|
-
function clusterViaKNN(db, episodes, similarityThreshold, minClusterSize) {
|
|
5
|
-
const n = episodes.length;
|
|
6
|
-
const k = Math.min(50, n);
|
|
7
|
-
const idToIndex = new Map(episodes.map((ep, i) => [ep.id, i]));
|
|
8
|
-
|
|
9
|
-
const parent = new Array(n);
|
|
10
|
-
for (let i = 0; i < n; i++) parent[i] = i;
|
|
11
|
-
|
|
12
|
-
function find(x) {
|
|
13
|
-
while (parent[x] !== x) {
|
|
14
|
-
parent[x] = parent[parent[x]];
|
|
15
|
-
x = parent[x];
|
|
16
|
-
}
|
|
17
|
-
return x;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
function union(a, b) {
|
|
21
|
-
const ra = find(a);
|
|
22
|
-
const rb = find(b);
|
|
23
|
-
if (ra !== rb) parent[ra] = rb;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const getEmbedding = db.prepare('SELECT embedding FROM vec_episodes WHERE id = ?');
|
|
27
|
-
const knnQuery = db.prepare(`
|
|
28
|
-
SELECT id, distance
|
|
29
|
-
FROM vec_episodes
|
|
30
|
-
WHERE embedding MATCH ? AND k = ? AND consolidated = 0
|
|
31
|
-
`);
|
|
32
|
-
|
|
33
|
-
for (let i = 0; i < n; i++) {
|
|
34
|
-
const ep = episodes[i];
|
|
35
|
-
const vecRow = getEmbedding.get(ep.id);
|
|
36
|
-
if (!vecRow) continue;
|
|
37
|
-
|
|
38
|
-
const neighbors = knnQuery.all(vecRow.embedding, k);
|
|
39
|
-
for (const neighbor of neighbors) {
|
|
40
|
-
if (neighbor.id === ep.id) continue;
|
|
41
|
-
const j = idToIndex.get(neighbor.id);
|
|
42
|
-
if (j === undefined) continue;
|
|
43
|
-
const similarity = 1.0 - neighbor.distance;
|
|
44
|
-
if (similarity >= similarityThreshold) {
|
|
45
|
-
union(i, j);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
const groups = new Map();
|
|
51
|
-
for (let i = 0; i < n; i++) {
|
|
52
|
-
const root = find(i);
|
|
53
|
-
if (!groups.has(root)) groups.set(root, []);
|
|
54
|
-
groups.get(root).push(episodes[i]);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
const clusters = [];
|
|
58
|
-
for (const group of groups.values()) {
|
|
59
|
-
if (group.length >= minClusterSize) {
|
|
60
|
-
clusters.push(group);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
return clusters;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* @param {import('better-sqlite3').Database} db
|
|
68
|
-
* @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
|
|
69
|
-
* @param {{ similarityThreshold?: number, minClusterSize?: number }} [options]
|
|
70
|
-
* @returns {Array<Array<Object>>}
|
|
71
|
-
*/
|
|
72
|
-
export function clusterEpisodes(db, embeddingProvider, options = {}) {
|
|
73
|
-
const {
|
|
74
|
-
similarityThreshold = 0.85,
|
|
75
|
-
minClusterSize = 3,
|
|
76
|
-
} = options;
|
|
77
|
-
|
|
78
|
-
const episodes = db.prepare(
|
|
79
|
-
'SELECT * FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
|
|
80
|
-
).all();
|
|
81
|
-
|
|
82
|
-
if (episodes.length === 0) return [];
|
|
83
|
-
|
|
84
|
-
return clusterViaKNN(db, episodes, similarityThreshold, minClusterSize);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
function defaultExtractPrinciple(episodes) {
|
|
88
|
-
const uniqueContents = [...new Set(episodes.map(e => e.content))];
|
|
89
|
-
return {
|
|
90
|
-
content: `Recurring pattern: ${uniqueContents.join('; ')}`,
|
|
91
|
-
type: 'semantic',
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
async function llmExtractPrinciple(llmProvider, episodes) {
|
|
96
|
-
const messages = buildPrincipleExtractionPrompt(episodes);
|
|
97
|
-
return llmProvider.json(messages);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* @param {import('better-sqlite3').Database} db
|
|
102
|
-
* @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
|
|
103
|
-
* @param {{ similarityThreshold?: number, minClusterSize?: number, extractPrinciple?: function, llmProvider?: Object }} [options]
|
|
104
|
-
* @returns {Promise<{ runId: string, episodesEvaluated: number, clustersFound: number, principlesExtracted: number }>}
|
|
105
|
-
*/
|
|
106
|
-
export async function runConsolidation(db, embeddingProvider, options = {}) {
|
|
107
|
-
const {
|
|
108
|
-
similarityThreshold = 0.85,
|
|
109
|
-
minClusterSize = 3,
|
|
110
|
-
extractPrinciple,
|
|
111
|
-
llmProvider,
|
|
112
|
-
} = options;
|
|
113
|
-
|
|
114
|
-
const runId = generateId();
|
|
115
|
-
const now = new Date().toISOString();
|
|
116
|
-
|
|
117
|
-
db.prepare(`
|
|
118
|
-
INSERT INTO consolidation_runs (
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
const
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
);
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
1
|
+
import { generateId } from './ulid.js';
|
|
2
|
+
import { buildPrincipleExtractionPrompt } from './prompts.js';
|
|
3
|
+
|
|
4
|
+
function clusterViaKNN(db, episodes, similarityThreshold, minClusterSize) {
|
|
5
|
+
const n = episodes.length;
|
|
6
|
+
const k = Math.min(50, n);
|
|
7
|
+
const idToIndex = new Map(episodes.map((ep, i) => [ep.id, i]));
|
|
8
|
+
|
|
9
|
+
const parent = new Array(n);
|
|
10
|
+
for (let i = 0; i < n; i++) parent[i] = i;
|
|
11
|
+
|
|
12
|
+
function find(x) {
|
|
13
|
+
while (parent[x] !== x) {
|
|
14
|
+
parent[x] = parent[parent[x]];
|
|
15
|
+
x = parent[x];
|
|
16
|
+
}
|
|
17
|
+
return x;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function union(a, b) {
|
|
21
|
+
const ra = find(a);
|
|
22
|
+
const rb = find(b);
|
|
23
|
+
if (ra !== rb) parent[ra] = rb;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const getEmbedding = db.prepare('SELECT embedding FROM vec_episodes WHERE id = ?');
|
|
27
|
+
const knnQuery = db.prepare(`
|
|
28
|
+
SELECT id, distance
|
|
29
|
+
FROM vec_episodes
|
|
30
|
+
WHERE embedding MATCH ? AND k = ? AND consolidated = 0
|
|
31
|
+
`);
|
|
32
|
+
|
|
33
|
+
for (let i = 0; i < n; i++) {
|
|
34
|
+
const ep = episodes[i];
|
|
35
|
+
const vecRow = getEmbedding.get(ep.id);
|
|
36
|
+
if (!vecRow) continue;
|
|
37
|
+
|
|
38
|
+
const neighbors = knnQuery.all(vecRow.embedding, k);
|
|
39
|
+
for (const neighbor of neighbors) {
|
|
40
|
+
if (neighbor.id === ep.id) continue;
|
|
41
|
+
const j = idToIndex.get(neighbor.id);
|
|
42
|
+
if (j === undefined) continue;
|
|
43
|
+
const similarity = 1.0 - neighbor.distance;
|
|
44
|
+
if (similarity >= similarityThreshold) {
|
|
45
|
+
union(i, j);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const groups = new Map();
|
|
51
|
+
for (let i = 0; i < n; i++) {
|
|
52
|
+
const root = find(i);
|
|
53
|
+
if (!groups.has(root)) groups.set(root, []);
|
|
54
|
+
groups.get(root).push(episodes[i]);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const clusters = [];
|
|
58
|
+
for (const group of groups.values()) {
|
|
59
|
+
if (group.length >= minClusterSize) {
|
|
60
|
+
clusters.push(group);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return clusters;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* @param {import('better-sqlite3').Database} db
|
|
68
|
+
* @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
|
|
69
|
+
* @param {{ similarityThreshold?: number, minClusterSize?: number }} [options]
|
|
70
|
+
* @returns {Array<Array<Object>>}
|
|
71
|
+
*/
|
|
72
|
+
export function clusterEpisodes(db, embeddingProvider, options = {}) {
|
|
73
|
+
const {
|
|
74
|
+
similarityThreshold = 0.85,
|
|
75
|
+
minClusterSize = 3,
|
|
76
|
+
} = options;
|
|
77
|
+
|
|
78
|
+
const episodes = db.prepare(
|
|
79
|
+
'SELECT * FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
|
|
80
|
+
).all();
|
|
81
|
+
|
|
82
|
+
if (episodes.length === 0) return [];
|
|
83
|
+
|
|
84
|
+
return clusterViaKNN(db, episodes, similarityThreshold, minClusterSize);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function defaultExtractPrinciple(episodes) {
|
|
88
|
+
const uniqueContents = [...new Set(episodes.map(e => e.content))];
|
|
89
|
+
return {
|
|
90
|
+
content: `Recurring pattern: ${uniqueContents.join('; ')}`,
|
|
91
|
+
type: 'semantic',
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function llmExtractPrinciple(llmProvider, episodes) {
|
|
96
|
+
const messages = buildPrincipleExtractionPrompt(episodes);
|
|
97
|
+
return llmProvider.json(messages);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* @param {import('better-sqlite3').Database} db
|
|
102
|
+
* @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
|
|
103
|
+
* @param {{ similarityThreshold?: number, minClusterSize?: number, extractPrinciple?: function, llmProvider?: Object }} [options]
|
|
104
|
+
* @returns {Promise<{ runId: string, episodesEvaluated: number, clustersFound: number, principlesExtracted: number }>}
|
|
105
|
+
*/
|
|
106
|
+
export async function runConsolidation(db, embeddingProvider, options = {}) {
|
|
107
|
+
const {
|
|
108
|
+
similarityThreshold = 0.85,
|
|
109
|
+
minClusterSize = 3,
|
|
110
|
+
extractPrinciple,
|
|
111
|
+
llmProvider,
|
|
112
|
+
} = options;
|
|
113
|
+
|
|
114
|
+
const runId = generateId();
|
|
115
|
+
const now = new Date().toISOString();
|
|
116
|
+
|
|
117
|
+
db.prepare(`
|
|
118
|
+
INSERT INTO consolidation_runs (
|
|
119
|
+
id, started_at, status, input_episode_ids, output_memory_ids, consolidation_model, checkpoint_cursor
|
|
120
|
+
)
|
|
121
|
+
VALUES (?, ?, 'running', '[]', '[]', ?, ?)
|
|
122
|
+
`).run(runId, now, llmProvider?.modelName || null, now);
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
const clusters = clusterEpisodes(db, embeddingProvider, { similarityThreshold, minClusterSize });
|
|
126
|
+
|
|
127
|
+
const episodesEvaluated = db.prepare(
|
|
128
|
+
'SELECT COUNT(*) as count FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
|
|
129
|
+
).get().count;
|
|
130
|
+
|
|
131
|
+
const allInputIds = [];
|
|
132
|
+
const allOutputIds = [];
|
|
133
|
+
let principlesExtracted = 0;
|
|
134
|
+
let proceduresExtracted = 0;
|
|
135
|
+
const insertProcedure = db.prepare(`
|
|
136
|
+
INSERT INTO procedures (
|
|
137
|
+
id, content, embedding, state, trigger_conditions,
|
|
138
|
+
evidence_episode_ids, success_count, failure_count,
|
|
139
|
+
embedding_model, embedding_version, created_at, salience
|
|
140
|
+
) VALUES (?, ?, ?, 'active', ?, ?, 0, 0, ?, ?, ?, ?)
|
|
141
|
+
`);
|
|
142
|
+
const insertVecProcedure = db.prepare('INSERT INTO vec_procedures(id, embedding, state) VALUES (?, ?, ?)');
|
|
143
|
+
const insertSemantic = db.prepare(`
|
|
144
|
+
INSERT INTO semantics (
|
|
145
|
+
id, content, embedding, state, evidence_episode_ids,
|
|
146
|
+
evidence_count, supporting_count, source_type_diversity,
|
|
147
|
+
consolidation_checkpoint, embedding_model, embedding_version,
|
|
148
|
+
consolidation_model, created_at, salience
|
|
149
|
+
) VALUES (?, ?, ?, 'active', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
150
|
+
`);
|
|
151
|
+
const insertVecSemantic = db.prepare('INSERT INTO vec_semantics(id, embedding, state) VALUES (?, ?, ?)');
|
|
152
|
+
const markEpisode = db.prepare('UPDATE episodes SET consolidated = 1 WHERE id = ?');
|
|
153
|
+
const markVecEpisode = db.prepare('UPDATE vec_episodes SET consolidated = ? WHERE id = ?');
|
|
154
|
+
const updateRunCompleted = db.prepare(`
|
|
155
|
+
UPDATE consolidation_runs
|
|
156
|
+
SET status = 'completed',
|
|
157
|
+
completed_at = ?,
|
|
158
|
+
input_episode_ids = ?,
|
|
159
|
+
output_memory_ids = ?
|
|
160
|
+
WHERE id = ?
|
|
161
|
+
`);
|
|
162
|
+
const insertMetrics = db.prepare(`
|
|
163
|
+
INSERT INTO consolidation_metrics (id, run_id, min_cluster_size, similarity_threshold,
|
|
164
|
+
episodes_evaluated, clusters_found, principles_extracted, created_at)
|
|
165
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
166
|
+
`);
|
|
167
|
+
|
|
168
|
+
db.exec('BEGIN IMMEDIATE');
|
|
169
|
+
try {
|
|
170
|
+
for (const cluster of clusters) {
|
|
171
|
+
let principle;
|
|
172
|
+
if (extractPrinciple) {
|
|
173
|
+
principle = extractPrinciple(cluster);
|
|
174
|
+
} else if (llmProvider) {
|
|
175
|
+
principle = await llmExtractPrinciple(llmProvider, cluster);
|
|
176
|
+
} else {
|
|
177
|
+
principle = defaultExtractPrinciple(cluster);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!principle || !principle.content) continue;
|
|
181
|
+
|
|
182
|
+
const clusterIds = cluster.map(ep => ep.id);
|
|
183
|
+
const sourceTypeDiversity = new Set(cluster.map(ep => ep.source)).size;
|
|
184
|
+
const vector = await embeddingProvider.embed(principle.content);
|
|
185
|
+
const embeddingBuffer = embeddingProvider.vectorToBuffer(vector);
|
|
186
|
+
const memoryId = generateId();
|
|
187
|
+
const createdAt = new Date().toISOString();
|
|
188
|
+
const maxSalience = Math.max(...cluster.map(ep => ep.salience ?? 0.5));
|
|
189
|
+
|
|
190
|
+
allInputIds.push(...clusterIds);
|
|
191
|
+
|
|
192
|
+
if (principle.type === 'procedural') {
|
|
193
|
+
insertProcedure.run(
|
|
194
|
+
memoryId,
|
|
195
|
+
principle.content,
|
|
196
|
+
embeddingBuffer,
|
|
197
|
+
principle.conditions ? JSON.stringify(principle.conditions) : null,
|
|
198
|
+
JSON.stringify(clusterIds),
|
|
199
|
+
embeddingProvider.modelName,
|
|
200
|
+
embeddingProvider.modelVersion,
|
|
201
|
+
createdAt,
|
|
202
|
+
maxSalience,
|
|
203
|
+
);
|
|
204
|
+
insertVecProcedure.run(memoryId, embeddingBuffer, 'active');
|
|
205
|
+
proceduresExtracted++;
|
|
206
|
+
} else {
|
|
207
|
+
insertSemantic.run(
|
|
208
|
+
memoryId,
|
|
209
|
+
principle.content,
|
|
210
|
+
embeddingBuffer,
|
|
211
|
+
JSON.stringify(clusterIds),
|
|
212
|
+
cluster.length,
|
|
213
|
+
cluster.length,
|
|
214
|
+
sourceTypeDiversity,
|
|
215
|
+
runId,
|
|
216
|
+
embeddingProvider.modelName,
|
|
217
|
+
embeddingProvider.modelVersion,
|
|
218
|
+
llmProvider?.modelName || null,
|
|
219
|
+
createdAt,
|
|
220
|
+
maxSalience,
|
|
221
|
+
);
|
|
222
|
+
insertVecSemantic.run(memoryId, embeddingBuffer, 'active');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
allOutputIds.push(memoryId);
|
|
226
|
+
principlesExtracted++;
|
|
227
|
+
|
|
228
|
+
for (const ep of cluster) {
|
|
229
|
+
markEpisode.run(ep.id);
|
|
230
|
+
markVecEpisode.run(BigInt(1), ep.id);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const completedAt = new Date().toISOString();
|
|
235
|
+
updateRunCompleted.run(completedAt, JSON.stringify(allInputIds), JSON.stringify(allOutputIds), runId);
|
|
236
|
+
insertMetrics.run(
|
|
237
|
+
generateId(), runId, minClusterSize, similarityThreshold,
|
|
238
|
+
episodesEvaluated, clusters.length, principlesExtracted, completedAt,
|
|
239
|
+
);
|
|
240
|
+
db.exec('COMMIT');
|
|
241
|
+
} catch (err) {
|
|
242
|
+
if (db.inTransaction) {
|
|
243
|
+
db.exec('ROLLBACK');
|
|
244
|
+
}
|
|
245
|
+
throw err;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
runId,
|
|
250
|
+
episodesEvaluated,
|
|
251
|
+
clustersFound: clusters.length,
|
|
252
|
+
principlesExtracted,
|
|
253
|
+
semanticsCreated: principlesExtracted - proceduresExtracted,
|
|
254
|
+
proceduresCreated: proceduresExtracted,
|
|
255
|
+
};
|
|
256
|
+
} catch (err) {
|
|
257
|
+
const failedAt = new Date().toISOString();
|
|
258
|
+
db.prepare(`
|
|
259
|
+
UPDATE consolidation_runs
|
|
260
|
+
SET status = 'failed', completed_at = ?
|
|
261
|
+
WHERE id = ?
|
|
262
|
+
`).run(failedAt, runId);
|
|
263
|
+
throw err;
|
|
264
|
+
}
|
|
265
|
+
}
|
package/src/context.js
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
export function contextMatchRatio(encodingContext, retrievalContext) {
|
|
2
|
-
if (!encodingContext || !retrievalContext) return 0;
|
|
3
|
-
const retrievalKeys = Object.keys(retrievalContext);
|
|
4
|
-
if (retrievalKeys.length === 0) return 0;
|
|
5
|
-
const sharedKeys = retrievalKeys.filter(k => k in encodingContext);
|
|
6
|
-
if (sharedKeys.length === 0) return 0;
|
|
7
|
-
const matches = sharedKeys.filter(k => encodingContext[k] === retrievalContext[k]).length;
|
|
8
|
-
return matches / retrievalKeys.length;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function contextModifier(encodingContext, retrievalContext, weight = 0.3) {
|
|
12
|
-
if (!encodingContext || !retrievalContext) return 1.0;
|
|
13
|
-
const ratio = contextMatchRatio(encodingContext, retrievalContext);
|
|
14
|
-
return 1.0 + (weight * ratio);
|
|
15
|
-
}
|
|
1
|
+
export function contextMatchRatio(encodingContext, retrievalContext) {
|
|
2
|
+
if (!encodingContext || !retrievalContext) return 0;
|
|
3
|
+
const retrievalKeys = Object.keys(retrievalContext);
|
|
4
|
+
if (retrievalKeys.length === 0) return 0;
|
|
5
|
+
const sharedKeys = retrievalKeys.filter(k => k in encodingContext);
|
|
6
|
+
if (sharedKeys.length === 0) return 0;
|
|
7
|
+
const matches = sharedKeys.filter(k => encodingContext[k] === retrievalContext[k]).length;
|
|
8
|
+
return matches / retrievalKeys.length;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function contextModifier(encodingContext, retrievalContext, weight = 0.3) {
|
|
12
|
+
if (!encodingContext || !retrievalContext) return 1.0;
|
|
13
|
+
const ratio = contextMatchRatio(encodingContext, retrievalContext);
|
|
14
|
+
return 1.0 + (weight * ratio);
|
|
15
|
+
}
|
package/src/db.js
CHANGED
|
@@ -213,6 +213,33 @@ function migrateEmbeddingsToVec0(db, dimensions) {
|
|
|
213
213
|
});
|
|
214
214
|
}
|
|
215
215
|
|
|
216
|
+
function getEmbeddingSyncCounts(db) {
|
|
217
|
+
let vecEpisodes = 0;
|
|
218
|
+
let vecSemantics = 0;
|
|
219
|
+
let vecProcedures = 0;
|
|
220
|
+
|
|
221
|
+
try {
|
|
222
|
+
vecEpisodes = db.prepare('SELECT COUNT(*) as c FROM vec_episodes').get().c;
|
|
223
|
+
vecSemantics = db.prepare('SELECT COUNT(*) as c FROM vec_semantics').get().c;
|
|
224
|
+
vecProcedures = db.prepare('SELECT COUNT(*) as c FROM vec_procedures').get().c;
|
|
225
|
+
} catch {
|
|
226
|
+
// vec tables may not exist yet
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const episodes = db.prepare('SELECT COUNT(*) as c FROM episodes WHERE embedding IS NOT NULL').get().c;
|
|
230
|
+
const semantics = db.prepare('SELECT COUNT(*) as c FROM semantics WHERE embedding IS NOT NULL').get().c;
|
|
231
|
+
const procedures = db.prepare('SELECT COUNT(*) as c FROM procedures WHERE embedding IS NOT NULL').get().c;
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
episodes,
|
|
235
|
+
vecEpisodes,
|
|
236
|
+
semantics,
|
|
237
|
+
vecSemantics,
|
|
238
|
+
procedures,
|
|
239
|
+
vecProcedures,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
216
243
|
function addColumnIfMissing(db, table, column, definition) {
|
|
217
244
|
const columns = db.pragma(`table_info(${table})`);
|
|
218
245
|
const exists = columns.some(col => col.name === column);
|
|
@@ -305,6 +332,16 @@ export function createDatabase(dataDir, options = {}) {
|
|
|
305
332
|
|
|
306
333
|
if (!migrated) {
|
|
307
334
|
migrateEmbeddingsToVec0(db, dimensions);
|
|
335
|
+
const sync = getEmbeddingSyncCounts(db);
|
|
336
|
+
if (
|
|
337
|
+
sync.episodes !== sync.vecEpisodes
|
|
338
|
+
|| sync.semantics !== sync.vecSemantics
|
|
339
|
+
|| sync.procedures !== sync.vecProcedures
|
|
340
|
+
) {
|
|
341
|
+
// Legacy blobs exist but could not be copied cleanly into vec0.
|
|
342
|
+
// Mark the store for lazy re-embedding so the next encode/recall repairs it.
|
|
343
|
+
migrated = true;
|
|
344
|
+
}
|
|
308
345
|
}
|
|
309
346
|
}
|
|
310
347
|
|