audrey 0.15.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,242 +1,265 @@
1
- import { generateId } from './ulid.js';
2
- import { buildPrincipleExtractionPrompt } from './prompts.js';
3
-
4
- function clusterViaKNN(db, episodes, similarityThreshold, minClusterSize) {
5
- const n = episodes.length;
6
- const k = Math.min(50, n);
7
- const idToIndex = new Map(episodes.map((ep, i) => [ep.id, i]));
8
-
9
- const parent = new Array(n);
10
- for (let i = 0; i < n; i++) parent[i] = i;
11
-
12
- function find(x) {
13
- while (parent[x] !== x) {
14
- parent[x] = parent[parent[x]];
15
- x = parent[x];
16
- }
17
- return x;
18
- }
19
-
20
- function union(a, b) {
21
- const ra = find(a);
22
- const rb = find(b);
23
- if (ra !== rb) parent[ra] = rb;
24
- }
25
-
26
- const getEmbedding = db.prepare('SELECT embedding FROM vec_episodes WHERE id = ?');
27
- const knnQuery = db.prepare(`
28
- SELECT id, distance
29
- FROM vec_episodes
30
- WHERE embedding MATCH ? AND k = ? AND consolidated = 0
31
- `);
32
-
33
- for (let i = 0; i < n; i++) {
34
- const ep = episodes[i];
35
- const vecRow = getEmbedding.get(ep.id);
36
- if (!vecRow) continue;
37
-
38
- const neighbors = knnQuery.all(vecRow.embedding, k);
39
- for (const neighbor of neighbors) {
40
- if (neighbor.id === ep.id) continue;
41
- const j = idToIndex.get(neighbor.id);
42
- if (j === undefined) continue;
43
- const similarity = 1.0 - neighbor.distance;
44
- if (similarity >= similarityThreshold) {
45
- union(i, j);
46
- }
47
- }
48
- }
49
-
50
- const groups = new Map();
51
- for (let i = 0; i < n; i++) {
52
- const root = find(i);
53
- if (!groups.has(root)) groups.set(root, []);
54
- groups.get(root).push(episodes[i]);
55
- }
56
-
57
- const clusters = [];
58
- for (const group of groups.values()) {
59
- if (group.length >= minClusterSize) {
60
- clusters.push(group);
61
- }
62
- }
63
- return clusters;
64
- }
65
-
66
- /**
67
- * @param {import('better-sqlite3').Database} db
68
- * @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
69
- * @param {{ similarityThreshold?: number, minClusterSize?: number }} [options]
70
- * @returns {Array<Array<Object>>}
71
- */
72
- export function clusterEpisodes(db, embeddingProvider, options = {}) {
73
- const {
74
- similarityThreshold = 0.85,
75
- minClusterSize = 3,
76
- } = options;
77
-
78
- const episodes = db.prepare(
79
- 'SELECT * FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
80
- ).all();
81
-
82
- if (episodes.length === 0) return [];
83
-
84
- return clusterViaKNN(db, episodes, similarityThreshold, minClusterSize);
85
- }
86
-
87
- function defaultExtractPrinciple(episodes) {
88
- const uniqueContents = [...new Set(episodes.map(e => e.content))];
89
- return {
90
- content: `Recurring pattern: ${uniqueContents.join('; ')}`,
91
- type: 'semantic',
92
- };
93
- }
94
-
95
- async function llmExtractPrinciple(llmProvider, episodes) {
96
- const messages = buildPrincipleExtractionPrompt(episodes);
97
- return llmProvider.json(messages);
98
- }
99
-
100
- /**
101
- * @param {import('better-sqlite3').Database} db
102
- * @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
103
- * @param {{ similarityThreshold?: number, minClusterSize?: number, extractPrinciple?: function, llmProvider?: Object }} [options]
104
- * @returns {Promise<{ runId: string, episodesEvaluated: number, clustersFound: number, principlesExtracted: number }>}
105
- */
106
- export async function runConsolidation(db, embeddingProvider, options = {}) {
107
- const {
108
- similarityThreshold = 0.85,
109
- minClusterSize = 3,
110
- extractPrinciple,
111
- llmProvider,
112
- } = options;
113
-
114
- const runId = generateId();
115
- const now = new Date().toISOString();
116
-
117
- db.prepare(`
118
- INSERT INTO consolidation_runs (id, started_at, status, input_episode_ids, output_memory_ids, consolidation_model)
119
- VALUES (?, ?, 'running', '[]', '[]', ?)
120
- `).run(runId, now, llmProvider?.modelName || null);
121
-
122
- try {
123
- const clusters = clusterEpisodes(db, embeddingProvider, { similarityThreshold, minClusterSize });
124
-
125
- const episodesEvaluated = db.prepare(
126
- 'SELECT COUNT(*) as count FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
127
- ).get().count;
128
-
129
- const clusterData = [];
130
- for (const cluster of clusters) {
131
- let principle;
132
- if (extractPrinciple) {
133
- principle = extractPrinciple(cluster);
134
- } else if (llmProvider) {
135
- principle = await llmExtractPrinciple(llmProvider, cluster);
136
- } else {
137
- principle = defaultExtractPrinciple(cluster);
138
- }
139
-
140
- if (!principle || !principle.content) continue;
141
-
142
- const clusterIds = cluster.map(ep => ep.id);
143
- const sourceTypes = new Set(cluster.map(ep => ep.source));
144
- const vector = await embeddingProvider.embed(principle.content);
145
- const embeddingBuffer = embeddingProvider.vectorToBuffer(vector);
146
-
147
- clusterData.push({
148
- cluster,
149
- principle,
150
- clusterIds,
151
- sourceTypeDiversity: sourceTypes.size,
152
- embeddingBuffer,
153
- semanticId: generateId(),
154
- semanticNow: new Date().toISOString(),
155
- maxSalience: Math.max(...cluster.map(ep => ep.salience ?? 0.5)),
156
- });
157
- }
158
-
159
- const allInputIds = [];
160
- const allOutputIds = [];
161
- let principlesExtracted = 0;
162
-
163
- const promoteAll = db.transaction(() => {
164
- for (const entry of clusterData) {
165
- allInputIds.push(...entry.clusterIds);
166
-
167
- db.prepare(`
168
- INSERT INTO semantics (
169
- id, content, embedding, state, evidence_episode_ids,
170
- evidence_count, supporting_count, source_type_diversity,
171
- consolidation_checkpoint, embedding_model, embedding_version,
172
- consolidation_model, created_at, salience
173
- ) VALUES (?, ?, ?, 'active', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
174
- `).run(
175
- entry.semanticId,
176
- entry.principle.content,
177
- entry.embeddingBuffer,
178
- JSON.stringify(entry.clusterIds),
179
- entry.cluster.length,
180
- entry.cluster.length,
181
- entry.sourceTypeDiversity,
182
- runId,
183
- embeddingProvider.modelName,
184
- embeddingProvider.modelVersion,
185
- llmProvider?.modelName || null,
186
- entry.semanticNow,
187
- entry.maxSalience,
188
- );
189
-
190
- db.prepare('INSERT INTO vec_semantics(id, embedding, state) VALUES (?, ?, ?)').run(
191
- entry.semanticId, entry.embeddingBuffer, 'active'
192
- );
193
-
194
- allOutputIds.push(entry.semanticId);
195
- principlesExtracted++;
196
-
197
- const markStmt = db.prepare('UPDATE episodes SET consolidated = 1 WHERE id = ?');
198
- const markVecStmt = db.prepare('UPDATE vec_episodes SET consolidated = ? WHERE id = ?');
199
- for (const ep of entry.cluster) {
200
- markStmt.run(ep.id);
201
- markVecStmt.run(BigInt(1), ep.id);
202
- }
203
- }
204
-
205
- const completedAt = new Date().toISOString();
206
- db.prepare(`
207
- UPDATE consolidation_runs
208
- SET status = 'completed',
209
- completed_at = ?,
210
- input_episode_ids = ?,
211
- output_memory_ids = ?
212
- WHERE id = ?
213
- `).run(completedAt, JSON.stringify(allInputIds), JSON.stringify(allOutputIds), runId);
214
- });
215
-
216
- promoteAll();
217
-
218
- db.prepare(`
219
- INSERT INTO consolidation_metrics (id, run_id, min_cluster_size, similarity_threshold,
220
- episodes_evaluated, clusters_found, principles_extracted, created_at)
221
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
222
- `).run(
223
- generateId(), runId, minClusterSize, similarityThreshold,
224
- episodesEvaluated, clusters.length, principlesExtracted, new Date().toISOString(),
225
- );
226
-
227
- return {
228
- runId,
229
- episodesEvaluated,
230
- clustersFound: clusters.length,
231
- principlesExtracted,
232
- };
233
- } catch (err) {
234
- const failedAt = new Date().toISOString();
235
- db.prepare(`
236
- UPDATE consolidation_runs
237
- SET status = 'failed', completed_at = ?
238
- WHERE id = ?
239
- `).run(failedAt, runId);
240
- throw err;
241
- }
242
- }
1
+ import { generateId } from './ulid.js';
2
+ import { buildPrincipleExtractionPrompt } from './prompts.js';
3
+
4
+ function clusterViaKNN(db, episodes, similarityThreshold, minClusterSize) {
5
+ const n = episodes.length;
6
+ const k = Math.min(50, n);
7
+ const idToIndex = new Map(episodes.map((ep, i) => [ep.id, i]));
8
+
9
+ const parent = new Array(n);
10
+ for (let i = 0; i < n; i++) parent[i] = i;
11
+
12
+ function find(x) {
13
+ while (parent[x] !== x) {
14
+ parent[x] = parent[parent[x]];
15
+ x = parent[x];
16
+ }
17
+ return x;
18
+ }
19
+
20
+ function union(a, b) {
21
+ const ra = find(a);
22
+ const rb = find(b);
23
+ if (ra !== rb) parent[ra] = rb;
24
+ }
25
+
26
+ const getEmbedding = db.prepare('SELECT embedding FROM vec_episodes WHERE id = ?');
27
+ const knnQuery = db.prepare(`
28
+ SELECT id, distance
29
+ FROM vec_episodes
30
+ WHERE embedding MATCH ? AND k = ? AND consolidated = 0
31
+ `);
32
+
33
+ for (let i = 0; i < n; i++) {
34
+ const ep = episodes[i];
35
+ const vecRow = getEmbedding.get(ep.id);
36
+ if (!vecRow) continue;
37
+
38
+ const neighbors = knnQuery.all(vecRow.embedding, k);
39
+ for (const neighbor of neighbors) {
40
+ if (neighbor.id === ep.id) continue;
41
+ const j = idToIndex.get(neighbor.id);
42
+ if (j === undefined) continue;
43
+ const similarity = 1.0 - neighbor.distance;
44
+ if (similarity >= similarityThreshold) {
45
+ union(i, j);
46
+ }
47
+ }
48
+ }
49
+
50
+ const groups = new Map();
51
+ for (let i = 0; i < n; i++) {
52
+ const root = find(i);
53
+ if (!groups.has(root)) groups.set(root, []);
54
+ groups.get(root).push(episodes[i]);
55
+ }
56
+
57
+ const clusters = [];
58
+ for (const group of groups.values()) {
59
+ if (group.length >= minClusterSize) {
60
+ clusters.push(group);
61
+ }
62
+ }
63
+ return clusters;
64
+ }
65
+
66
+ /**
67
+ * @param {import('better-sqlite3').Database} db
68
+ * @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
69
+ * @param {{ similarityThreshold?: number, minClusterSize?: number }} [options]
70
+ * @returns {Array<Array<Object>>}
71
+ */
72
+ export function clusterEpisodes(db, embeddingProvider, options = {}) {
73
+ const {
74
+ similarityThreshold = 0.85,
75
+ minClusterSize = 3,
76
+ } = options;
77
+
78
+ const episodes = db.prepare(
79
+ 'SELECT * FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
80
+ ).all();
81
+
82
+ if (episodes.length === 0) return [];
83
+
84
+ return clusterViaKNN(db, episodes, similarityThreshold, minClusterSize);
85
+ }
86
+
87
+ function defaultExtractPrinciple(episodes) {
88
+ const uniqueContents = [...new Set(episodes.map(e => e.content))];
89
+ return {
90
+ content: `Recurring pattern: ${uniqueContents.join('; ')}`,
91
+ type: 'semantic',
92
+ };
93
+ }
94
+
95
+ async function llmExtractPrinciple(llmProvider, episodes) {
96
+ const messages = buildPrincipleExtractionPrompt(episodes);
97
+ return llmProvider.json(messages);
98
+ }
99
+
100
+ /**
101
+ * @param {import('better-sqlite3').Database} db
102
+ * @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
103
+ * @param {{ similarityThreshold?: number, minClusterSize?: number, extractPrinciple?: function, llmProvider?: Object }} [options]
104
+ * @returns {Promise<{ runId: string, episodesEvaluated: number, clustersFound: number, principlesExtracted: number }>}
105
+ */
106
+ export async function runConsolidation(db, embeddingProvider, options = {}) {
107
+ const {
108
+ similarityThreshold = 0.85,
109
+ minClusterSize = 3,
110
+ extractPrinciple,
111
+ llmProvider,
112
+ } = options;
113
+
114
+ const runId = generateId();
115
+ const now = new Date().toISOString();
116
+
117
+ db.prepare(`
118
+ INSERT INTO consolidation_runs (
119
+ id, started_at, status, input_episode_ids, output_memory_ids, consolidation_model, checkpoint_cursor
120
+ )
121
+ VALUES (?, ?, 'running', '[]', '[]', ?, ?)
122
+ `).run(runId, now, llmProvider?.modelName || null, now);
123
+
124
+ try {
125
+ const clusters = clusterEpisodes(db, embeddingProvider, { similarityThreshold, minClusterSize });
126
+
127
+ const episodesEvaluated = db.prepare(
128
+ 'SELECT COUNT(*) as count FROM episodes WHERE consolidated = 0 AND superseded_by IS NULL AND embedding IS NOT NULL'
129
+ ).get().count;
130
+
131
+ const allInputIds = [];
132
+ const allOutputIds = [];
133
+ let principlesExtracted = 0;
134
+ let proceduresExtracted = 0;
135
+ const insertProcedure = db.prepare(`
136
+ INSERT INTO procedures (
137
+ id, content, embedding, state, trigger_conditions,
138
+ evidence_episode_ids, success_count, failure_count,
139
+ embedding_model, embedding_version, created_at, salience
140
+ ) VALUES (?, ?, ?, 'active', ?, ?, 0, 0, ?, ?, ?, ?)
141
+ `);
142
+ const insertVecProcedure = db.prepare('INSERT INTO vec_procedures(id, embedding, state) VALUES (?, ?, ?)');
143
+ const insertSemantic = db.prepare(`
144
+ INSERT INTO semantics (
145
+ id, content, embedding, state, evidence_episode_ids,
146
+ evidence_count, supporting_count, source_type_diversity,
147
+ consolidation_checkpoint, embedding_model, embedding_version,
148
+ consolidation_model, created_at, salience
149
+ ) VALUES (?, ?, ?, 'active', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
150
+ `);
151
+ const insertVecSemantic = db.prepare('INSERT INTO vec_semantics(id, embedding, state) VALUES (?, ?, ?)');
152
+ const markEpisode = db.prepare('UPDATE episodes SET consolidated = 1 WHERE id = ?');
153
+ const markVecEpisode = db.prepare('UPDATE vec_episodes SET consolidated = ? WHERE id = ?');
154
+ const updateRunCompleted = db.prepare(`
155
+ UPDATE consolidation_runs
156
+ SET status = 'completed',
157
+ completed_at = ?,
158
+ input_episode_ids = ?,
159
+ output_memory_ids = ?
160
+ WHERE id = ?
161
+ `);
162
+ const insertMetrics = db.prepare(`
163
+ INSERT INTO consolidation_metrics (id, run_id, min_cluster_size, similarity_threshold,
164
+ episodes_evaluated, clusters_found, principles_extracted, created_at)
165
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
166
+ `);
167
+
168
+ db.exec('BEGIN IMMEDIATE');
169
+ try {
170
+ for (const cluster of clusters) {
171
+ let principle;
172
+ if (extractPrinciple) {
173
+ principle = extractPrinciple(cluster);
174
+ } else if (llmProvider) {
175
+ principle = await llmExtractPrinciple(llmProvider, cluster);
176
+ } else {
177
+ principle = defaultExtractPrinciple(cluster);
178
+ }
179
+
180
+ if (!principle || !principle.content) continue;
181
+
182
+ const clusterIds = cluster.map(ep => ep.id);
183
+ const sourceTypeDiversity = new Set(cluster.map(ep => ep.source)).size;
184
+ const vector = await embeddingProvider.embed(principle.content);
185
+ const embeddingBuffer = embeddingProvider.vectorToBuffer(vector);
186
+ const memoryId = generateId();
187
+ const createdAt = new Date().toISOString();
188
+ const maxSalience = Math.max(...cluster.map(ep => ep.salience ?? 0.5));
189
+
190
+ allInputIds.push(...clusterIds);
191
+
192
+ if (principle.type === 'procedural') {
193
+ insertProcedure.run(
194
+ memoryId,
195
+ principle.content,
196
+ embeddingBuffer,
197
+ principle.conditions ? JSON.stringify(principle.conditions) : null,
198
+ JSON.stringify(clusterIds),
199
+ embeddingProvider.modelName,
200
+ embeddingProvider.modelVersion,
201
+ createdAt,
202
+ maxSalience,
203
+ );
204
+ insertVecProcedure.run(memoryId, embeddingBuffer, 'active');
205
+ proceduresExtracted++;
206
+ } else {
207
+ insertSemantic.run(
208
+ memoryId,
209
+ principle.content,
210
+ embeddingBuffer,
211
+ JSON.stringify(clusterIds),
212
+ cluster.length,
213
+ cluster.length,
214
+ sourceTypeDiversity,
215
+ runId,
216
+ embeddingProvider.modelName,
217
+ embeddingProvider.modelVersion,
218
+ llmProvider?.modelName || null,
219
+ createdAt,
220
+ maxSalience,
221
+ );
222
+ insertVecSemantic.run(memoryId, embeddingBuffer, 'active');
223
+ }
224
+
225
+ allOutputIds.push(memoryId);
226
+ principlesExtracted++;
227
+
228
+ for (const ep of cluster) {
229
+ markEpisode.run(ep.id);
230
+ markVecEpisode.run(BigInt(1), ep.id);
231
+ }
232
+ }
233
+
234
+ const completedAt = new Date().toISOString();
235
+ updateRunCompleted.run(completedAt, JSON.stringify(allInputIds), JSON.stringify(allOutputIds), runId);
236
+ insertMetrics.run(
237
+ generateId(), runId, minClusterSize, similarityThreshold,
238
+ episodesEvaluated, clusters.length, principlesExtracted, completedAt,
239
+ );
240
+ db.exec('COMMIT');
241
+ } catch (err) {
242
+ if (db.inTransaction) {
243
+ db.exec('ROLLBACK');
244
+ }
245
+ throw err;
246
+ }
247
+
248
+ return {
249
+ runId,
250
+ episodesEvaluated,
251
+ clustersFound: clusters.length,
252
+ principlesExtracted,
253
+ semanticsCreated: principlesExtracted - proceduresExtracted,
254
+ proceduresCreated: proceduresExtracted,
255
+ };
256
+ } catch (err) {
257
+ const failedAt = new Date().toISOString();
258
+ db.prepare(`
259
+ UPDATE consolidation_runs
260
+ SET status = 'failed', completed_at = ?
261
+ WHERE id = ?
262
+ `).run(failedAt, runId);
263
+ throw err;
264
+ }
265
+ }
package/src/context.js CHANGED
@@ -1,15 +1,15 @@
1
- export function contextMatchRatio(encodingContext, retrievalContext) {
2
- if (!encodingContext || !retrievalContext) return 0;
3
- const retrievalKeys = Object.keys(retrievalContext);
4
- if (retrievalKeys.length === 0) return 0;
5
- const sharedKeys = retrievalKeys.filter(k => k in encodingContext);
6
- if (sharedKeys.length === 0) return 0;
7
- const matches = sharedKeys.filter(k => encodingContext[k] === retrievalContext[k]).length;
8
- return matches / retrievalKeys.length;
9
- }
10
-
11
- export function contextModifier(encodingContext, retrievalContext, weight = 0.3) {
12
- if (!encodingContext || !retrievalContext) return 1.0;
13
- const ratio = contextMatchRatio(encodingContext, retrievalContext);
14
- return 1.0 + (weight * ratio);
15
- }
1
+ export function contextMatchRatio(encodingContext, retrievalContext) {
2
+ if (!encodingContext || !retrievalContext) return 0;
3
+ const retrievalKeys = Object.keys(retrievalContext);
4
+ if (retrievalKeys.length === 0) return 0;
5
+ const sharedKeys = retrievalKeys.filter(k => k in encodingContext);
6
+ if (sharedKeys.length === 0) return 0;
7
+ const matches = sharedKeys.filter(k => encodingContext[k] === retrievalContext[k]).length;
8
+ return matches / retrievalKeys.length;
9
+ }
10
+
11
+ export function contextModifier(encodingContext, retrievalContext, weight = 0.3) {
12
+ if (!encodingContext || !retrievalContext) return 1.0;
13
+ const ratio = contextMatchRatio(encodingContext, retrievalContext);
14
+ return 1.0 + (weight * ratio);
15
+ }