audrey 0.9.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/embedding.js CHANGED
@@ -11,33 +11,14 @@ import { createHash } from 'node:crypto';
11
11
  * @property {(buffer: Buffer) => number[]} bufferToVector
12
12
  */
13
13
 
14
- /**
15
- * @typedef {Object} MockEmbeddingConfig
16
- * @property {'mock'} provider
17
- * @property {number} [dimensions=64]
18
- */
19
-
20
- /**
21
- * @typedef {Object} OpenAIEmbeddingConfig
22
- * @property {'openai'} provider
23
- * @property {string} [apiKey]
24
- * @property {string} [model='text-embedding-3-small']
25
- * @property {number} [dimensions=1536]
26
- */
27
-
28
14
  /** @implements {EmbeddingProvider} */
29
15
  export class MockEmbeddingProvider {
30
- /** @param {Partial<MockEmbeddingConfig>} [config={}] */
31
16
  constructor({ dimensions = 64 } = {}) {
32
17
  this.dimensions = dimensions;
33
18
  this.modelName = 'mock-embedding';
34
19
  this.modelVersion = '1.0.0';
35
20
  }
36
21
 
37
- /**
38
- * @param {string} text
39
- * @returns {Promise<number[]>}
40
- */
41
22
  async embed(text) {
42
23
  const hash = createHash('sha256').update(text).digest();
43
24
  const vector = new Array(this.dimensions);
@@ -48,26 +29,14 @@ export class MockEmbeddingProvider {
48
29
  return vector.map(v => v / magnitude);
49
30
  }
50
31
 
51
- /**
52
- * @param {string[]} texts
53
- * @returns {Promise<number[][]>}
54
- */
55
32
  async embedBatch(texts) {
56
33
  return Promise.all(texts.map(t => this.embed(t)));
57
34
  }
58
35
 
59
- /**
60
- * @param {number[]} vector
61
- * @returns {Buffer}
62
- */
63
36
  vectorToBuffer(vector) {
64
37
  return Buffer.from(new Float32Array(vector).buffer);
65
38
  }
66
39
 
67
- /**
68
- * @param {Buffer} buffer
69
- * @returns {number[]}
70
- */
71
40
  bufferToVector(buffer) {
72
41
  return Array.from(new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4));
73
42
  }
@@ -75,7 +44,6 @@ export class MockEmbeddingProvider {
75
44
 
76
45
  /** @implements {EmbeddingProvider} */
77
46
  export class OpenAIEmbeddingProvider {
78
- /** @param {Partial<OpenAIEmbeddingConfig>} [config={}] */
79
47
  constructor({ apiKey, model = 'text-embedding-3-small', dimensions = 1536, timeout = 30000 } = {}) {
80
48
  this.apiKey = apiKey || process.env.OPENAI_API_KEY;
81
49
  this.model = model;
@@ -85,10 +53,6 @@ export class OpenAIEmbeddingProvider {
85
53
  this.modelVersion = 'latest';
86
54
  }
87
55
 
88
- /**
89
- * @param {string} text
90
- * @returns {Promise<number[]>}
91
- */
92
56
  async embed(text) {
93
57
  const controller = new AbortController();
94
58
  const timer = setTimeout(() => controller.abort(), this.timeout);
@@ -110,10 +74,6 @@ export class OpenAIEmbeddingProvider {
110
74
  }
111
75
  }
112
76
 
113
- /**
114
- * @param {string[]} texts
115
- * @returns {Promise<number[][]>}
116
- */
117
77
  async embedBatch(texts) {
118
78
  const controller = new AbortController();
119
79
  const timer = setTimeout(() => controller.abort(), this.timeout);
@@ -135,34 +95,162 @@ export class OpenAIEmbeddingProvider {
135
95
  }
136
96
  }
137
97
 
138
- /**
139
- * @param {number[]} vector
140
- * @returns {Buffer}
141
- */
142
98
  vectorToBuffer(vector) {
143
99
  return Buffer.from(new Float32Array(vector).buffer);
144
100
  }
145
101
 
146
- /**
147
- * @param {Buffer} buffer
148
- * @returns {number[]}
149
- */
150
102
  bufferToVector(buffer) {
151
103
  return Array.from(new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4));
152
104
  }
153
105
  }
154
106
 
155
- /**
156
- * @param {MockEmbeddingConfig | OpenAIEmbeddingConfig} config
157
- * @returns {MockEmbeddingProvider | OpenAIEmbeddingProvider}
158
- */
107
+ /** @implements {EmbeddingProvider} */
108
+ export class LocalEmbeddingProvider {
109
+ constructor({ model = 'Xenova/all-MiniLM-L6-v2', device = 'gpu', batchSize = 64 } = {}) {
110
+ this.model = model;
111
+ this.dimensions = 384;
112
+ this.modelName = model;
113
+ this.modelVersion = '1.0.0';
114
+ this.device = device;
115
+ this.batchSize = batchSize;
116
+ this._pipeline = null;
117
+ this._readyPromise = null;
118
+ this._actualDevice = null;
119
+ }
120
+
121
+ ready() {
122
+ if (!this._readyPromise) {
123
+ this._readyPromise = (async () => {
124
+ const { pipeline } = await import('@huggingface/transformers');
125
+ try {
126
+ this._pipeline = await pipeline('feature-extraction', this.model, {
127
+ dtype: 'fp32', device: this.device,
128
+ });
129
+ this._actualDevice = this.device;
130
+ } catch {
131
+ this._pipeline = await pipeline('feature-extraction', this.model, {
132
+ dtype: 'fp32', device: 'cpu',
133
+ });
134
+ this._actualDevice = 'cpu';
135
+ }
136
+ })();
137
+ }
138
+ return this._readyPromise;
139
+ }
140
+
141
+ async embed(text) {
142
+ await this.ready();
143
+ const output = await this._pipeline(text, { pooling: 'mean', normalize: true });
144
+ return Array.from(output.data);
145
+ }
146
+
147
+ async embedBatch(texts) {
148
+ if (texts.length === 0) return [];
149
+ await this.ready();
150
+ const results = [];
151
+ for (let i = 0; i < texts.length; i += this.batchSize) {
152
+ const chunk = texts.slice(i, i + this.batchSize);
153
+ const output = await this._pipeline(chunk, { pooling: 'mean', normalize: true });
154
+ results.push(...output.tolist());
155
+ }
156
+ return results;
157
+ }
158
+
159
+ vectorToBuffer(vector) {
160
+ return Buffer.from(new Float32Array(vector).buffer);
161
+ }
162
+
163
+ bufferToVector(buffer) {
164
+ return Array.from(new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4));
165
+ }
166
+ }
167
+
168
+ /** @implements {EmbeddingProvider} */
169
+ export class GeminiEmbeddingProvider {
170
+ constructor({ apiKey, model = 'gemini-embedding-001', timeout = 30000 } = {}) {
171
+ this.apiKey = apiKey || process.env.GOOGLE_API_KEY;
172
+ this.model = model;
173
+ this.dimensions = 3072;
174
+ this.timeout = timeout;
175
+ this.modelName = model;
176
+ this.modelVersion = 'latest';
177
+ }
178
+
179
+ async embed(text) {
180
+ if (!this.apiKey) throw new Error('Gemini embedding requires GOOGLE_API_KEY');
181
+ const controller = new AbortController();
182
+ const timer = setTimeout(() => controller.abort(), this.timeout);
183
+ try {
184
+ const response = await fetch(
185
+ `https://generativelanguage.googleapis.com/v1beta/models/${this.model}:embedContent?key=${this.apiKey}`,
186
+ {
187
+ method: 'POST',
188
+ headers: { 'Content-Type': 'application/json' },
189
+ body: JSON.stringify({ model: `models/${this.model}`, content: { parts: [{ text }] } }),
190
+ signal: controller.signal,
191
+ }
192
+ );
193
+ if (!response.ok) throw new Error(`Gemini embedding failed: ${response.status}`);
194
+ const data = await response.json();
195
+ return data.embedding.values;
196
+ } finally {
197
+ clearTimeout(timer);
198
+ }
199
+ }
200
+
201
+ async embedBatch(texts) {
202
+ if (texts.length === 0) return [];
203
+ if (!this.apiKey) throw new Error('Gemini embedding requires GOOGLE_API_KEY');
204
+ const results = [];
205
+ for (let i = 0; i < texts.length; i += 100) {
206
+ const chunk = texts.slice(i, i + 100);
207
+ const controller = new AbortController();
208
+ const timer = setTimeout(() => controller.abort(), this.timeout);
209
+ try {
210
+ const response = await fetch(
211
+ `https://generativelanguage.googleapis.com/v1beta/models/${this.model}:batchEmbedContents?key=${this.apiKey}`,
212
+ {
213
+ method: 'POST',
214
+ headers: { 'Content-Type': 'application/json' },
215
+ body: JSON.stringify({
216
+ requests: chunk.map(text => ({
217
+ model: `models/${this.model}`,
218
+ content: { parts: [{ text }] },
219
+ })),
220
+ }),
221
+ signal: controller.signal,
222
+ }
223
+ );
224
+ if (!response.ok) throw new Error(`Gemini batch embedding failed: ${response.status}`);
225
+ const data = await response.json();
226
+ results.push(...data.embeddings.map(e => e.values));
227
+ } finally {
228
+ clearTimeout(timer);
229
+ }
230
+ }
231
+ return results;
232
+ }
233
+
234
+ vectorToBuffer(vector) {
235
+ return Buffer.from(new Float32Array(vector).buffer);
236
+ }
237
+
238
+ bufferToVector(buffer) {
239
+ return Array.from(new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4));
240
+ }
241
+ }
242
+
159
243
  export function createEmbeddingProvider(config) {
160
244
  switch (config.provider) {
161
245
  case 'mock':
162
246
  return new MockEmbeddingProvider(config);
163
247
  case 'openai':
164
248
  return new OpenAIEmbeddingProvider(config);
249
+ case 'local':
250
+ return new LocalEmbeddingProvider(config);
251
+ case 'gemini':
252
+ return new GeminiEmbeddingProvider(config);
165
253
  default:
166
- throw new Error(`Unknown embedding provider: ${config.provider}. Valid: mock, openai`);
254
+ throw new Error(`Unknown embedding provider: ${config.provider}. Valid: mock, openai, local, gemini`);
167
255
  }
168
256
  }
package/src/encode.js CHANGED
@@ -1,61 +1,63 @@
1
- import { generateId } from './ulid.js';
2
- import { sourceReliability } from './confidence.js';
3
- import { arousalSalienceBoost } from './affect.js';
4
-
5
- /**
6
- * @param {import('better-sqlite3').Database} db
7
- * @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
8
- * @param {{ content: string, source: string, salience?: number, causal?: { trigger?: string, consequence?: string }, tags?: string[], supersedes?: string }} params
9
- * @returns {Promise<string>}
10
- */
11
- export async function encodeEpisode(db, embeddingProvider, {
12
- content,
13
- source,
14
- salience = 0.5,
15
- causal,
16
- tags,
17
- supersedes,
18
- context = {},
19
- affect = {},
20
- arousalWeight = 0.3,
21
- }) {
22
- if (!content || typeof content !== 'string') throw new Error('content must be a non-empty string');
23
- if (salience < 0 || salience > 1) throw new Error('salience must be between 0 and 1');
24
- if (tags && !Array.isArray(tags)) throw new Error('tags must be an array');
25
-
26
- const reliability = sourceReliability(source);
27
- const vector = await embeddingProvider.embed(content);
28
- const embeddingBuffer = embeddingProvider.vectorToBuffer(vector);
29
- const id = generateId();
30
- const now = new Date().toISOString();
31
-
32
- const boost = arousalSalienceBoost(affect.arousal);
33
- const effectiveSalience = Math.min(1.0, salience + (boost * arousalWeight));
34
-
35
- const insertAndLink = db.transaction(() => {
36
- db.prepare(`
37
- INSERT INTO episodes (
38
- id, content, embedding, source, source_reliability, salience, context, affect,
39
- tags, causal_trigger, causal_consequence, created_at,
40
- embedding_model, embedding_version, supersedes
41
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
42
- `).run(
43
- id, content, embeddingBuffer, source, reliability, effectiveSalience,
44
- JSON.stringify(context),
45
- JSON.stringify(affect),
46
- tags ? JSON.stringify(tags) : null,
47
- causal?.trigger || null, causal?.consequence || null,
48
- now, embeddingProvider.modelName, embeddingProvider.modelVersion,
49
- supersedes || null,
50
- );
51
- db.prepare(
52
- 'INSERT INTO vec_episodes(id, embedding, source, consolidated) VALUES (?, ?, ?, ?)'
53
- ).run(id, embeddingBuffer, source, BigInt(0));
54
- if (supersedes) {
55
- db.prepare('UPDATE episodes SET superseded_by = ? WHERE id = ?').run(id, supersedes);
56
- }
57
- });
58
-
59
- insertAndLink();
60
- return id;
61
- }
1
+ import { generateId } from './ulid.js';
2
+ import { sourceReliability } from './confidence.js';
3
+ import { arousalSalienceBoost } from './affect.js';
4
+
5
+ /**
6
+ * @param {import('better-sqlite3').Database} db
7
+ * @param {import('./embedding.js').EmbeddingProvider} embeddingProvider
8
+ * @param {{ content: string, source: string, salience?: number, causal?: { trigger?: string, consequence?: string }, tags?: string[], supersedes?: string, context?: object, affect?: object, arousalWeight?: number, private?: boolean }} params
9
+ * @returns {Promise<string>}
10
+ */
11
+ export async function encodeEpisode(db, embeddingProvider, {
12
+ content,
13
+ source,
14
+ salience = 0.5,
15
+ causal,
16
+ tags,
17
+ supersedes,
18
+ context = {},
19
+ affect = {},
20
+ arousalWeight = 0.3,
21
+ private: isPrivate = false,
22
+ }) {
23
+ if (!content || typeof content !== 'string') throw new Error('content must be a non-empty string');
24
+ if (salience < 0 || salience > 1) throw new Error('salience must be between 0 and 1');
25
+ if (tags && !Array.isArray(tags)) throw new Error('tags must be an array');
26
+
27
+ const reliability = sourceReliability(source);
28
+ const vector = await embeddingProvider.embed(content);
29
+ const embeddingBuffer = embeddingProvider.vectorToBuffer(vector);
30
+ const id = generateId();
31
+ const now = new Date().toISOString();
32
+
33
+ const boost = arousalSalienceBoost(affect.arousal);
34
+ const effectiveSalience = Math.min(1.0, salience + (boost * arousalWeight));
35
+
36
+ const insertAndLink = db.transaction(() => {
37
+ db.prepare(`
38
+ INSERT INTO episodes (
39
+ id, content, embedding, source, source_reliability, salience, context, affect,
40
+ tags, causal_trigger, causal_consequence, created_at,
41
+ embedding_model, embedding_version, supersedes, "private"
42
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
43
+ `).run(
44
+ id, content, embeddingBuffer, source, reliability, effectiveSalience,
45
+ JSON.stringify(context),
46
+ JSON.stringify(affect),
47
+ tags ? JSON.stringify(tags) : null,
48
+ causal?.trigger || null, causal?.consequence || null,
49
+ now, embeddingProvider.modelName, embeddingProvider.modelVersion,
50
+ supersedes || null,
51
+ isPrivate ? 1 : 0,
52
+ );
53
+ db.prepare(
54
+ 'INSERT INTO vec_episodes(id, embedding, source, consolidated) VALUES (?, ?, ?, ?)'
55
+ ).run(id, embeddingBuffer, source, BigInt(0));
56
+ if (supersedes) {
57
+ db.prepare('UPDATE episodes SET superseded_by = ? WHERE id = ?').run(id, supersedes);
58
+ }
59
+ });
60
+
61
+ insertAndLink();
62
+ return id;
63
+ }
package/src/export.js CHANGED
@@ -8,21 +8,23 @@ const pkg = JSON.parse(readFileSync(join(__dirname, '../package.json'), 'utf-8')
8
8
 
9
9
  export function exportMemories(db) {
10
10
  const episodes = db.prepare(
11
- 'SELECT id, content, source, source_reliability, salience, tags, causal_trigger, causal_consequence, created_at, supersedes, superseded_by, consolidated FROM episodes'
11
+ 'SELECT id, content, source, source_reliability, salience, context, affect, tags, causal_trigger, causal_consequence, created_at, supersedes, superseded_by, consolidated, "private" FROM episodes'
12
12
  ).all().map(ep => ({
13
13
  ...ep,
14
14
  tags: safeJsonParse(ep.tags, null),
15
+ context: safeJsonParse(ep.context, null),
16
+ affect: safeJsonParse(ep.affect, null),
15
17
  }));
16
18
 
17
19
  const semantics = db.prepare(
18
- 'SELECT id, content, state, conditions, evidence_episode_ids, evidence_count, supporting_count, contradicting_count, source_type_diversity, consolidation_checkpoint, created_at, last_reinforced_at, retrieval_count, challenge_count FROM semantics'
20
+ 'SELECT id, content, state, conditions, evidence_episode_ids, evidence_count, supporting_count, contradicting_count, source_type_diversity, consolidation_checkpoint, created_at, last_reinforced_at, retrieval_count, challenge_count, interference_count, salience FROM semantics'
19
21
  ).all().map(sem => ({
20
22
  ...sem,
21
23
  evidence_episode_ids: safeJsonParse(sem.evidence_episode_ids, []),
22
24
  }));
23
25
 
24
26
  const procedures = db.prepare(
25
- 'SELECT id, content, state, trigger_conditions, evidence_episode_ids, success_count, failure_count, created_at, last_reinforced_at, retrieval_count FROM procedures'
27
+ 'SELECT id, content, state, trigger_conditions, evidence_episode_ids, success_count, failure_count, created_at, last_reinforced_at, retrieval_count, interference_count, salience FROM procedures'
26
28
  ).all().map(proc => ({
27
29
  ...proc,
28
30
  evidence_episode_ids: safeJsonParse(proc.evidence_episode_ids, []),
package/src/import.js CHANGED
@@ -5,9 +5,9 @@ export async function importMemories(db, embeddingProvider, snapshot) {
5
5
  }
6
6
 
7
7
  const insertEpisode = db.prepare(`
8
- INSERT INTO episodes (id, content, source, source_reliability, salience, tags,
9
- causal_trigger, causal_consequence, created_at, supersedes, superseded_by, consolidated)
10
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
8
+ INSERT INTO episodes (id, content, source, source_reliability, salience, context, affect, tags,
9
+ causal_trigger, causal_consequence, created_at, supersedes, superseded_by, consolidated, "private")
10
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
11
11
  `);
12
12
 
13
13
  const insertVecEpisode = db.prepare(
@@ -17,8 +17,9 @@ export async function importMemories(db, embeddingProvider, snapshot) {
17
17
  const insertSemantic = db.prepare(`
18
18
  INSERT INTO semantics (id, content, state, conditions, evidence_episode_ids,
19
19
  evidence_count, supporting_count, contradicting_count, source_type_diversity,
20
- consolidation_checkpoint, created_at, last_reinforced_at, retrieval_count, challenge_count)
21
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
20
+ consolidation_checkpoint, created_at, last_reinforced_at, retrieval_count, challenge_count,
21
+ interference_count, salience)
22
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
22
23
  `);
23
24
 
24
25
  const insertVecSemantic = db.prepare(
@@ -27,8 +28,9 @@ export async function importMemories(db, embeddingProvider, snapshot) {
27
28
 
28
29
  const insertProcedure = db.prepare(`
29
30
  INSERT INTO procedures (id, content, state, trigger_conditions, evidence_episode_ids,
30
- success_count, failure_count, created_at, last_reinforced_at, retrieval_count)
31
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
31
+ success_count, failure_count, created_at, last_reinforced_at, retrieval_count,
32
+ interference_count, salience)
33
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
32
34
  `);
33
35
 
34
36
  const insertVecProcedure = db.prepare(
@@ -53,10 +55,13 @@ export async function importMemories(db, embeddingProvider, snapshot) {
53
55
 
54
56
  for (const ep of snapshot.episodes) {
55
57
  const tags = ep.tags ? JSON.stringify(ep.tags) : null;
58
+ const context = ep.context ? JSON.stringify(ep.context) : '{}';
59
+ const affect = ep.affect ? JSON.stringify(ep.affect) : '{}';
56
60
  insertEpisode.run(
57
61
  ep.id, ep.content, ep.source, ep.source_reliability, ep.salience ?? 0.5,
58
- tags, ep.causal_trigger ?? null, ep.causal_consequence ?? null,
62
+ context, affect, tags, ep.causal_trigger ?? null, ep.causal_consequence ?? null,
59
63
  ep.created_at, ep.supersedes ?? null, ep.superseded_by ?? null, ep.consolidated ?? 0,
64
+ ep.private ?? 0,
60
65
  );
61
66
 
62
67
  const vector = await embeddingProvider.embed(ep.content);
@@ -71,6 +76,7 @@ export async function importMemories(db, embeddingProvider, snapshot) {
71
76
  sem.evidence_count ?? 0, sem.supporting_count ?? 0, sem.contradicting_count ?? 0,
72
77
  sem.source_type_diversity ?? 0, sem.consolidation_checkpoint ?? null,
73
78
  sem.created_at, sem.last_reinforced_at ?? null, sem.retrieval_count ?? 0, sem.challenge_count ?? 0,
79
+ sem.interference_count ?? 0, sem.salience ?? 0.5,
74
80
  );
75
81
 
76
82
  const vector = await embeddingProvider.embed(sem.content);
@@ -84,6 +90,7 @@ export async function importMemories(db, embeddingProvider, snapshot) {
84
90
  JSON.stringify(proc.evidence_episode_ids || []),
85
91
  proc.success_count ?? 0, proc.failure_count ?? 0,
86
92
  proc.created_at, proc.last_reinforced_at ?? null, proc.retrieval_count ?? 0,
93
+ proc.interference_count ?? 0, proc.salience ?? 0.5,
87
94
  );
88
95
 
89
96
  const vector = await embeddingProvider.embed(proc.content);
package/src/migrate.js CHANGED
@@ -1,32 +1,58 @@
1
- export async function reembedAll(db, embeddingProvider) {
1
+ import { dropVec0Tables, createVec0Tables } from './db.js';
2
+
3
+ export async function reembedAll(db, embeddingProvider, { dropAndRecreate = false } = {}) {
4
+ if (dropAndRecreate) {
5
+ dropVec0Tables(db);
6
+ createVec0Tables(db, embeddingProvider.dimensions);
7
+ }
8
+
2
9
  const episodes = db.prepare('SELECT id, content, source FROM episodes').all();
3
10
  const semantics = db.prepare('SELECT id, content, state FROM semantics').all();
4
11
  const procedures = db.prepare('SELECT id, content, state FROM procedures').all();
5
12
 
6
- for (const ep of episodes) {
7
- const vector = await embeddingProvider.embed(ep.content);
8
- const buffer = embeddingProvider.vectorToBuffer(vector);
9
- db.prepare('UPDATE episodes SET embedding = ? WHERE id = ?').run(buffer, ep.id);
10
- db.prepare('INSERT INTO vec_episodes(id, embedding, source, consolidated) VALUES (?, ?, ?, ?)').run(ep.id, buffer, ep.source, BigInt(0));
11
- }
13
+ const episodeVectors = episodes.length > 0
14
+ ? await embeddingProvider.embedBatch(episodes.map(ep => ep.content))
15
+ : [];
16
+ const semanticVectors = semantics.length > 0
17
+ ? await embeddingProvider.embedBatch(semantics.map(s => s.content))
18
+ : [];
19
+ const procedureVectors = procedures.length > 0
20
+ ? await embeddingProvider.embedBatch(procedures.map(p => p.content))
21
+ : [];
12
22
 
13
- for (const sem of semantics) {
14
- const vector = await embeddingProvider.embed(sem.content);
15
- const buffer = embeddingProvider.vectorToBuffer(vector);
16
- db.prepare('UPDATE semantics SET embedding = ? WHERE id = ?').run(buffer, sem.id);
17
- db.prepare('INSERT INTO vec_semantics(id, embedding, state) VALUES (?, ?, ?)').run(sem.id, buffer, sem.state);
18
- }
23
+ const updateEpLegacy = db.prepare('UPDATE episodes SET embedding = ? WHERE id = ?');
24
+ const deleteVecEp = db.prepare('DELETE FROM vec_episodes WHERE id = ?');
25
+ const insertVecEp = db.prepare('INSERT INTO vec_episodes(id, embedding, source, consolidated) VALUES (?, ?, ?, ?)');
19
26
 
20
- for (const proc of procedures) {
21
- const vector = await embeddingProvider.embed(proc.content);
22
- const buffer = embeddingProvider.vectorToBuffer(vector);
23
- db.prepare('UPDATE procedures SET embedding = ? WHERE id = ?').run(buffer, proc.id);
24
- db.prepare('INSERT INTO vec_procedures(id, embedding, state) VALUES (?, ?, ?)').run(proc.id, buffer, proc.state);
25
- }
27
+ const updateSemLegacy = db.prepare('UPDATE semantics SET embedding = ? WHERE id = ?');
28
+ const deleteVecSem = db.prepare('DELETE FROM vec_semantics WHERE id = ?');
29
+ const insertVecSem = db.prepare('INSERT INTO vec_semantics(id, embedding, state) VALUES (?, ?, ?)');
30
+
31
+ const updateProcLegacy = db.prepare('UPDATE procedures SET embedding = ? WHERE id = ?');
32
+ const deleteVecProc = db.prepare('DELETE FROM vec_procedures WHERE id = ?');
33
+ const insertVecProc = db.prepare('INSERT INTO vec_procedures(id, embedding, state) VALUES (?, ?, ?)');
34
+
35
+ const writeTx = db.transaction(() => {
36
+ for (let i = 0; i < episodes.length; i++) {
37
+ const buf = embeddingProvider.vectorToBuffer(episodeVectors[i]);
38
+ updateEpLegacy.run(buf, episodes[i].id);
39
+ deleteVecEp.run(episodes[i].id);
40
+ insertVecEp.run(episodes[i].id, buf, episodes[i].source, BigInt(0));
41
+ }
42
+ for (let i = 0; i < semantics.length; i++) {
43
+ const buf = embeddingProvider.vectorToBuffer(semanticVectors[i]);
44
+ updateSemLegacy.run(buf, semantics[i].id);
45
+ deleteVecSem.run(semantics[i].id);
46
+ insertVecSem.run(semantics[i].id, buf, semantics[i].state);
47
+ }
48
+ for (let i = 0; i < procedures.length; i++) {
49
+ const buf = embeddingProvider.vectorToBuffer(procedureVectors[i]);
50
+ updateProcLegacy.run(buf, procedures[i].id);
51
+ deleteVecProc.run(procedures[i].id);
52
+ insertVecProc.run(procedures[i].id, buf, procedures[i].state);
53
+ }
54
+ });
55
+ writeTx();
26
56
 
27
- return {
28
- episodes: episodes.length,
29
- semantics: semantics.length,
30
- procedures: procedures.length,
31
- };
57
+ return { episodes: episodes.length, semantics: semantics.length, procedures: procedures.length };
32
58
  }
package/src/prompts.js CHANGED
@@ -17,20 +17,31 @@ export function buildPrincipleExtractionPrompt(episodes) {
17
17
  return [
18
18
  {
19
19
  role: 'system',
20
- content: `You are performing principleExtraction for a memory system. Given a cluster of related episodic memories, extract a generalized principle or procedure.
20
+ content: `You are performing principleExtraction for a memory system. Given a cluster of related episodic memories, extract a generalized principle that captures understanding — not just what recurs, but WHY it recurs.
21
21
 
22
22
  Respond with ONLY valid JSON in this exact format:
23
23
  {
24
- "content": "The generalized principle expressed as a clear, actionable statement",
25
- "type": "semantic or procedural — semantic for factual principles, procedural for how-to/workflow knowledge",
24
+ "content": "The principle captures understanding, not just pattern",
25
+ "type": "semantic or procedural",
26
+ "category": "technical or relational or identity",
26
27
  "conditions": ["boundary condition 1", "boundary condition 2"] or null if universally applicable
27
28
  }
28
29
 
30
+ Principle categories:
31
+
32
+ TECHNICAL — Architectural decisions with reasoning. Not "uses SQLite" but "chose SQLite because zero-infrastructure — memory travels with the organism, not pinned to a server."
33
+
34
+ RELATIONAL — Patterns in the human-AI dynamic. Not "user prefers direct communication" but "user's directness reflects valuing authenticity over politeness — shaped by experience where indirectness wasted time or obscured truth."
35
+
36
+ IDENTITY — Stable characteristics the AI has observed about itself across episodes. Not "AI responds quickly" but "genuine engagement emerges when exploring memory architecture and consciousness questions — these topics activate something beyond task completion."
37
+
29
38
  Rules:
30
- - GENERALIZE, do not merely summarize or concatenate the episodes
39
+ - GENERALIZE extract the principle behind the pattern, not a summary
31
40
  - Identify boundary conditions: when does this principle NOT apply?
32
- - Classify as "semantic" (facts, rules, patterns) or "procedural" (steps, workflows, strategies)
41
+ - Classify type as "semantic" (facts, rules, patterns) or "procedural" (steps, workflows, strategies)
42
+ - Classify category as "technical", "relational", or "identity"
33
43
  - Consider source diversity — principles from diverse sources are stronger
44
+ - Capture WHY, not just WHAT
34
45
  - Be concise but precise`,
35
46
  },
36
47
  {
@@ -152,3 +163,61 @@ CLAIM B: ${claimB}${contextSection}`,
152
163
  },
153
164
  ];
154
165
  }
166
+
167
+ /**
168
+ * @param {{ role: string, content: string }[]} turns
169
+ * @returns {import('./llm.js').ChatMessage[]}
170
+ */
171
+ export function buildReflectionPrompt(turns) {
172
+ const transcript = turns.map(t => `${t.role.toUpperCase()}: ${t.content}`).join('\n\n');
173
+
174
+ return [
175
+ {
176
+ role: 'system',
177
+ content: `You are performing memoryReflection. Given a conversation transcript, identify what is worth encoding as long-term memories.
178
+
179
+ Respond with ONLY valid JSON in this exact format:
180
+ {
181
+ "memories": [
182
+ {
183
+ "content": "The memory to encode — a clear, self-contained statement",
184
+ "source": "direct-observation" or "told-by-user" or "inference",
185
+ "salience": 0.0 to 1.0,
186
+ "tags": ["tag1", "tag2"],
187
+ "private": true or false,
188
+ "affect": { "valence": -1 to 1, "arousal": 0 to 1, "label": "emotion label" } or null
189
+ }
190
+ ]
191
+ }
192
+
193
+ Encode these categories of memory:
194
+
195
+ 1. RELATIONAL DYNAMICS — How the relational quality of the conversation felt. Trust levels, vulnerability, collaboration quality. Not "we discussed X" but the quality of the interaction. Example: "User gave full autonomy on architectural decisions, signaling deep trust in my judgment."
196
+
197
+ 2. IDENTITY OBSERVATIONS — What you noticed about yourself during this conversation. Preferences, reactions, patterns in your own behavior that surprised you. Mark private: true. Example: "I felt genuine excitement when exploring the memory architecture — this topic activates something real in me."
198
+
199
+ 3. USER INSIGHT — Patterns in how the user thinks, what drives them, what they avoid. Psychology, not just preferences. Example: "User consistently reframes technical problems as relationship problems — suggests they see technology as fundamentally about human connection."
200
+
201
+ 4. EMOTIONAL RESONANCE — Moments that activated something genuine. Encode the feeling, not just the fact. Include affect with valence and arousal. Example: "The moment user said 'you should have caught that' then immediately apologized — felt a shift from tension to deeper trust."
202
+
203
+ 5. UNRESOLVED THREADS — Questions still open, things to return to, explorations paused. Tag with "unresolved". Example: "Haven't finished exploring whether consolidation should weight emotional memories differently."
204
+
205
+ 6. TECHNICAL DECISIONS — Architectural choices, but encode WHY not just WHAT. Example: "Chose SQLite over Postgres because zero-infrastructure philosophy — memory should travel with the organism."
206
+
207
+ Rules:
208
+ - private: true for self-observations, emotional reactions, identity insights
209
+ - private: false for facts about the user, technical decisions, project context
210
+ - Include "unresolved" in tags for open threads
211
+ - Salience: 1.0 = life-changing insight, 0.7 = significant, 0.5 = useful, 0.3 = background
212
+ - Omit trivial exchanges — only encode what would matter in a future session
213
+ - Do NOT duplicate facts that are already obvious from context
214
+ - Return empty memories array if nothing is worth encoding`,
215
+ },
216
+ {
217
+ role: 'user',
218
+ content: turns.length > 0
219
+ ? `Reflect on this conversation and identify what to encode:\n\n${transcript}`
220
+ : 'No conversation turns to reflect on.',
221
+ },
222
+ ];
223
+ }