2ndbrain 2026.1.30 → 2026.1.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +17 -0
- package/LICENSE +21 -0
- package/README.md +1 -1
- package/db/migrations/001_initial_schema.sql +91 -0
- package/doc/SPEC.md +896 -0
- package/hooks/auto-capture.sh +4 -0
- package/hooks/validate-command.sh +374 -0
- package/package.json +34 -20
- package/skills/journal/SKILL.md +112 -0
- package/skills/knowledge/SKILL.md +165 -0
- package/skills/project-manage/SKILL.md +216 -0
- package/skills/recall/SKILL.md +182 -0
- package/skills/system-ops/SKILL.md +161 -0
- package/src/attachments/store.js +167 -0
- package/src/claude/bridge.js +291 -0
- package/src/claude/conversation.js +219 -0
- package/src/config.js +90 -0
- package/src/db/migrate.js +94 -0
- package/src/db/pool.js +33 -0
- package/src/embeddings/engine.js +281 -0
- package/src/embeddings/worker.js +221 -0
- package/src/hooks/lifecycle.js +448 -0
- package/src/index.js +560 -0
- package/src/logging.js +91 -0
- package/src/mcp/config.js +75 -0
- package/src/mcp/embed-server.js +242 -0
- package/src/rate-limiter.js +114 -0
- package/src/telegram/bot.js +546 -0
- package/src/telegram/commands.js +440 -0
- package/src/web/server.js +1119 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings engine -- manages pgvector-backed semantic search infrastructure.
|
|
3
|
+
*
|
|
4
|
+
* Handles startup configuration resolution per spec section 11.4:
|
|
5
|
+
* 1. Resolve dimensions from env var or model defaults
|
|
6
|
+
* 2. First-time setup: create extension, tables, index
|
|
7
|
+
* 3. Model switch: drop/recreate vector column, queue re-embedding
|
|
8
|
+
* 4. No change: skip
|
|
9
|
+
*
|
|
10
|
+
* Only creates pgvector tables when EMBEDDING_PROVIDER is set.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Default vector dimensions for known OpenAI embedding models.
|
|
15
|
+
*/
|
|
16
|
+
const MODEL_DIMENSION_DEFAULTS = {
|
|
17
|
+
'text-embedding-3-small': 1536,
|
|
18
|
+
'text-embedding-3-large': 3072,
|
|
19
|
+
'text-embedding-ada-002': 1536,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
class EmbeddingsEngine {
|
|
23
|
+
/**
|
|
24
|
+
* @param {object} deps
|
|
25
|
+
* @param {object} deps.db - Database query interface ({ query(sql, params) }).
|
|
26
|
+
* @param {object} deps.config - Application configuration.
|
|
27
|
+
* @param {object} deps.logger - Logger instance.
|
|
28
|
+
*/
|
|
29
|
+
constructor({ db, config, logger }) {
|
|
30
|
+
this.db = db;
|
|
31
|
+
this.config = config;
|
|
32
|
+
this.logger = logger;
|
|
33
|
+
this._dimensions = null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Returns true when the embedding provider is configured.
|
|
38
|
+
*
|
|
39
|
+
* @returns {boolean}
|
|
40
|
+
*/
|
|
41
|
+
isEnabled() {
|
|
42
|
+
return Boolean(this.config.EMBEDDING_PROVIDER);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Run startup configuration resolution.
|
|
47
|
+
*
|
|
48
|
+
* 1. Resolve dimensions: from EMBEDDING_DIMENSIONS env var, or model
|
|
49
|
+
* defaults (text-embedding-3-small=1536, text-embedding-3-large=3072,
|
|
50
|
+
* text-embedding-ada-002=1536). Fails startup if the model is unknown
|
|
51
|
+
* and no explicit dimension is provided.
|
|
52
|
+
* 2. First-time setup: CREATE EXTENSION IF NOT EXISTS vector, create
|
|
53
|
+
* embedding_config and embeddings tables, create HNSW index, insert
|
|
54
|
+
* config row.
|
|
55
|
+
* 3. Model switch: log warning, drop+recreate vector column with new
|
|
56
|
+
* dimensions, recreate index, update config. All existing rows become
|
|
57
|
+
* NULL-vector and are re-embedded by the background worker.
|
|
58
|
+
* 4. No change: skip.
|
|
59
|
+
*/
|
|
60
|
+
async initialize() {
|
|
61
|
+
if (!this.isEnabled()) {
|
|
62
|
+
this.logger.info('embeddings', 'Embedding provider not configured; embeddings disabled.');
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const provider = this.config.EMBEDDING_PROVIDER;
|
|
67
|
+
const model = this.config.EMBEDDING_MODEL || 'text-embedding-3-small';
|
|
68
|
+
const dimensions = this._resolveDimensions(model);
|
|
69
|
+
this._dimensions = dimensions;
|
|
70
|
+
|
|
71
|
+
this.logger.info(
|
|
72
|
+
'embeddings',
|
|
73
|
+
`Initializing embeddings: provider=${provider} model=${model} dimensions=${dimensions}`,
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
// Ensure the pgvector extension is available
|
|
77
|
+
await this.db.query('CREATE EXTENSION IF NOT EXISTS vector');
|
|
78
|
+
|
|
79
|
+
// Check whether the embedding_config table already exists
|
|
80
|
+
const tableCheck = await this.db.query(
|
|
81
|
+
`SELECT EXISTS (
|
|
82
|
+
SELECT FROM information_schema.tables
|
|
83
|
+
WHERE table_schema = 'public'
|
|
84
|
+
AND table_name = 'embedding_config'
|
|
85
|
+
) AS table_exists`,
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
if (!tableCheck.rows[0].table_exists) {
|
|
89
|
+
await this._firstTimeSetup(provider, model, dimensions);
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Table exists -- check for an existing config row
|
|
94
|
+
const configRow = await this.db.query(
|
|
95
|
+
'SELECT provider, model, dimensions FROM embedding_config WHERE id = 1',
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
if (configRow.rows.length === 0) {
|
|
99
|
+
// Table present but empty -- treat as first-time setup
|
|
100
|
+
await this._firstTimeSetup(provider, model, dimensions);
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const current = configRow.rows[0];
|
|
105
|
+
|
|
106
|
+
if (
|
|
107
|
+
current.provider === provider &&
|
|
108
|
+
current.model === model &&
|
|
109
|
+
current.dimensions === dimensions
|
|
110
|
+
) {
|
|
111
|
+
// Configuration unchanged
|
|
112
|
+
this.logger.info('embeddings', 'Embedding configuration unchanged.');
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Configuration differs -- perform model switch
|
|
117
|
+
await this._handleModelSwitch(current, { provider, model, dimensions });
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Queue an entity for background embedding generation.
|
|
122
|
+
* Inserts a row with a NULL vector; the background worker will fill it in.
|
|
123
|
+
*
|
|
124
|
+
* @param {string} entityType - Entity type (e.g. 'message', 'node', 'journal', 'issue').
|
|
125
|
+
* @param {number} entityId - Primary key of the source entity.
|
|
126
|
+
*/
|
|
127
|
+
async queueEmbedding(entityType, entityId) {
|
|
128
|
+
if (!this.isEnabled()) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
await this.db.query(
|
|
133
|
+
`INSERT INTO embeddings (entity_type, entity_id)
|
|
134
|
+
VALUES ($1, $2)
|
|
135
|
+
ON CONFLICT (entity_type, entity_id) DO NOTHING`,
|
|
136
|
+
[entityType, entityId],
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
// Internal helpers
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Resolve the target vector dimensions from the EMBEDDING_DIMENSIONS env var
|
|
146
|
+
* or the known model defaults.
|
|
147
|
+
*
|
|
148
|
+
* @param {string} model - Embedding model name.
|
|
149
|
+
* @returns {number} Resolved dimension count.
|
|
150
|
+
* @throws {Error} When dimensions cannot be determined.
|
|
151
|
+
*/
|
|
152
|
+
_resolveDimensions(model) {
|
|
153
|
+
if (this.config.EMBEDDING_DIMENSIONS) {
|
|
154
|
+
const dim = parseInt(this.config.EMBEDDING_DIMENSIONS, 10);
|
|
155
|
+
if (Number.isNaN(dim) || dim <= 0) {
|
|
156
|
+
throw new Error(
|
|
157
|
+
`Invalid EMBEDDING_DIMENSIONS value: "${this.config.EMBEDDING_DIMENSIONS}"`,
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
return dim;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const defaultDim = MODEL_DIMENSION_DEFAULTS[model];
|
|
164
|
+
if (!defaultDim) {
|
|
165
|
+
throw new Error(
|
|
166
|
+
`Unknown embedding model "${model}" and EMBEDDING_DIMENSIONS is not set. ` +
|
|
167
|
+
`Set EMBEDDING_DIMENSIONS explicitly or use a known model: ` +
|
|
168
|
+
`${Object.keys(MODEL_DIMENSION_DEFAULTS).join(', ')}`,
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return defaultDim;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* First-time setup: create the embedding_config and embeddings tables,
|
|
177
|
+
* the HNSW index, and the initial config row.
|
|
178
|
+
*
|
|
179
|
+
* @param {string} provider - Embedding provider name.
|
|
180
|
+
* @param {string} model - Embedding model name.
|
|
181
|
+
* @param {number} dimensions - Vector dimension count.
|
|
182
|
+
*/
|
|
183
|
+
async _firstTimeSetup(provider, model, dimensions) {
|
|
184
|
+
this.logger.info('embeddings', 'First-time embedding setup: creating tables and index.');
|
|
185
|
+
|
|
186
|
+
// Create the single-row configuration table
|
|
187
|
+
await this.db.query(`
|
|
188
|
+
CREATE TABLE IF NOT EXISTS embedding_config (
|
|
189
|
+
id INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1),
|
|
190
|
+
provider TEXT NOT NULL,
|
|
191
|
+
model TEXT NOT NULL,
|
|
192
|
+
dimensions INTEGER NOT NULL,
|
|
193
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
194
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
195
|
+
)
|
|
196
|
+
`);
|
|
197
|
+
|
|
198
|
+
// Create the embeddings table with the resolved vector dimension.
|
|
199
|
+
// NOTE: The dimension is a validated integer, not user input; string
|
|
200
|
+
// interpolation in the DDL statement is safe here because parameterized
|
|
201
|
+
// DDL is not supported by PostgreSQL for column type definitions.
|
|
202
|
+
await this.db.query(`
|
|
203
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
204
|
+
id SERIAL PRIMARY KEY,
|
|
205
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
206
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
207
|
+
entity_type TEXT NOT NULL,
|
|
208
|
+
entity_id INTEGER NOT NULL,
|
|
209
|
+
vector VECTOR(${dimensions}),
|
|
210
|
+
UNIQUE(entity_type, entity_id)
|
|
211
|
+
)
|
|
212
|
+
`);
|
|
213
|
+
|
|
214
|
+
// HNSW index for fast approximate nearest-neighbor search (cosine distance)
|
|
215
|
+
await this.db.query(`
|
|
216
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_vector
|
|
217
|
+
ON embeddings USING hnsw (vector vector_cosine_ops)
|
|
218
|
+
`);
|
|
219
|
+
|
|
220
|
+
// Insert (or update) the config row
|
|
221
|
+
await this.db.query(
|
|
222
|
+
`INSERT INTO embedding_config (provider, model, dimensions)
|
|
223
|
+
VALUES ($1, $2, $3)
|
|
224
|
+
ON CONFLICT (id) DO UPDATE
|
|
225
|
+
SET provider = $1, model = $2, dimensions = $3, updated_at = NOW()`,
|
|
226
|
+
[provider, model, dimensions],
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
this.logger.info('embeddings', 'Embedding tables and index created successfully.');
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Handle a model configuration change.
|
|
234
|
+
*
|
|
235
|
+
* Drops the existing vector column and HNSW index, recreates them with the
|
|
236
|
+
* new dimension, and updates the config row. All existing embedding rows
|
|
237
|
+
* are left with a NULL vector so the background worker re-generates them.
|
|
238
|
+
*
|
|
239
|
+
* @param {object} oldConfig - Previous { provider, model, dimensions }.
|
|
240
|
+
* @param {object} newConfig - New { provider, model, dimensions }.
|
|
241
|
+
*/
|
|
242
|
+
async _handleModelSwitch(oldConfig, newConfig) {
|
|
243
|
+
this.logger.warn(
|
|
244
|
+
'embeddings',
|
|
245
|
+
`Embedding model changed from ${oldConfig.provider}/${oldConfig.model} ` +
|
|
246
|
+
`(${oldConfig.dimensions}d) to ${newConfig.provider}/${newConfig.model} ` +
|
|
247
|
+
`(${newConfig.dimensions}d). All existing embeddings will be dropped and re-generated.`,
|
|
248
|
+
);
|
|
249
|
+
|
|
250
|
+
// Drop the HNSW index
|
|
251
|
+
await this.db.query('DROP INDEX IF EXISTS idx_embeddings_vector');
|
|
252
|
+
|
|
253
|
+
// Drop and recreate the vector column with the new dimension
|
|
254
|
+
await this.db.query('ALTER TABLE embeddings DROP COLUMN vector');
|
|
255
|
+
await this.db.query(
|
|
256
|
+
`ALTER TABLE embeddings ADD COLUMN vector VECTOR(${newConfig.dimensions})`,
|
|
257
|
+
);
|
|
258
|
+
|
|
259
|
+
// Recreate the HNSW index
|
|
260
|
+
await this.db.query(`
|
|
261
|
+
CREATE INDEX idx_embeddings_vector
|
|
262
|
+
ON embeddings USING hnsw (vector vector_cosine_ops)
|
|
263
|
+
`);
|
|
264
|
+
|
|
265
|
+
// Update the config row
|
|
266
|
+
await this.db.query(
|
|
267
|
+
`UPDATE embedding_config
|
|
268
|
+
SET provider = $1, model = $2, dimensions = $3, updated_at = NOW()
|
|
269
|
+
WHERE id = 1`,
|
|
270
|
+
[newConfig.provider, newConfig.model, newConfig.dimensions],
|
|
271
|
+
);
|
|
272
|
+
|
|
273
|
+
this.logger.info(
|
|
274
|
+
'embeddings',
|
|
275
|
+
'Model switch complete. All embeddings queued for re-generation.',
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
export { EmbeddingsEngine };
|
|
281
|
+
export default EmbeddingsEngine;
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import { generateEmbedding } from '../mcp/embed-server.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Mapping of entity types to the SQL query that retrieves the text content
|
|
5
|
+
* to be embedded for a given entity_id.
|
|
6
|
+
*/
|
|
7
|
+
const ENTITY_TEXT_SOURCES = {
|
|
8
|
+
message: {
|
|
9
|
+
query: 'SELECT content AS text FROM conversation_messages WHERE id = $1',
|
|
10
|
+
},
|
|
11
|
+
node: {
|
|
12
|
+
query: `SELECT name || COALESCE(' ' || note, '') AS text FROM knowledge_nodes WHERE id = $1`,
|
|
13
|
+
},
|
|
14
|
+
journal: {
|
|
15
|
+
query: 'SELECT note AS text FROM journal WHERE id = $1',
|
|
16
|
+
},
|
|
17
|
+
issue: {
|
|
18
|
+
query: 'SELECT note AS text FROM issues WHERE id = $1',
|
|
19
|
+
},
|
|
20
|
+
spec: {
|
|
21
|
+
query: 'SELECT note AS text FROM specifications WHERE id = $1',
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/** Maximum rows to process in a single iteration. */
|
|
26
|
+
const BATCH_SIZE = 10;
|
|
27
|
+
|
|
28
|
+
/** Milliseconds between processing iterations. */
|
|
29
|
+
const POLL_INTERVAL_MS = 5_000;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Background embedding worker -- periodically processes rows in the
|
|
33
|
+
* embeddings table that have a NULL vector, generates the embedding via
|
|
34
|
+
* the configured API, and stores the result (spec section 11.4).
|
|
35
|
+
*/
|
|
36
|
+
class EmbeddingWorker {
|
|
37
|
+
/**
|
|
38
|
+
* @param {object} deps
|
|
39
|
+
* @param {object} deps.db - Database query interface ({ query(sql, params) }).
|
|
40
|
+
* @param {object} deps.config - Application configuration.
|
|
41
|
+
* @param {object} deps.logger - Logger instance.
|
|
42
|
+
*/
|
|
43
|
+
constructor({ db, config, logger }) {
|
|
44
|
+
this.db = db;
|
|
45
|
+
this.config = config;
|
|
46
|
+
this.logger = logger;
|
|
47
|
+
|
|
48
|
+
/** @type {ReturnType<typeof setTimeout>|null} */
|
|
49
|
+
this._timer = null;
|
|
50
|
+
|
|
51
|
+
/** Whether the worker loop is active. */
|
|
52
|
+
this._running = false;
|
|
53
|
+
|
|
54
|
+
/** Guard to prevent overlapping iterations. */
|
|
55
|
+
this._processing = false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Start the periodic embedding worker loop.
|
|
60
|
+
* Processes up to {@link BATCH_SIZE} NULL-vector rows every
|
|
61
|
+
* {@link POLL_INTERVAL_MS} milliseconds.
|
|
62
|
+
*/
|
|
63
|
+
start() {
|
|
64
|
+
if (this._running) {
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
this._running = true;
|
|
69
|
+
this.logger.info('embedding-worker', 'Starting background embedding worker.');
|
|
70
|
+
this._scheduleNext();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Stop the worker loop gracefully. Any in-flight iteration will finish
|
|
75
|
+
* before the loop fully halts.
|
|
76
|
+
*/
|
|
77
|
+
stop() {
|
|
78
|
+
this._running = false;
|
|
79
|
+
|
|
80
|
+
if (this._timer !== null) {
|
|
81
|
+
clearTimeout(this._timer);
|
|
82
|
+
this._timer = null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
this.logger.info('embedding-worker', 'Embedding worker stopped.');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Internal
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Schedule the next processing iteration after POLL_INTERVAL_MS.
|
|
94
|
+
*/
|
|
95
|
+
_scheduleNext() {
|
|
96
|
+
if (!this._running) {
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
this._timer = setTimeout(async () => {
|
|
101
|
+
this._timer = null;
|
|
102
|
+
|
|
103
|
+
// Skip if the previous iteration is still running
|
|
104
|
+
if (this._processing) {
|
|
105
|
+
this._scheduleNext();
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
this._processing = true;
|
|
111
|
+
await this._processQueue();
|
|
112
|
+
} catch (err) {
|
|
113
|
+
this.logger.error(
|
|
114
|
+
'embedding-worker',
|
|
115
|
+
`Unexpected error in worker loop: ${err.message}`,
|
|
116
|
+
);
|
|
117
|
+
} finally {
|
|
118
|
+
this._processing = false;
|
|
119
|
+
this._scheduleNext();
|
|
120
|
+
}
|
|
121
|
+
}, POLL_INTERVAL_MS);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Fetch and process a batch of rows with NULL vectors.
|
|
126
|
+
*/
|
|
127
|
+
async _processQueue() {
|
|
128
|
+
const result = await this.db.query(
|
|
129
|
+
`SELECT id, entity_type, entity_id
|
|
130
|
+
FROM embeddings
|
|
131
|
+
WHERE vector IS NULL
|
|
132
|
+
ORDER BY created_at ASC
|
|
133
|
+
LIMIT $1`,
|
|
134
|
+
[BATCH_SIZE],
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
if (result.rows.length === 0) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
this.logger.debug(
|
|
142
|
+
'embedding-worker',
|
|
143
|
+
`Processing ${result.rows.length} pending embedding(s).`,
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
for (const row of result.rows) {
|
|
147
|
+
try {
|
|
148
|
+
await this._processRow(row);
|
|
149
|
+
} catch (err) {
|
|
150
|
+
// Log the failure and continue with the next row
|
|
151
|
+
this.logger.error(
|
|
152
|
+
'embedding-worker',
|
|
153
|
+
`Failed to generate embedding for ${row.entity_type}:${row.entity_id}: ${err.message}`,
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Process a single embedding row: look up the source text, call the
|
|
161
|
+
* embedding API, and store the resulting vector.
|
|
162
|
+
*
|
|
163
|
+
* @param {{ id: number, entity_type: string, entity_id: number }} row
|
|
164
|
+
*/
|
|
165
|
+
async _processRow(row) {
|
|
166
|
+
const { id, entity_type: entityType, entity_id: entityId } = row;
|
|
167
|
+
|
|
168
|
+
// Resolve the query for this entity type
|
|
169
|
+
const source = ENTITY_TEXT_SOURCES[entityType];
|
|
170
|
+
if (!source) {
|
|
171
|
+
this.logger.warn(
|
|
172
|
+
'embedding-worker',
|
|
173
|
+
`Unknown entity type "${entityType}" for embedding ${id}; skipping.`,
|
|
174
|
+
);
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Fetch the text content from the source table
|
|
179
|
+
const textResult = await this.db.query(source.query, [entityId]);
|
|
180
|
+
|
|
181
|
+
if (textResult.rows.length === 0) {
|
|
182
|
+
this.logger.warn(
|
|
183
|
+
'embedding-worker',
|
|
184
|
+
`Source entity ${entityType}:${entityId} not found; removing orphaned embedding row ${id}.`,
|
|
185
|
+
);
|
|
186
|
+
await this.db.query('DELETE FROM embeddings WHERE id = $1', [id]);
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const text = textResult.rows[0].text;
|
|
191
|
+
if (!text || text.trim().length === 0) {
|
|
192
|
+
this.logger.debug(
|
|
193
|
+
'embedding-worker',
|
|
194
|
+
`Empty text for ${entityType}:${entityId}; skipping embedding generation.`,
|
|
195
|
+
);
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Generate the embedding vector via the configured API
|
|
200
|
+
const { vector } = await generateEmbedding(text, this.config);
|
|
201
|
+
|
|
202
|
+
// Format as a pgvector literal: [0.123,0.456,...]
|
|
203
|
+
const vectorLiteral = `[${vector.join(',')}]`;
|
|
204
|
+
|
|
205
|
+
// Update the row with the computed vector
|
|
206
|
+
await this.db.query(
|
|
207
|
+
`UPDATE embeddings
|
|
208
|
+
SET vector = $1::vector, updated_at = NOW()
|
|
209
|
+
WHERE id = $2`,
|
|
210
|
+
[vectorLiteral, id],
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
this.logger.debug(
|
|
214
|
+
'embedding-worker',
|
|
215
|
+
`Generated embedding for ${entityType}:${entityId} (${vector.length} dimensions).`,
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export { EmbeddingWorker };
|
|
221
|
+
export default EmbeddingWorker;
|