@karmaniverous/jeeves-watcher 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +537 -310
- package/dist/cli/jeeves-watcher/index.js +659 -404
- package/dist/index.d.ts +104 -56
- package/dist/index.iife.js +536 -309
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +538 -311
- package/package.json +1 -1
package/dist/cjs/index.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var Fastify = require('fastify');
|
|
4
|
-
var radash = require('radash');
|
|
5
|
-
var node_crypto = require('node:crypto');
|
|
6
4
|
var promises = require('node:fs/promises');
|
|
7
5
|
var node_path = require('node:path');
|
|
8
6
|
var picomatch = require('picomatch');
|
|
9
|
-
var
|
|
7
|
+
var radash = require('radash');
|
|
8
|
+
var node_crypto = require('node:crypto');
|
|
10
9
|
var cosmiconfig = require('cosmiconfig');
|
|
11
10
|
var zod = require('zod');
|
|
12
11
|
var jsonmap = require('@karmaniverous/jsonmap');
|
|
@@ -20,6 +19,7 @@ var Ajv = require('ajv');
|
|
|
20
19
|
var addFormats = require('ajv-formats');
|
|
21
20
|
var textsplitters = require('@langchain/textsplitters');
|
|
22
21
|
var jsClientRest = require('@qdrant/js-client-rest');
|
|
22
|
+
var chokidar = require('chokidar');
|
|
23
23
|
|
|
24
24
|
function _interopNamespaceDefault(e) {
|
|
25
25
|
var n = Object.create(null);
|
|
@@ -41,74 +41,28 @@ function _interopNamespaceDefault(e) {
|
|
|
41
41
|
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
42
42
|
|
|
43
43
|
/**
|
|
44
|
-
* @module
|
|
45
|
-
* Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
|
|
46
|
-
*/
|
|
47
|
-
/**
|
|
48
|
-
* Normalise a file path for deterministic mapping: lowercase, forward slashes, strip leading drive letter colon.
|
|
49
|
-
*
|
|
50
|
-
* @param filePath - The original file path.
|
|
51
|
-
* @returns The normalised path string.
|
|
52
|
-
*/
|
|
53
|
-
function normalisePath$1(filePath) {
|
|
54
|
-
return filePath
|
|
55
|
-
.replace(/\\/g, '/')
|
|
56
|
-
.replace(/^([A-Za-z]):/, (_m, letter) => letter.toLowerCase())
|
|
57
|
-
.toLowerCase();
|
|
58
|
-
}
|
|
59
|
-
/**
|
|
60
|
-
* Derive a deterministic `.meta.json` path for a given file.
|
|
61
|
-
*
|
|
62
|
-
* @param filePath - The watched file path.
|
|
63
|
-
* @param metadataDir - The root metadata directory.
|
|
64
|
-
* @returns The full path to the metadata file.
|
|
65
|
-
*/
|
|
66
|
-
function metadataPath(filePath, metadataDir) {
|
|
67
|
-
const normalised = normalisePath$1(filePath);
|
|
68
|
-
const hash = node_crypto.createHash('sha256').update(normalised, 'utf8').digest('hex');
|
|
69
|
-
return node_path.join(metadataDir, `${hash}.meta.json`);
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Read persisted metadata for a file.
|
|
73
|
-
*
|
|
74
|
-
* @param filePath - The watched file path.
|
|
75
|
-
* @param metadataDir - The root metadata directory.
|
|
76
|
-
* @returns The parsed metadata object, or `null` if not found.
|
|
77
|
-
*/
|
|
78
|
-
async function readMetadata(filePath, metadataDir) {
|
|
79
|
-
try {
|
|
80
|
-
const raw = await promises.readFile(metadataPath(filePath, metadataDir), 'utf8');
|
|
81
|
-
return JSON.parse(raw);
|
|
82
|
-
}
|
|
83
|
-
catch {
|
|
84
|
-
return null;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Write metadata for a file.
|
|
44
|
+
* @module util/normalizeError
|
|
89
45
|
*
|
|
90
|
-
*
|
|
91
|
-
* @param metadataDir - The root metadata directory.
|
|
92
|
-
* @param metadata - The metadata to persist.
|
|
46
|
+
* Normalizes unknown thrown values into proper Error objects for pino serialization.
|
|
93
47
|
*/
|
|
94
|
-
async function writeMetadata(filePath, metadataDir, metadata) {
|
|
95
|
-
const dest = metadataPath(filePath, metadataDir);
|
|
96
|
-
await promises.mkdir(node_path.dirname(dest), { recursive: true });
|
|
97
|
-
await promises.writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
|
|
98
|
-
}
|
|
99
48
|
/**
|
|
100
|
-
*
|
|
49
|
+
* Convert an unknown thrown value into a proper Error with message, stack, and cause.
|
|
50
|
+
* Pino's built-in `err` serializer requires an Error instance to extract message/stack.
|
|
101
51
|
*
|
|
102
|
-
* @param
|
|
103
|
-
* @
|
|
52
|
+
* @param error - The caught value (may not be an Error).
|
|
53
|
+
* @returns A proper Error instance.
|
|
104
54
|
*/
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
55
|
+
function normalizeError(error) {
|
|
56
|
+
if (error instanceof Error)
|
|
57
|
+
return error;
|
|
58
|
+
if (typeof error === 'string')
|
|
59
|
+
return new Error(error);
|
|
60
|
+
const message = typeof error === 'object' && error !== null && 'message' in error
|
|
61
|
+
? String(error.message)
|
|
62
|
+
: String(error);
|
|
63
|
+
const normalized = new Error(message);
|
|
64
|
+
normalized.cause = error;
|
|
65
|
+
return normalized;
|
|
112
66
|
}
|
|
113
67
|
|
|
114
68
|
/**
|
|
@@ -208,107 +162,266 @@ async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
|
|
|
208
162
|
}
|
|
209
163
|
|
|
210
164
|
/**
|
|
211
|
-
*
|
|
212
|
-
*
|
|
213
|
-
|
|
165
|
+
* @module api/handlers/configReindex
|
|
166
|
+
* Fastify route handler for POST /config-reindex. Triggers an async reindex job scoped to rules or full processing.
|
|
167
|
+
*/
|
|
168
|
+
/**
|
|
169
|
+
* Create handler for POST /config-reindex.
|
|
214
170
|
*
|
|
215
|
-
* @param
|
|
216
|
-
* @returns A configured Fastify instance.
|
|
171
|
+
* @param deps - Route dependencies.
|
|
217
172
|
*/
|
|
218
|
-
function
|
|
219
|
-
|
|
220
|
-
const app = Fastify({ logger: false });
|
|
221
|
-
app.get('/status', () => ({
|
|
222
|
-
status: 'ok',
|
|
223
|
-
uptime: process.uptime(),
|
|
224
|
-
}));
|
|
225
|
-
app.post('/metadata', async (request, reply) => {
|
|
173
|
+
function createConfigReindexHandler(deps) {
|
|
174
|
+
return async (request, reply) => {
|
|
226
175
|
try {
|
|
227
|
-
const
|
|
228
|
-
|
|
229
|
-
|
|
176
|
+
const scope = request.body.scope ?? 'rules';
|
|
177
|
+
// Return immediately and run async
|
|
178
|
+
void (async () => {
|
|
179
|
+
try {
|
|
180
|
+
if (scope === 'rules') {
|
|
181
|
+
const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processRulesUpdate');
|
|
182
|
+
deps.logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processFile');
|
|
186
|
+
deps.logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
deps.logger.error({ err: normalizeError(error), scope }, 'Config reindex failed');
|
|
191
|
+
}
|
|
192
|
+
})();
|
|
193
|
+
return await reply.status(200).send({ status: 'started', scope });
|
|
230
194
|
}
|
|
231
195
|
catch (error) {
|
|
232
|
-
logger.error({ error }, '
|
|
233
|
-
return reply.status(500).send({ error: 'Internal server error' });
|
|
196
|
+
deps.logger.error({ err: normalizeError(error) }, 'Config reindex request failed');
|
|
197
|
+
return await reply.status(500).send({ error: 'Internal server error' });
|
|
234
198
|
}
|
|
235
|
-
}
|
|
236
|
-
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* @module api/handlers/metadata
|
|
204
|
+
* Fastify route handler for POST /metadata. Performs enrichment metadata updates via the document processor.
|
|
205
|
+
*/
|
|
206
|
+
/**
|
|
207
|
+
* Create handler for POST /metadata.
|
|
208
|
+
*
|
|
209
|
+
* @param deps - Route dependencies.
|
|
210
|
+
*/
|
|
211
|
+
function createMetadataHandler(deps) {
|
|
212
|
+
return async (request, reply) => {
|
|
237
213
|
try {
|
|
238
|
-
const {
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
return results;
|
|
214
|
+
const { path, metadata } = request.body;
|
|
215
|
+
await deps.processor.processMetadataUpdate(path, metadata);
|
|
216
|
+
return { ok: true };
|
|
242
217
|
}
|
|
243
218
|
catch (error) {
|
|
244
|
-
logger.error({ error }, '
|
|
219
|
+
deps.logger.error({ err: normalizeError(error) }, 'Metadata update failed');
|
|
245
220
|
return reply.status(500).send({ error: 'Internal server error' });
|
|
246
221
|
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* @module util/normalizePath
|
|
227
|
+
* Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
|
|
228
|
+
*/
|
|
229
|
+
/**
|
|
230
|
+
* Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
|
|
231
|
+
*
|
|
232
|
+
* @param filePath - The original file path.
|
|
233
|
+
* @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
|
|
234
|
+
* @returns The normalized path string.
|
|
235
|
+
*/
|
|
236
|
+
function normalizePath(filePath, stripDriveLetter = false) {
|
|
237
|
+
let result = filePath.replace(/\\/g, '/').toLowerCase();
|
|
238
|
+
if (stripDriveLetter) {
|
|
239
|
+
result = result.replace(/^([a-z]):/, (_m, letter) => letter);
|
|
240
|
+
}
|
|
241
|
+
return result;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* @module metadata/metadata
|
|
246
|
+
* Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
|
|
247
|
+
*/
|
|
248
|
+
/**
|
|
249
|
+
* Derive a deterministic `.meta.json` path for a given file.
|
|
250
|
+
*
|
|
251
|
+
* @param filePath - The watched file path.
|
|
252
|
+
* @param metadataDir - The root metadata directory.
|
|
253
|
+
* @returns The full path to the metadata file.
|
|
254
|
+
*/
|
|
255
|
+
function metadataPath(filePath, metadataDir) {
|
|
256
|
+
const normalised = normalizePath(filePath, true);
|
|
257
|
+
const hash = node_crypto.createHash('sha256').update(normalised, 'utf8').digest('hex');
|
|
258
|
+
return node_path.join(metadataDir, `${hash}.meta.json`);
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Read persisted metadata for a file.
|
|
262
|
+
*
|
|
263
|
+
* @param filePath - The watched file path.
|
|
264
|
+
* @param metadataDir - The root metadata directory.
|
|
265
|
+
* @returns The parsed metadata object, or `null` if not found.
|
|
266
|
+
*/
|
|
267
|
+
async function readMetadata(filePath, metadataDir) {
|
|
268
|
+
try {
|
|
269
|
+
const raw = await promises.readFile(metadataPath(filePath, metadataDir), 'utf8');
|
|
270
|
+
return JSON.parse(raw);
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Write metadata for a file.
|
|
278
|
+
*
|
|
279
|
+
* @param filePath - The watched file path.
|
|
280
|
+
* @param metadataDir - The root metadata directory.
|
|
281
|
+
* @param metadata - The metadata to persist.
|
|
282
|
+
*/
|
|
283
|
+
async function writeMetadata(filePath, metadataDir, metadata) {
|
|
284
|
+
const dest = metadataPath(filePath, metadataDir);
|
|
285
|
+
await promises.mkdir(node_path.dirname(dest), { recursive: true });
|
|
286
|
+
await promises.writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Delete metadata for a file.
|
|
290
|
+
*
|
|
291
|
+
* @param filePath - The watched file path.
|
|
292
|
+
* @param metadataDir - The root metadata directory.
|
|
293
|
+
*/
|
|
294
|
+
async function deleteMetadata(filePath, metadataDir) {
|
|
295
|
+
try {
|
|
296
|
+
await promises.rm(metadataPath(filePath, metadataDir));
|
|
297
|
+
}
|
|
298
|
+
catch {
|
|
299
|
+
// Ignore if file doesn't exist.
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* @module metadata/constants
|
|
305
|
+
* Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
|
|
306
|
+
*/
|
|
307
|
+
/** Keys managed by the indexing pipeline (not user enrichment). */
|
|
308
|
+
const SYSTEM_METADATA_KEYS = [
|
|
309
|
+
'file_path',
|
|
310
|
+
'chunk_index',
|
|
311
|
+
'total_chunks',
|
|
312
|
+
'content_hash',
|
|
313
|
+
'chunk_text',
|
|
314
|
+
];
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* @module api/handlers/rebuildMetadata
|
|
318
|
+
* Fastify route handler for POST /rebuild-metadata. Recreates enrichment metadata files from vector store payloads.
|
|
319
|
+
*/
|
|
320
|
+
/**
|
|
321
|
+
* Create handler for POST /rebuild-metadata.
|
|
322
|
+
*
|
|
323
|
+
* @param deps - Route dependencies.
|
|
324
|
+
*/
|
|
325
|
+
function createRebuildMetadataHandler(deps) {
|
|
326
|
+
return async (_request, reply) => {
|
|
259
327
|
try {
|
|
260
|
-
const metadataDir =
|
|
261
|
-
const
|
|
262
|
-
|
|
263
|
-
'chunk_index',
|
|
264
|
-
'total_chunks',
|
|
265
|
-
'content_hash',
|
|
266
|
-
'chunk_text',
|
|
267
|
-
];
|
|
268
|
-
for await (const point of vectorStore.scroll()) {
|
|
328
|
+
const metadataDir = deps.config.metadataDir ?? '.jeeves-metadata';
|
|
329
|
+
const systemKeys = [...SYSTEM_METADATA_KEYS];
|
|
330
|
+
for await (const point of deps.vectorStore.scroll()) {
|
|
269
331
|
const payload = point.payload;
|
|
270
332
|
const filePath = payload['file_path'];
|
|
271
333
|
if (typeof filePath !== 'string' || filePath.length === 0)
|
|
272
334
|
continue;
|
|
273
335
|
// Persist only enrichment-ish fields, not chunking/index fields.
|
|
274
|
-
const enrichment = radash.omit(payload,
|
|
336
|
+
const enrichment = radash.omit(payload, systemKeys);
|
|
275
337
|
await writeMetadata(filePath, metadataDir, enrichment);
|
|
276
338
|
}
|
|
277
339
|
return await reply.status(200).send({ ok: true });
|
|
278
340
|
}
|
|
279
341
|
catch (error) {
|
|
280
|
-
logger.error({ error }, 'Rebuild metadata failed');
|
|
342
|
+
deps.logger.error({ err: normalizeError(error) }, 'Rebuild metadata failed');
|
|
281
343
|
return await reply.status(500).send({ error: 'Internal server error' });
|
|
282
344
|
}
|
|
283
|
-
}
|
|
284
|
-
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* @module api/handlers/reindex
|
|
350
|
+
* Fastify route handler for POST /reindex. Reprocesses all watched files through the processor.
|
|
351
|
+
*/
|
|
352
|
+
/**
|
|
353
|
+
* Create handler for POST /reindex.
|
|
354
|
+
*
|
|
355
|
+
* @param deps - Route dependencies.
|
|
356
|
+
*/
|
|
357
|
+
function createReindexHandler(deps) {
|
|
358
|
+
return async (_request, reply) => {
|
|
285
359
|
try {
|
|
286
|
-
const
|
|
287
|
-
|
|
288
|
-
void (async () => {
|
|
289
|
-
try {
|
|
290
|
-
if (scope === 'rules') {
|
|
291
|
-
// Re-apply inference rules to all files, update Qdrant payloads (no re-embedding)
|
|
292
|
-
const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processRulesUpdate');
|
|
293
|
-
logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
|
|
294
|
-
}
|
|
295
|
-
else {
|
|
296
|
-
// Full reindex: re-extract, re-embed, re-upsert
|
|
297
|
-
const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
|
|
298
|
-
logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
|
|
299
|
-
}
|
|
300
|
-
}
|
|
301
|
-
catch (error) {
|
|
302
|
-
logger.error({ error, scope }, 'Config reindex failed');
|
|
303
|
-
}
|
|
304
|
-
})();
|
|
305
|
-
return await reply.status(200).send({ status: 'started', scope });
|
|
360
|
+
const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processFile');
|
|
361
|
+
return await reply.status(200).send({ ok: true, filesIndexed: count });
|
|
306
362
|
}
|
|
307
363
|
catch (error) {
|
|
308
|
-
logger.error({ error }, '
|
|
364
|
+
deps.logger.error({ err: normalizeError(error) }, 'Reindex failed');
|
|
309
365
|
return await reply.status(500).send({ error: 'Internal server error' });
|
|
310
366
|
}
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* @module api/handlers/search
|
|
372
|
+
* Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
|
|
373
|
+
*/
|
|
374
|
+
/**
|
|
375
|
+
* Create handler for POST /search.
|
|
376
|
+
*
|
|
377
|
+
* @param deps - Route dependencies.
|
|
378
|
+
*/
|
|
379
|
+
function createSearchHandler(deps) {
|
|
380
|
+
return async (request, reply) => {
|
|
381
|
+
try {
|
|
382
|
+
const { query, limit = 10 } = request.body;
|
|
383
|
+
const vectors = await deps.embeddingProvider.embed([query]);
|
|
384
|
+
const results = await deps.vectorStore.search(vectors[0], limit);
|
|
385
|
+
return results;
|
|
386
|
+
}
|
|
387
|
+
catch (error) {
|
|
388
|
+
deps.logger.error({ err: normalizeError(error) }, 'Search failed');
|
|
389
|
+
return reply.status(500).send({ error: 'Internal server error' });
|
|
390
|
+
}
|
|
391
|
+
};
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* @module api/handlers/status
|
|
396
|
+
* Fastify route handler for GET /status. Pure handler: returns process uptime and health.
|
|
397
|
+
*/
|
|
398
|
+
/**
|
|
399
|
+
* Create handler for GET /status.
|
|
400
|
+
*/
|
|
401
|
+
function createStatusHandler() {
|
|
402
|
+
return () => ({
|
|
403
|
+
status: 'ok',
|
|
404
|
+
uptime: process.uptime(),
|
|
311
405
|
});
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Create the Fastify API server with all routes registered.
|
|
410
|
+
*
|
|
411
|
+
* The returned instance is not yet listening — call `server.listen()` to start.
|
|
412
|
+
*
|
|
413
|
+
* @param options - The server options.
|
|
414
|
+
* @returns A configured Fastify instance.
|
|
415
|
+
*/
|
|
416
|
+
function createApiServer(options) {
|
|
417
|
+
const { processor, vectorStore, embeddingProvider, logger, config } = options;
|
|
418
|
+
const app = Fastify({ logger: false });
|
|
419
|
+
app.get('/status', createStatusHandler());
|
|
420
|
+
app.post('/metadata', createMetadataHandler({ processor, logger }));
|
|
421
|
+
app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
|
|
422
|
+
app.post('/reindex', createReindexHandler({ config, processor, logger }));
|
|
423
|
+
app.post('/rebuild-metadata', createRebuildMetadataHandler({ config, vectorStore, logger }));
|
|
424
|
+
app.post('/config-reindex', createConfigReindexHandler({ config, processor, logger }));
|
|
312
425
|
return app;
|
|
313
426
|
}
|
|
314
427
|
|
|
@@ -554,6 +667,51 @@ const jeevesWatcherConfigSchema = zod.z.object({
|
|
|
554
667
|
.describe('Timeout in milliseconds for graceful shutdown.'),
|
|
555
668
|
});
|
|
556
669
|
|
|
670
|
+
/**
|
|
671
|
+
* @module config/substituteEnvVars
|
|
672
|
+
*
|
|
673
|
+
* Deep-walks config objects and replaces `${VAR_NAME}` patterns with environment variable values.
|
|
674
|
+
*/
|
|
675
|
+
const ENV_PATTERN = /\$\{([^}]+)\}/g;
|
|
676
|
+
/**
|
|
677
|
+
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
678
|
+
*
|
|
679
|
+
* @param value - The string to process.
|
|
680
|
+
* @returns The string with env vars substituted.
|
|
681
|
+
* @throws If a referenced env var is not set.
|
|
682
|
+
*/
|
|
683
|
+
function substituteString(value) {
|
|
684
|
+
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
685
|
+
const envValue = process.env[varName];
|
|
686
|
+
if (envValue === undefined) {
|
|
687
|
+
throw new Error(`Environment variable \${${varName}} referenced in config is not set.`);
|
|
688
|
+
}
|
|
689
|
+
return envValue;
|
|
690
|
+
});
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Deep-walk a value and substitute `${VAR_NAME}` patterns in all string values.
|
|
694
|
+
*
|
|
695
|
+
* @param value - The value to walk (object, array, or primitive).
|
|
696
|
+
* @returns A new value with all env var references resolved.
|
|
697
|
+
*/
|
|
698
|
+
function substituteEnvVars(value) {
|
|
699
|
+
if (typeof value === 'string') {
|
|
700
|
+
return substituteString(value);
|
|
701
|
+
}
|
|
702
|
+
if (Array.isArray(value)) {
|
|
703
|
+
return value.map((item) => substituteEnvVars(item));
|
|
704
|
+
}
|
|
705
|
+
if (value !== null && typeof value === 'object') {
|
|
706
|
+
const result = {};
|
|
707
|
+
for (const [key, val] of Object.entries(value)) {
|
|
708
|
+
result[key] = substituteEnvVars(val);
|
|
709
|
+
}
|
|
710
|
+
return result;
|
|
711
|
+
}
|
|
712
|
+
return value;
|
|
713
|
+
}
|
|
714
|
+
|
|
557
715
|
const MODULE_NAME = 'jeeves-watcher';
|
|
558
716
|
/**
|
|
559
717
|
* Merge sensible defaults into a loaded configuration.
|
|
@@ -589,7 +747,8 @@ async function loadConfig(configPath) {
|
|
|
589
747
|
}
|
|
590
748
|
try {
|
|
591
749
|
const validated = jeevesWatcherConfigSchema.parse(result.config);
|
|
592
|
-
|
|
750
|
+
const withDefaults = applyDefaults(validated);
|
|
751
|
+
return substituteEnvVars(withDefaults);
|
|
593
752
|
}
|
|
594
753
|
catch (error) {
|
|
595
754
|
if (error instanceof zod.ZodError) {
|
|
@@ -602,6 +761,31 @@ async function loadConfig(configPath) {
|
|
|
602
761
|
}
|
|
603
762
|
}
|
|
604
763
|
|
|
764
|
+
/**
|
|
765
|
+
* @module util/logger
|
|
766
|
+
* Logger fallback helper. Provides a unified warn interface that delegates to pino or console.
|
|
767
|
+
*/
|
|
768
|
+
/**
|
|
769
|
+
* Return a minimal logger that delegates to pino if available, otherwise console.
|
|
770
|
+
*
|
|
771
|
+
* @param logger - Optional pino logger instance.
|
|
772
|
+
* @returns A minimal logger.
|
|
773
|
+
*/
|
|
774
|
+
function getLogger(logger) {
|
|
775
|
+
if (logger)
|
|
776
|
+
return logger;
|
|
777
|
+
return {
|
|
778
|
+
warn(obj, msg) {
|
|
779
|
+
if (msg) {
|
|
780
|
+
console.warn(obj, msg);
|
|
781
|
+
}
|
|
782
|
+
else {
|
|
783
|
+
console.warn(obj);
|
|
784
|
+
}
|
|
785
|
+
},
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
|
|
605
789
|
/**
|
|
606
790
|
* @module util/retry
|
|
607
791
|
* Small async retry helper with exponential backoff. Side effects: sleeps between attempts; can invoke onRetry callback for logging.
|
|
@@ -706,6 +890,7 @@ function createGeminiProvider(config, logger) {
|
|
|
706
890
|
throw new Error('Gemini embedding provider requires config.embedding.apiKey');
|
|
707
891
|
}
|
|
708
892
|
const dimensions = config.dimensions ?? 3072;
|
|
893
|
+
const log = getLogger(logger);
|
|
709
894
|
const embedder = new googleGenai.GoogleGenerativeAIEmbeddings({
|
|
710
895
|
apiKey: config.apiKey,
|
|
711
896
|
model: config.model,
|
|
@@ -715,17 +900,7 @@ function createGeminiProvider(config, logger) {
|
|
|
715
900
|
async embed(texts) {
|
|
716
901
|
const vectors = await retry(async (attempt) => {
|
|
717
902
|
if (attempt > 1) {
|
|
718
|
-
|
|
719
|
-
attempt,
|
|
720
|
-
provider: 'gemini',
|
|
721
|
-
model: config.model,
|
|
722
|
-
};
|
|
723
|
-
if (logger) {
|
|
724
|
-
logger.warn(msg, 'Retrying embedding request');
|
|
725
|
-
}
|
|
726
|
-
else {
|
|
727
|
-
console.warn(msg, 'Retrying embedding request');
|
|
728
|
-
}
|
|
903
|
+
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
729
904
|
}
|
|
730
905
|
// embedDocuments returns vectors for multiple texts
|
|
731
906
|
return embedder.embedDocuments(texts);
|
|
@@ -735,19 +910,13 @@ function createGeminiProvider(config, logger) {
|
|
|
735
910
|
maxDelayMs: 10_000,
|
|
736
911
|
jitter: 0.2,
|
|
737
912
|
onRetry: ({ attempt, delayMs, error }) => {
|
|
738
|
-
|
|
913
|
+
log.warn({
|
|
739
914
|
attempt,
|
|
740
915
|
delayMs,
|
|
741
916
|
provider: 'gemini',
|
|
742
917
|
model: config.model,
|
|
743
|
-
error,
|
|
744
|
-
};
|
|
745
|
-
if (logger) {
|
|
746
|
-
logger.warn(msg, 'Embedding call failed; will retry');
|
|
747
|
-
}
|
|
748
|
-
else {
|
|
749
|
-
console.warn(msg, 'Embedding call failed; will retry');
|
|
750
|
-
}
|
|
918
|
+
err: normalizeError(error),
|
|
919
|
+
}, 'Embedding call failed; will retry');
|
|
751
920
|
},
|
|
752
921
|
});
|
|
753
922
|
// Validate dimensions
|
|
@@ -828,15 +997,6 @@ function contentHash(text) {
|
|
|
828
997
|
*/
|
|
829
998
|
/** Namespace UUID for jeeves-watcher point IDs. */
|
|
830
999
|
const NAMESPACE = '6a6f686e-6761-4c74-ad6a-656576657321';
|
|
831
|
-
/**
|
|
832
|
-
* Normalise a file path for deterministic point ID generation.
|
|
833
|
-
*
|
|
834
|
-
* @param filePath - The original file path.
|
|
835
|
-
* @returns The normalised path string.
|
|
836
|
-
*/
|
|
837
|
-
function normalisePath(filePath) {
|
|
838
|
-
return filePath.replace(/\\/g, '/').toLowerCase();
|
|
839
|
-
}
|
|
840
1000
|
/**
|
|
841
1001
|
* Generate a deterministic UUID v5 point ID for a file (and optional chunk index).
|
|
842
1002
|
*
|
|
@@ -846,8 +1006,8 @@ function normalisePath(filePath) {
|
|
|
846
1006
|
*/
|
|
847
1007
|
function pointId(filePath, chunkIndex) {
|
|
848
1008
|
const key = chunkIndex !== undefined
|
|
849
|
-
? `${
|
|
850
|
-
:
|
|
1009
|
+
? `${normalizePath(filePath)}#${String(chunkIndex)}`
|
|
1010
|
+
: normalizePath(filePath);
|
|
851
1011
|
return uuid.v5(key, NAMESPACE);
|
|
852
1012
|
}
|
|
853
1013
|
|
|
@@ -864,6 +1024,9 @@ function pointId(filePath, chunkIndex) {
|
|
|
864
1024
|
*/
|
|
865
1025
|
function extractMarkdownFrontmatter(markdown) {
|
|
866
1026
|
const trimmed = markdown.replace(/^\uFEFF/, '');
|
|
1027
|
+
// Only attempt frontmatter parsing if the file starts with ---
|
|
1028
|
+
if (!/^\s*---/.test(trimmed))
|
|
1029
|
+
return { body: markdown };
|
|
867
1030
|
const match = /^---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/m.exec(trimmed);
|
|
868
1031
|
if (!match)
|
|
869
1032
|
return { body: markdown };
|
|
@@ -966,66 +1129,11 @@ async function extractText(filePath, extension) {
|
|
|
966
1129
|
}
|
|
967
1130
|
|
|
968
1131
|
/**
|
|
969
|
-
*
|
|
970
|
-
*
|
|
971
|
-
* @param filePath - The file path.
|
|
972
|
-
* @param stats - The file stats.
|
|
973
|
-
* @param extractedFrontmatter - Optional extracted frontmatter.
|
|
974
|
-
* @param extractedJson - Optional parsed JSON content.
|
|
975
|
-
* @returns The constructed file attributes.
|
|
1132
|
+
* @module rules/templates
|
|
1133
|
+
* Resolves template variables (`${path.to.value}`) in rule `set` objects against file attributes.
|
|
976
1134
|
*/
|
|
977
|
-
function buildAttributes(filePath, stats, extractedFrontmatter, extractedJson) {
|
|
978
|
-
const normalised = filePath.replace(/\\/g, '/');
|
|
979
|
-
const attrs = {
|
|
980
|
-
file: {
|
|
981
|
-
path: normalised,
|
|
982
|
-
directory: node_path.dirname(normalised).replace(/\\/g, '/'),
|
|
983
|
-
filename: node_path.basename(normalised),
|
|
984
|
-
extension: node_path.extname(normalised),
|
|
985
|
-
sizeBytes: stats.size,
|
|
986
|
-
modified: stats.mtime.toISOString(),
|
|
987
|
-
},
|
|
988
|
-
};
|
|
989
|
-
if (extractedFrontmatter)
|
|
990
|
-
attrs.frontmatter = extractedFrontmatter;
|
|
991
|
-
if (extractedJson)
|
|
992
|
-
attrs.json = extractedJson;
|
|
993
|
-
return attrs;
|
|
994
|
-
}
|
|
995
1135
|
/**
|
|
996
|
-
*
|
|
997
|
-
*
|
|
998
|
-
* @returns The configured ajv instance.
|
|
999
|
-
*/
|
|
1000
|
-
function createRuleAjv() {
|
|
1001
|
-
const ajv = new Ajv({ allErrors: true });
|
|
1002
|
-
addFormats(ajv);
|
|
1003
|
-
ajv.addKeyword({
|
|
1004
|
-
keyword: 'glob',
|
|
1005
|
-
type: 'string',
|
|
1006
|
-
schemaType: 'string',
|
|
1007
|
-
validate: (pattern, data) => picomatch.isMatch(data, pattern),
|
|
1008
|
-
});
|
|
1009
|
-
return ajv;
|
|
1010
|
-
}
|
|
1011
|
-
/**
|
|
1012
|
-
* Compile an array of inference rules into executable validators.
|
|
1013
|
-
*
|
|
1014
|
-
* @param rules - The inference rule definitions.
|
|
1015
|
-
* @returns An array of compiled rules.
|
|
1016
|
-
*/
|
|
1017
|
-
function compileRules(rules) {
|
|
1018
|
-
const ajv = createRuleAjv();
|
|
1019
|
-
return rules.map((rule, idx) => ({
|
|
1020
|
-
rule,
|
|
1021
|
-
validate: ajv.compile({
|
|
1022
|
-
$id: `rule-${String(idx)}`,
|
|
1023
|
-
...rule.match,
|
|
1024
|
-
}),
|
|
1025
|
-
}));
|
|
1026
|
-
}
|
|
1027
|
-
/**
|
|
1028
|
-
* Resolve `$\{template.vars\}` in a value against the given attributes.
|
|
1136
|
+
* Resolve `${template.vars}` in a value against the given attributes.
|
|
1029
1137
|
*
|
|
1030
1138
|
* @param value - The value to resolve.
|
|
1031
1139
|
* @param attributes - The file attributes for variable lookup.
|
|
@@ -1055,9 +1163,13 @@ function resolveSet(setObj, attributes) {
|
|
|
1055
1163
|
}
|
|
1056
1164
|
return result;
|
|
1057
1165
|
}
|
|
1166
|
+
|
|
1167
|
+
/**
|
|
1168
|
+
* @module rules/apply
|
|
1169
|
+
* Applies compiled inference rules to file attributes, producing merged metadata via template resolution and JsonMap transforms.
|
|
1170
|
+
*/
|
|
1058
1171
|
/**
|
|
1059
1172
|
* Create the lib object for JsonMap transformations.
|
|
1060
|
-
* Provides utility functions for path manipulation.
|
|
1061
1173
|
*
|
|
1062
1174
|
* @returns The lib object.
|
|
1063
1175
|
*/
|
|
@@ -1081,7 +1193,7 @@ function createJsonMapLib() {
|
|
|
1081
1193
|
* @param compiledRules - The compiled rules to evaluate.
|
|
1082
1194
|
* @param attributes - The file attributes to match against.
|
|
1083
1195
|
* @param namedMaps - Optional record of named JsonMap definitions.
|
|
1084
|
-
* @param logger - Optional
|
|
1196
|
+
* @param logger - Optional logger for warnings (falls back to console.warn).
|
|
1085
1197
|
* @returns The merged metadata from all matching rules.
|
|
1086
1198
|
*/
|
|
1087
1199
|
async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
@@ -1131,6 +1243,80 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1131
1243
|
return merged;
|
|
1132
1244
|
}
|
|
1133
1245
|
|
|
1246
|
+
/**
|
|
1247
|
+
* @module rules/attributes
|
|
1248
|
+
* Builds file attribute objects for rule matching. Pure function: derives attributes from path, stats, and extracted data.
|
|
1249
|
+
*/
|
|
1250
|
+
/**
|
|
1251
|
+
* Build {@link FileAttributes} from a file path and stat info.
|
|
1252
|
+
*
|
|
1253
|
+
* @param filePath - The file path.
|
|
1254
|
+
* @param stats - The file stats.
|
|
1255
|
+
* @param extractedFrontmatter - Optional extracted frontmatter.
|
|
1256
|
+
* @param extractedJson - Optional parsed JSON content.
|
|
1257
|
+
* @returns The constructed file attributes.
|
|
1258
|
+
*/
|
|
1259
|
+
function buildAttributes(filePath, stats, extractedFrontmatter, extractedJson) {
|
|
1260
|
+
const normalised = filePath.replace(/\\/g, '/');
|
|
1261
|
+
const attrs = {
|
|
1262
|
+
file: {
|
|
1263
|
+
path: normalised,
|
|
1264
|
+
directory: node_path.dirname(normalised).replace(/\\/g, '/'),
|
|
1265
|
+
filename: node_path.basename(normalised),
|
|
1266
|
+
extension: node_path.extname(normalised),
|
|
1267
|
+
sizeBytes: stats.size,
|
|
1268
|
+
modified: stats.mtime.toISOString(),
|
|
1269
|
+
},
|
|
1270
|
+
};
|
|
1271
|
+
if (extractedFrontmatter)
|
|
1272
|
+
attrs.frontmatter = extractedFrontmatter;
|
|
1273
|
+
if (extractedJson)
|
|
1274
|
+
attrs.json = extractedJson;
|
|
1275
|
+
return attrs;
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
/**
|
|
1279
|
+
* @module rules/ajvSetup
|
|
1280
|
+
* AJV instance factory with custom glob keyword for picomatch-based pattern matching in rule schemas.
|
|
1281
|
+
*/
|
|
1282
|
+
/**
|
|
1283
|
+
* Create an AJV instance with a custom `glob` format for picomatch glob matching.
|
|
1284
|
+
*
|
|
1285
|
+
* @returns The configured AJV instance.
|
|
1286
|
+
*/
|
|
1287
|
+
function createRuleAjv() {
|
|
1288
|
+
const ajv = new Ajv({ allErrors: true });
|
|
1289
|
+
addFormats(ajv);
|
|
1290
|
+
ajv.addKeyword({
|
|
1291
|
+
keyword: 'glob',
|
|
1292
|
+
type: 'string',
|
|
1293
|
+
schemaType: 'string',
|
|
1294
|
+
validate: (pattern, data) => picomatch.isMatch(data, pattern),
|
|
1295
|
+
});
|
|
1296
|
+
return ajv;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
/**
|
|
1300
|
+
* @module rules/compile
|
|
1301
|
+
* Compiles inference rule definitions into executable AJV validators for efficient rule evaluation.
|
|
1302
|
+
*/
|
|
1303
|
+
/**
|
|
1304
|
+
* Compile an array of inference rules into executable validators.
|
|
1305
|
+
*
|
|
1306
|
+
* @param rules - The inference rule definitions.
|
|
1307
|
+
* @returns An array of compiled rules.
|
|
1308
|
+
*/
|
|
1309
|
+
function compileRules(rules) {
|
|
1310
|
+
const ajv = createRuleAjv();
|
|
1311
|
+
return rules.map((rule, idx) => ({
|
|
1312
|
+
rule,
|
|
1313
|
+
validate: ajv.compile({
|
|
1314
|
+
$id: `rule-${String(idx)}`,
|
|
1315
|
+
...rule.match,
|
|
1316
|
+
}),
|
|
1317
|
+
}));
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1134
1320
|
/**
|
|
1135
1321
|
* @module processor/buildMetadata
|
|
1136
1322
|
* Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text, loads enrichment .meta.json.
|
|
@@ -1298,7 +1484,7 @@ class DocumentProcessor {
|
|
|
1298
1484
|
this.logger.info({ filePath, chunks: chunks.length }, 'File processed successfully');
|
|
1299
1485
|
}
|
|
1300
1486
|
catch (error) {
|
|
1301
|
-
this.logger.error({ filePath, error }, 'Failed to process file');
|
|
1487
|
+
this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to process file');
|
|
1302
1488
|
}
|
|
1303
1489
|
}
|
|
1304
1490
|
/**
|
|
@@ -1318,7 +1504,7 @@ class DocumentProcessor {
|
|
|
1318
1504
|
this.logger.info({ filePath }, 'File deleted from index');
|
|
1319
1505
|
}
|
|
1320
1506
|
catch (error) {
|
|
1321
|
-
this.logger.error({ filePath, error }, 'Failed to delete file');
|
|
1507
|
+
this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to delete file');
|
|
1322
1508
|
}
|
|
1323
1509
|
}
|
|
1324
1510
|
/**
|
|
@@ -1346,7 +1532,7 @@ class DocumentProcessor {
|
|
|
1346
1532
|
return merged;
|
|
1347
1533
|
}
|
|
1348
1534
|
catch (error) {
|
|
1349
|
-
this.logger.error({ filePath, error }, 'Failed to update metadata');
|
|
1535
|
+
this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to update metadata');
|
|
1350
1536
|
return null;
|
|
1351
1537
|
}
|
|
1352
1538
|
}
|
|
@@ -1376,7 +1562,7 @@ class DocumentProcessor {
|
|
|
1376
1562
|
return metadata;
|
|
1377
1563
|
}
|
|
1378
1564
|
catch (error) {
|
|
1379
|
-
this.logger.error({ filePath, error }, 'Failed to re-apply rules');
|
|
1565
|
+
this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to re-apply rules');
|
|
1380
1566
|
return null;
|
|
1381
1567
|
}
|
|
1382
1568
|
}
|
|
@@ -1543,7 +1729,7 @@ class VectorStoreClient {
|
|
|
1543
1729
|
client;
|
|
1544
1730
|
collectionName;
|
|
1545
1731
|
dims;
|
|
1546
|
-
|
|
1732
|
+
log;
|
|
1547
1733
|
/**
|
|
1548
1734
|
* Create a new VectorStoreClient.
|
|
1549
1735
|
*
|
|
@@ -1559,7 +1745,7 @@ class VectorStoreClient {
|
|
|
1559
1745
|
});
|
|
1560
1746
|
this.collectionName = config.collectionName;
|
|
1561
1747
|
this.dims = dimensions;
|
|
1562
|
-
this.
|
|
1748
|
+
this.log = getLogger(logger);
|
|
1563
1749
|
}
|
|
1564
1750
|
/**
|
|
1565
1751
|
* Ensure the collection exists with correct dimensions and Cosine distance.
|
|
@@ -1588,17 +1774,7 @@ class VectorStoreClient {
|
|
|
1588
1774
|
return;
|
|
1589
1775
|
await retry(async (attempt) => {
|
|
1590
1776
|
if (attempt > 1) {
|
|
1591
|
-
|
|
1592
|
-
attempt,
|
|
1593
|
-
operation: 'qdrant.upsert',
|
|
1594
|
-
points: points.length,
|
|
1595
|
-
};
|
|
1596
|
-
if (this.logger) {
|
|
1597
|
-
this.logger.warn(msg, 'Retrying Qdrant upsert');
|
|
1598
|
-
}
|
|
1599
|
-
else {
|
|
1600
|
-
console.warn(msg, 'Retrying Qdrant upsert');
|
|
1601
|
-
}
|
|
1777
|
+
this.log.warn({ attempt, operation: 'qdrant.upsert', points: points.length }, 'Retrying Qdrant upsert');
|
|
1602
1778
|
}
|
|
1603
1779
|
await this.client.upsert(this.collectionName, {
|
|
1604
1780
|
wait: true,
|
|
@@ -1614,13 +1790,12 @@ class VectorStoreClient {
|
|
|
1614
1790
|
maxDelayMs: 10_000,
|
|
1615
1791
|
jitter: 0.2,
|
|
1616
1792
|
onRetry: ({ attempt, delayMs, error }) => {
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
}
|
|
1793
|
+
this.log.warn({
|
|
1794
|
+
attempt,
|
|
1795
|
+
delayMs,
|
|
1796
|
+
operation: 'qdrant.upsert',
|
|
1797
|
+
err: normalizeError(error),
|
|
1798
|
+
}, 'Qdrant upsert failed; will retry');
|
|
1624
1799
|
},
|
|
1625
1800
|
});
|
|
1626
1801
|
}
|
|
@@ -1634,17 +1809,7 @@ class VectorStoreClient {
|
|
|
1634
1809
|
return;
|
|
1635
1810
|
await retry(async (attempt) => {
|
|
1636
1811
|
if (attempt > 1) {
|
|
1637
|
-
|
|
1638
|
-
attempt,
|
|
1639
|
-
operation: 'qdrant.delete',
|
|
1640
|
-
ids: ids.length,
|
|
1641
|
-
};
|
|
1642
|
-
if (this.logger) {
|
|
1643
|
-
this.logger.warn(msg, 'Retrying Qdrant delete');
|
|
1644
|
-
}
|
|
1645
|
-
else {
|
|
1646
|
-
console.warn(msg, 'Retrying Qdrant delete');
|
|
1647
|
-
}
|
|
1812
|
+
this.log.warn({ attempt, operation: 'qdrant.delete', ids: ids.length }, 'Retrying Qdrant delete');
|
|
1648
1813
|
}
|
|
1649
1814
|
await this.client.delete(this.collectionName, {
|
|
1650
1815
|
wait: true,
|
|
@@ -1656,13 +1821,12 @@ class VectorStoreClient {
|
|
|
1656
1821
|
maxDelayMs: 10_000,
|
|
1657
1822
|
jitter: 0.2,
|
|
1658
1823
|
onRetry: ({ attempt, delayMs, error }) => {
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
}
|
|
1824
|
+
this.log.warn({
|
|
1825
|
+
attempt,
|
|
1826
|
+
delayMs,
|
|
1827
|
+
operation: 'qdrant.delete',
|
|
1828
|
+
err: normalizeError(error),
|
|
1829
|
+
}, 'Qdrant delete failed; will retry');
|
|
1666
1830
|
},
|
|
1667
1831
|
});
|
|
1668
1832
|
}
|
|
@@ -1816,7 +1980,7 @@ class FileSystemWatcher {
|
|
|
1816
1980
|
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.processor.deleteFile(path));
|
|
1817
1981
|
});
|
|
1818
1982
|
this.watcher.on('error', (error) => {
|
|
1819
|
-
this.logger.error({ error }, 'Watcher error');
|
|
1983
|
+
this.logger.error({ err: normalizeError(error) }, 'Watcher error');
|
|
1820
1984
|
});
|
|
1821
1985
|
this.queue.process();
|
|
1822
1986
|
this.logger.info({ paths: this.config.paths }, 'Filesystem watcher started');
|
|
@@ -1833,63 +1997,141 @@ class FileSystemWatcher {
|
|
|
1833
1997
|
}
|
|
1834
1998
|
}
|
|
1835
1999
|
|
|
2000
|
+
/**
|
|
2001
|
+
* @module app/configWatcher
|
|
2002
|
+
* Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
|
|
2003
|
+
*/
|
|
2004
|
+
/**
|
|
2005
|
+
* Debounced config file watcher.
|
|
2006
|
+
*/
|
|
2007
|
+
class ConfigWatcher {
|
|
2008
|
+
options;
|
|
2009
|
+
watcher;
|
|
2010
|
+
debounce;
|
|
2011
|
+
constructor(options) {
|
|
2012
|
+
this.options = options;
|
|
2013
|
+
}
|
|
2014
|
+
start() {
|
|
2015
|
+
if (!this.options.enabled)
|
|
2016
|
+
return;
|
|
2017
|
+
this.watcher = chokidar.watch(this.options.configPath, {
|
|
2018
|
+
ignoreInitial: true,
|
|
2019
|
+
});
|
|
2020
|
+
this.watcher.on('change', () => {
|
|
2021
|
+
if (this.debounce)
|
|
2022
|
+
clearTimeout(this.debounce);
|
|
2023
|
+
this.debounce = setTimeout(() => {
|
|
2024
|
+
void this.options.onChange();
|
|
2025
|
+
}, this.options.debounceMs);
|
|
2026
|
+
});
|
|
2027
|
+
this.watcher.on('error', (error) => {
|
|
2028
|
+
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
2029
|
+
});
|
|
2030
|
+
this.options.logger.info({
|
|
2031
|
+
configPath: this.options.configPath,
|
|
2032
|
+
debounceMs: this.options.debounceMs,
|
|
2033
|
+
}, 'Config watcher started');
|
|
2034
|
+
}
|
|
2035
|
+
async stop() {
|
|
2036
|
+
if (this.debounce) {
|
|
2037
|
+
clearTimeout(this.debounce);
|
|
2038
|
+
this.debounce = undefined;
|
|
2039
|
+
}
|
|
2040
|
+
if (this.watcher) {
|
|
2041
|
+
await this.watcher.close();
|
|
2042
|
+
this.watcher = undefined;
|
|
2043
|
+
}
|
|
2044
|
+
}
|
|
2045
|
+
}
|
|
2046
|
+
|
|
2047
|
+
/**
|
|
2048
|
+
* @module app/shutdown
|
|
2049
|
+
* Process signal shutdown orchestration. Installs SIGINT/SIGTERM handlers that invoke a provided async stop function.
|
|
2050
|
+
*/
|
|
2051
|
+
/**
|
|
2052
|
+
* Install process signal handlers.
|
|
2053
|
+
*
|
|
2054
|
+
* @param stop - Async stop function to invoke on shutdown signals.
|
|
2055
|
+
*/
|
|
2056
|
+
function installShutdownHandlers(stop) {
|
|
2057
|
+
const shutdown = async () => {
|
|
2058
|
+
await stop();
|
|
2059
|
+
process.exit(0);
|
|
2060
|
+
};
|
|
2061
|
+
process.on('SIGTERM', () => void shutdown());
|
|
2062
|
+
process.on('SIGINT', () => void shutdown());
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
const defaultFactories = {
|
|
2066
|
+
loadConfig,
|
|
2067
|
+
createLogger,
|
|
2068
|
+
createEmbeddingProvider,
|
|
2069
|
+
createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
|
|
2070
|
+
compileRules,
|
|
2071
|
+
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
|
|
2072
|
+
createEventQueue: (options) => new EventQueue(options),
|
|
2073
|
+
createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
|
|
2074
|
+
createApiServer,
|
|
2075
|
+
};
|
|
1836
2076
|
/**
|
|
1837
2077
|
* Main application class that wires together all components.
|
|
1838
2078
|
*/
|
|
1839
2079
|
class JeevesWatcher {
|
|
1840
2080
|
config;
|
|
1841
2081
|
configPath;
|
|
2082
|
+
factories;
|
|
1842
2083
|
logger;
|
|
1843
2084
|
watcher;
|
|
1844
2085
|
queue;
|
|
1845
2086
|
server;
|
|
1846
2087
|
processor;
|
|
1847
2088
|
configWatcher;
|
|
1848
|
-
configDebounce;
|
|
1849
2089
|
/**
|
|
1850
2090
|
* Create a new JeevesWatcher instance.
|
|
1851
2091
|
*
|
|
1852
2092
|
* @param config - The application configuration.
|
|
1853
2093
|
* @param configPath - Optional config file path to watch for changes.
|
|
2094
|
+
* @param factories - Optional component factories (for dependency injection).
|
|
1854
2095
|
*/
|
|
1855
|
-
constructor(config, configPath) {
|
|
2096
|
+
constructor(config, configPath, factories = {}) {
|
|
1856
2097
|
this.config = config;
|
|
1857
2098
|
this.configPath = configPath;
|
|
2099
|
+
this.factories = { ...defaultFactories, ...factories };
|
|
1858
2100
|
}
|
|
1859
2101
|
/**
|
|
1860
2102
|
* Start the watcher, API server, and all components.
|
|
1861
2103
|
*/
|
|
1862
2104
|
async start() {
|
|
1863
|
-
const logger = createLogger(this.config.logging);
|
|
2105
|
+
const logger = this.factories.createLogger(this.config.logging);
|
|
1864
2106
|
this.logger = logger;
|
|
1865
2107
|
let embeddingProvider;
|
|
1866
2108
|
try {
|
|
1867
|
-
embeddingProvider = createEmbeddingProvider(this.config.embedding, logger);
|
|
2109
|
+
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
1868
2110
|
}
|
|
1869
2111
|
catch (error) {
|
|
1870
|
-
logger.fatal({ error }, 'Failed to create embedding provider');
|
|
2112
|
+
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
1871
2113
|
throw error;
|
|
1872
2114
|
}
|
|
1873
|
-
const vectorStore =
|
|
2115
|
+
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
1874
2116
|
await vectorStore.ensureCollection();
|
|
1875
|
-
const compiledRules = compileRules(this.config.inferenceRules ?? []);
|
|
2117
|
+
const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
|
|
1876
2118
|
const processorConfig = {
|
|
1877
2119
|
metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
|
|
1878
2120
|
chunkSize: this.config.embedding.chunkSize,
|
|
1879
2121
|
chunkOverlap: this.config.embedding.chunkOverlap,
|
|
1880
2122
|
maps: this.config.maps,
|
|
1881
2123
|
};
|
|
1882
|
-
const processor =
|
|
2124
|
+
const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
|
|
1883
2125
|
this.processor = processor;
|
|
1884
|
-
const queue =
|
|
2126
|
+
const queue = this.factories.createEventQueue({
|
|
1885
2127
|
debounceMs: this.config.watch.debounceMs ?? 2000,
|
|
1886
2128
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
1887
2129
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
1888
2130
|
});
|
|
1889
2131
|
this.queue = queue;
|
|
1890
|
-
const watcher =
|
|
2132
|
+
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger);
|
|
1891
2133
|
this.watcher = watcher;
|
|
1892
|
-
const server = createApiServer({
|
|
2134
|
+
const server = this.factories.createApiServer({
|
|
1893
2135
|
processor,
|
|
1894
2136
|
vectorStore,
|
|
1895
2137
|
embeddingProvider,
|
|
@@ -1945,28 +2187,18 @@ class JeevesWatcher {
|
|
|
1945
2187
|
return;
|
|
1946
2188
|
}
|
|
1947
2189
|
const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
|
|
1948
|
-
this.configWatcher =
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
this.configDebounce = setTimeout(() => {
|
|
1955
|
-
void this.reloadConfig();
|
|
1956
|
-
}, debounceMs);
|
|
1957
|
-
});
|
|
1958
|
-
this.configWatcher.on('error', (error) => {
|
|
1959
|
-
logger.error({ error }, 'Config watcher error');
|
|
2190
|
+
this.configWatcher = new ConfigWatcher({
|
|
2191
|
+
configPath: this.configPath,
|
|
2192
|
+
enabled,
|
|
2193
|
+
debounceMs,
|
|
2194
|
+
logger,
|
|
2195
|
+
onChange: async () => this.reloadConfig(),
|
|
1960
2196
|
});
|
|
1961
|
-
|
|
2197
|
+
this.configWatcher.start();
|
|
1962
2198
|
}
|
|
1963
2199
|
async stopConfigWatch() {
|
|
1964
|
-
if (this.configDebounce) {
|
|
1965
|
-
clearTimeout(this.configDebounce);
|
|
1966
|
-
this.configDebounce = undefined;
|
|
1967
|
-
}
|
|
1968
2200
|
if (this.configWatcher) {
|
|
1969
|
-
await this.configWatcher.
|
|
2201
|
+
await this.configWatcher.stop();
|
|
1970
2202
|
this.configWatcher = undefined;
|
|
1971
2203
|
}
|
|
1972
2204
|
}
|
|
@@ -1977,14 +2209,14 @@ class JeevesWatcher {
|
|
|
1977
2209
|
return;
|
|
1978
2210
|
logger.info({ configPath: this.configPath }, 'Config change detected, reloading...');
|
|
1979
2211
|
try {
|
|
1980
|
-
const newConfig = await loadConfig(this.configPath);
|
|
2212
|
+
const newConfig = await this.factories.loadConfig(this.configPath);
|
|
1981
2213
|
this.config = newConfig;
|
|
1982
|
-
const compiledRules = compileRules(newConfig.inferenceRules ?? []);
|
|
2214
|
+
const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
|
|
1983
2215
|
processor.updateRules(compiledRules);
|
|
1984
2216
|
logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
|
|
1985
2217
|
}
|
|
1986
2218
|
catch (error) {
|
|
1987
|
-
logger.error({ error }, 'Failed to reload config');
|
|
2219
|
+
logger.error({ err: normalizeError(error) }, 'Failed to reload config');
|
|
1988
2220
|
}
|
|
1989
2221
|
}
|
|
1990
2222
|
}
|
|
@@ -1997,12 +2229,7 @@ class JeevesWatcher {
|
|
|
1997
2229
|
async function startFromConfig(configPath) {
|
|
1998
2230
|
const config = await loadConfig(configPath);
|
|
1999
2231
|
const app = new JeevesWatcher(config, configPath);
|
|
2000
|
-
|
|
2001
|
-
await app.stop();
|
|
2002
|
-
process.exit(0);
|
|
2003
|
-
};
|
|
2004
|
-
process.on('SIGTERM', () => void shutdown());
|
|
2005
|
-
process.on('SIGINT', () => void shutdown());
|
|
2232
|
+
installShutdownHandlers(() => app.stop());
|
|
2006
2233
|
await app.start();
|
|
2007
2234
|
return app;
|
|
2008
2235
|
}
|