@karmaniverous/jeeves-watcher 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -1,12 +1,11 @@
1
1
  'use strict';
2
2
 
3
3
  var Fastify = require('fastify');
4
- var radash = require('radash');
5
- var node_crypto = require('node:crypto');
6
4
  var promises = require('node:fs/promises');
7
5
  var node_path = require('node:path');
8
6
  var picomatch = require('picomatch');
9
- var chokidar = require('chokidar');
7
+ var radash = require('radash');
8
+ var node_crypto = require('node:crypto');
10
9
  var cosmiconfig = require('cosmiconfig');
11
10
  var zod = require('zod');
12
11
  var jsonmap = require('@karmaniverous/jsonmap');
@@ -20,6 +19,7 @@ var Ajv = require('ajv');
20
19
  var addFormats = require('ajv-formats');
21
20
  var textsplitters = require('@langchain/textsplitters');
22
21
  var jsClientRest = require('@qdrant/js-client-rest');
22
+ var chokidar = require('chokidar');
23
23
 
24
24
  function _interopNamespaceDefault(e) {
25
25
  var n = Object.create(null);
@@ -41,74 +41,28 @@ function _interopNamespaceDefault(e) {
41
41
  var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
42
42
 
43
43
  /**
44
- * @module metadata/metadata
45
- * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
46
- */
47
- /**
48
- * Normalise a file path for deterministic mapping: lowercase, forward slashes, strip leading drive letter colon.
49
- *
50
- * @param filePath - The original file path.
51
- * @returns The normalised path string.
52
- */
53
- function normalisePath$1(filePath) {
54
- return filePath
55
- .replace(/\\/g, '/')
56
- .replace(/^([A-Za-z]):/, (_m, letter) => letter.toLowerCase())
57
- .toLowerCase();
58
- }
59
- /**
60
- * Derive a deterministic `.meta.json` path for a given file.
61
- *
62
- * @param filePath - The watched file path.
63
- * @param metadataDir - The root metadata directory.
64
- * @returns The full path to the metadata file.
65
- */
66
- function metadataPath(filePath, metadataDir) {
67
- const normalised = normalisePath$1(filePath);
68
- const hash = node_crypto.createHash('sha256').update(normalised, 'utf8').digest('hex');
69
- return node_path.join(metadataDir, `${hash}.meta.json`);
70
- }
71
- /**
72
- * Read persisted metadata for a file.
73
- *
74
- * @param filePath - The watched file path.
75
- * @param metadataDir - The root metadata directory.
76
- * @returns The parsed metadata object, or `null` if not found.
77
- */
78
- async function readMetadata(filePath, metadataDir) {
79
- try {
80
- const raw = await promises.readFile(metadataPath(filePath, metadataDir), 'utf8');
81
- return JSON.parse(raw);
82
- }
83
- catch {
84
- return null;
85
- }
86
- }
87
- /**
88
- * Write metadata for a file.
44
+ * @module util/normalizeError
89
45
  *
90
- * @param filePath - The watched file path.
91
- * @param metadataDir - The root metadata directory.
92
- * @param metadata - The metadata to persist.
46
+ * Normalizes unknown thrown values into proper Error objects for pino serialization.
93
47
  */
94
- async function writeMetadata(filePath, metadataDir, metadata) {
95
- const dest = metadataPath(filePath, metadataDir);
96
- await promises.mkdir(node_path.dirname(dest), { recursive: true });
97
- await promises.writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
98
- }
99
48
  /**
100
- * Delete metadata for a file.
49
+ * Convert an unknown thrown value into a proper Error with message, stack, and cause.
50
+ * Pino's built-in `err` serializer requires an Error instance to extract message/stack.
101
51
  *
102
- * @param filePath - The watched file path.
103
- * @param metadataDir - The root metadata directory.
52
+ * @param error - The caught value (may not be an Error).
53
+ * @returns A proper Error instance.
104
54
  */
105
- async function deleteMetadata(filePath, metadataDir) {
106
- try {
107
- await promises.rm(metadataPath(filePath, metadataDir));
108
- }
109
- catch {
110
- // Ignore if file doesn't exist.
111
- }
55
+ function normalizeError(error) {
56
+ if (error instanceof Error)
57
+ return error;
58
+ if (typeof error === 'string')
59
+ return new Error(error);
60
+ const message = typeof error === 'object' && error !== null && 'message' in error
61
+ ? String(error.message)
62
+ : String(error);
63
+ const normalized = new Error(message);
64
+ normalized.cause = error;
65
+ return normalized;
112
66
  }
113
67
 
114
68
  /**
@@ -208,107 +162,266 @@ async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
208
162
  }
209
163
 
210
164
  /**
211
- * Create the Fastify API server with all routes registered.
212
- *
213
- * The returned instance is not yet listening — call `server.listen()` to start.
165
+ * @module api/handlers/configReindex
166
+ * Fastify route handler for POST /config-reindex. Triggers an async reindex job scoped to rules or full processing.
167
+ */
168
+ /**
169
+ * Create handler for POST /config-reindex.
214
170
  *
215
- * @param options - The server options.
216
- * @returns A configured Fastify instance.
171
+ * @param deps - Route dependencies.
217
172
  */
218
- function createApiServer(options) {
219
- const { processor, vectorStore, embeddingProvider, logger } = options;
220
- const app = Fastify({ logger: false });
221
- app.get('/status', () => ({
222
- status: 'ok',
223
- uptime: process.uptime(),
224
- }));
225
- app.post('/metadata', async (request, reply) => {
173
+ function createConfigReindexHandler(deps) {
174
+ return async (request, reply) => {
226
175
  try {
227
- const { path, metadata } = request.body;
228
- await processor.processMetadataUpdate(path, metadata);
229
- return { ok: true };
176
+ const scope = request.body.scope ?? 'rules';
177
+ // Return immediately and run async
178
+ void (async () => {
179
+ try {
180
+ if (scope === 'rules') {
181
+ const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processRulesUpdate');
182
+ deps.logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
183
+ }
184
+ else {
185
+ const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processFile');
186
+ deps.logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
187
+ }
188
+ }
189
+ catch (error) {
190
+ deps.logger.error({ err: normalizeError(error), scope }, 'Config reindex failed');
191
+ }
192
+ })();
193
+ return await reply.status(200).send({ status: 'started', scope });
230
194
  }
231
195
  catch (error) {
232
- logger.error({ error }, 'Metadata update failed');
233
- return reply.status(500).send({ error: 'Internal server error' });
196
+ deps.logger.error({ err: normalizeError(error) }, 'Config reindex request failed');
197
+ return await reply.status(500).send({ error: 'Internal server error' });
234
198
  }
235
- });
236
- app.post('/search', async (request, reply) => {
199
+ };
200
+ }
201
+
202
+ /**
203
+ * @module api/handlers/metadata
204
+ * Fastify route handler for POST /metadata. Performs enrichment metadata updates via the document processor.
205
+ */
206
+ /**
207
+ * Create handler for POST /metadata.
208
+ *
209
+ * @param deps - Route dependencies.
210
+ */
211
+ function createMetadataHandler(deps) {
212
+ return async (request, reply) => {
237
213
  try {
238
- const { query, limit = 10 } = request.body;
239
- const vectors = await embeddingProvider.embed([query]);
240
- const results = await vectorStore.search(vectors[0], limit);
241
- return results;
214
+ const { path, metadata } = request.body;
215
+ await deps.processor.processMetadataUpdate(path, metadata);
216
+ return { ok: true };
242
217
  }
243
218
  catch (error) {
244
- logger.error({ error }, 'Search failed');
219
+ deps.logger.error({ err: normalizeError(error) }, 'Metadata update failed');
245
220
  return reply.status(500).send({ error: 'Internal server error' });
246
221
  }
247
- });
248
- app.post('/reindex', async (_request, reply) => {
249
- try {
250
- const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
251
- return await reply.status(200).send({ ok: true, filesIndexed: count });
252
- }
253
- catch (error) {
254
- logger.error({ error }, 'Reindex failed');
255
- return await reply.status(500).send({ error: 'Internal server error' });
256
- }
257
- });
258
- app.post('/rebuild-metadata', async (_request, reply) => {
222
+ };
223
+ }
224
+
225
+ /**
226
+ * @module util/normalizePath
227
+ * Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
228
+ */
229
+ /**
230
+ * Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
231
+ *
232
+ * @param filePath - The original file path.
233
+ * @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` `c`).
234
+ * @returns The normalized path string.
235
+ */
236
+ function normalizePath(filePath, stripDriveLetter = false) {
237
+ let result = filePath.replace(/\\/g, '/').toLowerCase();
238
+ if (stripDriveLetter) {
239
+ result = result.replace(/^([a-z]):/, (_m, letter) => letter);
240
+ }
241
+ return result;
242
+ }
243
+
244
+ /**
245
+ * @module metadata/metadata
246
+ * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
247
+ */
248
+ /**
249
+ * Derive a deterministic `.meta.json` path for a given file.
250
+ *
251
+ * @param filePath - The watched file path.
252
+ * @param metadataDir - The root metadata directory.
253
+ * @returns The full path to the metadata file.
254
+ */
255
+ function metadataPath(filePath, metadataDir) {
256
+ const normalised = normalizePath(filePath, true);
257
+ const hash = node_crypto.createHash('sha256').update(normalised, 'utf8').digest('hex');
258
+ return node_path.join(metadataDir, `${hash}.meta.json`);
259
+ }
260
+ /**
261
+ * Read persisted metadata for a file.
262
+ *
263
+ * @param filePath - The watched file path.
264
+ * @param metadataDir - The root metadata directory.
265
+ * @returns The parsed metadata object, or `null` if not found.
266
+ */
267
+ async function readMetadata(filePath, metadataDir) {
268
+ try {
269
+ const raw = await promises.readFile(metadataPath(filePath, metadataDir), 'utf8');
270
+ return JSON.parse(raw);
271
+ }
272
+ catch {
273
+ return null;
274
+ }
275
+ }
276
+ /**
277
+ * Write metadata for a file.
278
+ *
279
+ * @param filePath - The watched file path.
280
+ * @param metadataDir - The root metadata directory.
281
+ * @param metadata - The metadata to persist.
282
+ */
283
+ async function writeMetadata(filePath, metadataDir, metadata) {
284
+ const dest = metadataPath(filePath, metadataDir);
285
+ await promises.mkdir(node_path.dirname(dest), { recursive: true });
286
+ await promises.writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
287
+ }
288
+ /**
289
+ * Delete metadata for a file.
290
+ *
291
+ * @param filePath - The watched file path.
292
+ * @param metadataDir - The root metadata directory.
293
+ */
294
+ async function deleteMetadata(filePath, metadataDir) {
295
+ try {
296
+ await promises.rm(metadataPath(filePath, metadataDir));
297
+ }
298
+ catch {
299
+ // Ignore if file doesn't exist.
300
+ }
301
+ }
302
+
303
+ /**
304
+ * @module metadata/constants
305
+ * Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
306
+ */
307
+ /** Keys managed by the indexing pipeline (not user enrichment). */
308
+ const SYSTEM_METADATA_KEYS = [
309
+ 'file_path',
310
+ 'chunk_index',
311
+ 'total_chunks',
312
+ 'content_hash',
313
+ 'chunk_text',
314
+ ];
315
+
316
+ /**
317
+ * @module api/handlers/rebuildMetadata
318
+ * Fastify route handler for POST /rebuild-metadata. Recreates enrichment metadata files from vector store payloads.
319
+ */
320
+ /**
321
+ * Create handler for POST /rebuild-metadata.
322
+ *
323
+ * @param deps - Route dependencies.
324
+ */
325
+ function createRebuildMetadataHandler(deps) {
326
+ return async (_request, reply) => {
259
327
  try {
260
- const metadataDir = options.config.metadataDir ?? '.jeeves-metadata';
261
- const SYSTEM_KEYS = [
262
- 'file_path',
263
- 'chunk_index',
264
- 'total_chunks',
265
- 'content_hash',
266
- 'chunk_text',
267
- ];
268
- for await (const point of vectorStore.scroll()) {
328
+ const metadataDir = deps.config.metadataDir ?? '.jeeves-metadata';
329
+ const systemKeys = [...SYSTEM_METADATA_KEYS];
330
+ for await (const point of deps.vectorStore.scroll()) {
269
331
  const payload = point.payload;
270
332
  const filePath = payload['file_path'];
271
333
  if (typeof filePath !== 'string' || filePath.length === 0)
272
334
  continue;
273
335
  // Persist only enrichment-ish fields, not chunking/index fields.
274
- const enrichment = radash.omit(payload, SYSTEM_KEYS);
336
+ const enrichment = radash.omit(payload, systemKeys);
275
337
  await writeMetadata(filePath, metadataDir, enrichment);
276
338
  }
277
339
  return await reply.status(200).send({ ok: true });
278
340
  }
279
341
  catch (error) {
280
- logger.error({ error }, 'Rebuild metadata failed');
342
+ deps.logger.error({ err: normalizeError(error) }, 'Rebuild metadata failed');
281
343
  return await reply.status(500).send({ error: 'Internal server error' });
282
344
  }
283
- });
284
- app.post('/config-reindex', async (request, reply) => {
345
+ };
346
+ }
347
+
348
+ /**
349
+ * @module api/handlers/reindex
350
+ * Fastify route handler for POST /reindex. Reprocesses all watched files through the processor.
351
+ */
352
+ /**
353
+ * Create handler for POST /reindex.
354
+ *
355
+ * @param deps - Route dependencies.
356
+ */
357
+ function createReindexHandler(deps) {
358
+ return async (_request, reply) => {
285
359
  try {
286
- const scope = request.body.scope ?? 'rules';
287
- // Return immediately and run async
288
- void (async () => {
289
- try {
290
- if (scope === 'rules') {
291
- // Re-apply inference rules to all files, update Qdrant payloads (no re-embedding)
292
- const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processRulesUpdate');
293
- logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
294
- }
295
- else {
296
- // Full reindex: re-extract, re-embed, re-upsert
297
- const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
298
- logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
299
- }
300
- }
301
- catch (error) {
302
- logger.error({ error, scope }, 'Config reindex failed');
303
- }
304
- })();
305
- return await reply.status(200).send({ status: 'started', scope });
360
+ const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processFile');
361
+ return await reply.status(200).send({ ok: true, filesIndexed: count });
306
362
  }
307
363
  catch (error) {
308
- logger.error({ error }, 'Config reindex request failed');
364
+ deps.logger.error({ err: normalizeError(error) }, 'Reindex failed');
309
365
  return await reply.status(500).send({ error: 'Internal server error' });
310
366
  }
367
+ };
368
+ }
369
+
370
+ /**
371
+ * @module api/handlers/search
372
+ * Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
373
+ */
374
+ /**
375
+ * Create handler for POST /search.
376
+ *
377
+ * @param deps - Route dependencies.
378
+ */
379
+ function createSearchHandler(deps) {
380
+ return async (request, reply) => {
381
+ try {
382
+ const { query, limit = 10 } = request.body;
383
+ const vectors = await deps.embeddingProvider.embed([query]);
384
+ const results = await deps.vectorStore.search(vectors[0], limit);
385
+ return results;
386
+ }
387
+ catch (error) {
388
+ deps.logger.error({ err: normalizeError(error) }, 'Search failed');
389
+ return reply.status(500).send({ error: 'Internal server error' });
390
+ }
391
+ };
392
+ }
393
+
394
+ /**
395
+ * @module api/handlers/status
396
+ * Fastify route handler for GET /status. Pure handler: returns process uptime and health.
397
+ */
398
+ /**
399
+ * Create handler for GET /status.
400
+ */
401
+ function createStatusHandler() {
402
+ return () => ({
403
+ status: 'ok',
404
+ uptime: process.uptime(),
311
405
  });
406
+ }
407
+
408
+ /**
409
+ * Create the Fastify API server with all routes registered.
410
+ *
411
+ * The returned instance is not yet listening — call `server.listen()` to start.
412
+ *
413
+ * @param options - The server options.
414
+ * @returns A configured Fastify instance.
415
+ */
416
+ function createApiServer(options) {
417
+ const { processor, vectorStore, embeddingProvider, logger, config } = options;
418
+ const app = Fastify({ logger: false });
419
+ app.get('/status', createStatusHandler());
420
+ app.post('/metadata', createMetadataHandler({ processor, logger }));
421
+ app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
422
+ app.post('/reindex', createReindexHandler({ config, processor, logger }));
423
+ app.post('/rebuild-metadata', createRebuildMetadataHandler({ config, vectorStore, logger }));
424
+ app.post('/config-reindex', createConfigReindexHandler({ config, processor, logger }));
312
425
  return app;
313
426
  }
314
427
 
@@ -554,6 +667,51 @@ const jeevesWatcherConfigSchema = zod.z.object({
554
667
  .describe('Timeout in milliseconds for graceful shutdown.'),
555
668
  });
556
669
 
670
+ /**
671
+ * @module config/substituteEnvVars
672
+ *
673
+ * Deep-walks config objects and replaces `${VAR_NAME}` patterns with environment variable values.
674
+ */
675
+ const ENV_PATTERN = /\$\{([^}]+)\}/g;
676
+ /**
677
+ * Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
678
+ *
679
+ * @param value - The string to process.
680
+ * @returns The string with env vars substituted.
681
+ * @throws If a referenced env var is not set.
682
+ */
683
+ function substituteString(value) {
684
+ return value.replace(ENV_PATTERN, (match, varName) => {
685
+ const envValue = process.env[varName];
686
+ if (envValue === undefined) {
687
+ throw new Error(`Environment variable \${${varName}} referenced in config is not set.`);
688
+ }
689
+ return envValue;
690
+ });
691
+ }
692
+ /**
693
+ * Deep-walk a value and substitute `${VAR_NAME}` patterns in all string values.
694
+ *
695
+ * @param value - The value to walk (object, array, or primitive).
696
+ * @returns A new value with all env var references resolved.
697
+ */
698
+ function substituteEnvVars(value) {
699
+ if (typeof value === 'string') {
700
+ return substituteString(value);
701
+ }
702
+ if (Array.isArray(value)) {
703
+ return value.map((item) => substituteEnvVars(item));
704
+ }
705
+ if (value !== null && typeof value === 'object') {
706
+ const result = {};
707
+ for (const [key, val] of Object.entries(value)) {
708
+ result[key] = substituteEnvVars(val);
709
+ }
710
+ return result;
711
+ }
712
+ return value;
713
+ }
714
+
557
715
  const MODULE_NAME = 'jeeves-watcher';
558
716
  /**
559
717
  * Merge sensible defaults into a loaded configuration.
@@ -589,7 +747,8 @@ async function loadConfig(configPath) {
589
747
  }
590
748
  try {
591
749
  const validated = jeevesWatcherConfigSchema.parse(result.config);
592
- return applyDefaults(validated);
750
+ const withDefaults = applyDefaults(validated);
751
+ return substituteEnvVars(withDefaults);
593
752
  }
594
753
  catch (error) {
595
754
  if (error instanceof zod.ZodError) {
@@ -602,6 +761,31 @@ async function loadConfig(configPath) {
602
761
  }
603
762
  }
604
763
 
764
+ /**
765
+ * @module util/logger
766
+ * Logger fallback helper. Provides a unified warn interface that delegates to pino or console.
767
+ */
768
+ /**
769
+ * Return a minimal logger that delegates to pino if available, otherwise console.
770
+ *
771
+ * @param logger - Optional pino logger instance.
772
+ * @returns A minimal logger.
773
+ */
774
+ function getLogger(logger) {
775
+ if (logger)
776
+ return logger;
777
+ return {
778
+ warn(obj, msg) {
779
+ if (msg) {
780
+ console.warn(obj, msg);
781
+ }
782
+ else {
783
+ console.warn(obj);
784
+ }
785
+ },
786
+ };
787
+ }
788
+
605
789
  /**
606
790
  * @module util/retry
607
791
  * Small async retry helper with exponential backoff. Side effects: sleeps between attempts; can invoke onRetry callback for logging.
@@ -706,6 +890,7 @@ function createGeminiProvider(config, logger) {
706
890
  throw new Error('Gemini embedding provider requires config.embedding.apiKey');
707
891
  }
708
892
  const dimensions = config.dimensions ?? 3072;
893
+ const log = getLogger(logger);
709
894
  const embedder = new googleGenai.GoogleGenerativeAIEmbeddings({
710
895
  apiKey: config.apiKey,
711
896
  model: config.model,
@@ -715,17 +900,7 @@ function createGeminiProvider(config, logger) {
715
900
  async embed(texts) {
716
901
  const vectors = await retry(async (attempt) => {
717
902
  if (attempt > 1) {
718
- const msg = {
719
- attempt,
720
- provider: 'gemini',
721
- model: config.model,
722
- };
723
- if (logger) {
724
- logger.warn(msg, 'Retrying embedding request');
725
- }
726
- else {
727
- console.warn(msg, 'Retrying embedding request');
728
- }
903
+ log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
729
904
  }
730
905
  // embedDocuments returns vectors for multiple texts
731
906
  return embedder.embedDocuments(texts);
@@ -735,19 +910,13 @@ function createGeminiProvider(config, logger) {
735
910
  maxDelayMs: 10_000,
736
911
  jitter: 0.2,
737
912
  onRetry: ({ attempt, delayMs, error }) => {
738
- const msg = {
913
+ log.warn({
739
914
  attempt,
740
915
  delayMs,
741
916
  provider: 'gemini',
742
917
  model: config.model,
743
- error,
744
- };
745
- if (logger) {
746
- logger.warn(msg, 'Embedding call failed; will retry');
747
- }
748
- else {
749
- console.warn(msg, 'Embedding call failed; will retry');
750
- }
918
+ err: normalizeError(error),
919
+ }, 'Embedding call failed; will retry');
751
920
  },
752
921
  });
753
922
  // Validate dimensions
@@ -828,15 +997,6 @@ function contentHash(text) {
828
997
  */
829
998
  /** Namespace UUID for jeeves-watcher point IDs. */
830
999
  const NAMESPACE = '6a6f686e-6761-4c74-ad6a-656576657321';
831
- /**
832
- * Normalise a file path for deterministic point ID generation.
833
- *
834
- * @param filePath - The original file path.
835
- * @returns The normalised path string.
836
- */
837
- function normalisePath(filePath) {
838
- return filePath.replace(/\\/g, '/').toLowerCase();
839
- }
840
1000
  /**
841
1001
  * Generate a deterministic UUID v5 point ID for a file (and optional chunk index).
842
1002
  *
@@ -846,8 +1006,8 @@ function normalisePath(filePath) {
846
1006
  */
847
1007
  function pointId(filePath, chunkIndex) {
848
1008
  const key = chunkIndex !== undefined
849
- ? `${normalisePath(filePath)}#${String(chunkIndex)}`
850
- : normalisePath(filePath);
1009
+ ? `${normalizePath(filePath)}#${String(chunkIndex)}`
1010
+ : normalizePath(filePath);
851
1011
  return uuid.v5(key, NAMESPACE);
852
1012
  }
853
1013
 
@@ -864,6 +1024,9 @@ function pointId(filePath, chunkIndex) {
864
1024
  */
865
1025
  function extractMarkdownFrontmatter(markdown) {
866
1026
  const trimmed = markdown.replace(/^\uFEFF/, '');
1027
+ // Only attempt frontmatter parsing if the file starts with ---
1028
+ if (!/^\s*---/.test(trimmed))
1029
+ return { body: markdown };
867
1030
  const match = /^---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/m.exec(trimmed);
868
1031
  if (!match)
869
1032
  return { body: markdown };
@@ -966,66 +1129,11 @@ async function extractText(filePath, extension) {
966
1129
  }
967
1130
 
968
1131
  /**
969
- * Build {@link FileAttributes} from a file path and stat info.
970
- *
971
- * @param filePath - The file path.
972
- * @param stats - The file stats.
973
- * @param extractedFrontmatter - Optional extracted frontmatter.
974
- * @param extractedJson - Optional parsed JSON content.
975
- * @returns The constructed file attributes.
1132
+ * @module rules/templates
1133
+ * Resolves template variables (`${path.to.value}`) in rule `set` objects against file attributes.
976
1134
  */
977
- function buildAttributes(filePath, stats, extractedFrontmatter, extractedJson) {
978
- const normalised = filePath.replace(/\\/g, '/');
979
- const attrs = {
980
- file: {
981
- path: normalised,
982
- directory: node_path.dirname(normalised).replace(/\\/g, '/'),
983
- filename: node_path.basename(normalised),
984
- extension: node_path.extname(normalised),
985
- sizeBytes: stats.size,
986
- modified: stats.mtime.toISOString(),
987
- },
988
- };
989
- if (extractedFrontmatter)
990
- attrs.frontmatter = extractedFrontmatter;
991
- if (extractedJson)
992
- attrs.json = extractedJson;
993
- return attrs;
994
- }
995
1135
  /**
996
- * Create an ajv instance with a custom `glob` format for picomatch glob matching.
997
- *
998
- * @returns The configured ajv instance.
999
- */
1000
- function createRuleAjv() {
1001
- const ajv = new Ajv({ allErrors: true });
1002
- addFormats(ajv);
1003
- ajv.addKeyword({
1004
- keyword: 'glob',
1005
- type: 'string',
1006
- schemaType: 'string',
1007
- validate: (pattern, data) => picomatch.isMatch(data, pattern),
1008
- });
1009
- return ajv;
1010
- }
1011
- /**
1012
- * Compile an array of inference rules into executable validators.
1013
- *
1014
- * @param rules - The inference rule definitions.
1015
- * @returns An array of compiled rules.
1016
- */
1017
- function compileRules(rules) {
1018
- const ajv = createRuleAjv();
1019
- return rules.map((rule, idx) => ({
1020
- rule,
1021
- validate: ajv.compile({
1022
- $id: `rule-${String(idx)}`,
1023
- ...rule.match,
1024
- }),
1025
- }));
1026
- }
1027
- /**
1028
- * Resolve `$\{template.vars\}` in a value against the given attributes.
1136
+ * Resolve `${template.vars}` in a value against the given attributes.
1029
1137
  *
1030
1138
  * @param value - The value to resolve.
1031
1139
  * @param attributes - The file attributes for variable lookup.
@@ -1055,9 +1163,13 @@ function resolveSet(setObj, attributes) {
1055
1163
  }
1056
1164
  return result;
1057
1165
  }
1166
+
1167
+ /**
1168
+ * @module rules/apply
1169
+ * Applies compiled inference rules to file attributes, producing merged metadata via template resolution and JsonMap transforms.
1170
+ */
1058
1171
  /**
1059
1172
  * Create the lib object for JsonMap transformations.
1060
- * Provides utility functions for path manipulation.
1061
1173
  *
1062
1174
  * @returns The lib object.
1063
1175
  */
@@ -1081,7 +1193,7 @@ function createJsonMapLib() {
1081
1193
  * @param compiledRules - The compiled rules to evaluate.
1082
1194
  * @param attributes - The file attributes to match against.
1083
1195
  * @param namedMaps - Optional record of named JsonMap definitions.
1084
- * @param logger - Optional pino logger for warnings (falls back to console.warn).
1196
+ * @param logger - Optional logger for warnings (falls back to console.warn).
1085
1197
  * @returns The merged metadata from all matching rules.
1086
1198
  */
1087
1199
  async function applyRules(compiledRules, attributes, namedMaps, logger) {
@@ -1131,6 +1243,80 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1131
1243
  return merged;
1132
1244
  }
1133
1245
 
1246
+ /**
1247
+ * @module rules/attributes
1248
+ * Builds file attribute objects for rule matching. Pure function: derives attributes from path, stats, and extracted data.
1249
+ */
1250
+ /**
1251
+ * Build {@link FileAttributes} from a file path and stat info.
1252
+ *
1253
+ * @param filePath - The file path.
1254
+ * @param stats - The file stats.
1255
+ * @param extractedFrontmatter - Optional extracted frontmatter.
1256
+ * @param extractedJson - Optional parsed JSON content.
1257
+ * @returns The constructed file attributes.
1258
+ */
1259
+ function buildAttributes(filePath, stats, extractedFrontmatter, extractedJson) {
1260
+ const normalised = filePath.replace(/\\/g, '/');
1261
+ const attrs = {
1262
+ file: {
1263
+ path: normalised,
1264
+ directory: node_path.dirname(normalised).replace(/\\/g, '/'),
1265
+ filename: node_path.basename(normalised),
1266
+ extension: node_path.extname(normalised),
1267
+ sizeBytes: stats.size,
1268
+ modified: stats.mtime.toISOString(),
1269
+ },
1270
+ };
1271
+ if (extractedFrontmatter)
1272
+ attrs.frontmatter = extractedFrontmatter;
1273
+ if (extractedJson)
1274
+ attrs.json = extractedJson;
1275
+ return attrs;
1276
+ }
1277
+
1278
+ /**
1279
+ * @module rules/ajvSetup
1280
+ * AJV instance factory with custom glob keyword for picomatch-based pattern matching in rule schemas.
1281
+ */
1282
+ /**
1283
+ * Create an AJV instance with a custom `glob` format for picomatch glob matching.
1284
+ *
1285
+ * @returns The configured AJV instance.
1286
+ */
1287
+ function createRuleAjv() {
1288
+ const ajv = new Ajv({ allErrors: true });
1289
+ addFormats(ajv);
1290
+ ajv.addKeyword({
1291
+ keyword: 'glob',
1292
+ type: 'string',
1293
+ schemaType: 'string',
1294
+ validate: (pattern, data) => picomatch.isMatch(data, pattern),
1295
+ });
1296
+ return ajv;
1297
+ }
1298
+
1299
+ /**
1300
+ * @module rules/compile
1301
+ * Compiles inference rule definitions into executable AJV validators for efficient rule evaluation.
1302
+ */
1303
+ /**
1304
+ * Compile an array of inference rules into executable validators.
1305
+ *
1306
+ * @param rules - The inference rule definitions.
1307
+ * @returns An array of compiled rules.
1308
+ */
1309
+ function compileRules(rules) {
1310
+ const ajv = createRuleAjv();
1311
+ return rules.map((rule, idx) => ({
1312
+ rule,
1313
+ validate: ajv.compile({
1314
+ $id: `rule-${String(idx)}`,
1315
+ ...rule.match,
1316
+ }),
1317
+ }));
1318
+ }
1319
+
1134
1320
  /**
1135
1321
  * @module processor/buildMetadata
1136
1322
  * Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text, loads enrichment .meta.json.
@@ -1298,7 +1484,7 @@ class DocumentProcessor {
1298
1484
  this.logger.info({ filePath, chunks: chunks.length }, 'File processed successfully');
1299
1485
  }
1300
1486
  catch (error) {
1301
- this.logger.error({ filePath, error }, 'Failed to process file');
1487
+ this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to process file');
1302
1488
  }
1303
1489
  }
1304
1490
  /**
@@ -1318,7 +1504,7 @@ class DocumentProcessor {
1318
1504
  this.logger.info({ filePath }, 'File deleted from index');
1319
1505
  }
1320
1506
  catch (error) {
1321
- this.logger.error({ filePath, error }, 'Failed to delete file');
1507
+ this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to delete file');
1322
1508
  }
1323
1509
  }
1324
1510
  /**
@@ -1346,7 +1532,7 @@ class DocumentProcessor {
1346
1532
  return merged;
1347
1533
  }
1348
1534
  catch (error) {
1349
- this.logger.error({ filePath, error }, 'Failed to update metadata');
1535
+ this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to update metadata');
1350
1536
  return null;
1351
1537
  }
1352
1538
  }
@@ -1376,7 +1562,7 @@ class DocumentProcessor {
1376
1562
  return metadata;
1377
1563
  }
1378
1564
  catch (error) {
1379
- this.logger.error({ filePath, error }, 'Failed to re-apply rules');
1565
+ this.logger.error({ filePath, err: normalizeError(error) }, 'Failed to re-apply rules');
1380
1566
  return null;
1381
1567
  }
1382
1568
  }
@@ -1543,7 +1729,7 @@ class VectorStoreClient {
1543
1729
  client;
1544
1730
  collectionName;
1545
1731
  dims;
1546
- logger;
1732
+ log;
1547
1733
  /**
1548
1734
  * Create a new VectorStoreClient.
1549
1735
  *
@@ -1559,7 +1745,7 @@ class VectorStoreClient {
1559
1745
  });
1560
1746
  this.collectionName = config.collectionName;
1561
1747
  this.dims = dimensions;
1562
- this.logger = logger;
1748
+ this.log = getLogger(logger);
1563
1749
  }
1564
1750
  /**
1565
1751
  * Ensure the collection exists with correct dimensions and Cosine distance.
@@ -1588,17 +1774,7 @@ class VectorStoreClient {
1588
1774
  return;
1589
1775
  await retry(async (attempt) => {
1590
1776
  if (attempt > 1) {
1591
- const msg = {
1592
- attempt,
1593
- operation: 'qdrant.upsert',
1594
- points: points.length,
1595
- };
1596
- if (this.logger) {
1597
- this.logger.warn(msg, 'Retrying Qdrant upsert');
1598
- }
1599
- else {
1600
- console.warn(msg, 'Retrying Qdrant upsert');
1601
- }
1777
+ this.log.warn({ attempt, operation: 'qdrant.upsert', points: points.length }, 'Retrying Qdrant upsert');
1602
1778
  }
1603
1779
  await this.client.upsert(this.collectionName, {
1604
1780
  wait: true,
@@ -1614,13 +1790,12 @@ class VectorStoreClient {
1614
1790
  maxDelayMs: 10_000,
1615
1791
  jitter: 0.2,
1616
1792
  onRetry: ({ attempt, delayMs, error }) => {
1617
- const msg = { attempt, delayMs, operation: 'qdrant.upsert', error };
1618
- if (this.logger) {
1619
- this.logger.warn(msg, 'Qdrant upsert failed; will retry');
1620
- }
1621
- else {
1622
- console.warn(msg, 'Qdrant upsert failed; will retry');
1623
- }
1793
+ this.log.warn({
1794
+ attempt,
1795
+ delayMs,
1796
+ operation: 'qdrant.upsert',
1797
+ err: normalizeError(error),
1798
+ }, 'Qdrant upsert failed; will retry');
1624
1799
  },
1625
1800
  });
1626
1801
  }
@@ -1634,17 +1809,7 @@ class VectorStoreClient {
1634
1809
  return;
1635
1810
  await retry(async (attempt) => {
1636
1811
  if (attempt > 1) {
1637
- const msg = {
1638
- attempt,
1639
- operation: 'qdrant.delete',
1640
- ids: ids.length,
1641
- };
1642
- if (this.logger) {
1643
- this.logger.warn(msg, 'Retrying Qdrant delete');
1644
- }
1645
- else {
1646
- console.warn(msg, 'Retrying Qdrant delete');
1647
- }
1812
+ this.log.warn({ attempt, operation: 'qdrant.delete', ids: ids.length }, 'Retrying Qdrant delete');
1648
1813
  }
1649
1814
  await this.client.delete(this.collectionName, {
1650
1815
  wait: true,
@@ -1656,13 +1821,12 @@ class VectorStoreClient {
1656
1821
  maxDelayMs: 10_000,
1657
1822
  jitter: 0.2,
1658
1823
  onRetry: ({ attempt, delayMs, error }) => {
1659
- const msg = { attempt, delayMs, operation: 'qdrant.delete', error };
1660
- if (this.logger) {
1661
- this.logger.warn(msg, 'Qdrant delete failed; will retry');
1662
- }
1663
- else {
1664
- console.warn(msg, 'Qdrant delete failed; will retry');
1665
- }
1824
+ this.log.warn({
1825
+ attempt,
1826
+ delayMs,
1827
+ operation: 'qdrant.delete',
1828
+ err: normalizeError(error),
1829
+ }, 'Qdrant delete failed; will retry');
1666
1830
  },
1667
1831
  });
1668
1832
  }
@@ -1816,7 +1980,7 @@ class FileSystemWatcher {
1816
1980
  this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.processor.deleteFile(path));
1817
1981
  });
1818
1982
  this.watcher.on('error', (error) => {
1819
- this.logger.error({ error }, 'Watcher error');
1983
+ this.logger.error({ err: normalizeError(error) }, 'Watcher error');
1820
1984
  });
1821
1985
  this.queue.process();
1822
1986
  this.logger.info({ paths: this.config.paths }, 'Filesystem watcher started');
@@ -1833,63 +1997,141 @@ class FileSystemWatcher {
1833
1997
  }
1834
1998
  }
1835
1999
 
2000
+ /**
2001
+ * @module app/configWatcher
2002
+ * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
2003
+ */
2004
+ /**
2005
+ * Debounced config file watcher.
2006
+ */
2007
+ class ConfigWatcher {
2008
+ options;
2009
+ watcher;
2010
+ debounce;
2011
+ constructor(options) {
2012
+ this.options = options;
2013
+ }
2014
+ start() {
2015
+ if (!this.options.enabled)
2016
+ return;
2017
+ this.watcher = chokidar.watch(this.options.configPath, {
2018
+ ignoreInitial: true,
2019
+ });
2020
+ this.watcher.on('change', () => {
2021
+ if (this.debounce)
2022
+ clearTimeout(this.debounce);
2023
+ this.debounce = setTimeout(() => {
2024
+ void this.options.onChange();
2025
+ }, this.options.debounceMs);
2026
+ });
2027
+ this.watcher.on('error', (error) => {
2028
+ this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
2029
+ });
2030
+ this.options.logger.info({
2031
+ configPath: this.options.configPath,
2032
+ debounceMs: this.options.debounceMs,
2033
+ }, 'Config watcher started');
2034
+ }
2035
+ async stop() {
2036
+ if (this.debounce) {
2037
+ clearTimeout(this.debounce);
2038
+ this.debounce = undefined;
2039
+ }
2040
+ if (this.watcher) {
2041
+ await this.watcher.close();
2042
+ this.watcher = undefined;
2043
+ }
2044
+ }
2045
+ }
2046
+
2047
+ /**
2048
+ * @module app/shutdown
2049
+ * Process signal shutdown orchestration. Installs SIGINT/SIGTERM handlers that invoke a provided async stop function.
2050
+ */
2051
+ /**
2052
+ * Install process signal handlers.
2053
+ *
2054
+ * @param stop - Async stop function to invoke on shutdown signals.
2055
+ */
2056
+ function installShutdownHandlers(stop) {
2057
+ const shutdown = async () => {
2058
+ await stop();
2059
+ process.exit(0);
2060
+ };
2061
+ process.on('SIGTERM', () => void shutdown());
2062
+ process.on('SIGINT', () => void shutdown());
2063
+ }
2064
+
2065
+ const defaultFactories = {
2066
+ loadConfig,
2067
+ createLogger,
2068
+ createEmbeddingProvider,
2069
+ createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2070
+ compileRules,
2071
+ createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
2072
+ createEventQueue: (options) => new EventQueue(options),
2073
+ createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
2074
+ createApiServer,
2075
+ };
1836
2076
  /**
1837
2077
  * Main application class that wires together all components.
1838
2078
  */
1839
2079
  class JeevesWatcher {
1840
2080
  config;
1841
2081
  configPath;
2082
+ factories;
1842
2083
  logger;
1843
2084
  watcher;
1844
2085
  queue;
1845
2086
  server;
1846
2087
  processor;
1847
2088
  configWatcher;
1848
- configDebounce;
1849
2089
  /**
1850
2090
  * Create a new JeevesWatcher instance.
1851
2091
  *
1852
2092
  * @param config - The application configuration.
1853
2093
  * @param configPath - Optional config file path to watch for changes.
2094
+ * @param factories - Optional component factories (for dependency injection).
1854
2095
  */
1855
- constructor(config, configPath) {
2096
+ constructor(config, configPath, factories = {}) {
1856
2097
  this.config = config;
1857
2098
  this.configPath = configPath;
2099
+ this.factories = { ...defaultFactories, ...factories };
1858
2100
  }
1859
2101
  /**
1860
2102
  * Start the watcher, API server, and all components.
1861
2103
  */
1862
2104
  async start() {
1863
- const logger = createLogger(this.config.logging);
2105
+ const logger = this.factories.createLogger(this.config.logging);
1864
2106
  this.logger = logger;
1865
2107
  let embeddingProvider;
1866
2108
  try {
1867
- embeddingProvider = createEmbeddingProvider(this.config.embedding, logger);
2109
+ embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
1868
2110
  }
1869
2111
  catch (error) {
1870
- logger.fatal({ error }, 'Failed to create embedding provider');
2112
+ logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
1871
2113
  throw error;
1872
2114
  }
1873
- const vectorStore = new VectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
2115
+ const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
1874
2116
  await vectorStore.ensureCollection();
1875
- const compiledRules = compileRules(this.config.inferenceRules ?? []);
2117
+ const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
1876
2118
  const processorConfig = {
1877
2119
  metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
1878
2120
  chunkSize: this.config.embedding.chunkSize,
1879
2121
  chunkOverlap: this.config.embedding.chunkOverlap,
1880
2122
  maps: this.config.maps,
1881
2123
  };
1882
- const processor = new DocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
2124
+ const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
1883
2125
  this.processor = processor;
1884
- const queue = new EventQueue({
2126
+ const queue = this.factories.createEventQueue({
1885
2127
  debounceMs: this.config.watch.debounceMs ?? 2000,
1886
2128
  concurrency: this.config.embedding.concurrency ?? 5,
1887
2129
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
1888
2130
  });
1889
2131
  this.queue = queue;
1890
- const watcher = new FileSystemWatcher(this.config.watch, queue, processor, logger);
2132
+ const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger);
1891
2133
  this.watcher = watcher;
1892
- const server = createApiServer({
2134
+ const server = this.factories.createApiServer({
1893
2135
  processor,
1894
2136
  vectorStore,
1895
2137
  embeddingProvider,
@@ -1945,28 +2187,18 @@ class JeevesWatcher {
1945
2187
  return;
1946
2188
  }
1947
2189
  const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
1948
- this.configWatcher = chokidar.watch(this.configPath, {
1949
- ignoreInitial: true,
1950
- });
1951
- this.configWatcher.on('change', () => {
1952
- if (this.configDebounce)
1953
- clearTimeout(this.configDebounce);
1954
- this.configDebounce = setTimeout(() => {
1955
- void this.reloadConfig();
1956
- }, debounceMs);
1957
- });
1958
- this.configWatcher.on('error', (error) => {
1959
- logger.error({ error }, 'Config watcher error');
2190
+ this.configWatcher = new ConfigWatcher({
2191
+ configPath: this.configPath,
2192
+ enabled,
2193
+ debounceMs,
2194
+ logger,
2195
+ onChange: async () => this.reloadConfig(),
1960
2196
  });
1961
- logger.info({ configPath: this.configPath, debounceMs }, 'Config watcher started');
2197
+ this.configWatcher.start();
1962
2198
  }
1963
2199
  async stopConfigWatch() {
1964
- if (this.configDebounce) {
1965
- clearTimeout(this.configDebounce);
1966
- this.configDebounce = undefined;
1967
- }
1968
2200
  if (this.configWatcher) {
1969
- await this.configWatcher.close();
2201
+ await this.configWatcher.stop();
1970
2202
  this.configWatcher = undefined;
1971
2203
  }
1972
2204
  }
@@ -1977,14 +2209,14 @@ class JeevesWatcher {
1977
2209
  return;
1978
2210
  logger.info({ configPath: this.configPath }, 'Config change detected, reloading...');
1979
2211
  try {
1980
- const newConfig = await loadConfig(this.configPath);
2212
+ const newConfig = await this.factories.loadConfig(this.configPath);
1981
2213
  this.config = newConfig;
1982
- const compiledRules = compileRules(newConfig.inferenceRules ?? []);
2214
+ const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
1983
2215
  processor.updateRules(compiledRules);
1984
2216
  logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
1985
2217
  }
1986
2218
  catch (error) {
1987
- logger.error({ error }, 'Failed to reload config');
2219
+ logger.error({ err: normalizeError(error) }, 'Failed to reload config');
1988
2220
  }
1989
2221
  }
1990
2222
  }
@@ -1997,12 +2229,7 @@ class JeevesWatcher {
1997
2229
  async function startFromConfig(configPath) {
1998
2230
  const config = await loadConfig(configPath);
1999
2231
  const app = new JeevesWatcher(config, configPath);
2000
- const shutdown = async () => {
2001
- await app.stop();
2002
- process.exit(0);
2003
- };
2004
- process.on('SIGTERM', () => void shutdown());
2005
- process.on('SIGINT', () => void shutdown());
2232
+ installShutdownHandlers(() => app.stop());
2006
2233
  await app.start();
2007
2234
  return app;
2008
2235
  }