@karmaniverous/jeeves-watcher 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -1,12 +1,11 @@
1
1
  'use strict';
2
2
 
3
3
  var Fastify = require('fastify');
4
- var radash = require('radash');
5
- var node_crypto = require('node:crypto');
6
4
  var promises = require('node:fs/promises');
7
5
  var node_path = require('node:path');
8
6
  var picomatch = require('picomatch');
9
- var chokidar = require('chokidar');
7
+ var radash = require('radash');
8
+ var node_crypto = require('node:crypto');
10
9
  var cosmiconfig = require('cosmiconfig');
11
10
  var zod = require('zod');
12
11
  var jsonmap = require('@karmaniverous/jsonmap');
@@ -20,6 +19,7 @@ var Ajv = require('ajv');
20
19
  var addFormats = require('ajv-formats');
21
20
  var textsplitters = require('@langchain/textsplitters');
22
21
  var jsClientRest = require('@qdrant/js-client-rest');
22
+ var chokidar = require('chokidar');
23
23
 
24
24
  function _interopNamespaceDefault(e) {
25
25
  var n = Object.create(null);
@@ -40,77 +40,6 @@ function _interopNamespaceDefault(e) {
40
40
 
41
41
  var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
42
42
 
43
- /**
44
- * @module metadata/metadata
45
- * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
46
- */
47
- /**
48
- * Normalise a file path for deterministic mapping: lowercase, forward slashes, strip leading drive letter colon.
49
- *
50
- * @param filePath - The original file path.
51
- * @returns The normalised path string.
52
- */
53
- function normalisePath$1(filePath) {
54
- return filePath
55
- .replace(/\\/g, '/')
56
- .replace(/^([A-Za-z]):/, (_m, letter) => letter.toLowerCase())
57
- .toLowerCase();
58
- }
59
- /**
60
- * Derive a deterministic `.meta.json` path for a given file.
61
- *
62
- * @param filePath - The watched file path.
63
- * @param metadataDir - The root metadata directory.
64
- * @returns The full path to the metadata file.
65
- */
66
- function metadataPath(filePath, metadataDir) {
67
- const normalised = normalisePath$1(filePath);
68
- const hash = node_crypto.createHash('sha256').update(normalised, 'utf8').digest('hex');
69
- return node_path.join(metadataDir, `${hash}.meta.json`);
70
- }
71
- /**
72
- * Read persisted metadata for a file.
73
- *
74
- * @param filePath - The watched file path.
75
- * @param metadataDir - The root metadata directory.
76
- * @returns The parsed metadata object, or `null` if not found.
77
- */
78
- async function readMetadata(filePath, metadataDir) {
79
- try {
80
- const raw = await promises.readFile(metadataPath(filePath, metadataDir), 'utf8');
81
- return JSON.parse(raw);
82
- }
83
- catch {
84
- return null;
85
- }
86
- }
87
- /**
88
- * Write metadata for a file.
89
- *
90
- * @param filePath - The watched file path.
91
- * @param metadataDir - The root metadata directory.
92
- * @param metadata - The metadata to persist.
93
- */
94
- async function writeMetadata(filePath, metadataDir, metadata) {
95
- const dest = metadataPath(filePath, metadataDir);
96
- await promises.mkdir(node_path.dirname(dest), { recursive: true });
97
- await promises.writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
98
- }
99
- /**
100
- * Delete metadata for a file.
101
- *
102
- * @param filePath - The watched file path.
103
- * @param metadataDir - The root metadata directory.
104
- */
105
- async function deleteMetadata(filePath, metadataDir) {
106
- try {
107
- await promises.rm(metadataPath(filePath, metadataDir));
108
- }
109
- catch {
110
- // Ignore if file doesn't exist.
111
- }
112
- }
113
-
114
43
  /**
115
44
  * Best-effort base directory inference for a glob pattern.
116
45
  *
@@ -208,107 +137,266 @@ async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
208
137
  }
209
138
 
210
139
  /**
211
- * Create the Fastify API server with all routes registered.
212
- *
213
- * The returned instance is not yet listening — call `server.listen()` to start.
140
+ * @module api/handlers/configReindex
141
+ * Fastify route handler for POST /config-reindex. Triggers an async reindex job scoped to rules or full processing.
142
+ */
143
+ /**
144
+ * Create handler for POST /config-reindex.
214
145
  *
215
- * @param options - The server options.
216
- * @returns A configured Fastify instance.
146
+ * @param deps - Route dependencies.
217
147
  */
218
- function createApiServer(options) {
219
- const { processor, vectorStore, embeddingProvider, logger } = options;
220
- const app = Fastify({ logger: false });
221
- app.get('/status', () => ({
222
- status: 'ok',
223
- uptime: process.uptime(),
224
- }));
225
- app.post('/metadata', async (request, reply) => {
148
+ function createConfigReindexHandler(deps) {
149
+ return async (request, reply) => {
226
150
  try {
227
- const { path, metadata } = request.body;
228
- await processor.processMetadataUpdate(path, metadata);
229
- return { ok: true };
151
+ const scope = request.body.scope ?? 'rules';
152
+ // Return immediately and run async
153
+ void (async () => {
154
+ try {
155
+ if (scope === 'rules') {
156
+ const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processRulesUpdate');
157
+ deps.logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
158
+ }
159
+ else {
160
+ const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processFile');
161
+ deps.logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
162
+ }
163
+ }
164
+ catch (error) {
165
+ deps.logger.error({ error, scope }, 'Config reindex failed');
166
+ }
167
+ })();
168
+ return await reply.status(200).send({ status: 'started', scope });
230
169
  }
231
170
  catch (error) {
232
- logger.error({ error }, 'Metadata update failed');
233
- return reply.status(500).send({ error: 'Internal server error' });
171
+ deps.logger.error({ error }, 'Config reindex request failed');
172
+ return await reply.status(500).send({ error: 'Internal server error' });
234
173
  }
235
- });
236
- app.post('/search', async (request, reply) => {
174
+ };
175
+ }
176
+
177
+ /**
178
+ * @module api/handlers/metadata
179
+ * Fastify route handler for POST /metadata. Performs enrichment metadata updates via the document processor.
180
+ */
181
+ /**
182
+ * Create handler for POST /metadata.
183
+ *
184
+ * @param deps - Route dependencies.
185
+ */
186
+ function createMetadataHandler(deps) {
187
+ return async (request, reply) => {
237
188
  try {
238
- const { query, limit = 10 } = request.body;
239
- const vectors = await embeddingProvider.embed([query]);
240
- const results = await vectorStore.search(vectors[0], limit);
241
- return results;
189
+ const { path, metadata } = request.body;
190
+ await deps.processor.processMetadataUpdate(path, metadata);
191
+ return { ok: true };
242
192
  }
243
193
  catch (error) {
244
- logger.error({ error }, 'Search failed');
194
+ deps.logger.error({ error }, 'Metadata update failed');
245
195
  return reply.status(500).send({ error: 'Internal server error' });
246
196
  }
247
- });
248
- app.post('/reindex', async (_request, reply) => {
249
- try {
250
- const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
251
- return await reply.status(200).send({ ok: true, filesIndexed: count });
252
- }
253
- catch (error) {
254
- logger.error({ error }, 'Reindex failed');
255
- return await reply.status(500).send({ error: 'Internal server error' });
256
- }
257
- });
258
- app.post('/rebuild-metadata', async (_request, reply) => {
197
+ };
198
+ }
199
+
200
+ /**
201
+ * @module util/normalizePath
202
+ * Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
203
+ */
204
+ /**
205
+ * Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
206
+ *
207
+ * @param filePath - The original file path.
208
+ * @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` `c`).
209
+ * @returns The normalized path string.
210
+ */
211
+ function normalizePath(filePath, stripDriveLetter = false) {
212
+ let result = filePath.replace(/\\/g, '/').toLowerCase();
213
+ if (stripDriveLetter) {
214
+ result = result.replace(/^([a-z]):/, (_m, letter) => letter);
215
+ }
216
+ return result;
217
+ }
218
+
219
+ /**
220
+ * @module metadata/metadata
221
+ * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
222
+ */
223
+ /**
224
+ * Derive a deterministic `.meta.json` path for a given file.
225
+ *
226
+ * @param filePath - The watched file path.
227
+ * @param metadataDir - The root metadata directory.
228
+ * @returns The full path to the metadata file.
229
+ */
230
+ function metadataPath(filePath, metadataDir) {
231
+ const normalised = normalizePath(filePath, true);
232
+ const hash = node_crypto.createHash('sha256').update(normalised, 'utf8').digest('hex');
233
+ return node_path.join(metadataDir, `${hash}.meta.json`);
234
+ }
235
+ /**
236
+ * Read persisted metadata for a file.
237
+ *
238
+ * @param filePath - The watched file path.
239
+ * @param metadataDir - The root metadata directory.
240
+ * @returns The parsed metadata object, or `null` if not found.
241
+ */
242
+ async function readMetadata(filePath, metadataDir) {
243
+ try {
244
+ const raw = await promises.readFile(metadataPath(filePath, metadataDir), 'utf8');
245
+ return JSON.parse(raw);
246
+ }
247
+ catch {
248
+ return null;
249
+ }
250
+ }
251
+ /**
252
+ * Write metadata for a file.
253
+ *
254
+ * @param filePath - The watched file path.
255
+ * @param metadataDir - The root metadata directory.
256
+ * @param metadata - The metadata to persist.
257
+ */
258
+ async function writeMetadata(filePath, metadataDir, metadata) {
259
+ const dest = metadataPath(filePath, metadataDir);
260
+ await promises.mkdir(node_path.dirname(dest), { recursive: true });
261
+ await promises.writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
262
+ }
263
+ /**
264
+ * Delete metadata for a file.
265
+ *
266
+ * @param filePath - The watched file path.
267
+ * @param metadataDir - The root metadata directory.
268
+ */
269
+ async function deleteMetadata(filePath, metadataDir) {
270
+ try {
271
+ await promises.rm(metadataPath(filePath, metadataDir));
272
+ }
273
+ catch {
274
+ // Ignore if file doesn't exist.
275
+ }
276
+ }
277
+
278
+ /**
279
+ * @module metadata/constants
280
+ * Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
281
+ */
282
+ /** Keys managed by the indexing pipeline (not user enrichment). */
283
+ const SYSTEM_METADATA_KEYS = [
284
+ 'file_path',
285
+ 'chunk_index',
286
+ 'total_chunks',
287
+ 'content_hash',
288
+ 'chunk_text',
289
+ ];
290
+
291
+ /**
292
+ * @module api/handlers/rebuildMetadata
293
+ * Fastify route handler for POST /rebuild-metadata. Recreates enrichment metadata files from vector store payloads.
294
+ */
295
+ /**
296
+ * Create handler for POST /rebuild-metadata.
297
+ *
298
+ * @param deps - Route dependencies.
299
+ */
300
+ function createRebuildMetadataHandler(deps) {
301
+ return async (_request, reply) => {
259
302
  try {
260
- const metadataDir = options.config.metadataDir ?? '.jeeves-metadata';
261
- const SYSTEM_KEYS = [
262
- 'file_path',
263
- 'chunk_index',
264
- 'total_chunks',
265
- 'content_hash',
266
- 'chunk_text',
267
- ];
268
- for await (const point of vectorStore.scroll()) {
303
+ const metadataDir = deps.config.metadataDir ?? '.jeeves-metadata';
304
+ const systemKeys = [...SYSTEM_METADATA_KEYS];
305
+ for await (const point of deps.vectorStore.scroll()) {
269
306
  const payload = point.payload;
270
307
  const filePath = payload['file_path'];
271
308
  if (typeof filePath !== 'string' || filePath.length === 0)
272
309
  continue;
273
310
  // Persist only enrichment-ish fields, not chunking/index fields.
274
- const enrichment = radash.omit(payload, SYSTEM_KEYS);
311
+ const enrichment = radash.omit(payload, systemKeys);
275
312
  await writeMetadata(filePath, metadataDir, enrichment);
276
313
  }
277
314
  return await reply.status(200).send({ ok: true });
278
315
  }
279
316
  catch (error) {
280
- logger.error({ error }, 'Rebuild metadata failed');
317
+ deps.logger.error({ error }, 'Rebuild metadata failed');
281
318
  return await reply.status(500).send({ error: 'Internal server error' });
282
319
  }
283
- });
284
- app.post('/config-reindex', async (request, reply) => {
320
+ };
321
+ }
322
+
323
+ /**
324
+ * @module api/handlers/reindex
325
+ * Fastify route handler for POST /reindex. Reprocesses all watched files through the processor.
326
+ */
327
+ /**
328
+ * Create handler for POST /reindex.
329
+ *
330
+ * @param deps - Route dependencies.
331
+ */
332
+ function createReindexHandler(deps) {
333
+ return async (_request, reply) => {
285
334
  try {
286
- const scope = request.body.scope ?? 'rules';
287
- // Return immediately and run async
288
- void (async () => {
289
- try {
290
- if (scope === 'rules') {
291
- // Re-apply inference rules to all files, update Qdrant payloads (no re-embedding)
292
- const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processRulesUpdate');
293
- logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
294
- }
295
- else {
296
- // Full reindex: re-extract, re-embed, re-upsert
297
- const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
298
- logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
299
- }
300
- }
301
- catch (error) {
302
- logger.error({ error, scope }, 'Config reindex failed');
303
- }
304
- })();
305
- return await reply.status(200).send({ status: 'started', scope });
335
+ const count = await processAllFiles(deps.config.watch.paths, deps.config.watch.ignored, deps.processor, 'processFile');
336
+ return await reply.status(200).send({ ok: true, filesIndexed: count });
306
337
  }
307
338
  catch (error) {
308
- logger.error({ error }, 'Config reindex request failed');
339
+ deps.logger.error({ error }, 'Reindex failed');
309
340
  return await reply.status(500).send({ error: 'Internal server error' });
310
341
  }
342
+ };
343
+ }
344
+
345
+ /**
346
+ * @module api/handlers/search
347
+ * Fastify route handler for POST /search. Embeds a query and performs vector store similarity search.
348
+ */
349
+ /**
350
+ * Create handler for POST /search.
351
+ *
352
+ * @param deps - Route dependencies.
353
+ */
354
+ function createSearchHandler(deps) {
355
+ return async (request, reply) => {
356
+ try {
357
+ const { query, limit = 10 } = request.body;
358
+ const vectors = await deps.embeddingProvider.embed([query]);
359
+ const results = await deps.vectorStore.search(vectors[0], limit);
360
+ return results;
361
+ }
362
+ catch (error) {
363
+ deps.logger.error({ error }, 'Search failed');
364
+ return reply.status(500).send({ error: 'Internal server error' });
365
+ }
366
+ };
367
+ }
368
+
369
+ /**
370
+ * @module api/handlers/status
371
+ * Fastify route handler for GET /status. Pure handler: returns process uptime and health.
372
+ */
373
+ /**
374
+ * Create handler for GET /status.
375
+ */
376
+ function createStatusHandler() {
377
+ return () => ({
378
+ status: 'ok',
379
+ uptime: process.uptime(),
311
380
  });
381
+ }
382
+
383
+ /**
384
+ * Create the Fastify API server with all routes registered.
385
+ *
386
+ * The returned instance is not yet listening — call `server.listen()` to start.
387
+ *
388
+ * @param options - The server options.
389
+ * @returns A configured Fastify instance.
390
+ */
391
+ function createApiServer(options) {
392
+ const { processor, vectorStore, embeddingProvider, logger, config } = options;
393
+ const app = Fastify({ logger: false });
394
+ app.get('/status', createStatusHandler());
395
+ app.post('/metadata', createMetadataHandler({ processor, logger }));
396
+ app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
397
+ app.post('/reindex', createReindexHandler({ config, processor, logger }));
398
+ app.post('/rebuild-metadata', createRebuildMetadataHandler({ config, vectorStore, logger }));
399
+ app.post('/config-reindex', createConfigReindexHandler({ config, processor, logger }));
312
400
  return app;
313
401
  }
314
402
 
@@ -602,6 +690,31 @@ async function loadConfig(configPath) {
602
690
  }
603
691
  }
604
692
 
693
+ /**
694
+ * @module util/logger
695
+ * Logger fallback helper. Provides a unified warn interface that delegates to pino or console.
696
+ */
697
+ /**
698
+ * Return a minimal logger that delegates to pino if available, otherwise console.
699
+ *
700
+ * @param logger - Optional pino logger instance.
701
+ * @returns A minimal logger.
702
+ */
703
+ function getLogger(logger) {
704
+ if (logger)
705
+ return logger;
706
+ return {
707
+ warn(obj, msg) {
708
+ if (msg) {
709
+ console.warn(obj, msg);
710
+ }
711
+ else {
712
+ console.warn(obj);
713
+ }
714
+ },
715
+ };
716
+ }
717
+
605
718
  /**
606
719
  * @module util/retry
607
720
  * Small async retry helper with exponential backoff. Side effects: sleeps between attempts; can invoke onRetry callback for logging.
@@ -706,6 +819,7 @@ function createGeminiProvider(config, logger) {
706
819
  throw new Error('Gemini embedding provider requires config.embedding.apiKey');
707
820
  }
708
821
  const dimensions = config.dimensions ?? 3072;
822
+ const log = getLogger(logger);
709
823
  const embedder = new googleGenai.GoogleGenerativeAIEmbeddings({
710
824
  apiKey: config.apiKey,
711
825
  model: config.model,
@@ -715,17 +829,7 @@ function createGeminiProvider(config, logger) {
715
829
  async embed(texts) {
716
830
  const vectors = await retry(async (attempt) => {
717
831
  if (attempt > 1) {
718
- const msg = {
719
- attempt,
720
- provider: 'gemini',
721
- model: config.model,
722
- };
723
- if (logger) {
724
- logger.warn(msg, 'Retrying embedding request');
725
- }
726
- else {
727
- console.warn(msg, 'Retrying embedding request');
728
- }
832
+ log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
729
833
  }
730
834
  // embedDocuments returns vectors for multiple texts
731
835
  return embedder.embedDocuments(texts);
@@ -735,19 +839,13 @@ function createGeminiProvider(config, logger) {
735
839
  maxDelayMs: 10_000,
736
840
  jitter: 0.2,
737
841
  onRetry: ({ attempt, delayMs, error }) => {
738
- const msg = {
842
+ log.warn({
739
843
  attempt,
740
844
  delayMs,
741
845
  provider: 'gemini',
742
846
  model: config.model,
743
847
  error,
744
- };
745
- if (logger) {
746
- logger.warn(msg, 'Embedding call failed; will retry');
747
- }
748
- else {
749
- console.warn(msg, 'Embedding call failed; will retry');
750
- }
848
+ }, 'Embedding call failed; will retry');
751
849
  },
752
850
  });
753
851
  // Validate dimensions
@@ -828,15 +926,6 @@ function contentHash(text) {
828
926
  */
829
927
  /** Namespace UUID for jeeves-watcher point IDs. */
830
928
  const NAMESPACE = '6a6f686e-6761-4c74-ad6a-656576657321';
831
- /**
832
- * Normalise a file path for deterministic point ID generation.
833
- *
834
- * @param filePath - The original file path.
835
- * @returns The normalised path string.
836
- */
837
- function normalisePath(filePath) {
838
- return filePath.replace(/\\/g, '/').toLowerCase();
839
- }
840
929
  /**
841
930
  * Generate a deterministic UUID v5 point ID for a file (and optional chunk index).
842
931
  *
@@ -846,8 +935,8 @@ function normalisePath(filePath) {
846
935
  */
847
936
  function pointId(filePath, chunkIndex) {
848
937
  const key = chunkIndex !== undefined
849
- ? `${normalisePath(filePath)}#${String(chunkIndex)}`
850
- : normalisePath(filePath);
938
+ ? `${normalizePath(filePath)}#${String(chunkIndex)}`
939
+ : normalizePath(filePath);
851
940
  return uuid.v5(key, NAMESPACE);
852
941
  }
853
942
 
@@ -966,66 +1055,11 @@ async function extractText(filePath, extension) {
966
1055
  }
967
1056
 
968
1057
  /**
969
- * Build {@link FileAttributes} from a file path and stat info.
970
- *
971
- * @param filePath - The file path.
972
- * @param stats - The file stats.
973
- * @param extractedFrontmatter - Optional extracted frontmatter.
974
- * @param extractedJson - Optional parsed JSON content.
975
- * @returns The constructed file attributes.
976
- */
977
- function buildAttributes(filePath, stats, extractedFrontmatter, extractedJson) {
978
- const normalised = filePath.replace(/\\/g, '/');
979
- const attrs = {
980
- file: {
981
- path: normalised,
982
- directory: node_path.dirname(normalised).replace(/\\/g, '/'),
983
- filename: node_path.basename(normalised),
984
- extension: node_path.extname(normalised),
985
- sizeBytes: stats.size,
986
- modified: stats.mtime.toISOString(),
987
- },
988
- };
989
- if (extractedFrontmatter)
990
- attrs.frontmatter = extractedFrontmatter;
991
- if (extractedJson)
992
- attrs.json = extractedJson;
993
- return attrs;
994
- }
995
- /**
996
- * Create an ajv instance with a custom `glob` format for picomatch glob matching.
997
- *
998
- * @returns The configured ajv instance.
1058
+ * @module rules/templates
1059
+ * Resolves template variables (`${path.to.value}`) in rule `set` objects against file attributes.
999
1060
  */
1000
- function createRuleAjv() {
1001
- const ajv = new Ajv({ allErrors: true });
1002
- addFormats(ajv);
1003
- ajv.addKeyword({
1004
- keyword: 'glob',
1005
- type: 'string',
1006
- schemaType: 'string',
1007
- validate: (pattern, data) => picomatch.isMatch(data, pattern),
1008
- });
1009
- return ajv;
1010
- }
1011
1061
  /**
1012
- * Compile an array of inference rules into executable validators.
1013
- *
1014
- * @param rules - The inference rule definitions.
1015
- * @returns An array of compiled rules.
1016
- */
1017
- function compileRules(rules) {
1018
- const ajv = createRuleAjv();
1019
- return rules.map((rule, idx) => ({
1020
- rule,
1021
- validate: ajv.compile({
1022
- $id: `rule-${String(idx)}`,
1023
- ...rule.match,
1024
- }),
1025
- }));
1026
- }
1027
- /**
1028
- * Resolve `$\{template.vars\}` in a value against the given attributes.
1062
+ * Resolve `${template.vars}` in a value against the given attributes.
1029
1063
  *
1030
1064
  * @param value - The value to resolve.
1031
1065
  * @param attributes - The file attributes for variable lookup.
@@ -1055,9 +1089,13 @@ function resolveSet(setObj, attributes) {
1055
1089
  }
1056
1090
  return result;
1057
1091
  }
1092
+
1093
+ /**
1094
+ * @module rules/apply
1095
+ * Applies compiled inference rules to file attributes, producing merged metadata via template resolution and JsonMap transforms.
1096
+ */
1058
1097
  /**
1059
1098
  * Create the lib object for JsonMap transformations.
1060
- * Provides utility functions for path manipulation.
1061
1099
  *
1062
1100
  * @returns The lib object.
1063
1101
  */
@@ -1081,7 +1119,7 @@ function createJsonMapLib() {
1081
1119
  * @param compiledRules - The compiled rules to evaluate.
1082
1120
  * @param attributes - The file attributes to match against.
1083
1121
  * @param namedMaps - Optional record of named JsonMap definitions.
1084
- * @param logger - Optional pino logger for warnings (falls back to console.warn).
1122
+ * @param logger - Optional logger for warnings (falls back to console.warn).
1085
1123
  * @returns The merged metadata from all matching rules.
1086
1124
  */
1087
1125
  async function applyRules(compiledRules, attributes, namedMaps, logger) {
@@ -1131,6 +1169,80 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1131
1169
  return merged;
1132
1170
  }
1133
1171
 
1172
+ /**
1173
+ * @module rules/attributes
1174
+ * Builds file attribute objects for rule matching. Pure function: derives attributes from path, stats, and extracted data.
1175
+ */
1176
+ /**
1177
+ * Build {@link FileAttributes} from a file path and stat info.
1178
+ *
1179
+ * @param filePath - The file path.
1180
+ * @param stats - The file stats.
1181
+ * @param extractedFrontmatter - Optional extracted frontmatter.
1182
+ * @param extractedJson - Optional parsed JSON content.
1183
+ * @returns The constructed file attributes.
1184
+ */
1185
+ function buildAttributes(filePath, stats, extractedFrontmatter, extractedJson) {
1186
+ const normalised = filePath.replace(/\\/g, '/');
1187
+ const attrs = {
1188
+ file: {
1189
+ path: normalised,
1190
+ directory: node_path.dirname(normalised).replace(/\\/g, '/'),
1191
+ filename: node_path.basename(normalised),
1192
+ extension: node_path.extname(normalised),
1193
+ sizeBytes: stats.size,
1194
+ modified: stats.mtime.toISOString(),
1195
+ },
1196
+ };
1197
+ if (extractedFrontmatter)
1198
+ attrs.frontmatter = extractedFrontmatter;
1199
+ if (extractedJson)
1200
+ attrs.json = extractedJson;
1201
+ return attrs;
1202
+ }
1203
+
1204
+ /**
1205
+ * @module rules/ajvSetup
1206
+ * AJV instance factory with custom glob keyword for picomatch-based pattern matching in rule schemas.
1207
+ */
1208
+ /**
1209
+ * Create an AJV instance with a custom `glob` format for picomatch glob matching.
1210
+ *
1211
+ * @returns The configured AJV instance.
1212
+ */
1213
+ function createRuleAjv() {
1214
+ const ajv = new Ajv({ allErrors: true });
1215
+ addFormats(ajv);
1216
+ ajv.addKeyword({
1217
+ keyword: 'glob',
1218
+ type: 'string',
1219
+ schemaType: 'string',
1220
+ validate: (pattern, data) => picomatch.isMatch(data, pattern),
1221
+ });
1222
+ return ajv;
1223
+ }
1224
+
1225
+ /**
1226
+ * @module rules/compile
1227
+ * Compiles inference rule definitions into executable AJV validators for efficient rule evaluation.
1228
+ */
1229
+ /**
1230
+ * Compile an array of inference rules into executable validators.
1231
+ *
1232
+ * @param rules - The inference rule definitions.
1233
+ * @returns An array of compiled rules.
1234
+ */
1235
+ function compileRules(rules) {
1236
+ const ajv = createRuleAjv();
1237
+ return rules.map((rule, idx) => ({
1238
+ rule,
1239
+ validate: ajv.compile({
1240
+ $id: `rule-${String(idx)}`,
1241
+ ...rule.match,
1242
+ }),
1243
+ }));
1244
+ }
1245
+
1134
1246
  /**
1135
1247
  * @module processor/buildMetadata
1136
1248
  * Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text, loads enrichment .meta.json.
@@ -1543,7 +1655,7 @@ class VectorStoreClient {
1543
1655
  client;
1544
1656
  collectionName;
1545
1657
  dims;
1546
- logger;
1658
+ log;
1547
1659
  /**
1548
1660
  * Create a new VectorStoreClient.
1549
1661
  *
@@ -1559,7 +1671,7 @@ class VectorStoreClient {
1559
1671
  });
1560
1672
  this.collectionName = config.collectionName;
1561
1673
  this.dims = dimensions;
1562
- this.logger = logger;
1674
+ this.log = getLogger(logger);
1563
1675
  }
1564
1676
  /**
1565
1677
  * Ensure the collection exists with correct dimensions and Cosine distance.
@@ -1588,17 +1700,7 @@ class VectorStoreClient {
1588
1700
  return;
1589
1701
  await retry(async (attempt) => {
1590
1702
  if (attempt > 1) {
1591
- const msg = {
1592
- attempt,
1593
- operation: 'qdrant.upsert',
1594
- points: points.length,
1595
- };
1596
- if (this.logger) {
1597
- this.logger.warn(msg, 'Retrying Qdrant upsert');
1598
- }
1599
- else {
1600
- console.warn(msg, 'Retrying Qdrant upsert');
1601
- }
1703
+ this.log.warn({ attempt, operation: 'qdrant.upsert', points: points.length }, 'Retrying Qdrant upsert');
1602
1704
  }
1603
1705
  await this.client.upsert(this.collectionName, {
1604
1706
  wait: true,
@@ -1614,13 +1716,7 @@ class VectorStoreClient {
1614
1716
  maxDelayMs: 10_000,
1615
1717
  jitter: 0.2,
1616
1718
  onRetry: ({ attempt, delayMs, error }) => {
1617
- const msg = { attempt, delayMs, operation: 'qdrant.upsert', error };
1618
- if (this.logger) {
1619
- this.logger.warn(msg, 'Qdrant upsert failed; will retry');
1620
- }
1621
- else {
1622
- console.warn(msg, 'Qdrant upsert failed; will retry');
1623
- }
1719
+ this.log.warn({ attempt, delayMs, operation: 'qdrant.upsert', error }, 'Qdrant upsert failed; will retry');
1624
1720
  },
1625
1721
  });
1626
1722
  }
@@ -1634,17 +1730,7 @@ class VectorStoreClient {
1634
1730
  return;
1635
1731
  await retry(async (attempt) => {
1636
1732
  if (attempt > 1) {
1637
- const msg = {
1638
- attempt,
1639
- operation: 'qdrant.delete',
1640
- ids: ids.length,
1641
- };
1642
- if (this.logger) {
1643
- this.logger.warn(msg, 'Retrying Qdrant delete');
1644
- }
1645
- else {
1646
- console.warn(msg, 'Retrying Qdrant delete');
1647
- }
1733
+ this.log.warn({ attempt, operation: 'qdrant.delete', ids: ids.length }, 'Retrying Qdrant delete');
1648
1734
  }
1649
1735
  await this.client.delete(this.collectionName, {
1650
1736
  wait: true,
@@ -1656,13 +1742,7 @@ class VectorStoreClient {
1656
1742
  maxDelayMs: 10_000,
1657
1743
  jitter: 0.2,
1658
1744
  onRetry: ({ attempt, delayMs, error }) => {
1659
- const msg = { attempt, delayMs, operation: 'qdrant.delete', error };
1660
- if (this.logger) {
1661
- this.logger.warn(msg, 'Qdrant delete failed; will retry');
1662
- }
1663
- else {
1664
- console.warn(msg, 'Qdrant delete failed; will retry');
1665
- }
1745
+ this.log.warn({ attempt, delayMs, operation: 'qdrant.delete', error }, 'Qdrant delete failed; will retry');
1666
1746
  },
1667
1747
  });
1668
1748
  }
@@ -1833,63 +1913,141 @@ class FileSystemWatcher {
1833
1913
  }
1834
1914
  }
1835
1915
 
1916
+ /**
1917
+ * @module app/configWatcher
1918
+ * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
1919
+ */
1920
+ /**
1921
+ * Debounced config file watcher.
1922
+ */
1923
+ class ConfigWatcher {
1924
+ options;
1925
+ watcher;
1926
+ debounce;
1927
+ constructor(options) {
1928
+ this.options = options;
1929
+ }
1930
+ start() {
1931
+ if (!this.options.enabled)
1932
+ return;
1933
+ this.watcher = chokidar.watch(this.options.configPath, {
1934
+ ignoreInitial: true,
1935
+ });
1936
+ this.watcher.on('change', () => {
1937
+ if (this.debounce)
1938
+ clearTimeout(this.debounce);
1939
+ this.debounce = setTimeout(() => {
1940
+ void this.options.onChange();
1941
+ }, this.options.debounceMs);
1942
+ });
1943
+ this.watcher.on('error', (error) => {
1944
+ this.options.logger.error({ error }, 'Config watcher error');
1945
+ });
1946
+ this.options.logger.info({
1947
+ configPath: this.options.configPath,
1948
+ debounceMs: this.options.debounceMs,
1949
+ }, 'Config watcher started');
1950
+ }
1951
+ async stop() {
1952
+ if (this.debounce) {
1953
+ clearTimeout(this.debounce);
1954
+ this.debounce = undefined;
1955
+ }
1956
+ if (this.watcher) {
1957
+ await this.watcher.close();
1958
+ this.watcher = undefined;
1959
+ }
1960
+ }
1961
+ }
1962
+
1963
+ /**
1964
+ * @module app/shutdown
1965
+ * Process signal shutdown orchestration. Installs SIGINT/SIGTERM handlers that invoke a provided async stop function.
1966
+ */
1967
+ /**
1968
+ * Install process signal handlers.
1969
+ *
1970
+ * @param stop - Async stop function to invoke on shutdown signals.
1971
+ */
1972
+ function installShutdownHandlers(stop) {
1973
+ const shutdown = async () => {
1974
+ await stop();
1975
+ process.exit(0);
1976
+ };
1977
+ process.on('SIGTERM', () => void shutdown());
1978
+ process.on('SIGINT', () => void shutdown());
1979
+ }
1980
+
1981
+ const defaultFactories = {
1982
+ loadConfig,
1983
+ createLogger,
1984
+ createEmbeddingProvider,
1985
+ createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
1986
+ compileRules,
1987
+ createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
1988
+ createEventQueue: (options) => new EventQueue(options),
1989
+ createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
1990
+ createApiServer,
1991
+ };
1836
1992
  /**
1837
1993
  * Main application class that wires together all components.
1838
1994
  */
1839
1995
  class JeevesWatcher {
1840
1996
  config;
1841
1997
  configPath;
1998
+ factories;
1842
1999
  logger;
1843
2000
  watcher;
1844
2001
  queue;
1845
2002
  server;
1846
2003
  processor;
1847
2004
  configWatcher;
1848
- configDebounce;
1849
2005
  /**
1850
2006
  * Create a new JeevesWatcher instance.
1851
2007
  *
1852
2008
  * @param config - The application configuration.
1853
2009
  * @param configPath - Optional config file path to watch for changes.
2010
+ * @param factories - Optional component factories (for dependency injection).
1854
2011
  */
1855
- constructor(config, configPath) {
2012
+ constructor(config, configPath, factories = {}) {
1856
2013
  this.config = config;
1857
2014
  this.configPath = configPath;
2015
+ this.factories = { ...defaultFactories, ...factories };
1858
2016
  }
1859
2017
  /**
1860
2018
  * Start the watcher, API server, and all components.
1861
2019
  */
1862
2020
  async start() {
1863
- const logger = createLogger(this.config.logging);
2021
+ const logger = this.factories.createLogger(this.config.logging);
1864
2022
  this.logger = logger;
1865
2023
  let embeddingProvider;
1866
2024
  try {
1867
- embeddingProvider = createEmbeddingProvider(this.config.embedding, logger);
2025
+ embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
1868
2026
  }
1869
2027
  catch (error) {
1870
2028
  logger.fatal({ error }, 'Failed to create embedding provider');
1871
2029
  throw error;
1872
2030
  }
1873
- const vectorStore = new VectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
2031
+ const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
1874
2032
  await vectorStore.ensureCollection();
1875
- const compiledRules = compileRules(this.config.inferenceRules ?? []);
2033
+ const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
1876
2034
  const processorConfig = {
1877
2035
  metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
1878
2036
  chunkSize: this.config.embedding.chunkSize,
1879
2037
  chunkOverlap: this.config.embedding.chunkOverlap,
1880
2038
  maps: this.config.maps,
1881
2039
  };
1882
- const processor = new DocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
2040
+ const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
1883
2041
  this.processor = processor;
1884
- const queue = new EventQueue({
2042
+ const queue = this.factories.createEventQueue({
1885
2043
  debounceMs: this.config.watch.debounceMs ?? 2000,
1886
2044
  concurrency: this.config.embedding.concurrency ?? 5,
1887
2045
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
1888
2046
  });
1889
2047
  this.queue = queue;
1890
- const watcher = new FileSystemWatcher(this.config.watch, queue, processor, logger);
2048
+ const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger);
1891
2049
  this.watcher = watcher;
1892
- const server = createApiServer({
2050
+ const server = this.factories.createApiServer({
1893
2051
  processor,
1894
2052
  vectorStore,
1895
2053
  embeddingProvider,
@@ -1945,28 +2103,18 @@ class JeevesWatcher {
1945
2103
  return;
1946
2104
  }
1947
2105
  const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
1948
- this.configWatcher = chokidar.watch(this.configPath, {
1949
- ignoreInitial: true,
1950
- });
1951
- this.configWatcher.on('change', () => {
1952
- if (this.configDebounce)
1953
- clearTimeout(this.configDebounce);
1954
- this.configDebounce = setTimeout(() => {
1955
- void this.reloadConfig();
1956
- }, debounceMs);
1957
- });
1958
- this.configWatcher.on('error', (error) => {
1959
- logger.error({ error }, 'Config watcher error');
2106
+ this.configWatcher = new ConfigWatcher({
2107
+ configPath: this.configPath,
2108
+ enabled,
2109
+ debounceMs,
2110
+ logger,
2111
+ onChange: async () => this.reloadConfig(),
1960
2112
  });
1961
- logger.info({ configPath: this.configPath, debounceMs }, 'Config watcher started');
2113
+ this.configWatcher.start();
1962
2114
  }
1963
2115
  async stopConfigWatch() {
1964
- if (this.configDebounce) {
1965
- clearTimeout(this.configDebounce);
1966
- this.configDebounce = undefined;
1967
- }
1968
2116
  if (this.configWatcher) {
1969
- await this.configWatcher.close();
2117
+ await this.configWatcher.stop();
1970
2118
  this.configWatcher = undefined;
1971
2119
  }
1972
2120
  }
@@ -1977,9 +2125,9 @@ class JeevesWatcher {
1977
2125
  return;
1978
2126
  logger.info({ configPath: this.configPath }, 'Config change detected, reloading...');
1979
2127
  try {
1980
- const newConfig = await loadConfig(this.configPath);
2128
+ const newConfig = await this.factories.loadConfig(this.configPath);
1981
2129
  this.config = newConfig;
1982
- const compiledRules = compileRules(newConfig.inferenceRules ?? []);
2130
+ const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
1983
2131
  processor.updateRules(compiledRules);
1984
2132
  logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
1985
2133
  }
@@ -1997,12 +2145,7 @@ class JeevesWatcher {
1997
2145
  async function startFromConfig(configPath) {
1998
2146
  const config = await loadConfig(configPath);
1999
2147
  const app = new JeevesWatcher(config, configPath);
2000
- const shutdown = async () => {
2001
- await app.stop();
2002
- process.exit(0);
2003
- };
2004
- process.on('SIGTERM', () => void shutdown());
2005
- process.on('SIGINT', () => void shutdown());
2148
+ installShutdownHandlers(() => app.stop());
2006
2149
  await app.start();
2007
2150
  return app;
2008
2151
  }