@karmaniverous/jeeves-watcher 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,25 @@
1
+ #!/usr/bin/env node
1
2
  import { Command } from '@commander-js/extra-typings';
3
+ import { dirname, resolve, relative, join, extname, basename } from 'node:path';
4
+ import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
5
+ import ignore from 'ignore';
6
+ import Handlebars from 'handlebars';
7
+ import dayjs from 'dayjs';
8
+ import { toMdast } from 'hast-util-to-mdast';
9
+ import { fromADF } from 'mdast-util-from-adf';
10
+ import { toMarkdown } from 'mdast-util-to-markdown';
11
+ import { capitalize, title, camel, snake, dash, isEqual, omit, get } from 'radash';
12
+ import rehypeParse from 'rehype-parse';
13
+ import { unified } from 'unified';
14
+ import chokidar from 'chokidar';
2
15
  import Fastify from 'fastify';
3
16
  import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
4
- import { resolve, dirname, join, relative, extname, basename } from 'node:path';
5
17
  import picomatch from 'picomatch';
6
- import { omit, get } from 'radash';
7
18
  import { createHash } from 'node:crypto';
8
19
  import { cosmiconfig } from 'cosmiconfig';
9
20
  import { z, ZodError } from 'zod';
10
21
  import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
11
22
  import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
12
- import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
13
- import ignore from 'ignore';
14
23
  import pino from 'pino';
15
24
  import { v5 } from 'uuid';
16
25
  import * as cheerio from 'cheerio';
@@ -20,7 +29,439 @@ import Ajv from 'ajv';
20
29
  import addFormats from 'ajv-formats';
21
30
  import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
22
31
  import { QdrantClient } from '@qdrant/js-client-rest';
23
- import chokidar from 'chokidar';
32
+
33
+ /**
34
+ * @module gitignore
35
+ * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
36
+ */
37
+ /**
38
+ * Find the git repo root by walking up from `startDir` looking for `.git/`.
39
+ * Returns `undefined` if no repo is found.
40
+ */
41
+ function findRepoRoot(startDir) {
42
+ let dir = resolve(startDir);
43
+ const root = resolve('/');
44
+ while (dir !== root) {
45
+ if (existsSync(join(dir, '.git')) &&
46
+ statSync(join(dir, '.git')).isDirectory()) {
47
+ return dir;
48
+ }
49
+ const parent = dirname(dir);
50
+ if (parent === dir)
51
+ break;
52
+ dir = parent;
53
+ }
54
+ return undefined;
55
+ }
56
+ /**
57
+ * Convert a watch path (directory, file path, or glob) to a concrete directory
58
+ * that can be scanned for a repo root.
59
+ */
60
+ function watchPathToScanDir(watchPath) {
61
+ const absPath = resolve(watchPath);
62
+ try {
63
+ return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
64
+ }
65
+ catch {
66
+ // ignore
67
+ }
68
+ // If this is a glob, fall back to the non-glob prefix.
69
+ const globMatch = /[*?[{]/.exec(watchPath);
70
+ if (!globMatch)
71
+ return undefined;
72
+ const prefix = watchPath.slice(0, globMatch.index);
73
+ const trimmed = prefix.trim();
74
+ const baseDir = trimmed.length === 0
75
+ ? '.'
76
+ : trimmed.endsWith('/') || trimmed.endsWith('\\')
77
+ ? trimmed
78
+ : dirname(trimmed);
79
+ const resolved = resolve(baseDir);
80
+ if (!existsSync(resolved))
81
+ return undefined;
82
+ return resolved;
83
+ }
84
+ /**
85
+ * Recursively find all `.gitignore` files under `dir`.
86
+ * Skips `.git` and `node_modules` directories for performance.
87
+ */
88
+ function findGitignoreFiles(dir) {
89
+ const results = [];
90
+ const gitignorePath = join(dir, '.gitignore');
91
+ if (existsSync(gitignorePath)) {
92
+ results.push(gitignorePath);
93
+ }
94
+ let entries;
95
+ try {
96
+ entries = readdirSync(dir);
97
+ }
98
+ catch {
99
+ return results;
100
+ }
101
+ for (const entry of entries) {
102
+ if (entry === '.git' || entry === 'node_modules')
103
+ continue;
104
+ const fullPath = join(dir, entry);
105
+ try {
106
+ if (statSync(fullPath).isDirectory()) {
107
+ results.push(...findGitignoreFiles(fullPath));
108
+ }
109
+ }
110
+ catch {
111
+ // Skip inaccessible entries
112
+ }
113
+ }
114
+ return results;
115
+ }
116
+ /**
117
+ * Parse a `.gitignore` file into an `ignore` instance.
118
+ */
119
+ function parseGitignore(gitignorePath) {
120
+ const content = readFileSync(gitignorePath, 'utf8');
121
+ return ignore().add(content);
122
+ }
123
+ /**
124
+ * Normalize a path to use forward slashes (required by `ignore` package).
125
+ */
126
+ function toForwardSlash(p) {
127
+ return p.replace(/\\/g, '/');
128
+ }
129
+ /**
130
+ * Processor-level gitignore filter. Checks file paths against the nearest
131
+ * `.gitignore` chain in git repositories.
132
+ */
133
+ class GitignoreFilter {
134
+ repos = new Map();
135
+ /**
136
+ * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
137
+ *
138
+ * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
139
+ */
140
+ constructor(watchPaths) {
141
+ this.scan(watchPaths);
142
+ }
143
+ /**
144
+ * Scan paths for git repos and their `.gitignore` files.
145
+ */
146
+ scan(watchPaths) {
147
+ this.repos.clear();
148
+ const scannedDirs = new Set();
149
+ for (const watchPath of watchPaths) {
150
+ const scanDir = watchPathToScanDir(watchPath);
151
+ if (!scanDir)
152
+ continue;
153
+ if (scannedDirs.has(scanDir))
154
+ continue;
155
+ scannedDirs.add(scanDir);
156
+ const repoRoot = findRepoRoot(scanDir);
157
+ if (!repoRoot)
158
+ continue;
159
+ if (this.repos.has(repoRoot))
160
+ continue;
161
+ const gitignoreFiles = findGitignoreFiles(repoRoot);
162
+ const entries = gitignoreFiles.map((gf) => ({
163
+ dir: dirname(gf),
164
+ ig: parseGitignore(gf),
165
+ }));
166
+ // Sort deepest-first so nested `.gitignore` files are checked first
167
+ entries.sort((a, b) => b.dir.length - a.dir.length);
168
+ this.repos.set(repoRoot, { root: repoRoot, entries });
169
+ }
170
+ }
171
+ /**
172
+ * Check whether a file path is ignored by any applicable `.gitignore`.
173
+ *
174
+ * @param filePath - Absolute file path to check.
175
+ * @returns `true` if the file should be ignored.
176
+ */
177
+ isIgnored(filePath) {
178
+ const absPath = resolve(filePath);
179
+ for (const [, repo] of this.repos) {
180
+ // Check if file is within this repo
181
+ const relToRepo = relative(repo.root, absPath);
182
+ // On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
183
+ // an absolute path with a drive letter instead of a relative one. The
184
+ // `ignore` library rejects these with a RangeError. Skip repos on
185
+ // different drives to avoid cross-drive gitignore mismatches.
186
+ if (relToRepo.startsWith('..') ||
187
+ relToRepo.startsWith(resolve('/')) ||
188
+ /^[a-zA-Z]:/.test(relToRepo)) {
189
+ continue;
190
+ }
191
+ // Check each `.gitignore` entry (deepest-first)
192
+ for (const entry of repo.entries) {
193
+ const relToEntry = relative(entry.dir, absPath);
194
+ if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
195
+ continue;
196
+ const normalized = toForwardSlash(relToEntry);
197
+ if (entry.ig.ignores(normalized)) {
198
+ return true;
199
+ }
200
+ }
201
+ }
202
+ return false;
203
+ }
204
+ /**
205
+ * Invalidate and re-parse a specific `.gitignore` file.
206
+ * Call when a `.gitignore` file is added, changed, or removed.
207
+ *
208
+ * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
209
+ */
210
+ invalidate(gitignorePath) {
211
+ const absPath = resolve(gitignorePath);
212
+ const gitignoreDir = dirname(absPath);
213
+ for (const [, repo] of this.repos) {
214
+ const relToRepo = relative(repo.root, gitignoreDir);
215
+ if (relToRepo.startsWith('..'))
216
+ continue;
217
+ // Remove old entry for this directory
218
+ repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
219
+ // Re-parse if file still exists
220
+ if (existsSync(absPath)) {
221
+ repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
222
+ // Re-sort deepest-first
223
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
224
+ }
225
+ return;
226
+ }
227
+ // If not in any known repo, check if it's in a repo we haven't scanned
228
+ const repoRoot = findRepoRoot(gitignoreDir);
229
+ if (repoRoot && existsSync(absPath)) {
230
+ const entries = [
231
+ { dir: gitignoreDir, ig: parseGitignore(absPath) },
232
+ ];
233
+ if (this.repos.has(repoRoot)) {
234
+ const repo = this.repos.get(repoRoot);
235
+ repo.entries.push(entries[0]);
236
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
237
+ }
238
+ else {
239
+ this.repos.set(repoRoot, { root: repoRoot, entries });
240
+ }
241
+ }
242
+ }
243
+ }
244
+
245
+ /**
246
+ * @module templates/helpers
247
+ * Registers built-in Handlebars helpers for content templates.
248
+ */
249
+ /** Pre-built rehype parser for HTML → hast conversion. */
250
+ const htmlParser = unified().use(rehypeParse, { fragment: true });
251
+ /**
252
+ * Register all built-in helpers on a Handlebars instance.
253
+ *
254
+ * @param hbs - The Handlebars instance.
255
+ */
256
+ function registerBuiltinHelpers(hbs) {
257
+ // Structural: ADF → Markdown
258
+ hbs.registerHelper('adfToMarkdown', function (adf) {
259
+ if (!adf || typeof adf !== 'object')
260
+ return '';
261
+ try {
262
+ const mdast = fromADF(adf);
263
+ return new hbs.SafeString(toMarkdown(mdast).trim());
264
+ }
265
+ catch {
266
+ return '<!-- ADF conversion failed -->';
267
+ }
268
+ });
269
+ // Structural: HTML → Markdown
270
+ hbs.registerHelper('markdownify', function (html) {
271
+ if (typeof html !== 'string' || !html.trim())
272
+ return '';
273
+ try {
274
+ const hast = htmlParser.parse(html);
275
+ const mdast = toMdast(hast);
276
+ return new hbs.SafeString(toMarkdown(mdast).trim());
277
+ }
278
+ catch {
279
+ return '<!-- HTML conversion failed -->';
280
+ }
281
+ });
282
+ // Formatting: dateFormat
283
+ hbs.registerHelper('dateFormat', function (value, format) {
284
+ if (value === undefined || value === null)
285
+ return '';
286
+ const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
287
+ return dayjs(value).format(fmt);
288
+ });
289
+ // Formatting: join
290
+ hbs.registerHelper('join', function (arr, separator) {
291
+ if (!Array.isArray(arr))
292
+ return '';
293
+ const sep = typeof separator === 'string' ? separator : ', ';
294
+ return arr.join(sep);
295
+ });
296
+ // Formatting: pluck
297
+ hbs.registerHelper('pluck', function (arr, key) {
298
+ if (!Array.isArray(arr) || typeof key !== 'string')
299
+ return [];
300
+ return arr.map((item) => item && typeof item === 'object'
301
+ ? item[key]
302
+ : undefined);
303
+ });
304
+ // String transforms
305
+ hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
306
+ hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
307
+ hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? capitalize(text) : '');
308
+ hbs.registerHelper('title', (text) => typeof text === 'string' ? title(text) : '');
309
+ hbs.registerHelper('camel', (text) => typeof text === 'string' ? camel(text) : '');
310
+ hbs.registerHelper('snake', (text) => typeof text === 'string' ? snake(text) : '');
311
+ hbs.registerHelper('dash', (text) => typeof text === 'string' ? dash(text) : '');
312
+ // default helper
313
+ hbs.registerHelper('default', function (value, fallback) {
314
+ return value ?? fallback ?? '';
315
+ });
316
+ // eq helper (deep equality)
317
+ hbs.registerHelper('eq', function (a, b) {
318
+ return isEqual(a, b);
319
+ });
320
+ // json helper
321
+ hbs.registerHelper('json', function (value) {
322
+ return new hbs.SafeString(JSON.stringify(value, null, 2));
323
+ });
324
+ }
325
+
326
+ /**
327
+ * @module templates/engine
328
+ * Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
329
+ */
330
+ /**
331
+ * Resolve a template value to its source string.
332
+ *
333
+ * Resolution order:
334
+ * 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
335
+ * 2. Matches a key in namedTemplates → named ref (recursively resolve)
336
+ * 3. Otherwise → inline Handlebars template string
337
+ *
338
+ * @param value - The template reference (inline, file path, or named ref).
339
+ * @param namedTemplates - Named template definitions from config.
340
+ * @param configDir - Directory to resolve relative file paths against.
341
+ * @param visited - Set of visited named refs for cycle detection.
342
+ * @returns The resolved template source string.
343
+ */
344
+ function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
345
+ // File path detection
346
+ if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
347
+ return readFileSync(resolve(configDir, value), 'utf-8');
348
+ }
349
+ // Named ref
350
+ if (namedTemplates?.[value] !== undefined) {
351
+ if (visited.has(value)) {
352
+ throw new Error(`Circular template reference detected: ${value}`);
353
+ }
354
+ visited.add(value);
355
+ return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
356
+ }
357
+ // Inline
358
+ return value;
359
+ }
360
+ /**
361
+ * Create a configured Handlebars instance with built-in helpers registered.
362
+ *
363
+ * @returns A Handlebars instance with helpers.
364
+ */
365
+ function createHandlebarsInstance() {
366
+ const hbs = Handlebars.create();
367
+ registerBuiltinHelpers(hbs);
368
+ return hbs;
369
+ }
370
+ /**
371
+ * Load custom helpers from file paths.
372
+ *
373
+ * Each file should export a default function that receives the Handlebars instance.
374
+ *
375
+ * @param hbs - The Handlebars instance.
376
+ * @param paths - File paths to custom helper modules.
377
+ * @param configDir - Directory to resolve relative paths against.
378
+ */
379
+ async function loadCustomHelpers(hbs, paths, configDir) {
380
+ for (const p of paths) {
381
+ const resolved = resolve(configDir, p);
382
+ const mod = (await import(resolved));
383
+ if (typeof mod.default === 'function') {
384
+ mod.default(hbs);
385
+ }
386
+ }
387
+ }
388
+ /**
389
+ * The template engine: holds compiled templates and renders them against context.
390
+ */
391
+ class TemplateEngine {
392
+ hbs;
393
+ compiled = new Map();
394
+ constructor(hbs) {
395
+ this.hbs = hbs;
396
+ }
397
+ /**
398
+ * Compile and cache a template from its source string.
399
+ *
400
+ * @param key - Cache key (rule index or named template).
401
+ * @param source - Handlebars template source.
402
+ * @returns The compiled template.
403
+ */
404
+ compile(key, source) {
405
+ const fn = this.hbs.compile(source);
406
+ this.compiled.set(key, fn);
407
+ return fn;
408
+ }
409
+ /**
410
+ * Get a previously compiled template by key.
411
+ *
412
+ * @param key - The cache key.
413
+ * @returns The compiled template, or undefined.
414
+ */
415
+ get(key) {
416
+ return this.compiled.get(key);
417
+ }
418
+ /**
419
+ * Render a compiled template against a context.
420
+ *
421
+ * @param key - The cache key of the compiled template.
422
+ * @param context - The data context for rendering.
423
+ * @returns The rendered string, or null if the template was not found.
424
+ */
425
+ render(key, context) {
426
+ const fn = this.compiled.get(key);
427
+ if (!fn)
428
+ return null;
429
+ return fn(context);
430
+ }
431
+ }
432
+
433
+ /**
434
+ * @module templates/buildTemplateEngine
435
+ * Factory to build a TemplateEngine from config, compiling all rule templates at load time.
436
+ */
437
+ /**
438
+ * Build a TemplateEngine from configuration, pre-compiling all rule templates.
439
+ *
440
+ * @param rules - The inference rules (may contain template fields).
441
+ * @param namedTemplates - Named template definitions from config.
442
+ * @param templateHelperPaths - Paths to custom helper modules.
443
+ * @param configDir - Directory to resolve relative paths against.
444
+ * @returns The configured TemplateEngine, or undefined if no templates are used.
445
+ */
446
+ async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
447
+ const rulesWithTemplates = rules.filter((r) => r.template);
448
+ if (rulesWithTemplates.length === 0)
449
+ return undefined;
450
+ const hbs = createHandlebarsInstance();
451
+ // Load custom helpers
452
+ if (templateHelperPaths?.length && configDir) {
453
+ await loadCustomHelpers(hbs, templateHelperPaths, configDir);
454
+ }
455
+ const engine = new TemplateEngine(hbs);
456
+ // Compile all rule templates
457
+ for (const [index, rule] of rules.entries()) {
458
+ if (!rule.template)
459
+ continue;
460
+ const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
461
+ engine.compile(`rule-${String(index)}`, source);
462
+ }
463
+ return engine;
464
+ }
24
465
 
25
466
  /**
26
467
  * @module util/normalizeError
@@ -47,6 +488,53 @@ function normalizeError(error) {
47
488
  return normalized;
48
489
  }
49
490
 
491
+ /**
492
+ * @module app/configWatcher
493
+ * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
494
+ */
495
+ /**
496
+ * Debounced config file watcher.
497
+ */
498
+ class ConfigWatcher {
499
+ options;
500
+ watcher;
501
+ debounce;
502
+ constructor(options) {
503
+ this.options = options;
504
+ }
505
+ start() {
506
+ if (!this.options.enabled)
507
+ return;
508
+ this.watcher = chokidar.watch(this.options.configPath, {
509
+ ignoreInitial: true,
510
+ });
511
+ this.watcher.on('change', () => {
512
+ if (this.debounce)
513
+ clearTimeout(this.debounce);
514
+ this.debounce = setTimeout(() => {
515
+ void this.options.onChange();
516
+ }, this.options.debounceMs);
517
+ });
518
+ this.watcher.on('error', (error) => {
519
+ this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
520
+ });
521
+ this.options.logger.info({
522
+ configPath: this.options.configPath,
523
+ debounceMs: this.options.debounceMs,
524
+ }, 'Config watcher started');
525
+ }
526
+ async stop() {
527
+ if (this.debounce) {
528
+ clearTimeout(this.debounce);
529
+ this.debounce = undefined;
530
+ }
531
+ if (this.watcher) {
532
+ await this.watcher.close();
533
+ this.watcher = undefined;
534
+ }
535
+ }
536
+ }
537
+
50
538
  /**
51
539
  * Best-effort base directory inference for a glob pattern.
52
540
  *
@@ -640,7 +1128,12 @@ const inferenceRuleSchema = z.object({
640
1128
  map: z
641
1129
  .union([jsonMapMapSchema, z.string()])
642
1130
  .optional()
643
- .describe('JsonMap transformation (inline definition or named map reference).'),
1131
+ .describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
1132
+ /** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
1133
+ template: z
1134
+ .string()
1135
+ .optional()
1136
+ .describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
644
1137
  });
645
1138
  /**
646
1139
  * Top-level configuration for jeeves-watcher.
@@ -678,6 +1171,22 @@ const jeevesWatcherConfigSchema = z.object({
678
1171
  .record(z.string(), jsonMapMapSchema)
679
1172
  .optional()
680
1173
  .describe('Reusable named JsonMap transformations.'),
1174
+ /** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
1175
+ templates: z
1176
+ .record(z.string(), z.string())
1177
+ .optional()
1178
+ .describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
1179
+ /** Custom Handlebars helper registration. */
1180
+ templateHelpers: z
1181
+ .object({
1182
+ /** File paths to custom helper modules. */
1183
+ paths: z
1184
+ .array(z.string())
1185
+ .optional()
1186
+ .describe('File paths to custom helper modules.'),
1187
+ })
1188
+ .optional()
1189
+ .describe('Custom Handlebars helper registration.'),
681
1190
  /** Logging configuration. */
682
1191
  logging: loggingConfigSchema.optional().describe('Logging configuration.'),
683
1192
  /** Timeout in milliseconds for graceful shutdown. */
@@ -930,263 +1439,57 @@ function createGeminiProvider(config, logger) {
930
1439
  if (attempt > 1) {
931
1440
  log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
932
1441
  }
933
- // embedDocuments returns vectors for multiple texts
934
- return embedder.embedDocuments(texts);
935
- }, {
936
- attempts: 5,
937
- baseDelayMs: 500,
938
- maxDelayMs: 10_000,
939
- jitter: 0.2,
940
- onRetry: ({ attempt, delayMs, error }) => {
941
- log.warn({
942
- attempt,
943
- delayMs,
944
- provider: 'gemini',
945
- model: config.model,
946
- err: normalizeError(error),
947
- }, 'Embedding call failed; will retry');
948
- },
949
- });
950
- // Validate dimensions
951
- for (const vector of vectors) {
952
- if (vector.length !== dimensions) {
953
- throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
954
- }
955
- }
956
- return vectors;
957
- },
958
- };
959
- }
960
- function createMockFromConfig(config) {
961
- const dimensions = config.dimensions ?? 768;
962
- return createMockProvider(dimensions);
963
- }
964
- const embeddingProviderRegistry = new Map([
965
- ['mock', createMockFromConfig],
966
- ['gemini', createGeminiProvider],
967
- ]);
968
- /**
969
- * Create an embedding provider based on the given configuration.
970
- *
971
- * Each provider is responsible for its own default dimensions.
972
- *
973
- * @param config - The embedding configuration.
974
- * @param logger - Optional pino logger for retry warnings.
975
- * @returns An {@link EmbeddingProvider} instance.
976
- * @throws If the configured provider is not supported.
977
- */
978
- function createEmbeddingProvider(config, logger) {
979
- const factory = embeddingProviderRegistry.get(config.provider);
980
- if (!factory) {
981
- throw new Error(`Unsupported embedding provider: ${config.provider}`);
982
- }
983
- return factory(config, logger);
984
- }
985
-
986
- /**
987
- * @module gitignore
988
- * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
989
- */
990
- /**
991
- * Find the git repo root by walking up from `startDir` looking for `.git/`.
992
- * Returns `undefined` if no repo is found.
993
- */
994
- function findRepoRoot(startDir) {
995
- let dir = resolve(startDir);
996
- const root = resolve('/');
997
- while (dir !== root) {
998
- if (existsSync(join(dir, '.git')) &&
999
- statSync(join(dir, '.git')).isDirectory()) {
1000
- return dir;
1001
- }
1002
- const parent = dirname(dir);
1003
- if (parent === dir)
1004
- break;
1005
- dir = parent;
1006
- }
1007
- return undefined;
1008
- }
1009
- /**
1010
- * Convert a watch path (directory, file path, or glob) to a concrete directory
1011
- * that can be scanned for a repo root.
1012
- */
1013
- function watchPathToScanDir(watchPath) {
1014
- const absPath = resolve(watchPath);
1015
- try {
1016
- return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
1017
- }
1018
- catch {
1019
- // ignore
1020
- }
1021
- // If this is a glob, fall back to the non-glob prefix.
1022
- const globMatch = /[*?[{]/.exec(watchPath);
1023
- if (!globMatch)
1024
- return undefined;
1025
- const prefix = watchPath.slice(0, globMatch.index);
1026
- const trimmed = prefix.trim();
1027
- const baseDir = trimmed.length === 0
1028
- ? '.'
1029
- : trimmed.endsWith('/') || trimmed.endsWith('\\')
1030
- ? trimmed
1031
- : dirname(trimmed);
1032
- const resolved = resolve(baseDir);
1033
- if (!existsSync(resolved))
1034
- return undefined;
1035
- return resolved;
1036
- }
1037
- /**
1038
- * Recursively find all `.gitignore` files under `dir`.
1039
- * Skips `.git` and `node_modules` directories for performance.
1040
- */
1041
- function findGitignoreFiles(dir) {
1042
- const results = [];
1043
- const gitignorePath = join(dir, '.gitignore');
1044
- if (existsSync(gitignorePath)) {
1045
- results.push(gitignorePath);
1046
- }
1047
- let entries;
1048
- try {
1049
- entries = readdirSync(dir);
1050
- }
1051
- catch {
1052
- return results;
1053
- }
1054
- for (const entry of entries) {
1055
- if (entry === '.git' || entry === 'node_modules')
1056
- continue;
1057
- const fullPath = join(dir, entry);
1058
- try {
1059
- if (statSync(fullPath).isDirectory()) {
1060
- results.push(...findGitignoreFiles(fullPath));
1061
- }
1062
- }
1063
- catch {
1064
- // Skip inaccessible entries
1065
- }
1066
- }
1067
- return results;
1068
- }
1069
- /**
1070
- * Parse a `.gitignore` file into an `ignore` instance.
1071
- */
1072
- function parseGitignore(gitignorePath) {
1073
- const content = readFileSync(gitignorePath, 'utf8');
1074
- return ignore().add(content);
1075
- }
1076
- /**
1077
- * Normalize a path to use forward slashes (required by `ignore` package).
1078
- */
1079
- function toForwardSlash(p) {
1080
- return p.replace(/\\/g, '/');
1081
- }
1082
- /**
1083
- * Processor-level gitignore filter. Checks file paths against the nearest
1084
- * `.gitignore` chain in git repositories.
1085
- */
1086
- class GitignoreFilter {
1087
- repos = new Map();
1088
- /**
1089
- * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
1090
- *
1091
- * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
1092
- */
1093
- constructor(watchPaths) {
1094
- this.scan(watchPaths);
1095
- }
1096
- /**
1097
- * Scan paths for git repos and their `.gitignore` files.
1098
- */
1099
- scan(watchPaths) {
1100
- this.repos.clear();
1101
- const scannedDirs = new Set();
1102
- for (const watchPath of watchPaths) {
1103
- const scanDir = watchPathToScanDir(watchPath);
1104
- if (!scanDir)
1105
- continue;
1106
- if (scannedDirs.has(scanDir))
1107
- continue;
1108
- scannedDirs.add(scanDir);
1109
- const repoRoot = findRepoRoot(scanDir);
1110
- if (!repoRoot)
1111
- continue;
1112
- if (this.repos.has(repoRoot))
1113
- continue;
1114
- const gitignoreFiles = findGitignoreFiles(repoRoot);
1115
- const entries = gitignoreFiles.map((gf) => ({
1116
- dir: dirname(gf),
1117
- ig: parseGitignore(gf),
1118
- }));
1119
- // Sort deepest-first so nested `.gitignore` files are checked first
1120
- entries.sort((a, b) => b.dir.length - a.dir.length);
1121
- this.repos.set(repoRoot, { root: repoRoot, entries });
1122
- }
1123
- }
1124
- /**
1125
- * Check whether a file path is ignored by any applicable `.gitignore`.
1126
- *
1127
- * @param filePath - Absolute file path to check.
1128
- * @returns `true` if the file should be ignored.
1129
- */
1130
- isIgnored(filePath) {
1131
- const absPath = resolve(filePath);
1132
- for (const [, repo] of this.repos) {
1133
- // Check if file is within this repo
1134
- const relToRepo = relative(repo.root, absPath);
1135
- if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
1136
- continue;
1137
- }
1138
- // Check each `.gitignore` entry (deepest-first)
1139
- for (const entry of repo.entries) {
1140
- const relToEntry = relative(entry.dir, absPath);
1141
- if (relToEntry.startsWith('..'))
1142
- continue;
1143
- const normalized = toForwardSlash(relToEntry);
1144
- if (entry.ig.ignores(normalized)) {
1145
- return true;
1146
- }
1147
- }
1148
- }
1149
- return false;
1150
- }
1151
- /**
1152
- * Invalidate and re-parse a specific `.gitignore` file.
1153
- * Call when a `.gitignore` file is added, changed, or removed.
1154
- *
1155
- * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
1156
- */
1157
- invalidate(gitignorePath) {
1158
- const absPath = resolve(gitignorePath);
1159
- const gitignoreDir = dirname(absPath);
1160
- for (const [, repo] of this.repos) {
1161
- const relToRepo = relative(repo.root, gitignoreDir);
1162
- if (relToRepo.startsWith('..'))
1163
- continue;
1164
- // Remove old entry for this directory
1165
- repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
1166
- // Re-parse if file still exists
1167
- if (existsSync(absPath)) {
1168
- repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
1169
- // Re-sort deepest-first
1170
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1171
- }
1172
- return;
1173
- }
1174
- // If not in any known repo, check if it's in a repo we haven't scanned
1175
- const repoRoot = findRepoRoot(gitignoreDir);
1176
- if (repoRoot && existsSync(absPath)) {
1177
- const entries = [
1178
- { dir: gitignoreDir, ig: parseGitignore(absPath) },
1179
- ];
1180
- if (this.repos.has(repoRoot)) {
1181
- const repo = this.repos.get(repoRoot);
1182
- repo.entries.push(entries[0]);
1183
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1184
- }
1185
- else {
1186
- this.repos.set(repoRoot, { root: repoRoot, entries });
1442
+ // embedDocuments returns vectors for multiple texts
1443
+ return embedder.embedDocuments(texts);
1444
+ }, {
1445
+ attempts: 5,
1446
+ baseDelayMs: 500,
1447
+ maxDelayMs: 10_000,
1448
+ jitter: 0.2,
1449
+ onRetry: ({ attempt, delayMs, error }) => {
1450
+ log.warn({
1451
+ attempt,
1452
+ delayMs,
1453
+ provider: 'gemini',
1454
+ model: config.model,
1455
+ err: normalizeError(error),
1456
+ }, 'Embedding call failed; will retry');
1457
+ },
1458
+ });
1459
+ // Validate dimensions
1460
+ for (const vector of vectors) {
1461
+ if (vector.length !== dimensions) {
1462
+ throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
1463
+ }
1187
1464
  }
1188
- }
1465
+ return vectors;
1466
+ },
1467
+ };
1468
+ }
1469
+ function createMockFromConfig(config) {
1470
+ const dimensions = config.dimensions ?? 768;
1471
+ return createMockProvider(dimensions);
1472
+ }
1473
+ const embeddingProviderRegistry = new Map([
1474
+ ['mock', createMockFromConfig],
1475
+ ['gemini', createGeminiProvider],
1476
+ ]);
1477
+ /**
1478
+ * Create an embedding provider based on the given configuration.
1479
+ *
1480
+ * Each provider is responsible for its own default dimensions.
1481
+ *
1482
+ * @param config - The embedding configuration.
1483
+ * @param logger - Optional pino logger for retry warnings.
1484
+ * @returns An {@link EmbeddingProvider} instance.
1485
+ * @throws If the configured provider is not supported.
1486
+ */
1487
+ function createEmbeddingProvider(config, logger) {
1488
+ const factory = embeddingProviderRegistry.get(config.provider);
1489
+ if (!factory) {
1490
+ throw new Error(`Unsupported embedding provider: ${config.provider}`);
1189
1491
  }
1492
+ return factory(config, logger);
1190
1493
  }
1191
1494
 
1192
1495
  /**
@@ -1418,7 +1721,7 @@ function createJsonMapLib() {
1418
1721
  };
1419
1722
  }
1420
1723
  /**
1421
- * Apply compiled inference rules to file attributes, returning merged metadata.
1724
+ * Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
1422
1725
  *
1423
1726
  * Rules are evaluated in order; later rules override earlier ones.
1424
1727
  * If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
@@ -1428,15 +1731,18 @@ function createJsonMapLib() {
1428
1731
  * @param attributes - The file attributes to match against.
1429
1732
  * @param namedMaps - Optional record of named JsonMap definitions.
1430
1733
  * @param logger - Optional logger for warnings (falls back to console.warn).
1431
- * @returns The merged metadata from all matching rules.
1734
+ * @param templateEngine - Optional template engine for rendering content templates.
1735
+ * @param configDir - Optional config directory for resolving .json map file paths.
1736
+ * @returns The merged metadata and optional rendered content.
1432
1737
  */
1433
- async function applyRules(compiledRules, attributes, namedMaps, logger) {
1738
+ async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
1434
1739
  // JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
1435
1740
  // Our helper functions accept multiple args, which JsonMap supports at runtime.
1436
1741
  const lib = createJsonMapLib();
1437
1742
  let merged = {};
1743
+ let renderedContent = null;
1438
1744
  const log = logger ?? console;
1439
- for (const { rule, validate } of compiledRules) {
1745
+ for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
1440
1746
  if (validate(attributes)) {
1441
1747
  // Apply set resolution
1442
1748
  const setOutput = resolveSet(rule.set, attributes);
@@ -1446,10 +1752,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1446
1752
  let mapDef;
1447
1753
  // Resolve map reference
1448
1754
  if (typeof rule.map === 'string') {
1449
- mapDef = namedMaps?.[rule.map];
1450
- if (!mapDef) {
1451
- log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1452
- continue;
1755
+ if (rule.map.endsWith('.json') && configDir) {
1756
+ // File path: load from .json file
1757
+ try {
1758
+ const mapPath = resolve(configDir, rule.map);
1759
+ const raw = readFileSync(mapPath, 'utf-8');
1760
+ mapDef = JSON.parse(raw);
1761
+ }
1762
+ catch (error) {
1763
+ log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
1764
+ continue;
1765
+ }
1766
+ }
1767
+ else {
1768
+ mapDef = namedMaps?.[rule.map];
1769
+ if (!mapDef) {
1770
+ log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1771
+ continue;
1772
+ }
1453
1773
  }
1454
1774
  }
1455
1775
  else {
@@ -1472,9 +1792,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1472
1792
  log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
1473
1793
  }
1474
1794
  }
1795
+ // Render template if present
1796
+ if (rule.template && templateEngine) {
1797
+ const templateKey = `rule-${String(ruleIndex)}`;
1798
+ // Build template context: attributes (with json spread at top) + map output
1799
+ const context = {
1800
+ ...(attributes.json ?? {}),
1801
+ ...attributes,
1802
+ ...merged,
1803
+ };
1804
+ try {
1805
+ const result = templateEngine.render(templateKey, context);
1806
+ if (result && result.trim()) {
1807
+ renderedContent = result;
1808
+ }
1809
+ else {
1810
+ log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
1811
+ }
1812
+ }
1813
+ catch (error) {
1814
+ log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
1815
+ }
1816
+ }
1475
1817
  }
1476
1818
  }
1477
- return merged;
1819
+ return { metadata: merged, renderedContent };
1478
1820
  }
1479
1821
 
1480
1822
  /**
@@ -1563,23 +1905,32 @@ function compileRules(rules) {
1563
1905
  * @param metadataDir - The metadata directory for enrichment files.
1564
1906
  * @param maps - Optional named JsonMap definitions.
1565
1907
  * @param logger - Optional logger for rule warnings.
1908
+ * @param templateEngine - Optional template engine for content templates.
1909
+ * @param configDir - Optional config directory for resolving file paths.
1566
1910
  * @returns The merged metadata and intermediate data.
1567
1911
  */
1568
- async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
1912
+ async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
1569
1913
  const ext = extname(filePath);
1570
1914
  const stats = await stat(filePath);
1571
1915
  // 1. Extract text and structured data
1572
1916
  const extracted = await extractText(filePath, ext);
1573
1917
  // 2. Build attributes + apply rules
1574
1918
  const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
1575
- const inferred = await applyRules(compiledRules, attributes, maps, logger);
1919
+ const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
1576
1920
  // 3. Read enrichment metadata (merge, enrichment wins)
1577
1921
  const enrichment = await readMetadata(filePath, metadataDir);
1578
1922
  const metadata = {
1579
1923
  ...inferred,
1580
1924
  ...(enrichment ?? {}),
1581
1925
  };
1582
- return { inferred, enrichment, metadata, attributes, extracted };
1926
+ return {
1927
+ inferred,
1928
+ enrichment,
1929
+ metadata,
1930
+ attributes,
1931
+ extracted,
1932
+ renderedContent,
1933
+ };
1583
1934
  }
1584
1935
 
1585
1936
  /**
@@ -1650,6 +2001,7 @@ class DocumentProcessor {
1650
2001
  vectorStore;
1651
2002
  compiledRules;
1652
2003
  logger;
2004
+ templateEngine;
1653
2005
  /**
1654
2006
  * Create a new DocumentProcessor.
1655
2007
  *
@@ -1658,13 +2010,15 @@ class DocumentProcessor {
1658
2010
  * @param vectorStore - The vector store client.
1659
2011
  * @param compiledRules - The compiled inference rules.
1660
2012
  * @param logger - The logger instance.
2013
+ * @param templateEngine - Optional template engine for content templates.
1661
2014
  */
1662
- constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
2015
+ constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
1663
2016
  this.config = config;
1664
2017
  this.embeddingProvider = embeddingProvider;
1665
2018
  this.vectorStore = vectorStore;
1666
2019
  this.compiledRules = compiledRules;
1667
2020
  this.logger = logger;
2021
+ this.templateEngine = templateEngine;
1668
2022
  }
1669
2023
  /**
1670
2024
  * Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
@@ -1675,13 +2029,15 @@ class DocumentProcessor {
1675
2029
  try {
1676
2030
  const ext = extname(filePath);
1677
2031
  // 1. Build merged metadata + extract text
1678
- const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
1679
- if (!extracted.text.trim()) {
2032
+ const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
2033
+ // Use rendered template content if available, otherwise raw extracted text
2034
+ const textToEmbed = renderedContent ?? extracted.text;
2035
+ if (!textToEmbed.trim()) {
1680
2036
  this.logger.debug({ filePath }, 'Skipping empty file');
1681
2037
  return;
1682
2038
  }
1683
2039
  // 2. Content hash check — skip if unchanged
1684
- const hash = contentHash(extracted.text);
2040
+ const hash = contentHash(textToEmbed);
1685
2041
  const baseId = pointId(filePath, 0);
1686
2042
  const existingPayload = await this.vectorStore.getPayload(baseId);
1687
2043
  if (existingPayload && existingPayload['content_hash'] === hash) {
@@ -1693,7 +2049,7 @@ class DocumentProcessor {
1693
2049
  const chunkSize = this.config.chunkSize ?? 1000;
1694
2050
  const chunkOverlap = this.config.chunkOverlap ?? 200;
1695
2051
  const splitter = createSplitter(ext, chunkSize, chunkOverlap);
1696
- const chunks = await splitter.splitText(extracted.text);
2052
+ const chunks = await splitter.splitText(textToEmbed);
1697
2053
  // 4. Embed all chunks
1698
2054
  const vectors = await this.embeddingProvider.embed(chunks);
1699
2055
  // 5. Upsert all chunk points
@@ -1787,7 +2143,7 @@ class DocumentProcessor {
1787
2143
  return null;
1788
2144
  }
1789
2145
  // Build merged metadata (lightweight — no embedding)
1790
- const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
2146
+ const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
1791
2147
  // Update all chunk payloads
1792
2148
  const totalChunks = getChunkCount(existingPayload);
1793
2149
  const ids = chunkIds(filePath, totalChunks);
@@ -1805,8 +2161,17 @@ class DocumentProcessor {
1805
2161
  *
1806
2162
  * @param compiledRules - The newly compiled rules.
1807
2163
  */
1808
- updateRules(compiledRules) {
2164
+ /**
2165
+ * Update compiled inference rules and optionally the template engine.
2166
+ *
2167
+ * @param compiledRules - The newly compiled rules.
2168
+ * @param templateEngine - Optional updated template engine.
2169
+ */
2170
+ updateRules(compiledRules, templateEngine) {
1809
2171
  this.compiledRules = compiledRules;
2172
+ if (templateEngine) {
2173
+ this.templateEngine = templateEngine;
2174
+ }
1810
2175
  this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
1811
2176
  }
1812
2177
  }
@@ -2335,6 +2700,76 @@ class SystemHealth {
2335
2700
  }
2336
2701
  }
2337
2702
 
2703
+ /**
2704
+ * @module watcher/globToDir
2705
+ * Adapts glob-based watch config to chokidar v4+, which removed glob support
2706
+ * (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
2707
+ * strings, silently producing zero events. This module extracts static directory
2708
+ * roots from glob patterns for chokidar to watch, then filters emitted events
2709
+ * against the original globs via picomatch.
2710
+ */
2711
+ /**
2712
+ * Extract the static directory root from a glob pattern.
2713
+ * Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
2714
+ *
2715
+ * @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
2716
+ * @returns The static directory prefix (e.g., `j:/domains`).
2717
+ */
2718
+ function globRoot(glob) {
2719
+ const normalized = glob.replace(/\\/g, '/');
2720
+ const segments = normalized.split('/');
2721
+ const staticSegments = [];
2722
+ for (const seg of segments) {
2723
+ if (/[*?{[\]]/.test(seg))
2724
+ break;
2725
+ staticSegments.push(seg);
2726
+ }
2727
+ return staticSegments.join('/') || '.';
2728
+ }
2729
+ /**
2730
+ * Deduplicate directory roots, removing paths that are subdirectories of others.
2731
+ *
2732
+ * @param roots - Array of directory paths.
2733
+ * @returns Deduplicated array with subdirectories removed.
2734
+ */
2735
+ function deduplicateRoots(roots) {
2736
+ const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
2737
+ const sorted = [...new Set(normalized)].sort();
2738
+ return sorted.filter((root, _i, arr) => {
2739
+ const withSlash = root.endsWith('/') ? root : root + '/';
2740
+ return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
2741
+ });
2742
+ }
2743
+ /**
2744
+ * Build a picomatch matcher from an array of glob patterns.
2745
+ * Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
2746
+ * before matching.
2747
+ *
2748
+ * @param globs - Glob patterns to match against.
2749
+ * @returns A function that tests whether a file path matches any of the globs.
2750
+ */
2751
+ function buildGlobMatcher(globs) {
2752
+ const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
2753
+ const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
2754
+ return (filePath) => {
2755
+ const normalized = filePath.replace(/\\/g, '/');
2756
+ return isMatch(normalized);
2757
+ };
2758
+ }
2759
+ /**
2760
+ * Convert an array of glob patterns into chokidar-compatible directory roots
2761
+ * and a filter function for post-hoc event filtering.
2762
+ *
2763
+ * @param globs - Glob patterns from the watch config.
2764
+ * @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
2765
+ */
2766
+ function resolveWatchPaths(globs) {
2767
+ const rawRoots = globs.map(globRoot);
2768
+ const roots = deduplicateRoots(rawRoots);
2769
+ const matches = buildGlobMatcher(globs);
2770
+ return { roots, matches };
2771
+ }
2772
+
2338
2773
  /**
2339
2774
  * @module watcher
2340
2775
  * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
@@ -2349,6 +2784,7 @@ class FileSystemWatcher {
2349
2784
  logger;
2350
2785
  health;
2351
2786
  gitignoreFilter;
2787
+ globMatches;
2352
2788
  watcher;
2353
2789
  /**
2354
2790
  * Create a new FileSystemWatcher.
@@ -2365,6 +2801,7 @@ class FileSystemWatcher {
2365
2801
  this.processor = processor;
2366
2802
  this.logger = logger;
2367
2803
  this.gitignoreFilter = options.gitignoreFilter;
2804
+ this.globMatches = () => true;
2368
2805
  const healthOptions = {
2369
2806
  maxRetries: options.maxRetries,
2370
2807
  maxBackoffMs: options.maxBackoffMs,
@@ -2377,7 +2814,13 @@ class FileSystemWatcher {
2377
2814
  * Start watching the filesystem and processing events.
2378
2815
  */
2379
2816
  start() {
2380
- this.watcher = chokidar.watch(this.config.paths, {
2817
+ // Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
2818
+ // Glob patterns are silently treated as literal strings, producing zero
2819
+ // events. We extract static directory roots for chokidar to watch, then
2820
+ // filter emitted events against the original globs via picomatch.
2821
+ const { roots, matches } = resolveWatchPaths(this.config.paths);
2822
+ this.globMatches = matches;
2823
+ this.watcher = chokidar.watch(roots, {
2381
2824
  ignored: this.config.ignored,
2382
2825
  usePolling: this.config.usePolling,
2383
2826
  interval: this.config.pollIntervalMs,
@@ -2388,6 +2831,8 @@ class FileSystemWatcher {
2388
2831
  });
2389
2832
  this.watcher.on('add', (path) => {
2390
2833
  this.handleGitignoreChange(path);
2834
+ if (!this.globMatches(path))
2835
+ return;
2391
2836
  if (this.isGitignored(path))
2392
2837
  return;
2393
2838
  this.logger.debug({ path }, 'File added');
@@ -2395,6 +2840,8 @@ class FileSystemWatcher {
2395
2840
  });
2396
2841
  this.watcher.on('change', (path) => {
2397
2842
  this.handleGitignoreChange(path);
2843
+ if (!this.globMatches(path))
2844
+ return;
2398
2845
  if (this.isGitignored(path))
2399
2846
  return;
2400
2847
  this.logger.debug({ path }, 'File changed');
@@ -2402,6 +2849,8 @@ class FileSystemWatcher {
2402
2849
  });
2403
2850
  this.watcher.on('unlink', (path) => {
2404
2851
  this.handleGitignoreChange(path);
2852
+ if (!this.globMatches(path))
2853
+ return;
2405
2854
  if (this.isGitignored(path))
2406
2855
  return;
2407
2856
  this.logger.debug({ path }, 'File removed');
@@ -2474,51 +2923,21 @@ class FileSystemWatcher {
2474
2923
  }
2475
2924
 
2476
2925
  /**
2477
- * @module app/configWatcher
2478
- * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
2479
- */
2480
- /**
2481
- * Debounced config file watcher.
2926
+ * @module app/factories
2927
+ * Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
2482
2928
  */
2483
- class ConfigWatcher {
2484
- options;
2485
- watcher;
2486
- debounce;
2487
- constructor(options) {
2488
- this.options = options;
2489
- }
2490
- start() {
2491
- if (!this.options.enabled)
2492
- return;
2493
- this.watcher = chokidar.watch(this.options.configPath, {
2494
- ignoreInitial: true,
2495
- });
2496
- this.watcher.on('change', () => {
2497
- if (this.debounce)
2498
- clearTimeout(this.debounce);
2499
- this.debounce = setTimeout(() => {
2500
- void this.options.onChange();
2501
- }, this.options.debounceMs);
2502
- });
2503
- this.watcher.on('error', (error) => {
2504
- this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
2505
- });
2506
- this.options.logger.info({
2507
- configPath: this.options.configPath,
2508
- debounceMs: this.options.debounceMs,
2509
- }, 'Config watcher started');
2510
- }
2511
- async stop() {
2512
- if (this.debounce) {
2513
- clearTimeout(this.debounce);
2514
- this.debounce = undefined;
2515
- }
2516
- if (this.watcher) {
2517
- await this.watcher.close();
2518
- this.watcher = undefined;
2519
- }
2520
- }
2521
- }
2929
+ /** Default component factories wiring real implementations. */
2930
+ const defaultFactories = {
2931
+ loadConfig,
2932
+ createLogger,
2933
+ createEmbeddingProvider,
2934
+ createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2935
+ compileRules,
2936
+ createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
2937
+ createEventQueue: (options) => new EventQueue(options),
2938
+ createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2939
+ createApiServer,
2940
+ };
2522
2941
 
2523
2942
  /**
2524
2943
  * @module app/shutdown
@@ -2538,17 +2957,28 @@ function installShutdownHandlers(stop) {
2538
2957
  process.on('SIGINT', () => void shutdown());
2539
2958
  }
2540
2959
 
2541
- const defaultFactories = {
2542
- loadConfig,
2543
- createLogger,
2544
- createEmbeddingProvider,
2545
- createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2546
- compileRules,
2547
- createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
2548
- createEventQueue: (options) => new EventQueue(options),
2549
- createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2550
- createApiServer,
2551
- };
2960
+ /**
2961
+ * @module app/startFromConfig
2962
+ * Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
2963
+ */
2964
+ /**
2965
+ * Create and start a JeevesWatcher from a config file path.
2966
+ *
2967
+ * @param configPath - Optional path to the configuration file.
2968
+ * @returns The running JeevesWatcher instance.
2969
+ */
2970
+ async function startFromConfig(configPath) {
2971
+ const config = await loadConfig(configPath);
2972
+ const app = new JeevesWatcher(config, configPath);
2973
+ installShutdownHandlers(() => app.stop());
2974
+ await app.start();
2975
+ return app;
2976
+ }
2977
+
2978
+ /**
2979
+ * @module app
2980
+ * Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
2981
+ */
2552
2982
  /**
2553
2983
  * Main application class that wires together all components.
2554
2984
  */
@@ -2583,56 +3013,26 @@ class JeevesWatcher {
2583
3013
  async start() {
2584
3014
  const logger = this.factories.createLogger(this.config.logging);
2585
3015
  this.logger = logger;
2586
- let embeddingProvider;
2587
- try {
2588
- embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
2589
- }
2590
- catch (error) {
2591
- logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
2592
- throw error;
2593
- }
2594
- const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
2595
- await vectorStore.ensureCollection();
3016
+ const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
2596
3017
  const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
2597
- const processorConfig = {
3018
+ const configDir = this.configPath ? dirname(this.configPath) : '.';
3019
+ const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
3020
+ const processor = this.factories.createDocumentProcessor({
2598
3021
  metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
2599
3022
  chunkSize: this.config.embedding.chunkSize,
2600
3023
  chunkOverlap: this.config.embedding.chunkOverlap,
2601
3024
  maps: this.config.maps,
2602
- };
2603
- const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
3025
+ configDir,
3026
+ }, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
2604
3027
  this.processor = processor;
2605
- const queue = this.factories.createEventQueue({
3028
+ this.queue = this.factories.createEventQueue({
2606
3029
  debounceMs: this.config.watch.debounceMs ?? 2000,
2607
3030
  concurrency: this.config.embedding.concurrency ?? 5,
2608
3031
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
2609
3032
  });
2610
- this.queue = queue;
2611
- const respectGitignore = this.config.watch.respectGitignore ?? true;
2612
- const gitignoreFilter = respectGitignore
2613
- ? new GitignoreFilter(this.config.watch.paths)
2614
- : undefined;
2615
- const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
2616
- maxRetries: this.config.maxRetries,
2617
- maxBackoffMs: this.config.maxBackoffMs,
2618
- onFatalError: this.runtimeOptions.onFatalError,
2619
- gitignoreFilter,
2620
- });
2621
- this.watcher = watcher;
2622
- const server = this.factories.createApiServer({
2623
- processor,
2624
- vectorStore,
2625
- embeddingProvider,
2626
- queue,
2627
- config: this.config,
2628
- logger,
2629
- });
2630
- this.server = server;
2631
- await server.listen({
2632
- host: this.config.api?.host ?? '127.0.0.1',
2633
- port: this.config.api?.port ?? 3456,
2634
- });
2635
- watcher.start();
3033
+ this.watcher = this.createWatcher(this.queue, processor, logger);
3034
+ this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
3035
+ this.watcher.start();
2636
3036
  this.startConfigWatch();
2637
3037
  logger.info('jeeves-watcher started');
2638
3038
  }
@@ -2663,22 +3063,61 @@ class JeevesWatcher {
2663
3063
  }
2664
3064
  this.logger?.info('jeeves-watcher stopped');
2665
3065
  }
3066
+ async initEmbeddingAndStore(logger) {
3067
+ let embeddingProvider;
3068
+ try {
3069
+ embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
3070
+ }
3071
+ catch (error) {
3072
+ logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
3073
+ throw error;
3074
+ }
3075
+ const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
3076
+ await vectorStore.ensureCollection();
3077
+ return { embeddingProvider, vectorStore };
3078
+ }
3079
+ createWatcher(queue, processor, logger) {
3080
+ const respectGitignore = this.config.watch.respectGitignore ?? true;
3081
+ const gitignoreFilter = respectGitignore
3082
+ ? new GitignoreFilter(this.config.watch.paths)
3083
+ : undefined;
3084
+ return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
3085
+ maxRetries: this.config.maxRetries,
3086
+ maxBackoffMs: this.config.maxBackoffMs,
3087
+ onFatalError: this.runtimeOptions.onFatalError,
3088
+ gitignoreFilter,
3089
+ });
3090
+ }
3091
+ async startApiServer(processor, vectorStore, embeddingProvider, logger) {
3092
+ const server = this.factories.createApiServer({
3093
+ processor,
3094
+ vectorStore,
3095
+ embeddingProvider,
3096
+ queue: this.queue,
3097
+ config: this.config,
3098
+ logger,
3099
+ });
3100
+ await server.listen({
3101
+ host: this.config.api?.host ?? '127.0.0.1',
3102
+ port: this.config.api?.port ?? 3456,
3103
+ });
3104
+ return server;
3105
+ }
2666
3106
  startConfigWatch() {
2667
3107
  const logger = this.logger;
2668
3108
  if (!logger)
2669
3109
  return;
2670
3110
  const enabled = this.config.configWatch?.enabled ?? true;
2671
- if (!enabled)
2672
- return;
2673
- if (!this.configPath) {
2674
- logger.debug('Config watch enabled, but no config path was provided');
3111
+ if (!enabled || !this.configPath) {
3112
+ if (!this.configPath) {
3113
+ logger.debug('Config watch enabled, but no config path was provided');
3114
+ }
2675
3115
  return;
2676
3116
  }
2677
- const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
2678
3117
  this.configWatcher = new ConfigWatcher({
2679
3118
  configPath: this.configPath,
2680
3119
  enabled,
2681
- debounceMs,
3120
+ debounceMs: this.config.configWatch?.debounceMs ?? 10000,
2682
3121
  logger,
2683
3122
  onChange: async () => this.reloadConfig(),
2684
3123
  });
@@ -2700,7 +3139,9 @@ class JeevesWatcher {
2700
3139
  const newConfig = await this.factories.loadConfig(this.configPath);
2701
3140
  this.config = newConfig;
2702
3141
  const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
2703
- processor.updateRules(compiledRules);
3142
+ const reloadConfigDir = dirname(this.configPath);
3143
+ const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
3144
+ processor.updateRules(compiledRules, newTemplateEngine);
2704
3145
  logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
2705
3146
  }
2706
3147
  catch (error) {
@@ -2708,19 +3149,7 @@ class JeevesWatcher {
2708
3149
  }
2709
3150
  }
2710
3151
  }
2711
- /**
2712
- * Create and start a JeevesWatcher from a config file path.
2713
- *
2714
- * @param configPath - Optional path to the configuration file.
2715
- * @returns The running JeevesWatcher instance.
2716
- */
2717
- async function startFromConfig(configPath) {
2718
- const config = await loadConfig(configPath);
2719
- const app = new JeevesWatcher(config, configPath);
2720
- installShutdownHandlers(() => app.stop());
2721
- await app.start();
2722
- return app;
2723
- }
3152
+ // startFromConfig re-exported from ./startFromConfig
2724
3153
 
2725
3154
  /**
2726
3155
  * @module cli/jeeves-watcher/defaults