@karmaniverous/jeeves-watcher 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- (function (exports, Fastify, promises, node_path, picomatch, radash, node_crypto, cosmiconfig, zod, jsonmap, googleGenai, node_fs, ignore, pino, uuid, cheerio, yaml, mammoth, Ajv, addFormats, textsplitters, jsClientRest, chokidar) {
1
+ (function (exports, Fastify, promises, node_path, picomatch, radash, node_crypto, node_fs, ignore, Handlebars, dayjs, hastUtilToMdast, mdastUtilFromAdf, mdastUtilToMarkdown, rehypeParse, unified, chokidar, cosmiconfig, zod, jsonmap, googleGenai, pino, uuid, cheerio, yaml, mammoth, Ajv, addFormats, textsplitters, jsClientRest) {
2
2
  'use strict';
3
3
 
4
4
  function _interopNamespaceDefault(e) {
@@ -416,6 +416,486 @@
416
416
  return app;
417
417
  }
418
418
 
419
+ /**
420
+ * @module gitignore
421
+ * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
422
+ */
423
+ /**
424
+ * Find the git repo root by walking up from `startDir` looking for `.git/`.
425
+ * Returns `undefined` if no repo is found.
426
+ */
427
+ function findRepoRoot(startDir) {
428
+ let dir = node_path.resolve(startDir);
429
+ const root = node_path.resolve('/');
430
+ while (dir !== root) {
431
+ if (node_fs.existsSync(node_path.join(dir, '.git')) &&
432
+ node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
433
+ return dir;
434
+ }
435
+ const parent = node_path.dirname(dir);
436
+ if (parent === dir)
437
+ break;
438
+ dir = parent;
439
+ }
440
+ return undefined;
441
+ }
442
+ /**
443
+ * Convert a watch path (directory, file path, or glob) to a concrete directory
444
+ * that can be scanned for a repo root.
445
+ */
446
+ function watchPathToScanDir(watchPath) {
447
+ const absPath = node_path.resolve(watchPath);
448
+ try {
449
+ return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
450
+ }
451
+ catch {
452
+ // ignore
453
+ }
454
+ // If this is a glob, fall back to the non-glob prefix.
455
+ const globMatch = /[*?[{]/.exec(watchPath);
456
+ if (!globMatch)
457
+ return undefined;
458
+ const prefix = watchPath.slice(0, globMatch.index);
459
+ const trimmed = prefix.trim();
460
+ const baseDir = trimmed.length === 0
461
+ ? '.'
462
+ : trimmed.endsWith('/') || trimmed.endsWith('\\')
463
+ ? trimmed
464
+ : node_path.dirname(trimmed);
465
+ const resolved = node_path.resolve(baseDir);
466
+ if (!node_fs.existsSync(resolved))
467
+ return undefined;
468
+ return resolved;
469
+ }
470
+ /**
471
+ * Recursively find all `.gitignore` files under `dir`.
472
+ * Skips `.git` and `node_modules` directories for performance.
473
+ */
474
+ function findGitignoreFiles(dir) {
475
+ const results = [];
476
+ const gitignorePath = node_path.join(dir, '.gitignore');
477
+ if (node_fs.existsSync(gitignorePath)) {
478
+ results.push(gitignorePath);
479
+ }
480
+ let entries;
481
+ try {
482
+ entries = node_fs.readdirSync(dir);
483
+ }
484
+ catch {
485
+ return results;
486
+ }
487
+ for (const entry of entries) {
488
+ if (entry === '.git' || entry === 'node_modules')
489
+ continue;
490
+ const fullPath = node_path.join(dir, entry);
491
+ try {
492
+ if (node_fs.statSync(fullPath).isDirectory()) {
493
+ results.push(...findGitignoreFiles(fullPath));
494
+ }
495
+ }
496
+ catch {
497
+ // Skip inaccessible entries
498
+ }
499
+ }
500
+ return results;
501
+ }
502
+ /**
503
+ * Parse a `.gitignore` file into an `ignore` instance.
504
+ */
505
+ function parseGitignore(gitignorePath) {
506
+ const content = node_fs.readFileSync(gitignorePath, 'utf8');
507
+ return ignore().add(content);
508
+ }
509
+ /**
510
+ * Normalize a path to use forward slashes (required by `ignore` package).
511
+ */
512
+ function toForwardSlash(p) {
513
+ return p.replace(/\\/g, '/');
514
+ }
515
+ /**
516
+ * Processor-level gitignore filter. Checks file paths against the nearest
517
+ * `.gitignore` chain in git repositories.
518
+ */
519
+ class GitignoreFilter {
520
+ repos = new Map();
521
+ /**
522
+ * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
523
+ *
524
+ * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
525
+ */
526
+ constructor(watchPaths) {
527
+ this.scan(watchPaths);
528
+ }
529
+ /**
530
+ * Scan paths for git repos and their `.gitignore` files.
531
+ */
532
+ scan(watchPaths) {
533
+ this.repos.clear();
534
+ const scannedDirs = new Set();
535
+ for (const watchPath of watchPaths) {
536
+ const scanDir = watchPathToScanDir(watchPath);
537
+ if (!scanDir)
538
+ continue;
539
+ if (scannedDirs.has(scanDir))
540
+ continue;
541
+ scannedDirs.add(scanDir);
542
+ const repoRoot = findRepoRoot(scanDir);
543
+ if (!repoRoot)
544
+ continue;
545
+ if (this.repos.has(repoRoot))
546
+ continue;
547
+ const gitignoreFiles = findGitignoreFiles(repoRoot);
548
+ const entries = gitignoreFiles.map((gf) => ({
549
+ dir: node_path.dirname(gf),
550
+ ig: parseGitignore(gf),
551
+ }));
552
+ // Sort deepest-first so nested `.gitignore` files are checked first
553
+ entries.sort((a, b) => b.dir.length - a.dir.length);
554
+ this.repos.set(repoRoot, { root: repoRoot, entries });
555
+ }
556
+ }
557
+ /**
558
+ * Check whether a file path is ignored by any applicable `.gitignore`.
559
+ *
560
+ * @param filePath - Absolute file path to check.
561
+ * @returns `true` if the file should be ignored.
562
+ */
563
+ isIgnored(filePath) {
564
+ const absPath = node_path.resolve(filePath);
565
+ for (const [, repo] of this.repos) {
566
+ // Check if file is within this repo
567
+ const relToRepo = node_path.relative(repo.root, absPath);
568
+ // On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
569
+ // an absolute path with a drive letter instead of a relative one. The
570
+ // `ignore` library rejects these with a RangeError. Skip repos on
571
+ // different drives to avoid cross-drive gitignore mismatches.
572
+ if (relToRepo.startsWith('..') ||
573
+ relToRepo.startsWith(node_path.resolve('/')) ||
574
+ /^[a-zA-Z]:/.test(relToRepo)) {
575
+ continue;
576
+ }
577
+ // Check each `.gitignore` entry (deepest-first)
578
+ for (const entry of repo.entries) {
579
+ const relToEntry = node_path.relative(entry.dir, absPath);
580
+ if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
581
+ continue;
582
+ const normalized = toForwardSlash(relToEntry);
583
+ if (entry.ig.ignores(normalized)) {
584
+ return true;
585
+ }
586
+ }
587
+ }
588
+ return false;
589
+ }
590
+ /**
591
+ * Invalidate and re-parse a specific `.gitignore` file.
592
+ * Call when a `.gitignore` file is added, changed, or removed.
593
+ *
594
+ * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
595
+ */
596
+ invalidate(gitignorePath) {
597
+ const absPath = node_path.resolve(gitignorePath);
598
+ const gitignoreDir = node_path.dirname(absPath);
599
+ for (const [, repo] of this.repos) {
600
+ const relToRepo = node_path.relative(repo.root, gitignoreDir);
601
+ if (relToRepo.startsWith('..'))
602
+ continue;
603
+ // Remove old entry for this directory
604
+ repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
605
+ // Re-parse if file still exists
606
+ if (node_fs.existsSync(absPath)) {
607
+ repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
608
+ // Re-sort deepest-first
609
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
610
+ }
611
+ return;
612
+ }
613
+ // If not in any known repo, check if it's in a repo we haven't scanned
614
+ const repoRoot = findRepoRoot(gitignoreDir);
615
+ if (repoRoot && node_fs.existsSync(absPath)) {
616
+ const entries = [
617
+ { dir: gitignoreDir, ig: parseGitignore(absPath) },
618
+ ];
619
+ if (this.repos.has(repoRoot)) {
620
+ const repo = this.repos.get(repoRoot);
621
+ repo.entries.push(entries[0]);
622
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
623
+ }
624
+ else {
625
+ this.repos.set(repoRoot, { root: repoRoot, entries });
626
+ }
627
+ }
628
+ }
629
+ }
630
+
631
+ /**
632
+ * @module templates/helpers
633
+ * Registers built-in Handlebars helpers for content templates.
634
+ */
635
+ /** Pre-built rehype parser for HTML → hast conversion. */
636
+ const htmlParser = unified.unified().use(rehypeParse, { fragment: true });
637
+ /**
638
+ * Register all built-in helpers on a Handlebars instance.
639
+ *
640
+ * @param hbs - The Handlebars instance.
641
+ */
642
+ function registerBuiltinHelpers(hbs) {
643
+ // Structural: ADF → Markdown
644
+ hbs.registerHelper('adfToMarkdown', function (adf) {
645
+ if (!adf || typeof adf !== 'object')
646
+ return '';
647
+ try {
648
+ const mdast = mdastUtilFromAdf.fromADF(adf);
649
+ return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
650
+ }
651
+ catch {
652
+ return '<!-- ADF conversion failed -->';
653
+ }
654
+ });
655
+ // Structural: HTML → Markdown
656
+ hbs.registerHelper('markdownify', function (html) {
657
+ if (typeof html !== 'string' || !html.trim())
658
+ return '';
659
+ try {
660
+ const hast = htmlParser.parse(html);
661
+ const mdast = hastUtilToMdast.toMdast(hast);
662
+ return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
663
+ }
664
+ catch {
665
+ return '<!-- HTML conversion failed -->';
666
+ }
667
+ });
668
+ // Formatting: dateFormat
669
+ hbs.registerHelper('dateFormat', function (value, format) {
670
+ if (value === undefined || value === null)
671
+ return '';
672
+ const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
673
+ return dayjs(value).format(fmt);
674
+ });
675
+ // Formatting: join
676
+ hbs.registerHelper('join', function (arr, separator) {
677
+ if (!Array.isArray(arr))
678
+ return '';
679
+ const sep = typeof separator === 'string' ? separator : ', ';
680
+ return arr.join(sep);
681
+ });
682
+ // Formatting: pluck
683
+ hbs.registerHelper('pluck', function (arr, key) {
684
+ if (!Array.isArray(arr) || typeof key !== 'string')
685
+ return [];
686
+ return arr.map((item) => item && typeof item === 'object'
687
+ ? item[key]
688
+ : undefined);
689
+ });
690
+ // String transforms
691
+ hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
692
+ hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
693
+ hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? radash.capitalize(text) : '');
694
+ hbs.registerHelper('title', (text) => typeof text === 'string' ? radash.title(text) : '');
695
+ hbs.registerHelper('camel', (text) => typeof text === 'string' ? radash.camel(text) : '');
696
+ hbs.registerHelper('snake', (text) => typeof text === 'string' ? radash.snake(text) : '');
697
+ hbs.registerHelper('dash', (text) => typeof text === 'string' ? radash.dash(text) : '');
698
+ // default helper
699
+ hbs.registerHelper('default', function (value, fallback) {
700
+ return value ?? fallback ?? '';
701
+ });
702
+ // eq helper (deep equality)
703
+ hbs.registerHelper('eq', function (a, b) {
704
+ return radash.isEqual(a, b);
705
+ });
706
+ // json helper
707
+ hbs.registerHelper('json', function (value) {
708
+ return new hbs.SafeString(JSON.stringify(value, null, 2));
709
+ });
710
+ }
711
+
712
+ /**
713
+ * @module templates/engine
714
+ * Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
715
+ */
716
+ /**
717
+ * Resolve a template value to its source string.
718
+ *
719
+ * Resolution order:
720
+ * 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
721
+ * 2. Matches a key in namedTemplates → named ref (recursively resolve)
722
+ * 3. Otherwise → inline Handlebars template string
723
+ *
724
+ * @param value - The template reference (inline, file path, or named ref).
725
+ * @param namedTemplates - Named template definitions from config.
726
+ * @param configDir - Directory to resolve relative file paths against.
727
+ * @param visited - Set of visited named refs for cycle detection.
728
+ * @returns The resolved template source string.
729
+ */
730
+ function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
731
+ // File path detection
732
+ if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
733
+ return node_fs.readFileSync(node_path.resolve(configDir, value), 'utf-8');
734
+ }
735
+ // Named ref
736
+ if (namedTemplates?.[value] !== undefined) {
737
+ if (visited.has(value)) {
738
+ throw new Error(`Circular template reference detected: ${value}`);
739
+ }
740
+ visited.add(value);
741
+ return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
742
+ }
743
+ // Inline
744
+ return value;
745
+ }
746
+ /**
747
+ * Create a configured Handlebars instance with built-in helpers registered.
748
+ *
749
+ * @returns A Handlebars instance with helpers.
750
+ */
751
+ function createHandlebarsInstance() {
752
+ const hbs = Handlebars.create();
753
+ registerBuiltinHelpers(hbs);
754
+ return hbs;
755
+ }
756
+ /**
757
+ * Load custom helpers from file paths.
758
+ *
759
+ * Each file should export a default function that receives the Handlebars instance.
760
+ *
761
+ * @param hbs - The Handlebars instance.
762
+ * @param paths - File paths to custom helper modules.
763
+ * @param configDir - Directory to resolve relative paths against.
764
+ */
765
+ async function loadCustomHelpers(hbs, paths, configDir) {
766
+ for (const p of paths) {
767
+ const resolved = node_path.resolve(configDir, p);
768
+ const mod = (await import(resolved));
769
+ if (typeof mod.default === 'function') {
770
+ mod.default(hbs);
771
+ }
772
+ }
773
+ }
774
+ /**
775
+ * The template engine: holds compiled templates and renders them against context.
776
+ */
777
+ class TemplateEngine {
778
+ hbs;
779
+ compiled = new Map();
780
+ constructor(hbs) {
781
+ this.hbs = hbs;
782
+ }
783
+ /**
784
+ * Compile and cache a template from its source string.
785
+ *
786
+ * @param key - Cache key (rule index or named template).
787
+ * @param source - Handlebars template source.
788
+ * @returns The compiled template.
789
+ */
790
+ compile(key, source) {
791
+ const fn = this.hbs.compile(source);
792
+ this.compiled.set(key, fn);
793
+ return fn;
794
+ }
795
+ /**
796
+ * Get a previously compiled template by key.
797
+ *
798
+ * @param key - The cache key.
799
+ * @returns The compiled template, or undefined.
800
+ */
801
+ get(key) {
802
+ return this.compiled.get(key);
803
+ }
804
+ /**
805
+ * Render a compiled template against a context.
806
+ *
807
+ * @param key - The cache key of the compiled template.
808
+ * @param context - The data context for rendering.
809
+ * @returns The rendered string, or null if the template was not found.
810
+ */
811
+ render(key, context) {
812
+ const fn = this.compiled.get(key);
813
+ if (!fn)
814
+ return null;
815
+ return fn(context);
816
+ }
817
+ }
818
+
819
+ /**
820
+ * @module templates/buildTemplateEngine
821
+ * Factory to build a TemplateEngine from config, compiling all rule templates at load time.
822
+ */
823
+ /**
824
+ * Build a TemplateEngine from configuration, pre-compiling all rule templates.
825
+ *
826
+ * @param rules - The inference rules (may contain template fields).
827
+ * @param namedTemplates - Named template definitions from config.
828
+ * @param templateHelperPaths - Paths to custom helper modules.
829
+ * @param configDir - Directory to resolve relative paths against.
830
+ * @returns The configured TemplateEngine, or undefined if no templates are used.
831
+ */
832
+ async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
833
+ const rulesWithTemplates = rules.filter((r) => r.template);
834
+ if (rulesWithTemplates.length === 0)
835
+ return undefined;
836
+ const hbs = createHandlebarsInstance();
837
+ // Load custom helpers
838
+ if (templateHelperPaths?.length && configDir) {
839
+ await loadCustomHelpers(hbs, templateHelperPaths, configDir);
840
+ }
841
+ const engine = new TemplateEngine(hbs);
842
+ // Compile all rule templates
843
+ for (const [index, rule] of rules.entries()) {
844
+ if (!rule.template)
845
+ continue;
846
+ const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
847
+ engine.compile(`rule-${String(index)}`, source);
848
+ }
849
+ return engine;
850
+ }
851
+
852
+ /**
853
+ * @module app/configWatcher
854
+ * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
855
+ */
856
+ /**
857
+ * Debounced config file watcher.
858
+ */
859
+ class ConfigWatcher {
860
+ options;
861
+ watcher;
862
+ debounce;
863
+ constructor(options) {
864
+ this.options = options;
865
+ }
866
+ start() {
867
+ if (!this.options.enabled)
868
+ return;
869
+ this.watcher = chokidar.watch(this.options.configPath, {
870
+ ignoreInitial: true,
871
+ });
872
+ this.watcher.on('change', () => {
873
+ if (this.debounce)
874
+ clearTimeout(this.debounce);
875
+ this.debounce = setTimeout(() => {
876
+ void this.options.onChange();
877
+ }, this.options.debounceMs);
878
+ });
879
+ this.watcher.on('error', (error) => {
880
+ this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
881
+ });
882
+ this.options.logger.info({
883
+ configPath: this.options.configPath,
884
+ debounceMs: this.options.debounceMs,
885
+ }, 'Config watcher started');
886
+ }
887
+ async stop() {
888
+ if (this.debounce) {
889
+ clearTimeout(this.debounce);
890
+ this.debounce = undefined;
891
+ }
892
+ if (this.watcher) {
893
+ await this.watcher.close();
894
+ this.watcher = undefined;
895
+ }
896
+ }
897
+ }
898
+
419
899
  /**
420
900
  * @module config/defaults
421
901
  * Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
@@ -617,7 +1097,12 @@
617
1097
  map: zod.z
618
1098
  .union([jsonmap.jsonMapMapSchema, zod.z.string()])
619
1099
  .optional()
620
- .describe('JsonMap transformation (inline definition or named map reference).'),
1100
+ .describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
1101
+ /** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
1102
+ template: zod.z
1103
+ .string()
1104
+ .optional()
1105
+ .describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
621
1106
  });
622
1107
  /**
623
1108
  * Top-level configuration for jeeves-watcher.
@@ -654,7 +1139,23 @@
654
1139
  maps: zod.z
655
1140
  .record(zod.z.string(), jsonmap.jsonMapMapSchema)
656
1141
  .optional()
657
- .describe('Reusable named JsonMap transformations.'),
1142
+ .describe('Reusable named JsonMap transformations.'),
1143
+ /** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
1144
+ templates: zod.z
1145
+ .record(zod.z.string(), zod.z.string())
1146
+ .optional()
1147
+ .describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
1148
+ /** Custom Handlebars helper registration. */
1149
+ templateHelpers: zod.z
1150
+ .object({
1151
+ /** File paths to custom helper modules. */
1152
+ paths: zod.z
1153
+ .array(zod.z.string())
1154
+ .optional()
1155
+ .describe('File paths to custom helper modules.'),
1156
+ })
1157
+ .optional()
1158
+ .describe('Custom Handlebars helper registration.'),
658
1159
  /** Logging configuration. */
659
1160
  logging: loggingConfigSchema.optional().describe('Logging configuration.'),
660
1161
  /** Timeout in milliseconds for graceful shutdown. */
@@ -904,266 +1405,60 @@
904
1405
  dimensions,
905
1406
  async embed(texts) {
906
1407
  const vectors = await retry(async (attempt) => {
907
- if (attempt > 1) {
908
- log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
909
- }
910
- // embedDocuments returns vectors for multiple texts
911
- return embedder.embedDocuments(texts);
912
- }, {
913
- attempts: 5,
914
- baseDelayMs: 500,
915
- maxDelayMs: 10_000,
916
- jitter: 0.2,
917
- onRetry: ({ attempt, delayMs, error }) => {
918
- log.warn({
919
- attempt,
920
- delayMs,
921
- provider: 'gemini',
922
- model: config.model,
923
- err: normalizeError(error),
924
- }, 'Embedding call failed; will retry');
925
- },
926
- });
927
- // Validate dimensions
928
- for (const vector of vectors) {
929
- if (vector.length !== dimensions) {
930
- throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
931
- }
932
- }
933
- return vectors;
934
- },
935
- };
936
- }
937
- function createMockFromConfig(config) {
938
- const dimensions = config.dimensions ?? 768;
939
- return createMockProvider(dimensions);
940
- }
941
- const embeddingProviderRegistry = new Map([
942
- ['mock', createMockFromConfig],
943
- ['gemini', createGeminiProvider],
944
- ]);
945
- /**
946
- * Create an embedding provider based on the given configuration.
947
- *
948
- * Each provider is responsible for its own default dimensions.
949
- *
950
- * @param config - The embedding configuration.
951
- * @param logger - Optional pino logger for retry warnings.
952
- * @returns An {@link EmbeddingProvider} instance.
953
- * @throws If the configured provider is not supported.
954
- */
955
- function createEmbeddingProvider(config, logger) {
956
- const factory = embeddingProviderRegistry.get(config.provider);
957
- if (!factory) {
958
- throw new Error(`Unsupported embedding provider: ${config.provider}`);
959
- }
960
- return factory(config, logger);
961
- }
962
-
963
- /**
964
- * @module gitignore
965
- * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
966
- */
967
- /**
968
- * Find the git repo root by walking up from `startDir` looking for `.git/`.
969
- * Returns `undefined` if no repo is found.
970
- */
971
- function findRepoRoot(startDir) {
972
- let dir = node_path.resolve(startDir);
973
- const root = node_path.resolve('/');
974
- while (dir !== root) {
975
- if (node_fs.existsSync(node_path.join(dir, '.git')) &&
976
- node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
977
- return dir;
978
- }
979
- const parent = node_path.dirname(dir);
980
- if (parent === dir)
981
- break;
982
- dir = parent;
983
- }
984
- return undefined;
985
- }
986
- /**
987
- * Convert a watch path (directory, file path, or glob) to a concrete directory
988
- * that can be scanned for a repo root.
989
- */
990
- function watchPathToScanDir(watchPath) {
991
- const absPath = node_path.resolve(watchPath);
992
- try {
993
- return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
994
- }
995
- catch {
996
- // ignore
997
- }
998
- // If this is a glob, fall back to the non-glob prefix.
999
- const globMatch = /[*?[{]/.exec(watchPath);
1000
- if (!globMatch)
1001
- return undefined;
1002
- const prefix = watchPath.slice(0, globMatch.index);
1003
- const trimmed = prefix.trim();
1004
- const baseDir = trimmed.length === 0
1005
- ? '.'
1006
- : trimmed.endsWith('/') || trimmed.endsWith('\\')
1007
- ? trimmed
1008
- : node_path.dirname(trimmed);
1009
- const resolved = node_path.resolve(baseDir);
1010
- if (!node_fs.existsSync(resolved))
1011
- return undefined;
1012
- return resolved;
1013
- }
1014
- /**
1015
- * Recursively find all `.gitignore` files under `dir`.
1016
- * Skips `.git` and `node_modules` directories for performance.
1017
- */
1018
- function findGitignoreFiles(dir) {
1019
- const results = [];
1020
- const gitignorePath = node_path.join(dir, '.gitignore');
1021
- if (node_fs.existsSync(gitignorePath)) {
1022
- results.push(gitignorePath);
1023
- }
1024
- let entries;
1025
- try {
1026
- entries = node_fs.readdirSync(dir);
1027
- }
1028
- catch {
1029
- return results;
1030
- }
1031
- for (const entry of entries) {
1032
- if (entry === '.git' || entry === 'node_modules')
1033
- continue;
1034
- const fullPath = node_path.join(dir, entry);
1035
- try {
1036
- if (node_fs.statSync(fullPath).isDirectory()) {
1037
- results.push(...findGitignoreFiles(fullPath));
1038
- }
1039
- }
1040
- catch {
1041
- // Skip inaccessible entries
1042
- }
1043
- }
1044
- return results;
1045
- }
1046
- /**
1047
- * Parse a `.gitignore` file into an `ignore` instance.
1048
- */
1049
- function parseGitignore(gitignorePath) {
1050
- const content = node_fs.readFileSync(gitignorePath, 'utf8');
1051
- return ignore().add(content);
1052
- }
1053
- /**
1054
- * Normalize a path to use forward slashes (required by `ignore` package).
1055
- */
1056
- function toForwardSlash(p) {
1057
- return p.replace(/\\/g, '/');
1058
- }
1059
- /**
1060
- * Processor-level gitignore filter. Checks file paths against the nearest
1061
- * `.gitignore` chain in git repositories.
1062
- */
1063
- class GitignoreFilter {
1064
- repos = new Map();
1065
- /**
1066
- * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
1067
- *
1068
- * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
1069
- */
1070
- constructor(watchPaths) {
1071
- this.scan(watchPaths);
1072
- }
1073
- /**
1074
- * Scan paths for git repos and their `.gitignore` files.
1075
- */
1076
- scan(watchPaths) {
1077
- this.repos.clear();
1078
- const scannedDirs = new Set();
1079
- for (const watchPath of watchPaths) {
1080
- const scanDir = watchPathToScanDir(watchPath);
1081
- if (!scanDir)
1082
- continue;
1083
- if (scannedDirs.has(scanDir))
1084
- continue;
1085
- scannedDirs.add(scanDir);
1086
- const repoRoot = findRepoRoot(scanDir);
1087
- if (!repoRoot)
1088
- continue;
1089
- if (this.repos.has(repoRoot))
1090
- continue;
1091
- const gitignoreFiles = findGitignoreFiles(repoRoot);
1092
- const entries = gitignoreFiles.map((gf) => ({
1093
- dir: node_path.dirname(gf),
1094
- ig: parseGitignore(gf),
1095
- }));
1096
- // Sort deepest-first so nested `.gitignore` files are checked first
1097
- entries.sort((a, b) => b.dir.length - a.dir.length);
1098
- this.repos.set(repoRoot, { root: repoRoot, entries });
1099
- }
1100
- }
1101
- /**
1102
- * Check whether a file path is ignored by any applicable `.gitignore`.
1103
- *
1104
- * @param filePath - Absolute file path to check.
1105
- * @returns `true` if the file should be ignored.
1106
- */
1107
- isIgnored(filePath) {
1108
- const absPath = node_path.resolve(filePath);
1109
- for (const [, repo] of this.repos) {
1110
- // Check if file is within this repo
1111
- const relToRepo = node_path.relative(repo.root, absPath);
1112
- if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
1113
- continue;
1114
- }
1115
- // Check each `.gitignore` entry (deepest-first)
1116
- for (const entry of repo.entries) {
1117
- const relToEntry = node_path.relative(entry.dir, absPath);
1118
- if (relToEntry.startsWith('..'))
1119
- continue;
1120
- const normalized = toForwardSlash(relToEntry);
1121
- if (entry.ig.ignores(normalized)) {
1122
- return true;
1123
- }
1124
- }
1125
- }
1126
- return false;
1127
- }
1128
- /**
1129
- * Invalidate and re-parse a specific `.gitignore` file.
1130
- * Call when a `.gitignore` file is added, changed, or removed.
1131
- *
1132
- * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
1133
- */
1134
- invalidate(gitignorePath) {
1135
- const absPath = node_path.resolve(gitignorePath);
1136
- const gitignoreDir = node_path.dirname(absPath);
1137
- for (const [, repo] of this.repos) {
1138
- const relToRepo = node_path.relative(repo.root, gitignoreDir);
1139
- if (relToRepo.startsWith('..'))
1140
- continue;
1141
- // Remove old entry for this directory
1142
- repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
1143
- // Re-parse if file still exists
1144
- if (node_fs.existsSync(absPath)) {
1145
- repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
1146
- // Re-sort deepest-first
1147
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1148
- }
1149
- return;
1150
- }
1151
- // If not in any known repo, check if it's in a repo we haven't scanned
1152
- const repoRoot = findRepoRoot(gitignoreDir);
1153
- if (repoRoot && node_fs.existsSync(absPath)) {
1154
- const entries = [
1155
- { dir: gitignoreDir, ig: parseGitignore(absPath) },
1156
- ];
1157
- if (this.repos.has(repoRoot)) {
1158
- const repo = this.repos.get(repoRoot);
1159
- repo.entries.push(entries[0]);
1160
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1161
- }
1162
- else {
1163
- this.repos.set(repoRoot, { root: repoRoot, entries });
1408
+ if (attempt > 1) {
1409
+ log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
1410
+ }
1411
+ // embedDocuments returns vectors for multiple texts
1412
+ return embedder.embedDocuments(texts);
1413
+ }, {
1414
+ attempts: 5,
1415
+ baseDelayMs: 500,
1416
+ maxDelayMs: 10_000,
1417
+ jitter: 0.2,
1418
+ onRetry: ({ attempt, delayMs, error }) => {
1419
+ log.warn({
1420
+ attempt,
1421
+ delayMs,
1422
+ provider: 'gemini',
1423
+ model: config.model,
1424
+ err: normalizeError(error),
1425
+ }, 'Embedding call failed; will retry');
1426
+ },
1427
+ });
1428
+ // Validate dimensions
1429
+ for (const vector of vectors) {
1430
+ if (vector.length !== dimensions) {
1431
+ throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
1432
+ }
1164
1433
  }
1165
- }
1434
+ return vectors;
1435
+ },
1436
+ };
1437
+ }
1438
+ function createMockFromConfig(config) {
1439
+ const dimensions = config.dimensions ?? 768;
1440
+ return createMockProvider(dimensions);
1441
+ }
1442
+ const embeddingProviderRegistry = new Map([
1443
+ ['mock', createMockFromConfig],
1444
+ ['gemini', createGeminiProvider],
1445
+ ]);
1446
+ /**
1447
+ * Create an embedding provider based on the given configuration.
1448
+ *
1449
+ * Each provider is responsible for its own default dimensions.
1450
+ *
1451
+ * @param config - The embedding configuration.
1452
+ * @param logger - Optional pino logger for retry warnings.
1453
+ * @returns An {@link EmbeddingProvider} instance.
1454
+ * @throws If the configured provider is not supported.
1455
+ */
1456
+ function createEmbeddingProvider(config, logger) {
1457
+ const factory = embeddingProviderRegistry.get(config.provider);
1458
+ if (!factory) {
1459
+ throw new Error(`Unsupported embedding provider: ${config.provider}`);
1166
1460
  }
1461
+ return factory(config, logger);
1167
1462
  }
1168
1463
 
1169
1464
  /**
@@ -1395,7 +1690,7 @@
1395
1690
  };
1396
1691
  }
1397
1692
  /**
1398
- * Apply compiled inference rules to file attributes, returning merged metadata.
1693
+ * Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
1399
1694
  *
1400
1695
  * Rules are evaluated in order; later rules override earlier ones.
1401
1696
  * If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
@@ -1405,15 +1700,18 @@
1405
1700
  * @param attributes - The file attributes to match against.
1406
1701
  * @param namedMaps - Optional record of named JsonMap definitions.
1407
1702
  * @param logger - Optional logger for warnings (falls back to console.warn).
1408
- * @returns The merged metadata from all matching rules.
1703
+ * @param templateEngine - Optional template engine for rendering content templates.
1704
+ * @param configDir - Optional config directory for resolving .json map file paths.
1705
+ * @returns The merged metadata and optional rendered content.
1409
1706
  */
1410
- async function applyRules(compiledRules, attributes, namedMaps, logger) {
1707
+ async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
1411
1708
  // JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
1412
1709
  // Our helper functions accept multiple args, which JsonMap supports at runtime.
1413
1710
  const lib = createJsonMapLib();
1414
1711
  let merged = {};
1712
+ let renderedContent = null;
1415
1713
  const log = logger ?? console;
1416
- for (const { rule, validate } of compiledRules) {
1714
+ for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
1417
1715
  if (validate(attributes)) {
1418
1716
  // Apply set resolution
1419
1717
  const setOutput = resolveSet(rule.set, attributes);
@@ -1423,10 +1721,24 @@
1423
1721
  let mapDef;
1424
1722
  // Resolve map reference
1425
1723
  if (typeof rule.map === 'string') {
1426
- mapDef = namedMaps?.[rule.map];
1427
- if (!mapDef) {
1428
- log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1429
- continue;
1724
+ if (rule.map.endsWith('.json') && configDir) {
1725
+ // File path: load from .json file
1726
+ try {
1727
+ const mapPath = node_path.resolve(configDir, rule.map);
1728
+ const raw = node_fs.readFileSync(mapPath, 'utf-8');
1729
+ mapDef = JSON.parse(raw);
1730
+ }
1731
+ catch (error) {
1732
+ log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
1733
+ continue;
1734
+ }
1735
+ }
1736
+ else {
1737
+ mapDef = namedMaps?.[rule.map];
1738
+ if (!mapDef) {
1739
+ log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1740
+ continue;
1741
+ }
1430
1742
  }
1431
1743
  }
1432
1744
  else {
@@ -1449,9 +1761,31 @@
1449
1761
  log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
1450
1762
  }
1451
1763
  }
1764
+ // Render template if present
1765
+ if (rule.template && templateEngine) {
1766
+ const templateKey = `rule-${String(ruleIndex)}`;
1767
+ // Build template context: attributes (with json spread at top) + map output
1768
+ const context = {
1769
+ ...(attributes.json ?? {}),
1770
+ ...attributes,
1771
+ ...merged,
1772
+ };
1773
+ try {
1774
+ const result = templateEngine.render(templateKey, context);
1775
+ if (result && result.trim()) {
1776
+ renderedContent = result;
1777
+ }
1778
+ else {
1779
+ log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
1780
+ }
1781
+ }
1782
+ catch (error) {
1783
+ log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
1784
+ }
1785
+ }
1452
1786
  }
1453
1787
  }
1454
- return merged;
1788
+ return { metadata: merged, renderedContent };
1455
1789
  }
1456
1790
 
1457
1791
  /**
@@ -1540,23 +1874,32 @@
1540
1874
  * @param metadataDir - The metadata directory for enrichment files.
1541
1875
  * @param maps - Optional named JsonMap definitions.
1542
1876
  * @param logger - Optional logger for rule warnings.
1877
+ * @param templateEngine - Optional template engine for content templates.
1878
+ * @param configDir - Optional config directory for resolving file paths.
1543
1879
  * @returns The merged metadata and intermediate data.
1544
1880
  */
1545
- async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
1881
+ async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
1546
1882
  const ext = node_path.extname(filePath);
1547
1883
  const stats = await promises.stat(filePath);
1548
1884
  // 1. Extract text and structured data
1549
1885
  const extracted = await extractText(filePath, ext);
1550
1886
  // 2. Build attributes + apply rules
1551
1887
  const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
1552
- const inferred = await applyRules(compiledRules, attributes, maps, logger);
1888
+ const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
1553
1889
  // 3. Read enrichment metadata (merge, enrichment wins)
1554
1890
  const enrichment = await readMetadata(filePath, metadataDir);
1555
1891
  const metadata = {
1556
1892
  ...inferred,
1557
1893
  ...(enrichment ?? {}),
1558
1894
  };
1559
- return { inferred, enrichment, metadata, attributes, extracted };
1895
+ return {
1896
+ inferred,
1897
+ enrichment,
1898
+ metadata,
1899
+ attributes,
1900
+ extracted,
1901
+ renderedContent,
1902
+ };
1560
1903
  }
1561
1904
 
1562
1905
  /**
@@ -1627,6 +1970,7 @@
1627
1970
  vectorStore;
1628
1971
  compiledRules;
1629
1972
  logger;
1973
+ templateEngine;
1630
1974
  /**
1631
1975
  * Create a new DocumentProcessor.
1632
1976
  *
@@ -1635,13 +1979,15 @@
1635
1979
  * @param vectorStore - The vector store client.
1636
1980
  * @param compiledRules - The compiled inference rules.
1637
1981
  * @param logger - The logger instance.
1982
+ * @param templateEngine - Optional template engine for content templates.
1638
1983
  */
1639
- constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
1984
+ constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
1640
1985
  this.config = config;
1641
1986
  this.embeddingProvider = embeddingProvider;
1642
1987
  this.vectorStore = vectorStore;
1643
1988
  this.compiledRules = compiledRules;
1644
1989
  this.logger = logger;
1990
+ this.templateEngine = templateEngine;
1645
1991
  }
1646
1992
  /**
1647
1993
  * Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
@@ -1652,13 +1998,15 @@
1652
1998
  try {
1653
1999
  const ext = node_path.extname(filePath);
1654
2000
  // 1. Build merged metadata + extract text
1655
- const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
1656
- if (!extracted.text.trim()) {
2001
+ const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
2002
+ // Use rendered template content if available, otherwise raw extracted text
2003
+ const textToEmbed = renderedContent ?? extracted.text;
2004
+ if (!textToEmbed.trim()) {
1657
2005
  this.logger.debug({ filePath }, 'Skipping empty file');
1658
2006
  return;
1659
2007
  }
1660
2008
  // 2. Content hash check — skip if unchanged
1661
- const hash = contentHash(extracted.text);
2009
+ const hash = contentHash(textToEmbed);
1662
2010
  const baseId = pointId(filePath, 0);
1663
2011
  const existingPayload = await this.vectorStore.getPayload(baseId);
1664
2012
  if (existingPayload && existingPayload['content_hash'] === hash) {
@@ -1670,7 +2018,7 @@
1670
2018
  const chunkSize = this.config.chunkSize ?? 1000;
1671
2019
  const chunkOverlap = this.config.chunkOverlap ?? 200;
1672
2020
  const splitter = createSplitter(ext, chunkSize, chunkOverlap);
1673
- const chunks = await splitter.splitText(extracted.text);
2021
+ const chunks = await splitter.splitText(textToEmbed);
1674
2022
  // 4. Embed all chunks
1675
2023
  const vectors = await this.embeddingProvider.embed(chunks);
1676
2024
  // 5. Upsert all chunk points
@@ -1764,7 +2112,7 @@
1764
2112
  return null;
1765
2113
  }
1766
2114
  // Build merged metadata (lightweight — no embedding)
1767
- const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
2115
+ const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
1768
2116
  // Update all chunk payloads
1769
2117
  const totalChunks = getChunkCount(existingPayload);
1770
2118
  const ids = chunkIds(filePath, totalChunks);
@@ -1782,8 +2130,17 @@
1782
2130
  *
1783
2131
  * @param compiledRules - The newly compiled rules.
1784
2132
  */
1785
- updateRules(compiledRules) {
2133
+ /**
2134
+ * Update compiled inference rules and optionally the template engine.
2135
+ *
2136
+ * @param compiledRules - The newly compiled rules.
2137
+ * @param templateEngine - Optional updated template engine.
2138
+ */
2139
+ updateRules(compiledRules, templateEngine) {
1786
2140
  this.compiledRules = compiledRules;
2141
+ if (templateEngine) {
2142
+ this.templateEngine = templateEngine;
2143
+ }
1787
2144
  this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
1788
2145
  }
1789
2146
  }
@@ -2312,6 +2669,76 @@
2312
2669
  }
2313
2670
  }
2314
2671
 
2672
+ /**
2673
+ * @module watcher/globToDir
2674
+ * Adapts glob-based watch config to chokidar v4+, which removed glob support
2675
+ * (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
2676
+ * strings, silently producing zero events. This module extracts static directory
2677
+ * roots from glob patterns for chokidar to watch, then filters emitted events
2678
+ * against the original globs via picomatch.
2679
+ */
2680
+ /**
2681
+ * Extract the static directory root from a glob pattern.
2682
+ * Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
2683
+ *
2684
+ * @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
2685
+ * @returns The static directory prefix (e.g., `j:/domains`).
2686
+ */
2687
+ function globRoot(glob) {
2688
+ const normalized = glob.replace(/\\/g, '/');
2689
+ const segments = normalized.split('/');
2690
+ const staticSegments = [];
2691
+ for (const seg of segments) {
2692
+ if (/[*?{[\]]/.test(seg))
2693
+ break;
2694
+ staticSegments.push(seg);
2695
+ }
2696
+ return staticSegments.join('/') || '.';
2697
+ }
2698
+ /**
2699
+ * Deduplicate directory roots, removing paths that are subdirectories of others.
2700
+ *
2701
+ * @param roots - Array of directory paths.
2702
+ * @returns Deduplicated array with subdirectories removed.
2703
+ */
2704
+ function deduplicateRoots(roots) {
2705
+ const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
2706
+ const sorted = [...new Set(normalized)].sort();
2707
+ return sorted.filter((root, _i, arr) => {
2708
+ const withSlash = root.endsWith('/') ? root : root + '/';
2709
+ return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
2710
+ });
2711
+ }
2712
+ /**
2713
+ * Build a picomatch matcher from an array of glob patterns.
2714
+ * Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
2715
+ * before matching.
2716
+ *
2717
+ * @param globs - Glob patterns to match against.
2718
+ * @returns A function that tests whether a file path matches any of the globs.
2719
+ */
2720
+ function buildGlobMatcher(globs) {
2721
+ const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
2722
+ const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
2723
+ return (filePath) => {
2724
+ const normalized = filePath.replace(/\\/g, '/');
2725
+ return isMatch(normalized);
2726
+ };
2727
+ }
2728
+ /**
2729
+ * Convert an array of glob patterns into chokidar-compatible directory roots
2730
+ * and a filter function for post-hoc event filtering.
2731
+ *
2732
+ * @param globs - Glob patterns from the watch config.
2733
+ * @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
2734
+ */
2735
+ function resolveWatchPaths(globs) {
2736
+ const rawRoots = globs.map(globRoot);
2737
+ const roots = deduplicateRoots(rawRoots);
2738
+ const matches = buildGlobMatcher(globs);
2739
+ return { roots, matches };
2740
+ }
2741
+
2315
2742
  /**
2316
2743
  * @module watcher
2317
2744
  * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
@@ -2326,6 +2753,7 @@
2326
2753
  logger;
2327
2754
  health;
2328
2755
  gitignoreFilter;
2756
+ globMatches;
2329
2757
  watcher;
2330
2758
  /**
2331
2759
  * Create a new FileSystemWatcher.
@@ -2342,6 +2770,7 @@
2342
2770
  this.processor = processor;
2343
2771
  this.logger = logger;
2344
2772
  this.gitignoreFilter = options.gitignoreFilter;
2773
+ this.globMatches = () => true;
2345
2774
  const healthOptions = {
2346
2775
  maxRetries: options.maxRetries,
2347
2776
  maxBackoffMs: options.maxBackoffMs,
@@ -2354,7 +2783,13 @@
2354
2783
  * Start watching the filesystem and processing events.
2355
2784
  */
2356
2785
  start() {
2357
- this.watcher = chokidar.watch(this.config.paths, {
2786
+ // Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
2787
+ // Glob patterns are silently treated as literal strings, producing zero
2788
+ // events. We extract static directory roots for chokidar to watch, then
2789
+ // filter emitted events against the original globs via picomatch.
2790
+ const { roots, matches } = resolveWatchPaths(this.config.paths);
2791
+ this.globMatches = matches;
2792
+ this.watcher = chokidar.watch(roots, {
2358
2793
  ignored: this.config.ignored,
2359
2794
  usePolling: this.config.usePolling,
2360
2795
  interval: this.config.pollIntervalMs,
@@ -2365,6 +2800,8 @@
2365
2800
  });
2366
2801
  this.watcher.on('add', (path) => {
2367
2802
  this.handleGitignoreChange(path);
2803
+ if (!this.globMatches(path))
2804
+ return;
2368
2805
  if (this.isGitignored(path))
2369
2806
  return;
2370
2807
  this.logger.debug({ path }, 'File added');
@@ -2372,6 +2809,8 @@
2372
2809
  });
2373
2810
  this.watcher.on('change', (path) => {
2374
2811
  this.handleGitignoreChange(path);
2812
+ if (!this.globMatches(path))
2813
+ return;
2375
2814
  if (this.isGitignored(path))
2376
2815
  return;
2377
2816
  this.logger.debug({ path }, 'File changed');
@@ -2379,6 +2818,8 @@
2379
2818
  });
2380
2819
  this.watcher.on('unlink', (path) => {
2381
2820
  this.handleGitignoreChange(path);
2821
+ if (!this.globMatches(path))
2822
+ return;
2382
2823
  if (this.isGitignored(path))
2383
2824
  return;
2384
2825
  this.logger.debug({ path }, 'File removed');
@@ -2451,51 +2892,21 @@
2451
2892
  }
2452
2893
 
2453
2894
  /**
2454
- * @module app/configWatcher
2455
- * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
2456
- */
2457
- /**
2458
- * Debounced config file watcher.
2895
+ * @module app/factories
2896
+ * Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
2459
2897
  */
2460
- class ConfigWatcher {
2461
- options;
2462
- watcher;
2463
- debounce;
2464
- constructor(options) {
2465
- this.options = options;
2466
- }
2467
- start() {
2468
- if (!this.options.enabled)
2469
- return;
2470
- this.watcher = chokidar.watch(this.options.configPath, {
2471
- ignoreInitial: true,
2472
- });
2473
- this.watcher.on('change', () => {
2474
- if (this.debounce)
2475
- clearTimeout(this.debounce);
2476
- this.debounce = setTimeout(() => {
2477
- void this.options.onChange();
2478
- }, this.options.debounceMs);
2479
- });
2480
- this.watcher.on('error', (error) => {
2481
- this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
2482
- });
2483
- this.options.logger.info({
2484
- configPath: this.options.configPath,
2485
- debounceMs: this.options.debounceMs,
2486
- }, 'Config watcher started');
2487
- }
2488
- async stop() {
2489
- if (this.debounce) {
2490
- clearTimeout(this.debounce);
2491
- this.debounce = undefined;
2492
- }
2493
- if (this.watcher) {
2494
- await this.watcher.close();
2495
- this.watcher = undefined;
2496
- }
2497
- }
2498
- }
2898
+ /** Default component factories wiring real implementations. */
2899
+ const defaultFactories = {
2900
+ loadConfig,
2901
+ createLogger,
2902
+ createEmbeddingProvider,
2903
+ createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2904
+ compileRules,
2905
+ createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
2906
+ createEventQueue: (options) => new EventQueue(options),
2907
+ createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2908
+ createApiServer,
2909
+ };
2499
2910
 
2500
2911
  /**
2501
2912
  * @module app/shutdown
@@ -2515,17 +2926,28 @@
2515
2926
  process.on('SIGINT', () => void shutdown());
2516
2927
  }
2517
2928
 
2518
- const defaultFactories = {
2519
- loadConfig,
2520
- createLogger,
2521
- createEmbeddingProvider,
2522
- createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2523
- compileRules,
2524
- createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
2525
- createEventQueue: (options) => new EventQueue(options),
2526
- createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2527
- createApiServer,
2528
- };
2929
+ /**
2930
+ * @module app/startFromConfig
2931
+ * Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
2932
+ */
2933
+ /**
2934
+ * Create and start a JeevesWatcher from a config file path.
2935
+ *
2936
+ * @param configPath - Optional path to the configuration file.
2937
+ * @returns The running JeevesWatcher instance.
2938
+ */
2939
+ async function startFromConfig(configPath) {
2940
+ const config = await loadConfig(configPath);
2941
+ const app = new JeevesWatcher(config, configPath);
2942
+ installShutdownHandlers(() => app.stop());
2943
+ await app.start();
2944
+ return app;
2945
+ }
2946
+
2947
+ /**
2948
+ * @module app
2949
+ * Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
2950
+ */
2529
2951
  /**
2530
2952
  * Main application class that wires together all components.
2531
2953
  */
@@ -2560,56 +2982,26 @@
2560
2982
  async start() {
2561
2983
  const logger = this.factories.createLogger(this.config.logging);
2562
2984
  this.logger = logger;
2563
- let embeddingProvider;
2564
- try {
2565
- embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
2566
- }
2567
- catch (error) {
2568
- logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
2569
- throw error;
2570
- }
2571
- const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
2572
- await vectorStore.ensureCollection();
2985
+ const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
2573
2986
  const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
2574
- const processorConfig = {
2987
+ const configDir = this.configPath ? node_path.dirname(this.configPath) : '.';
2988
+ const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
2989
+ const processor = this.factories.createDocumentProcessor({
2575
2990
  metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
2576
2991
  chunkSize: this.config.embedding.chunkSize,
2577
2992
  chunkOverlap: this.config.embedding.chunkOverlap,
2578
2993
  maps: this.config.maps,
2579
- };
2580
- const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
2994
+ configDir,
2995
+ }, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
2581
2996
  this.processor = processor;
2582
- const queue = this.factories.createEventQueue({
2997
+ this.queue = this.factories.createEventQueue({
2583
2998
  debounceMs: this.config.watch.debounceMs ?? 2000,
2584
2999
  concurrency: this.config.embedding.concurrency ?? 5,
2585
3000
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
2586
3001
  });
2587
- this.queue = queue;
2588
- const respectGitignore = this.config.watch.respectGitignore ?? true;
2589
- const gitignoreFilter = respectGitignore
2590
- ? new GitignoreFilter(this.config.watch.paths)
2591
- : undefined;
2592
- const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
2593
- maxRetries: this.config.maxRetries,
2594
- maxBackoffMs: this.config.maxBackoffMs,
2595
- onFatalError: this.runtimeOptions.onFatalError,
2596
- gitignoreFilter,
2597
- });
2598
- this.watcher = watcher;
2599
- const server = this.factories.createApiServer({
2600
- processor,
2601
- vectorStore,
2602
- embeddingProvider,
2603
- queue,
2604
- config: this.config,
2605
- logger,
2606
- });
2607
- this.server = server;
2608
- await server.listen({
2609
- host: this.config.api?.host ?? '127.0.0.1',
2610
- port: this.config.api?.port ?? 3456,
2611
- });
2612
- watcher.start();
3002
+ this.watcher = this.createWatcher(this.queue, processor, logger);
3003
+ this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
3004
+ this.watcher.start();
2613
3005
  this.startConfigWatch();
2614
3006
  logger.info('jeeves-watcher started');
2615
3007
  }
@@ -2640,22 +3032,61 @@
2640
3032
  }
2641
3033
  this.logger?.info('jeeves-watcher stopped');
2642
3034
  }
3035
+ async initEmbeddingAndStore(logger) {
3036
+ let embeddingProvider;
3037
+ try {
3038
+ embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
3039
+ }
3040
+ catch (error) {
3041
+ logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
3042
+ throw error;
3043
+ }
3044
+ const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
3045
+ await vectorStore.ensureCollection();
3046
+ return { embeddingProvider, vectorStore };
3047
+ }
3048
+ createWatcher(queue, processor, logger) {
3049
+ const respectGitignore = this.config.watch.respectGitignore ?? true;
3050
+ const gitignoreFilter = respectGitignore
3051
+ ? new GitignoreFilter(this.config.watch.paths)
3052
+ : undefined;
3053
+ return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
3054
+ maxRetries: this.config.maxRetries,
3055
+ maxBackoffMs: this.config.maxBackoffMs,
3056
+ onFatalError: this.runtimeOptions.onFatalError,
3057
+ gitignoreFilter,
3058
+ });
3059
+ }
3060
+ async startApiServer(processor, vectorStore, embeddingProvider, logger) {
3061
+ const server = this.factories.createApiServer({
3062
+ processor,
3063
+ vectorStore,
3064
+ embeddingProvider,
3065
+ queue: this.queue,
3066
+ config: this.config,
3067
+ logger,
3068
+ });
3069
+ await server.listen({
3070
+ host: this.config.api?.host ?? '127.0.0.1',
3071
+ port: this.config.api?.port ?? 3456,
3072
+ });
3073
+ return server;
3074
+ }
2643
3075
  startConfigWatch() {
2644
3076
  const logger = this.logger;
2645
3077
  if (!logger)
2646
3078
  return;
2647
3079
  const enabled = this.config.configWatch?.enabled ?? true;
2648
- if (!enabled)
2649
- return;
2650
- if (!this.configPath) {
2651
- logger.debug('Config watch enabled, but no config path was provided');
3080
+ if (!enabled || !this.configPath) {
3081
+ if (!this.configPath) {
3082
+ logger.debug('Config watch enabled, but no config path was provided');
3083
+ }
2652
3084
  return;
2653
3085
  }
2654
- const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
2655
3086
  this.configWatcher = new ConfigWatcher({
2656
3087
  configPath: this.configPath,
2657
3088
  enabled,
2658
- debounceMs,
3089
+ debounceMs: this.config.configWatch?.debounceMs ?? 10000,
2659
3090
  logger,
2660
3091
  onChange: async () => this.reloadConfig(),
2661
3092
  });
@@ -2677,7 +3108,9 @@
2677
3108
  const newConfig = await this.factories.loadConfig(this.configPath);
2678
3109
  this.config = newConfig;
2679
3110
  const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
2680
- processor.updateRules(compiledRules);
3111
+ const reloadConfigDir = node_path.dirname(this.configPath);
3112
+ const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
3113
+ processor.updateRules(compiledRules, newTemplateEngine);
2681
3114
  logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
2682
3115
  }
2683
3116
  catch (error) {
@@ -2685,19 +3118,7 @@
2685
3118
  }
2686
3119
  }
2687
3120
  }
2688
- /**
2689
- * Create and start a JeevesWatcher from a config file path.
2690
- *
2691
- * @param configPath - Optional path to the configuration file.
2692
- * @returns The running JeevesWatcher instance.
2693
- */
2694
- async function startFromConfig(configPath) {
2695
- const config = await loadConfig(configPath);
2696
- const app = new JeevesWatcher(config, configPath);
2697
- installShutdownHandlers(() => app.stop());
2698
- await app.start();
2699
- return app;
2700
- }
3121
+ // startFromConfig re-exported from ./startFromConfig
2701
3122
 
2702
3123
  exports.DocumentProcessor = DocumentProcessor;
2703
3124
  exports.EventQueue = EventQueue;
@@ -2705,15 +3126,18 @@
2705
3126
  exports.GitignoreFilter = GitignoreFilter;
2706
3127
  exports.JeevesWatcher = JeevesWatcher;
2707
3128
  exports.SystemHealth = SystemHealth;
3129
+ exports.TemplateEngine = TemplateEngine;
2708
3130
  exports.VectorStoreClient = VectorStoreClient;
2709
3131
  exports.apiConfigSchema = apiConfigSchema;
2710
3132
  exports.applyRules = applyRules;
2711
3133
  exports.buildAttributes = buildAttributes;
3134
+ exports.buildTemplateEngine = buildTemplateEngine;
2712
3135
  exports.compileRules = compileRules;
2713
3136
  exports.configWatchConfigSchema = configWatchConfigSchema;
2714
3137
  exports.contentHash = contentHash;
2715
3138
  exports.createApiServer = createApiServer;
2716
3139
  exports.createEmbeddingProvider = createEmbeddingProvider;
3140
+ exports.createHandlebarsInstance = createHandlebarsInstance;
2717
3141
  exports.createLogger = createLogger;
2718
3142
  exports.deleteMetadata = deleteMetadata;
2719
3143
  exports.embeddingConfigSchema = embeddingConfigSchema;
@@ -2721,13 +3145,16 @@
2721
3145
  exports.inferenceRuleSchema = inferenceRuleSchema;
2722
3146
  exports.jeevesWatcherConfigSchema = jeevesWatcherConfigSchema;
2723
3147
  exports.loadConfig = loadConfig;
3148
+ exports.loadCustomHelpers = loadCustomHelpers;
2724
3149
  exports.loggingConfigSchema = loggingConfigSchema;
2725
3150
  exports.metadataPath = metadataPath;
2726
3151
  exports.pointId = pointId;
2727
3152
  exports.readMetadata = readMetadata;
3153
+ exports.registerBuiltinHelpers = registerBuiltinHelpers;
3154
+ exports.resolveTemplateSource = resolveTemplateSource;
2728
3155
  exports.startFromConfig = startFromConfig;
2729
3156
  exports.vectorStoreConfigSchema = vectorStoreConfigSchema;
2730
3157
  exports.watchConfigSchema = watchConfigSchema;
2731
3158
  exports.writeMetadata = writeMetadata;
2732
3159
 
2733
- })(this["jeeves-watcher"] = this["jeeves-watcher"] || {}, Fastify, promises, node_path, picomatch, radash, node_crypto, cosmiconfig, zod, jsonmap, googleGenai, node_fs, ignore, pino, uuid, cheerio, yaml, mammoth, Ajv, addFormats, textsplitters, jsClientRest, chokidar);
3160
+ })(this["jeeves-watcher"] = this["jeeves-watcher"] || {}, Fastify, promises, node_path, picomatch, radash, node_crypto, node_fs, ignore, Handlebars, dayjs, hastUtilToMdast, mdastUtilFromAdf, mdastUtilToMarkdown, rehypeParse, unified, chokidar, cosmiconfig, zod, jsonmap, googleGenai, pino, uuid, cheerio, yaml, mammoth, Ajv, addFormats, textsplitters, jsClientRest);