@karmaniverous/jeeves-watcher 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -6,12 +6,20 @@ var node_path = require('node:path');
6
6
  var picomatch = require('picomatch');
7
7
  var radash = require('radash');
8
8
  var node_crypto = require('node:crypto');
9
+ var node_fs = require('node:fs');
10
+ var ignore = require('ignore');
11
+ var Handlebars = require('handlebars');
12
+ var dayjs = require('dayjs');
13
+ var hastUtilToMdast = require('hast-util-to-mdast');
14
+ var mdastUtilFromAdf = require('mdast-util-from-adf');
15
+ var mdastUtilToMarkdown = require('mdast-util-to-markdown');
16
+ var rehypeParse = require('rehype-parse');
17
+ var unified = require('unified');
18
+ var chokidar = require('chokidar');
9
19
  var cosmiconfig = require('cosmiconfig');
10
20
  var zod = require('zod');
11
21
  var jsonmap = require('@karmaniverous/jsonmap');
12
22
  var googleGenai = require('@langchain/google-genai');
13
- var node_fs = require('node:fs');
14
- var ignore = require('ignore');
15
23
  var pino = require('pino');
16
24
  var uuid = require('uuid');
17
25
  var cheerio = require('cheerio');
@@ -21,7 +29,6 @@ var Ajv = require('ajv');
21
29
  var addFormats = require('ajv-formats');
22
30
  var textsplitters = require('@langchain/textsplitters');
23
31
  var jsClientRest = require('@qdrant/js-client-rest');
24
- var chokidar = require('chokidar');
25
32
 
26
33
  function _interopNamespaceDefault(e) {
27
34
  var n = Object.create(null);
@@ -438,6 +445,486 @@ function createApiServer(options) {
438
445
  return app;
439
446
  }
440
447
 
448
+ /**
449
+ * @module gitignore
450
+ * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
451
+ */
452
+ /**
453
+ * Find the git repo root by walking up from `startDir` looking for `.git/`.
454
+ * Returns `undefined` if no repo is found.
455
+ */
456
+ function findRepoRoot(startDir) {
457
+ let dir = node_path.resolve(startDir);
458
+ const root = node_path.resolve('/');
459
+ while (dir !== root) {
460
+ if (node_fs.existsSync(node_path.join(dir, '.git')) &&
461
+ node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
462
+ return dir;
463
+ }
464
+ const parent = node_path.dirname(dir);
465
+ if (parent === dir)
466
+ break;
467
+ dir = parent;
468
+ }
469
+ return undefined;
470
+ }
471
+ /**
472
+ * Convert a watch path (directory, file path, or glob) to a concrete directory
473
+ * that can be scanned for a repo root.
474
+ */
475
+ function watchPathToScanDir(watchPath) {
476
+ const absPath = node_path.resolve(watchPath);
477
+ try {
478
+ return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
479
+ }
480
+ catch {
481
+ // ignore
482
+ }
483
+ // If this is a glob, fall back to the non-glob prefix.
484
+ const globMatch = /[*?[{]/.exec(watchPath);
485
+ if (!globMatch)
486
+ return undefined;
487
+ const prefix = watchPath.slice(0, globMatch.index);
488
+ const trimmed = prefix.trim();
489
+ const baseDir = trimmed.length === 0
490
+ ? '.'
491
+ : trimmed.endsWith('/') || trimmed.endsWith('\\')
492
+ ? trimmed
493
+ : node_path.dirname(trimmed);
494
+ const resolved = node_path.resolve(baseDir);
495
+ if (!node_fs.existsSync(resolved))
496
+ return undefined;
497
+ return resolved;
498
+ }
499
+ /**
500
+ * Recursively find all `.gitignore` files under `dir`.
501
+ * Skips `.git` and `node_modules` directories for performance.
502
+ */
503
+ function findGitignoreFiles(dir) {
504
+ const results = [];
505
+ const gitignorePath = node_path.join(dir, '.gitignore');
506
+ if (node_fs.existsSync(gitignorePath)) {
507
+ results.push(gitignorePath);
508
+ }
509
+ let entries;
510
+ try {
511
+ entries = node_fs.readdirSync(dir);
512
+ }
513
+ catch {
514
+ return results;
515
+ }
516
+ for (const entry of entries) {
517
+ if (entry === '.git' || entry === 'node_modules')
518
+ continue;
519
+ const fullPath = node_path.join(dir, entry);
520
+ try {
521
+ if (node_fs.statSync(fullPath).isDirectory()) {
522
+ results.push(...findGitignoreFiles(fullPath));
523
+ }
524
+ }
525
+ catch {
526
+ // Skip inaccessible entries
527
+ }
528
+ }
529
+ return results;
530
+ }
531
+ /**
532
+ * Parse a `.gitignore` file into an `ignore` instance.
533
+ */
534
+ function parseGitignore(gitignorePath) {
535
+ const content = node_fs.readFileSync(gitignorePath, 'utf8');
536
+ return ignore().add(content);
537
+ }
538
+ /**
539
+ * Normalize a path to use forward slashes (required by `ignore` package).
540
+ */
541
+ function toForwardSlash(p) {
542
+ return p.replace(/\\/g, '/');
543
+ }
544
+ /**
545
+ * Processor-level gitignore filter. Checks file paths against the nearest
546
+ * `.gitignore` chain in git repositories.
547
+ */
548
+ class GitignoreFilter {
549
+ repos = new Map();
550
+ /**
551
+ * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
552
+ *
553
+ * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
554
+ */
555
+ constructor(watchPaths) {
556
+ this.scan(watchPaths);
557
+ }
558
+ /**
559
+ * Scan paths for git repos and their `.gitignore` files.
560
+ */
561
+ scan(watchPaths) {
562
+ this.repos.clear();
563
+ const scannedDirs = new Set();
564
+ for (const watchPath of watchPaths) {
565
+ const scanDir = watchPathToScanDir(watchPath);
566
+ if (!scanDir)
567
+ continue;
568
+ if (scannedDirs.has(scanDir))
569
+ continue;
570
+ scannedDirs.add(scanDir);
571
+ const repoRoot = findRepoRoot(scanDir);
572
+ if (!repoRoot)
573
+ continue;
574
+ if (this.repos.has(repoRoot))
575
+ continue;
576
+ const gitignoreFiles = findGitignoreFiles(repoRoot);
577
+ const entries = gitignoreFiles.map((gf) => ({
578
+ dir: node_path.dirname(gf),
579
+ ig: parseGitignore(gf),
580
+ }));
581
+ // Sort deepest-first so nested `.gitignore` files are checked first
582
+ entries.sort((a, b) => b.dir.length - a.dir.length);
583
+ this.repos.set(repoRoot, { root: repoRoot, entries });
584
+ }
585
+ }
586
+ /**
587
+ * Check whether a file path is ignored by any applicable `.gitignore`.
588
+ *
589
+ * @param filePath - Absolute file path to check.
590
+ * @returns `true` if the file should be ignored.
591
+ */
592
+ isIgnored(filePath) {
593
+ const absPath = node_path.resolve(filePath);
594
+ for (const [, repo] of this.repos) {
595
+ // Check if file is within this repo
596
+ const relToRepo = node_path.relative(repo.root, absPath);
597
+ // On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
598
+ // an absolute path with a drive letter instead of a relative one. The
599
+ // `ignore` library rejects these with a RangeError. Skip repos on
600
+ // different drives to avoid cross-drive gitignore mismatches.
601
+ if (relToRepo.startsWith('..') ||
602
+ relToRepo.startsWith(node_path.resolve('/')) ||
603
+ /^[a-zA-Z]:/.test(relToRepo)) {
604
+ continue;
605
+ }
606
+ // Check each `.gitignore` entry (deepest-first)
607
+ for (const entry of repo.entries) {
608
+ const relToEntry = node_path.relative(entry.dir, absPath);
609
+ if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
610
+ continue;
611
+ const normalized = toForwardSlash(relToEntry);
612
+ if (entry.ig.ignores(normalized)) {
613
+ return true;
614
+ }
615
+ }
616
+ }
617
+ return false;
618
+ }
619
+ /**
620
+ * Invalidate and re-parse a specific `.gitignore` file.
621
+ * Call when a `.gitignore` file is added, changed, or removed.
622
+ *
623
+ * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
624
+ */
625
+ invalidate(gitignorePath) {
626
+ const absPath = node_path.resolve(gitignorePath);
627
+ const gitignoreDir = node_path.dirname(absPath);
628
+ for (const [, repo] of this.repos) {
629
+ const relToRepo = node_path.relative(repo.root, gitignoreDir);
630
+ if (relToRepo.startsWith('..'))
631
+ continue;
632
+ // Remove old entry for this directory
633
+ repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
634
+ // Re-parse if file still exists
635
+ if (node_fs.existsSync(absPath)) {
636
+ repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
637
+ // Re-sort deepest-first
638
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
639
+ }
640
+ return;
641
+ }
642
+ // If not in any known repo, check if it's in a repo we haven't scanned
643
+ const repoRoot = findRepoRoot(gitignoreDir);
644
+ if (repoRoot && node_fs.existsSync(absPath)) {
645
+ const entries = [
646
+ { dir: gitignoreDir, ig: parseGitignore(absPath) },
647
+ ];
648
+ if (this.repos.has(repoRoot)) {
649
+ const repo = this.repos.get(repoRoot);
650
+ repo.entries.push(entries[0]);
651
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
652
+ }
653
+ else {
654
+ this.repos.set(repoRoot, { root: repoRoot, entries });
655
+ }
656
+ }
657
+ }
658
+ }
659
+
660
+ /**
661
+ * @module templates/helpers
662
+ * Registers built-in Handlebars helpers for content templates.
663
+ */
664
+ /** Pre-built rehype parser for HTML → hast conversion. */
665
+ const htmlParser = unified.unified().use(rehypeParse, { fragment: true });
666
+ /**
667
+ * Register all built-in helpers on a Handlebars instance.
668
+ *
669
+ * @param hbs - The Handlebars instance.
670
+ */
671
+ function registerBuiltinHelpers(hbs) {
672
+ // Structural: ADF → Markdown
673
+ hbs.registerHelper('adfToMarkdown', function (adf) {
674
+ if (!adf || typeof adf !== 'object')
675
+ return '';
676
+ try {
677
+ const mdast = mdastUtilFromAdf.fromADF(adf);
678
+ return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
679
+ }
680
+ catch {
681
+ return '<!-- ADF conversion failed -->';
682
+ }
683
+ });
684
+ // Structural: HTML → Markdown
685
+ hbs.registerHelper('markdownify', function (html) {
686
+ if (typeof html !== 'string' || !html.trim())
687
+ return '';
688
+ try {
689
+ const hast = htmlParser.parse(html);
690
+ const mdast = hastUtilToMdast.toMdast(hast);
691
+ return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
692
+ }
693
+ catch {
694
+ return '<!-- HTML conversion failed -->';
695
+ }
696
+ });
697
+ // Formatting: dateFormat
698
+ hbs.registerHelper('dateFormat', function (value, format) {
699
+ if (value === undefined || value === null)
700
+ return '';
701
+ const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
702
+ return dayjs(value).format(fmt);
703
+ });
704
+ // Formatting: join
705
+ hbs.registerHelper('join', function (arr, separator) {
706
+ if (!Array.isArray(arr))
707
+ return '';
708
+ const sep = typeof separator === 'string' ? separator : ', ';
709
+ return arr.join(sep);
710
+ });
711
+ // Formatting: pluck
712
+ hbs.registerHelper('pluck', function (arr, key) {
713
+ if (!Array.isArray(arr) || typeof key !== 'string')
714
+ return [];
715
+ return arr.map((item) => item && typeof item === 'object'
716
+ ? item[key]
717
+ : undefined);
718
+ });
719
+ // String transforms
720
+ hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
721
+ hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
722
+ hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? radash.capitalize(text) : '');
723
+ hbs.registerHelper('title', (text) => typeof text === 'string' ? radash.title(text) : '');
724
+ hbs.registerHelper('camel', (text) => typeof text === 'string' ? radash.camel(text) : '');
725
+ hbs.registerHelper('snake', (text) => typeof text === 'string' ? radash.snake(text) : '');
726
+ hbs.registerHelper('dash', (text) => typeof text === 'string' ? radash.dash(text) : '');
727
+ // default helper
728
+ hbs.registerHelper('default', function (value, fallback) {
729
+ return value ?? fallback ?? '';
730
+ });
731
+ // eq helper (deep equality)
732
+ hbs.registerHelper('eq', function (a, b) {
733
+ return radash.isEqual(a, b);
734
+ });
735
+ // json helper
736
+ hbs.registerHelper('json', function (value) {
737
+ return new hbs.SafeString(JSON.stringify(value, null, 2));
738
+ });
739
+ }
740
+
741
+ /**
742
+ * @module templates/engine
743
+ * Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
744
+ */
745
+ /**
746
+ * Resolve a template value to its source string.
747
+ *
748
+ * Resolution order:
749
+ * 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
750
+ * 2. Matches a key in namedTemplates → named ref (recursively resolve)
751
+ * 3. Otherwise → inline Handlebars template string
752
+ *
753
+ * @param value - The template reference (inline, file path, or named ref).
754
+ * @param namedTemplates - Named template definitions from config.
755
+ * @param configDir - Directory to resolve relative file paths against.
756
+ * @param visited - Set of visited named refs for cycle detection.
757
+ * @returns The resolved template source string.
758
+ */
759
+ function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
760
+ // File path detection
761
+ if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
762
+ return node_fs.readFileSync(node_path.resolve(configDir, value), 'utf-8');
763
+ }
764
+ // Named ref
765
+ if (namedTemplates?.[value] !== undefined) {
766
+ if (visited.has(value)) {
767
+ throw new Error(`Circular template reference detected: ${value}`);
768
+ }
769
+ visited.add(value);
770
+ return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
771
+ }
772
+ // Inline
773
+ return value;
774
+ }
775
+ /**
776
+ * Create a configured Handlebars instance with built-in helpers registered.
777
+ *
778
+ * @returns A Handlebars instance with helpers.
779
+ */
780
+ function createHandlebarsInstance() {
781
+ const hbs = Handlebars.create();
782
+ registerBuiltinHelpers(hbs);
783
+ return hbs;
784
+ }
785
+ /**
786
+ * Load custom helpers from file paths.
787
+ *
788
+ * Each file should export a default function that receives the Handlebars instance.
789
+ *
790
+ * @param hbs - The Handlebars instance.
791
+ * @param paths - File paths to custom helper modules.
792
+ * @param configDir - Directory to resolve relative paths against.
793
+ */
794
+ async function loadCustomHelpers(hbs, paths, configDir) {
795
+ for (const p of paths) {
796
+ const resolved = node_path.resolve(configDir, p);
797
+ const mod = (await import(resolved));
798
+ if (typeof mod.default === 'function') {
799
+ mod.default(hbs);
800
+ }
801
+ }
802
+ }
803
+ /**
804
+ * The template engine: holds compiled templates and renders them against context.
805
+ */
806
+ class TemplateEngine {
807
+ hbs;
808
+ compiled = new Map();
809
+ constructor(hbs) {
810
+ this.hbs = hbs;
811
+ }
812
+ /**
813
+ * Compile and cache a template from its source string.
814
+ *
815
+ * @param key - Cache key (rule index or named template).
816
+ * @param source - Handlebars template source.
817
+ * @returns The compiled template.
818
+ */
819
+ compile(key, source) {
820
+ const fn = this.hbs.compile(source);
821
+ this.compiled.set(key, fn);
822
+ return fn;
823
+ }
824
+ /**
825
+ * Get a previously compiled template by key.
826
+ *
827
+ * @param key - The cache key.
828
+ * @returns The compiled template, or undefined.
829
+ */
830
+ get(key) {
831
+ return this.compiled.get(key);
832
+ }
833
+ /**
834
+ * Render a compiled template against a context.
835
+ *
836
+ * @param key - The cache key of the compiled template.
837
+ * @param context - The data context for rendering.
838
+ * @returns The rendered string, or null if the template was not found.
839
+ */
840
+ render(key, context) {
841
+ const fn = this.compiled.get(key);
842
+ if (!fn)
843
+ return null;
844
+ return fn(context);
845
+ }
846
+ }
847
+
848
+ /**
849
+ * @module templates/buildTemplateEngine
850
+ * Factory to build a TemplateEngine from config, compiling all rule templates at load time.
851
+ */
852
+ /**
853
+ * Build a TemplateEngine from configuration, pre-compiling all rule templates.
854
+ *
855
+ * @param rules - The inference rules (may contain template fields).
856
+ * @param namedTemplates - Named template definitions from config.
857
+ * @param templateHelperPaths - Paths to custom helper modules.
858
+ * @param configDir - Directory to resolve relative paths against.
859
+ * @returns The configured TemplateEngine, or undefined if no templates are used.
860
+ */
861
+ async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
862
+ const rulesWithTemplates = rules.filter((r) => r.template);
863
+ if (rulesWithTemplates.length === 0)
864
+ return undefined;
865
+ const hbs = createHandlebarsInstance();
866
+ // Load custom helpers
867
+ if (templateHelperPaths?.length && configDir) {
868
+ await loadCustomHelpers(hbs, templateHelperPaths, configDir);
869
+ }
870
+ const engine = new TemplateEngine(hbs);
871
+ // Compile all rule templates
872
+ for (const [index, rule] of rules.entries()) {
873
+ if (!rule.template)
874
+ continue;
875
+ const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
876
+ engine.compile(`rule-${String(index)}`, source);
877
+ }
878
+ return engine;
879
+ }
880
+
881
+ /**
882
+ * @module app/configWatcher
883
+ * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
884
+ */
885
+ /**
886
+ * Debounced config file watcher.
887
+ */
888
+ class ConfigWatcher {
889
+ options;
890
+ watcher;
891
+ debounce;
892
+ constructor(options) {
893
+ this.options = options;
894
+ }
895
+ start() {
896
+ if (!this.options.enabled)
897
+ return;
898
+ this.watcher = chokidar.watch(this.options.configPath, {
899
+ ignoreInitial: true,
900
+ });
901
+ this.watcher.on('change', () => {
902
+ if (this.debounce)
903
+ clearTimeout(this.debounce);
904
+ this.debounce = setTimeout(() => {
905
+ void this.options.onChange();
906
+ }, this.options.debounceMs);
907
+ });
908
+ this.watcher.on('error', (error) => {
909
+ this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
910
+ });
911
+ this.options.logger.info({
912
+ configPath: this.options.configPath,
913
+ debounceMs: this.options.debounceMs,
914
+ }, 'Config watcher started');
915
+ }
916
+ async stop() {
917
+ if (this.debounce) {
918
+ clearTimeout(this.debounce);
919
+ this.debounce = undefined;
920
+ }
921
+ if (this.watcher) {
922
+ await this.watcher.close();
923
+ this.watcher = undefined;
924
+ }
925
+ }
926
+ }
927
+
441
928
  /**
442
929
  * @module config/defaults
443
930
  * Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
@@ -639,7 +1126,12 @@ const inferenceRuleSchema = zod.z.object({
639
1126
  map: zod.z
640
1127
  .union([jsonmap.jsonMapMapSchema, zod.z.string()])
641
1128
  .optional()
642
- .describe('JsonMap transformation (inline definition or named map reference).'),
1129
+ .describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
1130
+ /** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
1131
+ template: zod.z
1132
+ .string()
1133
+ .optional()
1134
+ .describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
643
1135
  });
644
1136
  /**
645
1137
  * Top-level configuration for jeeves-watcher.
@@ -677,6 +1169,22 @@ const jeevesWatcherConfigSchema = zod.z.object({
677
1169
  .record(zod.z.string(), jsonmap.jsonMapMapSchema)
678
1170
  .optional()
679
1171
  .describe('Reusable named JsonMap transformations.'),
1172
+ /** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
1173
+ templates: zod.z
1174
+ .record(zod.z.string(), zod.z.string())
1175
+ .optional()
1176
+ .describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
1177
+ /** Custom Handlebars helper registration. */
1178
+ templateHelpers: zod.z
1179
+ .object({
1180
+ /** File paths to custom helper modules. */
1181
+ paths: zod.z
1182
+ .array(zod.z.string())
1183
+ .optional()
1184
+ .describe('File paths to custom helper modules.'),
1185
+ })
1186
+ .optional()
1187
+ .describe('Custom Handlebars helper registration.'),
680
1188
  /** Logging configuration. */
681
1189
  logging: loggingConfigSchema.optional().describe('Logging configuration.'),
682
1190
  /** Timeout in milliseconds for graceful shutdown. */
@@ -934,258 +1442,52 @@ function createGeminiProvider(config, logger) {
934
1442
  }, {
935
1443
  attempts: 5,
936
1444
  baseDelayMs: 500,
937
- maxDelayMs: 10_000,
938
- jitter: 0.2,
939
- onRetry: ({ attempt, delayMs, error }) => {
940
- log.warn({
941
- attempt,
942
- delayMs,
943
- provider: 'gemini',
944
- model: config.model,
945
- err: normalizeError(error),
946
- }, 'Embedding call failed; will retry');
947
- },
948
- });
949
- // Validate dimensions
950
- for (const vector of vectors) {
951
- if (vector.length !== dimensions) {
952
- throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
953
- }
954
- }
955
- return vectors;
956
- },
957
- };
958
- }
959
- function createMockFromConfig(config) {
960
- const dimensions = config.dimensions ?? 768;
961
- return createMockProvider(dimensions);
962
- }
963
- const embeddingProviderRegistry = new Map([
964
- ['mock', createMockFromConfig],
965
- ['gemini', createGeminiProvider],
966
- ]);
967
- /**
968
- * Create an embedding provider based on the given configuration.
969
- *
970
- * Each provider is responsible for its own default dimensions.
971
- *
972
- * @param config - The embedding configuration.
973
- * @param logger - Optional pino logger for retry warnings.
974
- * @returns An {@link EmbeddingProvider} instance.
975
- * @throws If the configured provider is not supported.
976
- */
977
- function createEmbeddingProvider(config, logger) {
978
- const factory = embeddingProviderRegistry.get(config.provider);
979
- if (!factory) {
980
- throw new Error(`Unsupported embedding provider: ${config.provider}`);
981
- }
982
- return factory(config, logger);
983
- }
984
-
985
- /**
986
- * @module gitignore
987
- * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
988
- */
989
- /**
990
- * Find the git repo root by walking up from `startDir` looking for `.git/`.
991
- * Returns `undefined` if no repo is found.
992
- */
993
- function findRepoRoot(startDir) {
994
- let dir = node_path.resolve(startDir);
995
- const root = node_path.resolve('/');
996
- while (dir !== root) {
997
- if (node_fs.existsSync(node_path.join(dir, '.git')) &&
998
- node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
999
- return dir;
1000
- }
1001
- const parent = node_path.dirname(dir);
1002
- if (parent === dir)
1003
- break;
1004
- dir = parent;
1005
- }
1006
- return undefined;
1007
- }
1008
- /**
1009
- * Convert a watch path (directory, file path, or glob) to a concrete directory
1010
- * that can be scanned for a repo root.
1011
- */
1012
- function watchPathToScanDir(watchPath) {
1013
- const absPath = node_path.resolve(watchPath);
1014
- try {
1015
- return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
1016
- }
1017
- catch {
1018
- // ignore
1019
- }
1020
- // If this is a glob, fall back to the non-glob prefix.
1021
- const globMatch = /[*?[{]/.exec(watchPath);
1022
- if (!globMatch)
1023
- return undefined;
1024
- const prefix = watchPath.slice(0, globMatch.index);
1025
- const trimmed = prefix.trim();
1026
- const baseDir = trimmed.length === 0
1027
- ? '.'
1028
- : trimmed.endsWith('/') || trimmed.endsWith('\\')
1029
- ? trimmed
1030
- : node_path.dirname(trimmed);
1031
- const resolved = node_path.resolve(baseDir);
1032
- if (!node_fs.existsSync(resolved))
1033
- return undefined;
1034
- return resolved;
1035
- }
1036
- /**
1037
- * Recursively find all `.gitignore` files under `dir`.
1038
- * Skips `.git` and `node_modules` directories for performance.
1039
- */
1040
- function findGitignoreFiles(dir) {
1041
- const results = [];
1042
- const gitignorePath = node_path.join(dir, '.gitignore');
1043
- if (node_fs.existsSync(gitignorePath)) {
1044
- results.push(gitignorePath);
1045
- }
1046
- let entries;
1047
- try {
1048
- entries = node_fs.readdirSync(dir);
1049
- }
1050
- catch {
1051
- return results;
1052
- }
1053
- for (const entry of entries) {
1054
- if (entry === '.git' || entry === 'node_modules')
1055
- continue;
1056
- const fullPath = node_path.join(dir, entry);
1057
- try {
1058
- if (node_fs.statSync(fullPath).isDirectory()) {
1059
- results.push(...findGitignoreFiles(fullPath));
1060
- }
1061
- }
1062
- catch {
1063
- // Skip inaccessible entries
1064
- }
1065
- }
1066
- return results;
1067
- }
1068
- /**
1069
- * Parse a `.gitignore` file into an `ignore` instance.
1070
- */
1071
- function parseGitignore(gitignorePath) {
1072
- const content = node_fs.readFileSync(gitignorePath, 'utf8');
1073
- return ignore().add(content);
1074
- }
1075
- /**
1076
- * Normalize a path to use forward slashes (required by `ignore` package).
1077
- */
1078
- function toForwardSlash(p) {
1079
- return p.replace(/\\/g, '/');
1080
- }
1081
- /**
1082
- * Processor-level gitignore filter. Checks file paths against the nearest
1083
- * `.gitignore` chain in git repositories.
1084
- */
1085
- class GitignoreFilter {
1086
- repos = new Map();
1087
- /**
1088
- * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
1089
- *
1090
- * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
1091
- */
1092
- constructor(watchPaths) {
1093
- this.scan(watchPaths);
1094
- }
1095
- /**
1096
- * Scan paths for git repos and their `.gitignore` files.
1097
- */
1098
- scan(watchPaths) {
1099
- this.repos.clear();
1100
- const scannedDirs = new Set();
1101
- for (const watchPath of watchPaths) {
1102
- const scanDir = watchPathToScanDir(watchPath);
1103
- if (!scanDir)
1104
- continue;
1105
- if (scannedDirs.has(scanDir))
1106
- continue;
1107
- scannedDirs.add(scanDir);
1108
- const repoRoot = findRepoRoot(scanDir);
1109
- if (!repoRoot)
1110
- continue;
1111
- if (this.repos.has(repoRoot))
1112
- continue;
1113
- const gitignoreFiles = findGitignoreFiles(repoRoot);
1114
- const entries = gitignoreFiles.map((gf) => ({
1115
- dir: node_path.dirname(gf),
1116
- ig: parseGitignore(gf),
1117
- }));
1118
- // Sort deepest-first so nested `.gitignore` files are checked first
1119
- entries.sort((a, b) => b.dir.length - a.dir.length);
1120
- this.repos.set(repoRoot, { root: repoRoot, entries });
1121
- }
1122
- }
1123
- /**
1124
- * Check whether a file path is ignored by any applicable `.gitignore`.
1125
- *
1126
- * @param filePath - Absolute file path to check.
1127
- * @returns `true` if the file should be ignored.
1128
- */
1129
- isIgnored(filePath) {
1130
- const absPath = node_path.resolve(filePath);
1131
- for (const [, repo] of this.repos) {
1132
- // Check if file is within this repo
1133
- const relToRepo = node_path.relative(repo.root, absPath);
1134
- if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
1135
- continue;
1136
- }
1137
- // Check each `.gitignore` entry (deepest-first)
1138
- for (const entry of repo.entries) {
1139
- const relToEntry = node_path.relative(entry.dir, absPath);
1140
- if (relToEntry.startsWith('..'))
1141
- continue;
1142
- const normalized = toForwardSlash(relToEntry);
1143
- if (entry.ig.ignores(normalized)) {
1144
- return true;
1145
- }
1146
- }
1147
- }
1148
- return false;
1149
- }
1150
- /**
1151
- * Invalidate and re-parse a specific `.gitignore` file.
1152
- * Call when a `.gitignore` file is added, changed, or removed.
1153
- *
1154
- * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
1155
- */
1156
- invalidate(gitignorePath) {
1157
- const absPath = node_path.resolve(gitignorePath);
1158
- const gitignoreDir = node_path.dirname(absPath);
1159
- for (const [, repo] of this.repos) {
1160
- const relToRepo = node_path.relative(repo.root, gitignoreDir);
1161
- if (relToRepo.startsWith('..'))
1162
- continue;
1163
- // Remove old entry for this directory
1164
- repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
1165
- // Re-parse if file still exists
1166
- if (node_fs.existsSync(absPath)) {
1167
- repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
1168
- // Re-sort deepest-first
1169
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1170
- }
1171
- return;
1172
- }
1173
- // If not in any known repo, check if it's in a repo we haven't scanned
1174
- const repoRoot = findRepoRoot(gitignoreDir);
1175
- if (repoRoot && node_fs.existsSync(absPath)) {
1176
- const entries = [
1177
- { dir: gitignoreDir, ig: parseGitignore(absPath) },
1178
- ];
1179
- if (this.repos.has(repoRoot)) {
1180
- const repo = this.repos.get(repoRoot);
1181
- repo.entries.push(entries[0]);
1182
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1183
- }
1184
- else {
1185
- this.repos.set(repoRoot, { root: repoRoot, entries });
1445
+ maxDelayMs: 10_000,
1446
+ jitter: 0.2,
1447
+ onRetry: ({ attempt, delayMs, error }) => {
1448
+ log.warn({
1449
+ attempt,
1450
+ delayMs,
1451
+ provider: 'gemini',
1452
+ model: config.model,
1453
+ err: normalizeError(error),
1454
+ }, 'Embedding call failed; will retry');
1455
+ },
1456
+ });
1457
+ // Validate dimensions
1458
+ for (const vector of vectors) {
1459
+ if (vector.length !== dimensions) {
1460
+ throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
1461
+ }
1186
1462
  }
1187
- }
1463
+ return vectors;
1464
+ },
1465
+ };
1466
+ }
1467
+ function createMockFromConfig(config) {
1468
+ const dimensions = config.dimensions ?? 768;
1469
+ return createMockProvider(dimensions);
1470
+ }
1471
+ const embeddingProviderRegistry = new Map([
1472
+ ['mock', createMockFromConfig],
1473
+ ['gemini', createGeminiProvider],
1474
+ ]);
1475
+ /**
1476
+ * Create an embedding provider based on the given configuration.
1477
+ *
1478
+ * Each provider is responsible for its own default dimensions.
1479
+ *
1480
+ * @param config - The embedding configuration.
1481
+ * @param logger - Optional pino logger for retry warnings.
1482
+ * @returns An {@link EmbeddingProvider} instance.
1483
+ * @throws If the configured provider is not supported.
1484
+ */
1485
+ function createEmbeddingProvider(config, logger) {
1486
+ const factory = embeddingProviderRegistry.get(config.provider);
1487
+ if (!factory) {
1488
+ throw new Error(`Unsupported embedding provider: ${config.provider}`);
1188
1489
  }
1490
+ return factory(config, logger);
1189
1491
  }
1190
1492
 
1191
1493
  /**
@@ -1417,7 +1719,7 @@ function createJsonMapLib() {
1417
1719
  };
1418
1720
  }
1419
1721
  /**
1420
- * Apply compiled inference rules to file attributes, returning merged metadata.
1722
+ * Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
1421
1723
  *
1422
1724
  * Rules are evaluated in order; later rules override earlier ones.
1423
1725
  * If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
@@ -1427,15 +1729,18 @@ function createJsonMapLib() {
1427
1729
  * @param attributes - The file attributes to match against.
1428
1730
  * @param namedMaps - Optional record of named JsonMap definitions.
1429
1731
  * @param logger - Optional logger for warnings (falls back to console.warn).
1430
- * @returns The merged metadata from all matching rules.
1732
+ * @param templateEngine - Optional template engine for rendering content templates.
1733
+ * @param configDir - Optional config directory for resolving .json map file paths.
1734
+ * @returns The merged metadata and optional rendered content.
1431
1735
  */
1432
- async function applyRules(compiledRules, attributes, namedMaps, logger) {
1736
+ async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
1433
1737
  // JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
1434
1738
  // Our helper functions accept multiple args, which JsonMap supports at runtime.
1435
1739
  const lib = createJsonMapLib();
1436
1740
  let merged = {};
1741
+ let renderedContent = null;
1437
1742
  const log = logger ?? console;
1438
- for (const { rule, validate } of compiledRules) {
1743
+ for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
1439
1744
  if (validate(attributes)) {
1440
1745
  // Apply set resolution
1441
1746
  const setOutput = resolveSet(rule.set, attributes);
@@ -1445,10 +1750,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1445
1750
  let mapDef;
1446
1751
  // Resolve map reference
1447
1752
  if (typeof rule.map === 'string') {
1448
- mapDef = namedMaps?.[rule.map];
1449
- if (!mapDef) {
1450
- log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1451
- continue;
1753
+ if (rule.map.endsWith('.json') && configDir) {
1754
+ // File path: load from .json file
1755
+ try {
1756
+ const mapPath = node_path.resolve(configDir, rule.map);
1757
+ const raw = node_fs.readFileSync(mapPath, 'utf-8');
1758
+ mapDef = JSON.parse(raw);
1759
+ }
1760
+ catch (error) {
1761
+ log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
1762
+ continue;
1763
+ }
1764
+ }
1765
+ else {
1766
+ mapDef = namedMaps?.[rule.map];
1767
+ if (!mapDef) {
1768
+ log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1769
+ continue;
1770
+ }
1452
1771
  }
1453
1772
  }
1454
1773
  else {
@@ -1471,9 +1790,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1471
1790
  log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
1472
1791
  }
1473
1792
  }
1793
+ // Render template if present
1794
+ if (rule.template && templateEngine) {
1795
+ const templateKey = `rule-${String(ruleIndex)}`;
1796
+ // Build template context: attributes (with json spread at top) + map output
1797
+ const context = {
1798
+ ...(attributes.json ?? {}),
1799
+ ...attributes,
1800
+ ...merged,
1801
+ };
1802
+ try {
1803
+ const result = templateEngine.render(templateKey, context);
1804
+ if (result && result.trim()) {
1805
+ renderedContent = result;
1806
+ }
1807
+ else {
1808
+ log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
1809
+ }
1810
+ }
1811
+ catch (error) {
1812
+ log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
1813
+ }
1814
+ }
1474
1815
  }
1475
1816
  }
1476
- return merged;
1817
+ return { metadata: merged, renderedContent };
1477
1818
  }
1478
1819
 
1479
1820
  /**
@@ -1562,23 +1903,32 @@ function compileRules(rules) {
1562
1903
  * @param metadataDir - The metadata directory for enrichment files.
1563
1904
  * @param maps - Optional named JsonMap definitions.
1564
1905
  * @param logger - Optional logger for rule warnings.
1906
+ * @param templateEngine - Optional template engine for content templates.
1907
+ * @param configDir - Optional config directory for resolving file paths.
1565
1908
  * @returns The merged metadata and intermediate data.
1566
1909
  */
1567
- async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
1910
+ async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
1568
1911
  const ext = node_path.extname(filePath);
1569
1912
  const stats = await promises.stat(filePath);
1570
1913
  // 1. Extract text and structured data
1571
1914
  const extracted = await extractText(filePath, ext);
1572
1915
  // 2. Build attributes + apply rules
1573
1916
  const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
1574
- const inferred = await applyRules(compiledRules, attributes, maps, logger);
1917
+ const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
1575
1918
  // 3. Read enrichment metadata (merge, enrichment wins)
1576
1919
  const enrichment = await readMetadata(filePath, metadataDir);
1577
1920
  const metadata = {
1578
1921
  ...inferred,
1579
1922
  ...(enrichment ?? {}),
1580
1923
  };
1581
- return { inferred, enrichment, metadata, attributes, extracted };
1924
+ return {
1925
+ inferred,
1926
+ enrichment,
1927
+ metadata,
1928
+ attributes,
1929
+ extracted,
1930
+ renderedContent,
1931
+ };
1582
1932
  }
1583
1933
 
1584
1934
  /**
@@ -1649,6 +1999,7 @@ class DocumentProcessor {
1649
1999
  vectorStore;
1650
2000
  compiledRules;
1651
2001
  logger;
2002
+ templateEngine;
1652
2003
  /**
1653
2004
  * Create a new DocumentProcessor.
1654
2005
  *
@@ -1657,13 +2008,15 @@ class DocumentProcessor {
1657
2008
  * @param vectorStore - The vector store client.
1658
2009
  * @param compiledRules - The compiled inference rules.
1659
2010
  * @param logger - The logger instance.
2011
+ * @param templateEngine - Optional template engine for content templates.
1660
2012
  */
1661
- constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
2013
+ constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
1662
2014
  this.config = config;
1663
2015
  this.embeddingProvider = embeddingProvider;
1664
2016
  this.vectorStore = vectorStore;
1665
2017
  this.compiledRules = compiledRules;
1666
2018
  this.logger = logger;
2019
+ this.templateEngine = templateEngine;
1667
2020
  }
1668
2021
  /**
1669
2022
  * Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
@@ -1674,13 +2027,15 @@ class DocumentProcessor {
1674
2027
  try {
1675
2028
  const ext = node_path.extname(filePath);
1676
2029
  // 1. Build merged metadata + extract text
1677
- const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
1678
- if (!extracted.text.trim()) {
2030
+ const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
2031
+ // Use rendered template content if available, otherwise raw extracted text
2032
+ const textToEmbed = renderedContent ?? extracted.text;
2033
+ if (!textToEmbed.trim()) {
1679
2034
  this.logger.debug({ filePath }, 'Skipping empty file');
1680
2035
  return;
1681
2036
  }
1682
2037
  // 2. Content hash check — skip if unchanged
1683
- const hash = contentHash(extracted.text);
2038
+ const hash = contentHash(textToEmbed);
1684
2039
  const baseId = pointId(filePath, 0);
1685
2040
  const existingPayload = await this.vectorStore.getPayload(baseId);
1686
2041
  if (existingPayload && existingPayload['content_hash'] === hash) {
@@ -1692,7 +2047,7 @@ class DocumentProcessor {
1692
2047
  const chunkSize = this.config.chunkSize ?? 1000;
1693
2048
  const chunkOverlap = this.config.chunkOverlap ?? 200;
1694
2049
  const splitter = createSplitter(ext, chunkSize, chunkOverlap);
1695
- const chunks = await splitter.splitText(extracted.text);
2050
+ const chunks = await splitter.splitText(textToEmbed);
1696
2051
  // 4. Embed all chunks
1697
2052
  const vectors = await this.embeddingProvider.embed(chunks);
1698
2053
  // 5. Upsert all chunk points
@@ -1786,7 +2141,7 @@ class DocumentProcessor {
1786
2141
  return null;
1787
2142
  }
1788
2143
  // Build merged metadata (lightweight — no embedding)
1789
- const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
2144
+ const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
1790
2145
  // Update all chunk payloads
1791
2146
  const totalChunks = getChunkCount(existingPayload);
1792
2147
  const ids = chunkIds(filePath, totalChunks);
@@ -1804,8 +2159,17 @@ class DocumentProcessor {
1804
2159
  *
1805
2160
  * @param compiledRules - The newly compiled rules.
1806
2161
  */
1807
- updateRules(compiledRules) {
2162
+ /**
2163
+ * Update compiled inference rules and optionally the template engine.
2164
+ *
2165
+ * @param compiledRules - The newly compiled rules.
2166
+ * @param templateEngine - Optional updated template engine.
2167
+ */
2168
+ updateRules(compiledRules, templateEngine) {
1808
2169
  this.compiledRules = compiledRules;
2170
+ if (templateEngine) {
2171
+ this.templateEngine = templateEngine;
2172
+ }
1809
2173
  this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
1810
2174
  }
1811
2175
  }
@@ -2334,6 +2698,104 @@ class SystemHealth {
2334
2698
  }
2335
2699
  }
2336
2700
 
2701
+ /**
2702
+ * @module watcher/globToDir
2703
+ * Adapts glob-based watch config to chokidar v4+, which removed glob support
2704
+ * (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
2705
+ * strings, silently producing zero events. This module extracts static directory
2706
+ * roots from glob patterns for chokidar to watch, then filters emitted events
2707
+ * against the original globs via picomatch.
2708
+ */
2709
+ /**
2710
+ * Extract the static directory root from a glob pattern.
2711
+ * Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
2712
+ *
2713
+ * @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
2714
+ * @returns The static directory prefix (e.g., `j:/domains`).
2715
+ */
2716
+ function globRoot(glob) {
2717
+ const normalized = glob.replace(/\\/g, '/');
2718
+ const segments = normalized.split('/');
2719
+ const staticSegments = [];
2720
+ for (const seg of segments) {
2721
+ if (/[*?{[\]]/.test(seg))
2722
+ break;
2723
+ staticSegments.push(seg);
2724
+ }
2725
+ return staticSegments.join('/') || '.';
2726
+ }
2727
+ /**
2728
+ * Deduplicate directory roots, removing paths that are subdirectories of others.
2729
+ *
2730
+ * @param roots - Array of directory paths.
2731
+ * @returns Deduplicated array with subdirectories removed.
2732
+ */
2733
+ function deduplicateRoots(roots) {
2734
+ const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
2735
+ const sorted = [...new Set(normalized)].sort();
2736
+ return sorted.filter((root, _i, arr) => {
2737
+ const withSlash = root.endsWith('/') ? root : root + '/';
2738
+ return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
2739
+ });
2740
+ }
2741
+ /**
2742
+ * Build a picomatch matcher from an array of glob patterns.
2743
+ * Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
2744
+ * before matching.
2745
+ *
2746
+ * @param globs - Glob patterns to match against.
2747
+ * @returns A function that tests whether a file path matches any of the globs.
2748
+ */
2749
+ function buildGlobMatcher(globs) {
2750
+ const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
2751
+ const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
2752
+ return (filePath) => {
2753
+ const normalized = filePath.replace(/\\/g, '/');
2754
+ return isMatch(normalized);
2755
+ };
2756
+ }
2757
+ /**
2758
+ * Convert an array of glob patterns into chokidar-compatible directory roots
2759
+ * and a filter function for post-hoc event filtering.
2760
+ *
2761
+ * @param globs - Glob patterns from the watch config.
2762
+ * @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
2763
+ */
2764
+ function resolveWatchPaths(globs) {
2765
+ const rawRoots = globs.map(globRoot);
2766
+ const roots = deduplicateRoots(rawRoots);
2767
+ const matches = buildGlobMatcher(globs);
2768
+ return { roots, matches };
2769
+ }
2770
+ /**
2771
+ * Convert ignored glob patterns to picomatch matcher functions.
2772
+ *
2773
+ * Chokidar v5 replaced the external `anymatch` dependency with an inline
2774
+ * implementation that does **exact string equality** for string matchers,
2775
+ * breaking glob-based `ignored` patterns. This function converts glob strings
2776
+ * to picomatch functions that chokidar's `createPattern` passes through
2777
+ * unchanged (`typeof matcher === 'function'`).
2778
+ *
2779
+ * Non-string entries (functions, RegExps) are passed through as-is.
2780
+ *
2781
+ * @param ignored - Array of ignored patterns (globs, functions, RegExps).
2782
+ * @returns Array with glob strings replaced by picomatch matcher functions.
2783
+ */
2784
+ function resolveIgnored(ignored) {
2785
+ return ignored.map((entry) => {
2786
+ if (typeof entry !== 'string')
2787
+ return entry;
2788
+ // If the string contains glob characters, convert to a picomatch function.
2789
+ // Literal strings (exact paths) are also converted for consistent matching.
2790
+ const normalizedPattern = entry.replace(/\\/g, '/');
2791
+ const matcher = picomatch(normalizedPattern, { dot: true, nocase: true });
2792
+ return (filePath) => {
2793
+ const normalized = filePath.replace(/\\/g, '/');
2794
+ return matcher(normalized);
2795
+ };
2796
+ });
2797
+ }
2798
+
2337
2799
  /**
2338
2800
  * @module watcher
2339
2801
  * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
@@ -2348,6 +2810,7 @@ class FileSystemWatcher {
2348
2810
  logger;
2349
2811
  health;
2350
2812
  gitignoreFilter;
2813
+ globMatches;
2351
2814
  watcher;
2352
2815
  /**
2353
2816
  * Create a new FileSystemWatcher.
@@ -2364,6 +2827,7 @@ class FileSystemWatcher {
2364
2827
  this.processor = processor;
2365
2828
  this.logger = logger;
2366
2829
  this.gitignoreFilter = options.gitignoreFilter;
2830
+ this.globMatches = () => true;
2367
2831
  const healthOptions = {
2368
2832
  maxRetries: options.maxRetries,
2369
2833
  maxBackoffMs: options.maxBackoffMs,
@@ -2376,8 +2840,20 @@ class FileSystemWatcher {
2376
2840
  * Start watching the filesystem and processing events.
2377
2841
  */
2378
2842
  start() {
2379
- this.watcher = chokidar.watch(this.config.paths, {
2380
- ignored: this.config.ignored,
2843
+ // Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
2844
+ // Glob patterns are silently treated as literal strings, producing zero
2845
+ // events. We extract static directory roots for chokidar to watch, then
2846
+ // filter emitted events against the original globs via picomatch.
2847
+ const { roots, matches } = resolveWatchPaths(this.config.paths);
2848
+ this.globMatches = matches;
2849
+ // Chokidar v5's inline anymatch does exact string equality for string
2850
+ // matchers, breaking glob-based ignored patterns. Convert to picomatch
2851
+ // functions that chokidar passes through as-is.
2852
+ const ignored = this.config.ignored
2853
+ ? resolveIgnored(this.config.ignored)
2854
+ : undefined;
2855
+ this.watcher = chokidar.watch(roots, {
2856
+ ignored,
2381
2857
  usePolling: this.config.usePolling,
2382
2858
  interval: this.config.pollIntervalMs,
2383
2859
  awaitWriteFinish: this.config.stabilityThresholdMs
@@ -2387,6 +2863,8 @@ class FileSystemWatcher {
2387
2863
  });
2388
2864
  this.watcher.on('add', (path) => {
2389
2865
  this.handleGitignoreChange(path);
2866
+ if (!this.globMatches(path))
2867
+ return;
2390
2868
  if (this.isGitignored(path))
2391
2869
  return;
2392
2870
  this.logger.debug({ path }, 'File added');
@@ -2394,6 +2872,8 @@ class FileSystemWatcher {
2394
2872
  });
2395
2873
  this.watcher.on('change', (path) => {
2396
2874
  this.handleGitignoreChange(path);
2875
+ if (!this.globMatches(path))
2876
+ return;
2397
2877
  if (this.isGitignored(path))
2398
2878
  return;
2399
2879
  this.logger.debug({ path }, 'File changed');
@@ -2401,6 +2881,8 @@ class FileSystemWatcher {
2401
2881
  });
2402
2882
  this.watcher.on('unlink', (path) => {
2403
2883
  this.handleGitignoreChange(path);
2884
+ if (!this.globMatches(path))
2885
+ return;
2404
2886
  if (this.isGitignored(path))
2405
2887
  return;
2406
2888
  this.logger.debug({ path }, 'File removed');
@@ -2473,51 +2955,21 @@ class FileSystemWatcher {
2473
2955
  }
2474
2956
 
2475
2957
  /**
2476
- * @module app/configWatcher
2477
- * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
2478
- */
2479
- /**
2480
- * Debounced config file watcher.
2958
+ * @module app/factories
2959
+ * Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
2481
2960
  */
2482
- class ConfigWatcher {
2483
- options;
2484
- watcher;
2485
- debounce;
2486
- constructor(options) {
2487
- this.options = options;
2488
- }
2489
- start() {
2490
- if (!this.options.enabled)
2491
- return;
2492
- this.watcher = chokidar.watch(this.options.configPath, {
2493
- ignoreInitial: true,
2494
- });
2495
- this.watcher.on('change', () => {
2496
- if (this.debounce)
2497
- clearTimeout(this.debounce);
2498
- this.debounce = setTimeout(() => {
2499
- void this.options.onChange();
2500
- }, this.options.debounceMs);
2501
- });
2502
- this.watcher.on('error', (error) => {
2503
- this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
2504
- });
2505
- this.options.logger.info({
2506
- configPath: this.options.configPath,
2507
- debounceMs: this.options.debounceMs,
2508
- }, 'Config watcher started');
2509
- }
2510
- async stop() {
2511
- if (this.debounce) {
2512
- clearTimeout(this.debounce);
2513
- this.debounce = undefined;
2514
- }
2515
- if (this.watcher) {
2516
- await this.watcher.close();
2517
- this.watcher = undefined;
2518
- }
2519
- }
2520
- }
2961
+ /** Default component factories wiring real implementations. */
2962
+ const defaultFactories = {
2963
+ loadConfig,
2964
+ createLogger,
2965
+ createEmbeddingProvider,
2966
+ createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2967
+ compileRules,
2968
+ createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
2969
+ createEventQueue: (options) => new EventQueue(options),
2970
+ createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2971
+ createApiServer,
2972
+ };
2521
2973
 
2522
2974
  /**
2523
2975
  * @module app/shutdown
@@ -2537,17 +2989,28 @@ function installShutdownHandlers(stop) {
2537
2989
  process.on('SIGINT', () => void shutdown());
2538
2990
  }
2539
2991
 
2540
- const defaultFactories = {
2541
- loadConfig,
2542
- createLogger,
2543
- createEmbeddingProvider,
2544
- createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2545
- compileRules,
2546
- createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
2547
- createEventQueue: (options) => new EventQueue(options),
2548
- createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2549
- createApiServer,
2550
- };
2992
+ /**
2993
+ * @module app/startFromConfig
2994
+ * Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
2995
+ */
2996
+ /**
2997
+ * Create and start a JeevesWatcher from a config file path.
2998
+ *
2999
+ * @param configPath - Optional path to the configuration file.
3000
+ * @returns The running JeevesWatcher instance.
3001
+ */
3002
+ async function startFromConfig(configPath) {
3003
+ const config = await loadConfig(configPath);
3004
+ const app = new JeevesWatcher(config, configPath);
3005
+ installShutdownHandlers(() => app.stop());
3006
+ await app.start();
3007
+ return app;
3008
+ }
3009
+
3010
+ /**
3011
+ * @module app
3012
+ * Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
3013
+ */
2551
3014
  /**
2552
3015
  * Main application class that wires together all components.
2553
3016
  */
@@ -2582,56 +3045,26 @@ class JeevesWatcher {
2582
3045
  async start() {
2583
3046
  const logger = this.factories.createLogger(this.config.logging);
2584
3047
  this.logger = logger;
2585
- let embeddingProvider;
2586
- try {
2587
- embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
2588
- }
2589
- catch (error) {
2590
- logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
2591
- throw error;
2592
- }
2593
- const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
2594
- await vectorStore.ensureCollection();
3048
+ const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
2595
3049
  const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
2596
- const processorConfig = {
3050
+ const configDir = this.configPath ? node_path.dirname(this.configPath) : '.';
3051
+ const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
3052
+ const processor = this.factories.createDocumentProcessor({
2597
3053
  metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
2598
3054
  chunkSize: this.config.embedding.chunkSize,
2599
3055
  chunkOverlap: this.config.embedding.chunkOverlap,
2600
3056
  maps: this.config.maps,
2601
- };
2602
- const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
3057
+ configDir,
3058
+ }, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
2603
3059
  this.processor = processor;
2604
- const queue = this.factories.createEventQueue({
3060
+ this.queue = this.factories.createEventQueue({
2605
3061
  debounceMs: this.config.watch.debounceMs ?? 2000,
2606
3062
  concurrency: this.config.embedding.concurrency ?? 5,
2607
3063
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
2608
3064
  });
2609
- this.queue = queue;
2610
- const respectGitignore = this.config.watch.respectGitignore ?? true;
2611
- const gitignoreFilter = respectGitignore
2612
- ? new GitignoreFilter(this.config.watch.paths)
2613
- : undefined;
2614
- const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
2615
- maxRetries: this.config.maxRetries,
2616
- maxBackoffMs: this.config.maxBackoffMs,
2617
- onFatalError: this.runtimeOptions.onFatalError,
2618
- gitignoreFilter,
2619
- });
2620
- this.watcher = watcher;
2621
- const server = this.factories.createApiServer({
2622
- processor,
2623
- vectorStore,
2624
- embeddingProvider,
2625
- queue,
2626
- config: this.config,
2627
- logger,
2628
- });
2629
- this.server = server;
2630
- await server.listen({
2631
- host: this.config.api?.host ?? '127.0.0.1',
2632
- port: this.config.api?.port ?? 3456,
2633
- });
2634
- watcher.start();
3065
+ this.watcher = this.createWatcher(this.queue, processor, logger);
3066
+ this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
3067
+ this.watcher.start();
2635
3068
  this.startConfigWatch();
2636
3069
  logger.info('jeeves-watcher started');
2637
3070
  }
@@ -2662,22 +3095,61 @@ class JeevesWatcher {
2662
3095
  }
2663
3096
  this.logger?.info('jeeves-watcher stopped');
2664
3097
  }
3098
+ async initEmbeddingAndStore(logger) {
3099
+ let embeddingProvider;
3100
+ try {
3101
+ embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
3102
+ }
3103
+ catch (error) {
3104
+ logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
3105
+ throw error;
3106
+ }
3107
+ const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
3108
+ await vectorStore.ensureCollection();
3109
+ return { embeddingProvider, vectorStore };
3110
+ }
3111
+ createWatcher(queue, processor, logger) {
3112
+ const respectGitignore = this.config.watch.respectGitignore ?? true;
3113
+ const gitignoreFilter = respectGitignore
3114
+ ? new GitignoreFilter(this.config.watch.paths)
3115
+ : undefined;
3116
+ return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
3117
+ maxRetries: this.config.maxRetries,
3118
+ maxBackoffMs: this.config.maxBackoffMs,
3119
+ onFatalError: this.runtimeOptions.onFatalError,
3120
+ gitignoreFilter,
3121
+ });
3122
+ }
3123
+ async startApiServer(processor, vectorStore, embeddingProvider, logger) {
3124
+ const server = this.factories.createApiServer({
3125
+ processor,
3126
+ vectorStore,
3127
+ embeddingProvider,
3128
+ queue: this.queue,
3129
+ config: this.config,
3130
+ logger,
3131
+ });
3132
+ await server.listen({
3133
+ host: this.config.api?.host ?? '127.0.0.1',
3134
+ port: this.config.api?.port ?? 3456,
3135
+ });
3136
+ return server;
3137
+ }
2665
3138
  startConfigWatch() {
2666
3139
  const logger = this.logger;
2667
3140
  if (!logger)
2668
3141
  return;
2669
3142
  const enabled = this.config.configWatch?.enabled ?? true;
2670
- if (!enabled)
2671
- return;
2672
- if (!this.configPath) {
2673
- logger.debug('Config watch enabled, but no config path was provided');
3143
+ if (!enabled || !this.configPath) {
3144
+ if (!this.configPath) {
3145
+ logger.debug('Config watch enabled, but no config path was provided');
3146
+ }
2674
3147
  return;
2675
3148
  }
2676
- const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
2677
3149
  this.configWatcher = new ConfigWatcher({
2678
3150
  configPath: this.configPath,
2679
3151
  enabled,
2680
- debounceMs,
3152
+ debounceMs: this.config.configWatch?.debounceMs ?? 10000,
2681
3153
  logger,
2682
3154
  onChange: async () => this.reloadConfig(),
2683
3155
  });
@@ -2699,7 +3171,9 @@ class JeevesWatcher {
2699
3171
  const newConfig = await this.factories.loadConfig(this.configPath);
2700
3172
  this.config = newConfig;
2701
3173
  const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
2702
- processor.updateRules(compiledRules);
3174
+ const reloadConfigDir = node_path.dirname(this.configPath);
3175
+ const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
3176
+ processor.updateRules(compiledRules, newTemplateEngine);
2703
3177
  logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
2704
3178
  }
2705
3179
  catch (error) {
@@ -2707,19 +3181,7 @@ class JeevesWatcher {
2707
3181
  }
2708
3182
  }
2709
3183
  }
2710
- /**
2711
- * Create and start a JeevesWatcher from a config file path.
2712
- *
2713
- * @param configPath - Optional path to the configuration file.
2714
- * @returns The running JeevesWatcher instance.
2715
- */
2716
- async function startFromConfig(configPath) {
2717
- const config = await loadConfig(configPath);
2718
- const app = new JeevesWatcher(config, configPath);
2719
- installShutdownHandlers(() => app.stop());
2720
- await app.start();
2721
- return app;
2722
- }
3184
+ // startFromConfig re-exported from ./startFromConfig
2723
3185
 
2724
3186
  exports.DocumentProcessor = DocumentProcessor;
2725
3187
  exports.EventQueue = EventQueue;
@@ -2727,15 +3189,18 @@ exports.FileSystemWatcher = FileSystemWatcher;
2727
3189
  exports.GitignoreFilter = GitignoreFilter;
2728
3190
  exports.JeevesWatcher = JeevesWatcher;
2729
3191
  exports.SystemHealth = SystemHealth;
3192
+ exports.TemplateEngine = TemplateEngine;
2730
3193
  exports.VectorStoreClient = VectorStoreClient;
2731
3194
  exports.apiConfigSchema = apiConfigSchema;
2732
3195
  exports.applyRules = applyRules;
2733
3196
  exports.buildAttributes = buildAttributes;
3197
+ exports.buildTemplateEngine = buildTemplateEngine;
2734
3198
  exports.compileRules = compileRules;
2735
3199
  exports.configWatchConfigSchema = configWatchConfigSchema;
2736
3200
  exports.contentHash = contentHash;
2737
3201
  exports.createApiServer = createApiServer;
2738
3202
  exports.createEmbeddingProvider = createEmbeddingProvider;
3203
+ exports.createHandlebarsInstance = createHandlebarsInstance;
2739
3204
  exports.createLogger = createLogger;
2740
3205
  exports.deleteMetadata = deleteMetadata;
2741
3206
  exports.embeddingConfigSchema = embeddingConfigSchema;
@@ -2743,10 +3208,13 @@ exports.extractText = extractText;
2743
3208
  exports.inferenceRuleSchema = inferenceRuleSchema;
2744
3209
  exports.jeevesWatcherConfigSchema = jeevesWatcherConfigSchema;
2745
3210
  exports.loadConfig = loadConfig;
3211
+ exports.loadCustomHelpers = loadCustomHelpers;
2746
3212
  exports.loggingConfigSchema = loggingConfigSchema;
2747
3213
  exports.metadataPath = metadataPath;
2748
3214
  exports.pointId = pointId;
2749
3215
  exports.readMetadata = readMetadata;
3216
+ exports.registerBuiltinHelpers = registerBuiltinHelpers;
3217
+ exports.resolveTemplateSource = resolveTemplateSource;
2750
3218
  exports.startFromConfig = startFromConfig;
2751
3219
  exports.vectorStoreConfigSchema = vectorStoreConfigSchema;
2752
3220
  exports.watchConfigSchema = watchConfigSchema;