@karmaniverous/jeeves-watcher 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mjs/index.js CHANGED
@@ -2,14 +2,22 @@ import Fastify from 'fastify';
2
2
  import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
3
3
  import { resolve, dirname, join, relative, extname, basename } from 'node:path';
4
4
  import picomatch from 'picomatch';
5
- import { omit, get } from 'radash';
5
+ import { omit, capitalize, title, camel, snake, dash, isEqual, get } from 'radash';
6
6
  import { createHash } from 'node:crypto';
7
+ import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
8
+ import ignore from 'ignore';
9
+ import Handlebars from 'handlebars';
10
+ import dayjs from 'dayjs';
11
+ import { toMdast } from 'hast-util-to-mdast';
12
+ import { fromADF } from 'mdast-util-from-adf';
13
+ import { toMarkdown } from 'mdast-util-to-markdown';
14
+ import rehypeParse from 'rehype-parse';
15
+ import { unified } from 'unified';
16
+ import chokidar from 'chokidar';
7
17
  import { cosmiconfig } from 'cosmiconfig';
8
18
  import { z, ZodError } from 'zod';
9
19
  import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
10
20
  import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
11
- import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
12
- import ignore from 'ignore';
13
21
  import pino from 'pino';
14
22
  import { v5 } from 'uuid';
15
23
  import * as cheerio from 'cheerio';
@@ -19,7 +27,6 @@ import Ajv from 'ajv';
19
27
  import addFormats from 'ajv-formats';
20
28
  import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
21
29
  import { QdrantClient } from '@qdrant/js-client-rest';
22
- import chokidar from 'chokidar';
23
30
 
24
31
  /**
25
32
  * @module util/normalizeError
@@ -417,6 +424,486 @@ function createApiServer(options) {
417
424
  return app;
418
425
  }
419
426
 
427
+ /**
428
+ * @module gitignore
429
+ * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
430
+ */
431
+ /**
432
+ * Find the git repo root by walking up from `startDir` looking for `.git/`.
433
+ * Returns `undefined` if no repo is found.
434
+ */
435
+ function findRepoRoot(startDir) {
436
+ let dir = resolve(startDir);
437
+ const root = resolve('/');
438
+ while (dir !== root) {
439
+ if (existsSync(join(dir, '.git')) &&
440
+ statSync(join(dir, '.git')).isDirectory()) {
441
+ return dir;
442
+ }
443
+ const parent = dirname(dir);
444
+ if (parent === dir)
445
+ break;
446
+ dir = parent;
447
+ }
448
+ return undefined;
449
+ }
450
+ /**
451
+ * Convert a watch path (directory, file path, or glob) to a concrete directory
452
+ * that can be scanned for a repo root.
453
+ */
454
+ function watchPathToScanDir(watchPath) {
455
+ const absPath = resolve(watchPath);
456
+ try {
457
+ return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
458
+ }
459
+ catch {
460
+ // ignore
461
+ }
462
+ // If this is a glob, fall back to the non-glob prefix.
463
+ const globMatch = /[*?[{]/.exec(watchPath);
464
+ if (!globMatch)
465
+ return undefined;
466
+ const prefix = watchPath.slice(0, globMatch.index);
467
+ const trimmed = prefix.trim();
468
+ const baseDir = trimmed.length === 0
469
+ ? '.'
470
+ : trimmed.endsWith('/') || trimmed.endsWith('\\')
471
+ ? trimmed
472
+ : dirname(trimmed);
473
+ const resolved = resolve(baseDir);
474
+ if (!existsSync(resolved))
475
+ return undefined;
476
+ return resolved;
477
+ }
478
+ /**
479
+ * Recursively find all `.gitignore` files under `dir`.
480
+ * Skips `.git` and `node_modules` directories for performance.
481
+ */
482
+ function findGitignoreFiles(dir) {
483
+ const results = [];
484
+ const gitignorePath = join(dir, '.gitignore');
485
+ if (existsSync(gitignorePath)) {
486
+ results.push(gitignorePath);
487
+ }
488
+ let entries;
489
+ try {
490
+ entries = readdirSync(dir);
491
+ }
492
+ catch {
493
+ return results;
494
+ }
495
+ for (const entry of entries) {
496
+ if (entry === '.git' || entry === 'node_modules')
497
+ continue;
498
+ const fullPath = join(dir, entry);
499
+ try {
500
+ if (statSync(fullPath).isDirectory()) {
501
+ results.push(...findGitignoreFiles(fullPath));
502
+ }
503
+ }
504
+ catch {
505
+ // Skip inaccessible entries
506
+ }
507
+ }
508
+ return results;
509
+ }
510
+ /**
511
+ * Parse a `.gitignore` file into an `ignore` instance.
512
+ */
513
+ function parseGitignore(gitignorePath) {
514
+ const content = readFileSync(gitignorePath, 'utf8');
515
+ return ignore().add(content);
516
+ }
517
+ /**
518
+ * Normalize a path to use forward slashes (required by `ignore` package).
519
+ */
520
+ function toForwardSlash(p) {
521
+ return p.replace(/\\/g, '/');
522
+ }
523
+ /**
524
+ * Processor-level gitignore filter. Checks file paths against the nearest
525
+ * `.gitignore` chain in git repositories.
526
+ */
527
+ class GitignoreFilter {
528
+ repos = new Map();
529
+ /**
530
+ * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
531
+ *
532
+ * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
533
+ */
534
+ constructor(watchPaths) {
535
+ this.scan(watchPaths);
536
+ }
537
+ /**
538
+ * Scan paths for git repos and their `.gitignore` files.
539
+ */
540
+ scan(watchPaths) {
541
+ this.repos.clear();
542
+ const scannedDirs = new Set();
543
+ for (const watchPath of watchPaths) {
544
+ const scanDir = watchPathToScanDir(watchPath);
545
+ if (!scanDir)
546
+ continue;
547
+ if (scannedDirs.has(scanDir))
548
+ continue;
549
+ scannedDirs.add(scanDir);
550
+ const repoRoot = findRepoRoot(scanDir);
551
+ if (!repoRoot)
552
+ continue;
553
+ if (this.repos.has(repoRoot))
554
+ continue;
555
+ const gitignoreFiles = findGitignoreFiles(repoRoot);
556
+ const entries = gitignoreFiles.map((gf) => ({
557
+ dir: dirname(gf),
558
+ ig: parseGitignore(gf),
559
+ }));
560
+ // Sort deepest-first so nested `.gitignore` files are checked first
561
+ entries.sort((a, b) => b.dir.length - a.dir.length);
562
+ this.repos.set(repoRoot, { root: repoRoot, entries });
563
+ }
564
+ }
565
+ /**
566
+ * Check whether a file path is ignored by any applicable `.gitignore`.
567
+ *
568
+ * @param filePath - Absolute file path to check.
569
+ * @returns `true` if the file should be ignored.
570
+ */
571
+ isIgnored(filePath) {
572
+ const absPath = resolve(filePath);
573
+ for (const [, repo] of this.repos) {
574
+ // Check if file is within this repo
575
+ const relToRepo = relative(repo.root, absPath);
576
+ // On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
577
+ // an absolute path with a drive letter instead of a relative one. The
578
+ // `ignore` library rejects these with a RangeError. Skip repos on
579
+ // different drives to avoid cross-drive gitignore mismatches.
580
+ if (relToRepo.startsWith('..') ||
581
+ relToRepo.startsWith(resolve('/')) ||
582
+ /^[a-zA-Z]:/.test(relToRepo)) {
583
+ continue;
584
+ }
585
+ // Check each `.gitignore` entry (deepest-first)
586
+ for (const entry of repo.entries) {
587
+ const relToEntry = relative(entry.dir, absPath);
588
+ if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
589
+ continue;
590
+ const normalized = toForwardSlash(relToEntry);
591
+ if (entry.ig.ignores(normalized)) {
592
+ return true;
593
+ }
594
+ }
595
+ }
596
+ return false;
597
+ }
598
+ /**
599
+ * Invalidate and re-parse a specific `.gitignore` file.
600
+ * Call when a `.gitignore` file is added, changed, or removed.
601
+ *
602
+ * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
603
+ */
604
+ invalidate(gitignorePath) {
605
+ const absPath = resolve(gitignorePath);
606
+ const gitignoreDir = dirname(absPath);
607
+ for (const [, repo] of this.repos) {
608
+ const relToRepo = relative(repo.root, gitignoreDir);
609
+ if (relToRepo.startsWith('..'))
610
+ continue;
611
+ // Remove old entry for this directory
612
+ repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
613
+ // Re-parse if file still exists
614
+ if (existsSync(absPath)) {
615
+ repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
616
+ // Re-sort deepest-first
617
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
618
+ }
619
+ return;
620
+ }
621
+ // If not in any known repo, check if it's in a repo we haven't scanned
622
+ const repoRoot = findRepoRoot(gitignoreDir);
623
+ if (repoRoot && existsSync(absPath)) {
624
+ const entries = [
625
+ { dir: gitignoreDir, ig: parseGitignore(absPath) },
626
+ ];
627
+ if (this.repos.has(repoRoot)) {
628
+ const repo = this.repos.get(repoRoot);
629
+ repo.entries.push(entries[0]);
630
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
631
+ }
632
+ else {
633
+ this.repos.set(repoRoot, { root: repoRoot, entries });
634
+ }
635
+ }
636
+ }
637
+ }
638
+
639
+ /**
640
+ * @module templates/helpers
641
+ * Registers built-in Handlebars helpers for content templates.
642
+ */
643
+ /** Pre-built rehype parser for HTML → hast conversion. */
644
+ const htmlParser = unified().use(rehypeParse, { fragment: true });
645
+ /**
646
+ * Register all built-in helpers on a Handlebars instance.
647
+ *
648
+ * @param hbs - The Handlebars instance.
649
+ */
650
+ function registerBuiltinHelpers(hbs) {
651
+ // Structural: ADF → Markdown
652
+ hbs.registerHelper('adfToMarkdown', function (adf) {
653
+ if (!adf || typeof adf !== 'object')
654
+ return '';
655
+ try {
656
+ const mdast = fromADF(adf);
657
+ return new hbs.SafeString(toMarkdown(mdast).trim());
658
+ }
659
+ catch {
660
+ return '<!-- ADF conversion failed -->';
661
+ }
662
+ });
663
+ // Structural: HTML → Markdown
664
+ hbs.registerHelper('markdownify', function (html) {
665
+ if (typeof html !== 'string' || !html.trim())
666
+ return '';
667
+ try {
668
+ const hast = htmlParser.parse(html);
669
+ const mdast = toMdast(hast);
670
+ return new hbs.SafeString(toMarkdown(mdast).trim());
671
+ }
672
+ catch {
673
+ return '<!-- HTML conversion failed -->';
674
+ }
675
+ });
676
+ // Formatting: dateFormat
677
+ hbs.registerHelper('dateFormat', function (value, format) {
678
+ if (value === undefined || value === null)
679
+ return '';
680
+ const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
681
+ return dayjs(value).format(fmt);
682
+ });
683
+ // Formatting: join
684
+ hbs.registerHelper('join', function (arr, separator) {
685
+ if (!Array.isArray(arr))
686
+ return '';
687
+ const sep = typeof separator === 'string' ? separator : ', ';
688
+ return arr.join(sep);
689
+ });
690
+ // Formatting: pluck
691
+ hbs.registerHelper('pluck', function (arr, key) {
692
+ if (!Array.isArray(arr) || typeof key !== 'string')
693
+ return [];
694
+ return arr.map((item) => item && typeof item === 'object'
695
+ ? item[key]
696
+ : undefined);
697
+ });
698
+ // String transforms
699
+ hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
700
+ hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
701
+ hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? capitalize(text) : '');
702
+ hbs.registerHelper('title', (text) => typeof text === 'string' ? title(text) : '');
703
+ hbs.registerHelper('camel', (text) => typeof text === 'string' ? camel(text) : '');
704
+ hbs.registerHelper('snake', (text) => typeof text === 'string' ? snake(text) : '');
705
+ hbs.registerHelper('dash', (text) => typeof text === 'string' ? dash(text) : '');
706
+ // default helper
707
+ hbs.registerHelper('default', function (value, fallback) {
708
+ return value ?? fallback ?? '';
709
+ });
710
+ // eq helper (deep equality)
711
+ hbs.registerHelper('eq', function (a, b) {
712
+ return isEqual(a, b);
713
+ });
714
+ // json helper
715
+ hbs.registerHelper('json', function (value) {
716
+ return new hbs.SafeString(JSON.stringify(value, null, 2));
717
+ });
718
+ }
719
+
720
+ /**
721
+ * @module templates/engine
722
+ * Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
723
+ */
724
+ /**
725
+ * Resolve a template value to its source string.
726
+ *
727
+ * Resolution order:
728
+ * 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
729
+ * 2. Matches a key in namedTemplates → named ref (recursively resolve)
730
+ * 3. Otherwise → inline Handlebars template string
731
+ *
732
+ * @param value - The template reference (inline, file path, or named ref).
733
+ * @param namedTemplates - Named template definitions from config.
734
+ * @param configDir - Directory to resolve relative file paths against.
735
+ * @param visited - Set of visited named refs for cycle detection.
736
+ * @returns The resolved template source string.
737
+ */
738
+ function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
739
+ // File path detection
740
+ if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
741
+ return readFileSync(resolve(configDir, value), 'utf-8');
742
+ }
743
+ // Named ref
744
+ if (namedTemplates?.[value] !== undefined) {
745
+ if (visited.has(value)) {
746
+ throw new Error(`Circular template reference detected: ${value}`);
747
+ }
748
+ visited.add(value);
749
+ return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
750
+ }
751
+ // Inline
752
+ return value;
753
+ }
754
+ /**
755
+ * Create a configured Handlebars instance with built-in helpers registered.
756
+ *
757
+ * @returns A Handlebars instance with helpers.
758
+ */
759
+ function createHandlebarsInstance() {
760
+ const hbs = Handlebars.create();
761
+ registerBuiltinHelpers(hbs);
762
+ return hbs;
763
+ }
764
+ /**
765
+ * Load custom helpers from file paths.
766
+ *
767
+ * Each file should export a default function that receives the Handlebars instance.
768
+ *
769
+ * @param hbs - The Handlebars instance.
770
+ * @param paths - File paths to custom helper modules.
771
+ * @param configDir - Directory to resolve relative paths against.
772
+ */
773
+ async function loadCustomHelpers(hbs, paths, configDir) {
774
+ for (const p of paths) {
775
+ const resolved = resolve(configDir, p);
776
+ const mod = (await import(resolved));
777
+ if (typeof mod.default === 'function') {
778
+ mod.default(hbs);
779
+ }
780
+ }
781
+ }
782
+ /**
783
+ * The template engine: holds compiled templates and renders them against context.
784
+ */
785
+ class TemplateEngine {
786
+ hbs;
787
+ compiled = new Map();
788
+ constructor(hbs) {
789
+ this.hbs = hbs;
790
+ }
791
+ /**
792
+ * Compile and cache a template from its source string.
793
+ *
794
+ * @param key - Cache key (rule index or named template).
795
+ * @param source - Handlebars template source.
796
+ * @returns The compiled template.
797
+ */
798
+ compile(key, source) {
799
+ const fn = this.hbs.compile(source);
800
+ this.compiled.set(key, fn);
801
+ return fn;
802
+ }
803
+ /**
804
+ * Get a previously compiled template by key.
805
+ *
806
+ * @param key - The cache key.
807
+ * @returns The compiled template, or undefined.
808
+ */
809
+ get(key) {
810
+ return this.compiled.get(key);
811
+ }
812
+ /**
813
+ * Render a compiled template against a context.
814
+ *
815
+ * @param key - The cache key of the compiled template.
816
+ * @param context - The data context for rendering.
817
+ * @returns The rendered string, or null if the template was not found.
818
+ */
819
+ render(key, context) {
820
+ const fn = this.compiled.get(key);
821
+ if (!fn)
822
+ return null;
823
+ return fn(context);
824
+ }
825
+ }
826
+
827
+ /**
828
+ * @module templates/buildTemplateEngine
829
+ * Factory to build a TemplateEngine from config, compiling all rule templates at load time.
830
+ */
831
+ /**
832
+ * Build a TemplateEngine from configuration, pre-compiling all rule templates.
833
+ *
834
+ * @param rules - The inference rules (may contain template fields).
835
+ * @param namedTemplates - Named template definitions from config.
836
+ * @param templateHelperPaths - Paths to custom helper modules.
837
+ * @param configDir - Directory to resolve relative paths against.
838
+ * @returns The configured TemplateEngine, or undefined if no templates are used.
839
+ */
840
+ async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
841
+ const rulesWithTemplates = rules.filter((r) => r.template);
842
+ if (rulesWithTemplates.length === 0)
843
+ return undefined;
844
+ const hbs = createHandlebarsInstance();
845
+ // Load custom helpers
846
+ if (templateHelperPaths?.length && configDir) {
847
+ await loadCustomHelpers(hbs, templateHelperPaths, configDir);
848
+ }
849
+ const engine = new TemplateEngine(hbs);
850
+ // Compile all rule templates
851
+ for (const [index, rule] of rules.entries()) {
852
+ if (!rule.template)
853
+ continue;
854
+ const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
855
+ engine.compile(`rule-${String(index)}`, source);
856
+ }
857
+ return engine;
858
+ }
859
+
860
+ /**
861
+ * @module app/configWatcher
862
+ * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
863
+ */
864
+ /**
865
+ * Debounced config file watcher.
866
+ */
867
+ class ConfigWatcher {
868
+ options;
869
+ watcher;
870
+ debounce;
871
+ constructor(options) {
872
+ this.options = options;
873
+ }
874
+ start() {
875
+ if (!this.options.enabled)
876
+ return;
877
+ this.watcher = chokidar.watch(this.options.configPath, {
878
+ ignoreInitial: true,
879
+ });
880
+ this.watcher.on('change', () => {
881
+ if (this.debounce)
882
+ clearTimeout(this.debounce);
883
+ this.debounce = setTimeout(() => {
884
+ void this.options.onChange();
885
+ }, this.options.debounceMs);
886
+ });
887
+ this.watcher.on('error', (error) => {
888
+ this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
889
+ });
890
+ this.options.logger.info({
891
+ configPath: this.options.configPath,
892
+ debounceMs: this.options.debounceMs,
893
+ }, 'Config watcher started');
894
+ }
895
+ async stop() {
896
+ if (this.debounce) {
897
+ clearTimeout(this.debounce);
898
+ this.debounce = undefined;
899
+ }
900
+ if (this.watcher) {
901
+ await this.watcher.close();
902
+ this.watcher = undefined;
903
+ }
904
+ }
905
+ }
906
+
420
907
  /**
421
908
  * @module config/defaults
422
909
  * Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
@@ -618,7 +1105,12 @@ const inferenceRuleSchema = z.object({
618
1105
  map: z
619
1106
  .union([jsonMapMapSchema, z.string()])
620
1107
  .optional()
621
- .describe('JsonMap transformation (inline definition or named map reference).'),
1108
+ .describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
1109
+ /** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
1110
+ template: z
1111
+ .string()
1112
+ .optional()
1113
+ .describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
622
1114
  });
623
1115
  /**
624
1116
  * Top-level configuration for jeeves-watcher.
@@ -656,6 +1148,22 @@ const jeevesWatcherConfigSchema = z.object({
656
1148
  .record(z.string(), jsonMapMapSchema)
657
1149
  .optional()
658
1150
  .describe('Reusable named JsonMap transformations.'),
1151
+ /** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
1152
+ templates: z
1153
+ .record(z.string(), z.string())
1154
+ .optional()
1155
+ .describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
1156
+ /** Custom Handlebars helper registration. */
1157
+ templateHelpers: z
1158
+ .object({
1159
+ /** File paths to custom helper modules. */
1160
+ paths: z
1161
+ .array(z.string())
1162
+ .optional()
1163
+ .describe('File paths to custom helper modules.'),
1164
+ })
1165
+ .optional()
1166
+ .describe('Custom Handlebars helper registration.'),
659
1167
  /** Logging configuration. */
660
1168
  logging: loggingConfigSchema.optional().describe('Logging configuration.'),
661
1169
  /** Timeout in milliseconds for graceful shutdown. */
@@ -913,258 +1421,52 @@ function createGeminiProvider(config, logger) {
913
1421
  }, {
914
1422
  attempts: 5,
915
1423
  baseDelayMs: 500,
916
- maxDelayMs: 10_000,
917
- jitter: 0.2,
918
- onRetry: ({ attempt, delayMs, error }) => {
919
- log.warn({
920
- attempt,
921
- delayMs,
922
- provider: 'gemini',
923
- model: config.model,
924
- err: normalizeError(error),
925
- }, 'Embedding call failed; will retry');
926
- },
927
- });
928
- // Validate dimensions
929
- for (const vector of vectors) {
930
- if (vector.length !== dimensions) {
931
- throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
932
- }
933
- }
934
- return vectors;
935
- },
936
- };
937
- }
938
- function createMockFromConfig(config) {
939
- const dimensions = config.dimensions ?? 768;
940
- return createMockProvider(dimensions);
941
- }
942
- const embeddingProviderRegistry = new Map([
943
- ['mock', createMockFromConfig],
944
- ['gemini', createGeminiProvider],
945
- ]);
946
- /**
947
- * Create an embedding provider based on the given configuration.
948
- *
949
- * Each provider is responsible for its own default dimensions.
950
- *
951
- * @param config - The embedding configuration.
952
- * @param logger - Optional pino logger for retry warnings.
953
- * @returns An {@link EmbeddingProvider} instance.
954
- * @throws If the configured provider is not supported.
955
- */
956
- function createEmbeddingProvider(config, logger) {
957
- const factory = embeddingProviderRegistry.get(config.provider);
958
- if (!factory) {
959
- throw new Error(`Unsupported embedding provider: ${config.provider}`);
960
- }
961
- return factory(config, logger);
962
- }
963
-
964
- /**
965
- * @module gitignore
966
- * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
967
- */
968
- /**
969
- * Find the git repo root by walking up from `startDir` looking for `.git/`.
970
- * Returns `undefined` if no repo is found.
971
- */
972
- function findRepoRoot(startDir) {
973
- let dir = resolve(startDir);
974
- const root = resolve('/');
975
- while (dir !== root) {
976
- if (existsSync(join(dir, '.git')) &&
977
- statSync(join(dir, '.git')).isDirectory()) {
978
- return dir;
979
- }
980
- const parent = dirname(dir);
981
- if (parent === dir)
982
- break;
983
- dir = parent;
984
- }
985
- return undefined;
986
- }
987
- /**
988
- * Convert a watch path (directory, file path, or glob) to a concrete directory
989
- * that can be scanned for a repo root.
990
- */
991
- function watchPathToScanDir(watchPath) {
992
- const absPath = resolve(watchPath);
993
- try {
994
- return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
995
- }
996
- catch {
997
- // ignore
998
- }
999
- // If this is a glob, fall back to the non-glob prefix.
1000
- const globMatch = /[*?[{]/.exec(watchPath);
1001
- if (!globMatch)
1002
- return undefined;
1003
- const prefix = watchPath.slice(0, globMatch.index);
1004
- const trimmed = prefix.trim();
1005
- const baseDir = trimmed.length === 0
1006
- ? '.'
1007
- : trimmed.endsWith('/') || trimmed.endsWith('\\')
1008
- ? trimmed
1009
- : dirname(trimmed);
1010
- const resolved = resolve(baseDir);
1011
- if (!existsSync(resolved))
1012
- return undefined;
1013
- return resolved;
1014
- }
1015
- /**
1016
- * Recursively find all `.gitignore` files under `dir`.
1017
- * Skips `.git` and `node_modules` directories for performance.
1018
- */
1019
- function findGitignoreFiles(dir) {
1020
- const results = [];
1021
- const gitignorePath = join(dir, '.gitignore');
1022
- if (existsSync(gitignorePath)) {
1023
- results.push(gitignorePath);
1024
- }
1025
- let entries;
1026
- try {
1027
- entries = readdirSync(dir);
1028
- }
1029
- catch {
1030
- return results;
1031
- }
1032
- for (const entry of entries) {
1033
- if (entry === '.git' || entry === 'node_modules')
1034
- continue;
1035
- const fullPath = join(dir, entry);
1036
- try {
1037
- if (statSync(fullPath).isDirectory()) {
1038
- results.push(...findGitignoreFiles(fullPath));
1039
- }
1040
- }
1041
- catch {
1042
- // Skip inaccessible entries
1043
- }
1044
- }
1045
- return results;
1046
- }
1047
- /**
1048
- * Parse a `.gitignore` file into an `ignore` instance.
1049
- */
1050
- function parseGitignore(gitignorePath) {
1051
- const content = readFileSync(gitignorePath, 'utf8');
1052
- return ignore().add(content);
1053
- }
1054
- /**
1055
- * Normalize a path to use forward slashes (required by `ignore` package).
1056
- */
1057
- function toForwardSlash(p) {
1058
- return p.replace(/\\/g, '/');
1059
- }
1060
- /**
1061
- * Processor-level gitignore filter. Checks file paths against the nearest
1062
- * `.gitignore` chain in git repositories.
1063
- */
1064
- class GitignoreFilter {
1065
- repos = new Map();
1066
- /**
1067
- * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
1068
- *
1069
- * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
1070
- */
1071
- constructor(watchPaths) {
1072
- this.scan(watchPaths);
1073
- }
1074
- /**
1075
- * Scan paths for git repos and their `.gitignore` files.
1076
- */
1077
- scan(watchPaths) {
1078
- this.repos.clear();
1079
- const scannedDirs = new Set();
1080
- for (const watchPath of watchPaths) {
1081
- const scanDir = watchPathToScanDir(watchPath);
1082
- if (!scanDir)
1083
- continue;
1084
- if (scannedDirs.has(scanDir))
1085
- continue;
1086
- scannedDirs.add(scanDir);
1087
- const repoRoot = findRepoRoot(scanDir);
1088
- if (!repoRoot)
1089
- continue;
1090
- if (this.repos.has(repoRoot))
1091
- continue;
1092
- const gitignoreFiles = findGitignoreFiles(repoRoot);
1093
- const entries = gitignoreFiles.map((gf) => ({
1094
- dir: dirname(gf),
1095
- ig: parseGitignore(gf),
1096
- }));
1097
- // Sort deepest-first so nested `.gitignore` files are checked first
1098
- entries.sort((a, b) => b.dir.length - a.dir.length);
1099
- this.repos.set(repoRoot, { root: repoRoot, entries });
1100
- }
1101
- }
1102
- /**
1103
- * Check whether a file path is ignored by any applicable `.gitignore`.
1104
- *
1105
- * @param filePath - Absolute file path to check.
1106
- * @returns `true` if the file should be ignored.
1107
- */
1108
- isIgnored(filePath) {
1109
- const absPath = resolve(filePath);
1110
- for (const [, repo] of this.repos) {
1111
- // Check if file is within this repo
1112
- const relToRepo = relative(repo.root, absPath);
1113
- if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
1114
- continue;
1115
- }
1116
- // Check each `.gitignore` entry (deepest-first)
1117
- for (const entry of repo.entries) {
1118
- const relToEntry = relative(entry.dir, absPath);
1119
- if (relToEntry.startsWith('..'))
1120
- continue;
1121
- const normalized = toForwardSlash(relToEntry);
1122
- if (entry.ig.ignores(normalized)) {
1123
- return true;
1124
- }
1125
- }
1126
- }
1127
- return false;
1128
- }
1129
- /**
1130
- * Invalidate and re-parse a specific `.gitignore` file.
1131
- * Call when a `.gitignore` file is added, changed, or removed.
1132
- *
1133
- * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
1134
- */
1135
- invalidate(gitignorePath) {
1136
- const absPath = resolve(gitignorePath);
1137
- const gitignoreDir = dirname(absPath);
1138
- for (const [, repo] of this.repos) {
1139
- const relToRepo = relative(repo.root, gitignoreDir);
1140
- if (relToRepo.startsWith('..'))
1141
- continue;
1142
- // Remove old entry for this directory
1143
- repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
1144
- // Re-parse if file still exists
1145
- if (existsSync(absPath)) {
1146
- repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
1147
- // Re-sort deepest-first
1148
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1149
- }
1150
- return;
1151
- }
1152
- // If not in any known repo, check if it's in a repo we haven't scanned
1153
- const repoRoot = findRepoRoot(gitignoreDir);
1154
- if (repoRoot && existsSync(absPath)) {
1155
- const entries = [
1156
- { dir: gitignoreDir, ig: parseGitignore(absPath) },
1157
- ];
1158
- if (this.repos.has(repoRoot)) {
1159
- const repo = this.repos.get(repoRoot);
1160
- repo.entries.push(entries[0]);
1161
- repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1162
- }
1163
- else {
1164
- this.repos.set(repoRoot, { root: repoRoot, entries });
1424
+ maxDelayMs: 10_000,
1425
+ jitter: 0.2,
1426
+ onRetry: ({ attempt, delayMs, error }) => {
1427
+ log.warn({
1428
+ attempt,
1429
+ delayMs,
1430
+ provider: 'gemini',
1431
+ model: config.model,
1432
+ err: normalizeError(error),
1433
+ }, 'Embedding call failed; will retry');
1434
+ },
1435
+ });
1436
+ // Validate dimensions
1437
+ for (const vector of vectors) {
1438
+ if (vector.length !== dimensions) {
1439
+ throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
1440
+ }
1165
1441
  }
1166
- }
1442
+ return vectors;
1443
+ },
1444
+ };
1445
+ }
1446
+ function createMockFromConfig(config) {
1447
+ const dimensions = config.dimensions ?? 768;
1448
+ return createMockProvider(dimensions);
1449
+ }
1450
+ const embeddingProviderRegistry = new Map([
1451
+ ['mock', createMockFromConfig],
1452
+ ['gemini', createGeminiProvider],
1453
+ ]);
1454
+ /**
1455
+ * Create an embedding provider based on the given configuration.
1456
+ *
1457
+ * Each provider is responsible for its own default dimensions.
1458
+ *
1459
+ * @param config - The embedding configuration.
1460
+ * @param logger - Optional pino logger for retry warnings.
1461
+ * @returns An {@link EmbeddingProvider} instance.
1462
+ * @throws If the configured provider is not supported.
1463
+ */
1464
+ function createEmbeddingProvider(config, logger) {
1465
+ const factory = embeddingProviderRegistry.get(config.provider);
1466
+ if (!factory) {
1467
+ throw new Error(`Unsupported embedding provider: ${config.provider}`);
1167
1468
  }
1469
+ return factory(config, logger);
1168
1470
  }
1169
1471
 
1170
1472
  /**
@@ -1396,7 +1698,7 @@ function createJsonMapLib() {
1396
1698
  };
1397
1699
  }
1398
1700
  /**
1399
- * Apply compiled inference rules to file attributes, returning merged metadata.
1701
+ * Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
1400
1702
  *
1401
1703
  * Rules are evaluated in order; later rules override earlier ones.
1402
1704
  * If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
@@ -1406,15 +1708,18 @@ function createJsonMapLib() {
1406
1708
  * @param attributes - The file attributes to match against.
1407
1709
  * @param namedMaps - Optional record of named JsonMap definitions.
1408
1710
  * @param logger - Optional logger for warnings (falls back to console.warn).
1409
- * @returns The merged metadata from all matching rules.
1711
+ * @param templateEngine - Optional template engine for rendering content templates.
1712
+ * @param configDir - Optional config directory for resolving .json map file paths.
1713
+ * @returns The merged metadata and optional rendered content.
1410
1714
  */
1411
- async function applyRules(compiledRules, attributes, namedMaps, logger) {
1715
+ async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
1412
1716
  // JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
1413
1717
  // Our helper functions accept multiple args, which JsonMap supports at runtime.
1414
1718
  const lib = createJsonMapLib();
1415
1719
  let merged = {};
1720
+ let renderedContent = null;
1416
1721
  const log = logger ?? console;
1417
- for (const { rule, validate } of compiledRules) {
1722
+ for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
1418
1723
  if (validate(attributes)) {
1419
1724
  // Apply set resolution
1420
1725
  const setOutput = resolveSet(rule.set, attributes);
@@ -1424,10 +1729,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1424
1729
  let mapDef;
1425
1730
  // Resolve map reference
1426
1731
  if (typeof rule.map === 'string') {
1427
- mapDef = namedMaps?.[rule.map];
1428
- if (!mapDef) {
1429
- log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1430
- continue;
1732
+ if (rule.map.endsWith('.json') && configDir) {
1733
+ // File path: load from .json file
1734
+ try {
1735
+ const mapPath = resolve(configDir, rule.map);
1736
+ const raw = readFileSync(mapPath, 'utf-8');
1737
+ mapDef = JSON.parse(raw);
1738
+ }
1739
+ catch (error) {
1740
+ log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
1741
+ continue;
1742
+ }
1743
+ }
1744
+ else {
1745
+ mapDef = namedMaps?.[rule.map];
1746
+ if (!mapDef) {
1747
+ log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
1748
+ continue;
1749
+ }
1431
1750
  }
1432
1751
  }
1433
1752
  else {
@@ -1450,9 +1769,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
1450
1769
  log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
1451
1770
  }
1452
1771
  }
1772
+ // Render template if present
1773
+ if (rule.template && templateEngine) {
1774
+ const templateKey = `rule-${String(ruleIndex)}`;
1775
+ // Build template context: attributes (with json spread at top) + map output
1776
+ const context = {
1777
+ ...(attributes.json ?? {}),
1778
+ ...attributes,
1779
+ ...merged,
1780
+ };
1781
+ try {
1782
+ const result = templateEngine.render(templateKey, context);
1783
+ if (result && result.trim()) {
1784
+ renderedContent = result;
1785
+ }
1786
+ else {
1787
+ log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
1788
+ }
1789
+ }
1790
+ catch (error) {
1791
+ log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
1792
+ }
1793
+ }
1453
1794
  }
1454
1795
  }
1455
- return merged;
1796
+ return { metadata: merged, renderedContent };
1456
1797
  }
1457
1798
 
1458
1799
  /**
@@ -1541,23 +1882,32 @@ function compileRules(rules) {
1541
1882
  * @param metadataDir - The metadata directory for enrichment files.
1542
1883
  * @param maps - Optional named JsonMap definitions.
1543
1884
  * @param logger - Optional logger for rule warnings.
1885
+ * @param templateEngine - Optional template engine for content templates.
1886
+ * @param configDir - Optional config directory for resolving file paths.
1544
1887
  * @returns The merged metadata and intermediate data.
1545
1888
  */
1546
- async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
1889
+ async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
1547
1890
  const ext = extname(filePath);
1548
1891
  const stats = await stat(filePath);
1549
1892
  // 1. Extract text and structured data
1550
1893
  const extracted = await extractText(filePath, ext);
1551
1894
  // 2. Build attributes + apply rules
1552
1895
  const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
1553
- const inferred = await applyRules(compiledRules, attributes, maps, logger);
1896
+ const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
1554
1897
  // 3. Read enrichment metadata (merge, enrichment wins)
1555
1898
  const enrichment = await readMetadata(filePath, metadataDir);
1556
1899
  const metadata = {
1557
1900
  ...inferred,
1558
1901
  ...(enrichment ?? {}),
1559
1902
  };
1560
- return { inferred, enrichment, metadata, attributes, extracted };
1903
+ return {
1904
+ inferred,
1905
+ enrichment,
1906
+ metadata,
1907
+ attributes,
1908
+ extracted,
1909
+ renderedContent,
1910
+ };
1561
1911
  }
1562
1912
 
1563
1913
  /**
@@ -1628,6 +1978,7 @@ class DocumentProcessor {
1628
1978
  vectorStore;
1629
1979
  compiledRules;
1630
1980
  logger;
1981
+ templateEngine;
1631
1982
  /**
1632
1983
  * Create a new DocumentProcessor.
1633
1984
  *
@@ -1636,13 +1987,15 @@ class DocumentProcessor {
1636
1987
  * @param vectorStore - The vector store client.
1637
1988
  * @param compiledRules - The compiled inference rules.
1638
1989
  * @param logger - The logger instance.
1990
+ * @param templateEngine - Optional template engine for content templates.
1639
1991
  */
1640
- constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
1992
+ constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
1641
1993
  this.config = config;
1642
1994
  this.embeddingProvider = embeddingProvider;
1643
1995
  this.vectorStore = vectorStore;
1644
1996
  this.compiledRules = compiledRules;
1645
1997
  this.logger = logger;
1998
+ this.templateEngine = templateEngine;
1646
1999
  }
1647
2000
  /**
1648
2001
  * Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
@@ -1653,13 +2006,15 @@ class DocumentProcessor {
1653
2006
  try {
1654
2007
  const ext = extname(filePath);
1655
2008
  // 1. Build merged metadata + extract text
1656
- const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
1657
- if (!extracted.text.trim()) {
2009
+ const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
2010
+ // Use rendered template content if available, otherwise raw extracted text
2011
+ const textToEmbed = renderedContent ?? extracted.text;
2012
+ if (!textToEmbed.trim()) {
1658
2013
  this.logger.debug({ filePath }, 'Skipping empty file');
1659
2014
  return;
1660
2015
  }
1661
2016
  // 2. Content hash check — skip if unchanged
1662
- const hash = contentHash(extracted.text);
2017
+ const hash = contentHash(textToEmbed);
1663
2018
  const baseId = pointId(filePath, 0);
1664
2019
  const existingPayload = await this.vectorStore.getPayload(baseId);
1665
2020
  if (existingPayload && existingPayload['content_hash'] === hash) {
@@ -1671,7 +2026,7 @@ class DocumentProcessor {
1671
2026
  const chunkSize = this.config.chunkSize ?? 1000;
1672
2027
  const chunkOverlap = this.config.chunkOverlap ?? 200;
1673
2028
  const splitter = createSplitter(ext, chunkSize, chunkOverlap);
1674
- const chunks = await splitter.splitText(extracted.text);
2029
+ const chunks = await splitter.splitText(textToEmbed);
1675
2030
  // 4. Embed all chunks
1676
2031
  const vectors = await this.embeddingProvider.embed(chunks);
1677
2032
  // 5. Upsert all chunk points
@@ -1765,7 +2120,7 @@ class DocumentProcessor {
1765
2120
  return null;
1766
2121
  }
1767
2122
  // Build merged metadata (lightweight — no embedding)
1768
- const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
2123
+ const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
1769
2124
  // Update all chunk payloads
1770
2125
  const totalChunks = getChunkCount(existingPayload);
1771
2126
  const ids = chunkIds(filePath, totalChunks);
@@ -1783,8 +2138,17 @@ class DocumentProcessor {
1783
2138
  *
1784
2139
  * @param compiledRules - The newly compiled rules.
1785
2140
  */
1786
- updateRules(compiledRules) {
2141
+ /**
2142
+ * Update compiled inference rules and optionally the template engine.
2143
+ *
2144
+ * @param compiledRules - The newly compiled rules.
2145
+ * @param templateEngine - Optional updated template engine.
2146
+ */
2147
+ updateRules(compiledRules, templateEngine) {
1787
2148
  this.compiledRules = compiledRules;
2149
+ if (templateEngine) {
2150
+ this.templateEngine = templateEngine;
2151
+ }
1788
2152
  this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
1789
2153
  }
1790
2154
  }
@@ -2313,6 +2677,104 @@ class SystemHealth {
2313
2677
  }
2314
2678
  }
2315
2679
 
2680
+ /**
2681
+ * @module watcher/globToDir
2682
+ * Adapts glob-based watch config to chokidar v4+, which removed glob support
2683
+ * (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
2684
+ * strings, silently producing zero events. This module extracts static directory
2685
+ * roots from glob patterns for chokidar to watch, then filters emitted events
2686
+ * against the original globs via picomatch.
2687
+ */
2688
+ /**
2689
+ * Extract the static directory root from a glob pattern.
2690
+ * Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
2691
+ *
2692
+ * @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
2693
+ * @returns The static directory prefix (e.g., `j:/domains`).
2694
+ */
2695
+ function globRoot(glob) {
2696
+ const normalized = glob.replace(/\\/g, '/');
2697
+ const segments = normalized.split('/');
2698
+ const staticSegments = [];
2699
+ for (const seg of segments) {
2700
+ if (/[*?{[\]]/.test(seg))
2701
+ break;
2702
+ staticSegments.push(seg);
2703
+ }
2704
+ return staticSegments.join('/') || '.';
2705
+ }
2706
+ /**
2707
+ * Deduplicate directory roots, removing paths that are subdirectories of others.
2708
+ *
2709
+ * @param roots - Array of directory paths.
2710
+ * @returns Deduplicated array with subdirectories removed.
2711
+ */
2712
+ function deduplicateRoots(roots) {
2713
+ const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
2714
+ const sorted = [...new Set(normalized)].sort();
2715
+ return sorted.filter((root, _i, arr) => {
2716
+ const withSlash = root.endsWith('/') ? root : root + '/';
2717
+ return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
2718
+ });
2719
+ }
2720
+ /**
2721
+ * Build a picomatch matcher from an array of glob patterns.
2722
+ * Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
2723
+ * before matching.
2724
+ *
2725
+ * @param globs - Glob patterns to match against.
2726
+ * @returns A function that tests whether a file path matches any of the globs.
2727
+ */
2728
+ function buildGlobMatcher(globs) {
2729
+ const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
2730
+ const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
2731
+ return (filePath) => {
2732
+ const normalized = filePath.replace(/\\/g, '/');
2733
+ return isMatch(normalized);
2734
+ };
2735
+ }
2736
+ /**
2737
+ * Convert an array of glob patterns into chokidar-compatible directory roots
2738
+ * and a filter function for post-hoc event filtering.
2739
+ *
2740
+ * @param globs - Glob patterns from the watch config.
2741
+ * @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
2742
+ */
2743
+ function resolveWatchPaths(globs) {
2744
+ const rawRoots = globs.map(globRoot);
2745
+ const roots = deduplicateRoots(rawRoots);
2746
+ const matches = buildGlobMatcher(globs);
2747
+ return { roots, matches };
2748
+ }
2749
+ /**
2750
+ * Convert ignored glob patterns to picomatch matcher functions.
2751
+ *
2752
+ * Chokidar v5 replaced the external `anymatch` dependency with an inline
2753
+ * implementation that does **exact string equality** for string matchers,
2754
+ * breaking glob-based `ignored` patterns. This function converts glob strings
2755
+ * to picomatch functions that chokidar's `createPattern` passes through
2756
+ * unchanged (`typeof matcher === 'function'`).
2757
+ *
2758
+ * Non-string entries (functions, RegExps) are passed through as-is.
2759
+ *
2760
+ * @param ignored - Array of ignored patterns (globs, functions, RegExps).
2761
+ * @returns Array with glob strings replaced by picomatch matcher functions.
2762
+ */
2763
+ function resolveIgnored(ignored) {
2764
+ return ignored.map((entry) => {
2765
+ if (typeof entry !== 'string')
2766
+ return entry;
2767
+ // If the string contains glob characters, convert to a picomatch function.
2768
+ // Literal strings (exact paths) are also converted for consistent matching.
2769
+ const normalizedPattern = entry.replace(/\\/g, '/');
2770
+ const matcher = picomatch(normalizedPattern, { dot: true, nocase: true });
2771
+ return (filePath) => {
2772
+ const normalized = filePath.replace(/\\/g, '/');
2773
+ return matcher(normalized);
2774
+ };
2775
+ });
2776
+ }
2777
+
2316
2778
  /**
2317
2779
  * @module watcher
2318
2780
  * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
@@ -2327,6 +2789,7 @@ class FileSystemWatcher {
2327
2789
  logger;
2328
2790
  health;
2329
2791
  gitignoreFilter;
2792
+ globMatches;
2330
2793
  watcher;
2331
2794
  /**
2332
2795
  * Create a new FileSystemWatcher.
@@ -2343,6 +2806,7 @@ class FileSystemWatcher {
2343
2806
  this.processor = processor;
2344
2807
  this.logger = logger;
2345
2808
  this.gitignoreFilter = options.gitignoreFilter;
2809
+ this.globMatches = () => true;
2346
2810
  const healthOptions = {
2347
2811
  maxRetries: options.maxRetries,
2348
2812
  maxBackoffMs: options.maxBackoffMs,
@@ -2355,8 +2819,20 @@ class FileSystemWatcher {
2355
2819
  * Start watching the filesystem and processing events.
2356
2820
  */
2357
2821
  start() {
2358
- this.watcher = chokidar.watch(this.config.paths, {
2359
- ignored: this.config.ignored,
2822
+ // Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
2823
+ // Glob patterns are silently treated as literal strings, producing zero
2824
+ // events. We extract static directory roots for chokidar to watch, then
2825
+ // filter emitted events against the original globs via picomatch.
2826
+ const { roots, matches } = resolveWatchPaths(this.config.paths);
2827
+ this.globMatches = matches;
2828
+ // Chokidar v5's inline anymatch does exact string equality for string
2829
+ // matchers, breaking glob-based ignored patterns. Convert to picomatch
2830
+ // functions that chokidar passes through as-is.
2831
+ const ignored = this.config.ignored
2832
+ ? resolveIgnored(this.config.ignored)
2833
+ : undefined;
2834
+ this.watcher = chokidar.watch(roots, {
2835
+ ignored,
2360
2836
  usePolling: this.config.usePolling,
2361
2837
  interval: this.config.pollIntervalMs,
2362
2838
  awaitWriteFinish: this.config.stabilityThresholdMs
@@ -2366,6 +2842,8 @@ class FileSystemWatcher {
2366
2842
  });
2367
2843
  this.watcher.on('add', (path) => {
2368
2844
  this.handleGitignoreChange(path);
2845
+ if (!this.globMatches(path))
2846
+ return;
2369
2847
  if (this.isGitignored(path))
2370
2848
  return;
2371
2849
  this.logger.debug({ path }, 'File added');
@@ -2373,6 +2851,8 @@ class FileSystemWatcher {
2373
2851
  });
2374
2852
  this.watcher.on('change', (path) => {
2375
2853
  this.handleGitignoreChange(path);
2854
+ if (!this.globMatches(path))
2855
+ return;
2376
2856
  if (this.isGitignored(path))
2377
2857
  return;
2378
2858
  this.logger.debug({ path }, 'File changed');
@@ -2380,6 +2860,8 @@ class FileSystemWatcher {
2380
2860
  });
2381
2861
  this.watcher.on('unlink', (path) => {
2382
2862
  this.handleGitignoreChange(path);
2863
+ if (!this.globMatches(path))
2864
+ return;
2383
2865
  if (this.isGitignored(path))
2384
2866
  return;
2385
2867
  this.logger.debug({ path }, 'File removed');
@@ -2452,51 +2934,21 @@ class FileSystemWatcher {
2452
2934
  }
2453
2935
 
2454
2936
  /**
2455
- * @module app/configWatcher
2456
- * Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
2457
- */
2458
- /**
2459
- * Debounced config file watcher.
2937
+ * @module app/factories
2938
+ * Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
2460
2939
  */
2461
- class ConfigWatcher {
2462
- options;
2463
- watcher;
2464
- debounce;
2465
- constructor(options) {
2466
- this.options = options;
2467
- }
2468
- start() {
2469
- if (!this.options.enabled)
2470
- return;
2471
- this.watcher = chokidar.watch(this.options.configPath, {
2472
- ignoreInitial: true,
2473
- });
2474
- this.watcher.on('change', () => {
2475
- if (this.debounce)
2476
- clearTimeout(this.debounce);
2477
- this.debounce = setTimeout(() => {
2478
- void this.options.onChange();
2479
- }, this.options.debounceMs);
2480
- });
2481
- this.watcher.on('error', (error) => {
2482
- this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
2483
- });
2484
- this.options.logger.info({
2485
- configPath: this.options.configPath,
2486
- debounceMs: this.options.debounceMs,
2487
- }, 'Config watcher started');
2488
- }
2489
- async stop() {
2490
- if (this.debounce) {
2491
- clearTimeout(this.debounce);
2492
- this.debounce = undefined;
2493
- }
2494
- if (this.watcher) {
2495
- await this.watcher.close();
2496
- this.watcher = undefined;
2497
- }
2498
- }
2499
- }
2940
+ /** Default component factories wiring real implementations. */
2941
+ const defaultFactories = {
2942
+ loadConfig,
2943
+ createLogger,
2944
+ createEmbeddingProvider,
2945
+ createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2946
+ compileRules,
2947
+ createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
2948
+ createEventQueue: (options) => new EventQueue(options),
2949
+ createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2950
+ createApiServer,
2951
+ };
2500
2952
 
2501
2953
  /**
2502
2954
  * @module app/shutdown
@@ -2516,17 +2968,28 @@ function installShutdownHandlers(stop) {
2516
2968
  process.on('SIGINT', () => void shutdown());
2517
2969
  }
2518
2970
 
2519
- const defaultFactories = {
2520
- loadConfig,
2521
- createLogger,
2522
- createEmbeddingProvider,
2523
- createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
2524
- compileRules,
2525
- createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
2526
- createEventQueue: (options) => new EventQueue(options),
2527
- createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
2528
- createApiServer,
2529
- };
2971
+ /**
2972
+ * @module app/startFromConfig
2973
+ * Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
2974
+ */
2975
+ /**
2976
+ * Create and start a JeevesWatcher from a config file path.
2977
+ *
2978
+ * @param configPath - Optional path to the configuration file.
2979
+ * @returns The running JeevesWatcher instance.
2980
+ */
2981
+ async function startFromConfig(configPath) {
2982
+ const config = await loadConfig(configPath);
2983
+ const app = new JeevesWatcher(config, configPath);
2984
+ installShutdownHandlers(() => app.stop());
2985
+ await app.start();
2986
+ return app;
2987
+ }
2988
+
2989
+ /**
2990
+ * @module app
2991
+ * Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
2992
+ */
2530
2993
  /**
2531
2994
  * Main application class that wires together all components.
2532
2995
  */
@@ -2561,56 +3024,26 @@ class JeevesWatcher {
2561
3024
  async start() {
2562
3025
  const logger = this.factories.createLogger(this.config.logging);
2563
3026
  this.logger = logger;
2564
- let embeddingProvider;
2565
- try {
2566
- embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
2567
- }
2568
- catch (error) {
2569
- logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
2570
- throw error;
2571
- }
2572
- const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
2573
- await vectorStore.ensureCollection();
3027
+ const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
2574
3028
  const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
2575
- const processorConfig = {
3029
+ const configDir = this.configPath ? dirname(this.configPath) : '.';
3030
+ const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
3031
+ const processor = this.factories.createDocumentProcessor({
2576
3032
  metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
2577
3033
  chunkSize: this.config.embedding.chunkSize,
2578
3034
  chunkOverlap: this.config.embedding.chunkOverlap,
2579
3035
  maps: this.config.maps,
2580
- };
2581
- const processor = this.factories.createDocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
3036
+ configDir,
3037
+ }, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
2582
3038
  this.processor = processor;
2583
- const queue = this.factories.createEventQueue({
3039
+ this.queue = this.factories.createEventQueue({
2584
3040
  debounceMs: this.config.watch.debounceMs ?? 2000,
2585
3041
  concurrency: this.config.embedding.concurrency ?? 5,
2586
3042
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
2587
3043
  });
2588
- this.queue = queue;
2589
- const respectGitignore = this.config.watch.respectGitignore ?? true;
2590
- const gitignoreFilter = respectGitignore
2591
- ? new GitignoreFilter(this.config.watch.paths)
2592
- : undefined;
2593
- const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
2594
- maxRetries: this.config.maxRetries,
2595
- maxBackoffMs: this.config.maxBackoffMs,
2596
- onFatalError: this.runtimeOptions.onFatalError,
2597
- gitignoreFilter,
2598
- });
2599
- this.watcher = watcher;
2600
- const server = this.factories.createApiServer({
2601
- processor,
2602
- vectorStore,
2603
- embeddingProvider,
2604
- queue,
2605
- config: this.config,
2606
- logger,
2607
- });
2608
- this.server = server;
2609
- await server.listen({
2610
- host: this.config.api?.host ?? '127.0.0.1',
2611
- port: this.config.api?.port ?? 3456,
2612
- });
2613
- watcher.start();
3044
+ this.watcher = this.createWatcher(this.queue, processor, logger);
3045
+ this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
3046
+ this.watcher.start();
2614
3047
  this.startConfigWatch();
2615
3048
  logger.info('jeeves-watcher started');
2616
3049
  }
@@ -2641,22 +3074,61 @@ class JeevesWatcher {
2641
3074
  }
2642
3075
  this.logger?.info('jeeves-watcher stopped');
2643
3076
  }
3077
+ async initEmbeddingAndStore(logger) {
3078
+ let embeddingProvider;
3079
+ try {
3080
+ embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
3081
+ }
3082
+ catch (error) {
3083
+ logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
3084
+ throw error;
3085
+ }
3086
+ const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
3087
+ await vectorStore.ensureCollection();
3088
+ return { embeddingProvider, vectorStore };
3089
+ }
3090
+ createWatcher(queue, processor, logger) {
3091
+ const respectGitignore = this.config.watch.respectGitignore ?? true;
3092
+ const gitignoreFilter = respectGitignore
3093
+ ? new GitignoreFilter(this.config.watch.paths)
3094
+ : undefined;
3095
+ return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
3096
+ maxRetries: this.config.maxRetries,
3097
+ maxBackoffMs: this.config.maxBackoffMs,
3098
+ onFatalError: this.runtimeOptions.onFatalError,
3099
+ gitignoreFilter,
3100
+ });
3101
+ }
3102
+ async startApiServer(processor, vectorStore, embeddingProvider, logger) {
3103
+ const server = this.factories.createApiServer({
3104
+ processor,
3105
+ vectorStore,
3106
+ embeddingProvider,
3107
+ queue: this.queue,
3108
+ config: this.config,
3109
+ logger,
3110
+ });
3111
+ await server.listen({
3112
+ host: this.config.api?.host ?? '127.0.0.1',
3113
+ port: this.config.api?.port ?? 3456,
3114
+ });
3115
+ return server;
3116
+ }
2644
3117
  startConfigWatch() {
2645
3118
  const logger = this.logger;
2646
3119
  if (!logger)
2647
3120
  return;
2648
3121
  const enabled = this.config.configWatch?.enabled ?? true;
2649
- if (!enabled)
2650
- return;
2651
- if (!this.configPath) {
2652
- logger.debug('Config watch enabled, but no config path was provided');
3122
+ if (!enabled || !this.configPath) {
3123
+ if (!this.configPath) {
3124
+ logger.debug('Config watch enabled, but no config path was provided');
3125
+ }
2653
3126
  return;
2654
3127
  }
2655
- const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
2656
3128
  this.configWatcher = new ConfigWatcher({
2657
3129
  configPath: this.configPath,
2658
3130
  enabled,
2659
- debounceMs,
3131
+ debounceMs: this.config.configWatch?.debounceMs ?? 10000,
2660
3132
  logger,
2661
3133
  onChange: async () => this.reloadConfig(),
2662
3134
  });
@@ -2678,7 +3150,9 @@ class JeevesWatcher {
2678
3150
  const newConfig = await this.factories.loadConfig(this.configPath);
2679
3151
  this.config = newConfig;
2680
3152
  const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
2681
- processor.updateRules(compiledRules);
3153
+ const reloadConfigDir = dirname(this.configPath);
3154
+ const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
3155
+ processor.updateRules(compiledRules, newTemplateEngine);
2682
3156
  logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
2683
3157
  }
2684
3158
  catch (error) {
@@ -2686,18 +3160,6 @@ class JeevesWatcher {
2686
3160
  }
2687
3161
  }
2688
3162
  }
2689
- /**
2690
- * Create and start a JeevesWatcher from a config file path.
2691
- *
2692
- * @param configPath - Optional path to the configuration file.
2693
- * @returns The running JeevesWatcher instance.
2694
- */
2695
- async function startFromConfig(configPath) {
2696
- const config = await loadConfig(configPath);
2697
- const app = new JeevesWatcher(config, configPath);
2698
- installShutdownHandlers(() => app.stop());
2699
- await app.start();
2700
- return app;
2701
- }
3163
+ // startFromConfig re-exported from ./startFromConfig
2702
3164
 
2703
- export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
3165
+ export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, TemplateEngine, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, metadataPath, pointId, readMetadata, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };