@karmaniverous/jeeves-watcher 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +69 -14
- package/dist/cjs/index.js +996 -562
- package/dist/cli/jeeves-watcher/index.js +825 -396
- package/dist/index.d.ts +160 -16
- package/dist/index.iife.js +824 -397
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +992 -564
- package/package.json +12 -4
package/dist/index.iife.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
(function (exports, Fastify, promises, node_path, picomatch, radash, node_crypto, cosmiconfig, zod, jsonmap, googleGenai,
|
|
1
|
+
(function (exports, Fastify, promises, node_path, picomatch, radash, node_crypto, node_fs, ignore, Handlebars, dayjs, hastUtilToMdast, mdastUtilFromAdf, mdastUtilToMarkdown, rehypeParse, unified, chokidar, cosmiconfig, zod, jsonmap, googleGenai, pino, uuid, cheerio, yaml, mammoth, Ajv, addFormats, textsplitters, jsClientRest) {
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
function _interopNamespaceDefault(e) {
|
|
@@ -416,6 +416,486 @@
|
|
|
416
416
|
return app;
|
|
417
417
|
}
|
|
418
418
|
|
|
419
|
+
/**
|
|
420
|
+
* @module gitignore
|
|
421
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
422
|
+
*/
|
|
423
|
+
/**
|
|
424
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
425
|
+
* Returns `undefined` if no repo is found.
|
|
426
|
+
*/
|
|
427
|
+
function findRepoRoot(startDir) {
|
|
428
|
+
let dir = node_path.resolve(startDir);
|
|
429
|
+
const root = node_path.resolve('/');
|
|
430
|
+
while (dir !== root) {
|
|
431
|
+
if (node_fs.existsSync(node_path.join(dir, '.git')) &&
|
|
432
|
+
node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
|
|
433
|
+
return dir;
|
|
434
|
+
}
|
|
435
|
+
const parent = node_path.dirname(dir);
|
|
436
|
+
if (parent === dir)
|
|
437
|
+
break;
|
|
438
|
+
dir = parent;
|
|
439
|
+
}
|
|
440
|
+
return undefined;
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
444
|
+
* that can be scanned for a repo root.
|
|
445
|
+
*/
|
|
446
|
+
function watchPathToScanDir(watchPath) {
|
|
447
|
+
const absPath = node_path.resolve(watchPath);
|
|
448
|
+
try {
|
|
449
|
+
return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
|
|
450
|
+
}
|
|
451
|
+
catch {
|
|
452
|
+
// ignore
|
|
453
|
+
}
|
|
454
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
455
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
456
|
+
if (!globMatch)
|
|
457
|
+
return undefined;
|
|
458
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
459
|
+
const trimmed = prefix.trim();
|
|
460
|
+
const baseDir = trimmed.length === 0
|
|
461
|
+
? '.'
|
|
462
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
463
|
+
? trimmed
|
|
464
|
+
: node_path.dirname(trimmed);
|
|
465
|
+
const resolved = node_path.resolve(baseDir);
|
|
466
|
+
if (!node_fs.existsSync(resolved))
|
|
467
|
+
return undefined;
|
|
468
|
+
return resolved;
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
472
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
473
|
+
*/
|
|
474
|
+
function findGitignoreFiles(dir) {
|
|
475
|
+
const results = [];
|
|
476
|
+
const gitignorePath = node_path.join(dir, '.gitignore');
|
|
477
|
+
if (node_fs.existsSync(gitignorePath)) {
|
|
478
|
+
results.push(gitignorePath);
|
|
479
|
+
}
|
|
480
|
+
let entries;
|
|
481
|
+
try {
|
|
482
|
+
entries = node_fs.readdirSync(dir);
|
|
483
|
+
}
|
|
484
|
+
catch {
|
|
485
|
+
return results;
|
|
486
|
+
}
|
|
487
|
+
for (const entry of entries) {
|
|
488
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
489
|
+
continue;
|
|
490
|
+
const fullPath = node_path.join(dir, entry);
|
|
491
|
+
try {
|
|
492
|
+
if (node_fs.statSync(fullPath).isDirectory()) {
|
|
493
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
catch {
|
|
497
|
+
// Skip inaccessible entries
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
return results;
|
|
501
|
+
}
|
|
502
|
+
/**
|
|
503
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
504
|
+
*/
|
|
505
|
+
function parseGitignore(gitignorePath) {
|
|
506
|
+
const content = node_fs.readFileSync(gitignorePath, 'utf8');
|
|
507
|
+
return ignore().add(content);
|
|
508
|
+
}
|
|
509
|
+
/**
|
|
510
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
511
|
+
*/
|
|
512
|
+
function toForwardSlash(p) {
|
|
513
|
+
return p.replace(/\\/g, '/');
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
517
|
+
* `.gitignore` chain in git repositories.
|
|
518
|
+
*/
|
|
519
|
+
class GitignoreFilter {
|
|
520
|
+
repos = new Map();
|
|
521
|
+
/**
|
|
522
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
523
|
+
*
|
|
524
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
525
|
+
*/
|
|
526
|
+
constructor(watchPaths) {
|
|
527
|
+
this.scan(watchPaths);
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
531
|
+
*/
|
|
532
|
+
scan(watchPaths) {
|
|
533
|
+
this.repos.clear();
|
|
534
|
+
const scannedDirs = new Set();
|
|
535
|
+
for (const watchPath of watchPaths) {
|
|
536
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
537
|
+
if (!scanDir)
|
|
538
|
+
continue;
|
|
539
|
+
if (scannedDirs.has(scanDir))
|
|
540
|
+
continue;
|
|
541
|
+
scannedDirs.add(scanDir);
|
|
542
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
543
|
+
if (!repoRoot)
|
|
544
|
+
continue;
|
|
545
|
+
if (this.repos.has(repoRoot))
|
|
546
|
+
continue;
|
|
547
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
548
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
549
|
+
dir: node_path.dirname(gf),
|
|
550
|
+
ig: parseGitignore(gf),
|
|
551
|
+
}));
|
|
552
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
553
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
554
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
/**
|
|
558
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
559
|
+
*
|
|
560
|
+
* @param filePath - Absolute file path to check.
|
|
561
|
+
* @returns `true` if the file should be ignored.
|
|
562
|
+
*/
|
|
563
|
+
isIgnored(filePath) {
|
|
564
|
+
const absPath = node_path.resolve(filePath);
|
|
565
|
+
for (const [, repo] of this.repos) {
|
|
566
|
+
// Check if file is within this repo
|
|
567
|
+
const relToRepo = node_path.relative(repo.root, absPath);
|
|
568
|
+
// On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
|
|
569
|
+
// an absolute path with a drive letter instead of a relative one. The
|
|
570
|
+
// `ignore` library rejects these with a RangeError. Skip repos on
|
|
571
|
+
// different drives to avoid cross-drive gitignore mismatches.
|
|
572
|
+
if (relToRepo.startsWith('..') ||
|
|
573
|
+
relToRepo.startsWith(node_path.resolve('/')) ||
|
|
574
|
+
/^[a-zA-Z]:/.test(relToRepo)) {
|
|
575
|
+
continue;
|
|
576
|
+
}
|
|
577
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
578
|
+
for (const entry of repo.entries) {
|
|
579
|
+
const relToEntry = node_path.relative(entry.dir, absPath);
|
|
580
|
+
if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
|
|
581
|
+
continue;
|
|
582
|
+
const normalized = toForwardSlash(relToEntry);
|
|
583
|
+
if (entry.ig.ignores(normalized)) {
|
|
584
|
+
return true;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
return false;
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
592
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
593
|
+
*
|
|
594
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
595
|
+
*/
|
|
596
|
+
invalidate(gitignorePath) {
|
|
597
|
+
const absPath = node_path.resolve(gitignorePath);
|
|
598
|
+
const gitignoreDir = node_path.dirname(absPath);
|
|
599
|
+
for (const [, repo] of this.repos) {
|
|
600
|
+
const relToRepo = node_path.relative(repo.root, gitignoreDir);
|
|
601
|
+
if (relToRepo.startsWith('..'))
|
|
602
|
+
continue;
|
|
603
|
+
// Remove old entry for this directory
|
|
604
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
605
|
+
// Re-parse if file still exists
|
|
606
|
+
if (node_fs.existsSync(absPath)) {
|
|
607
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
608
|
+
// Re-sort deepest-first
|
|
609
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
610
|
+
}
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
614
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
615
|
+
if (repoRoot && node_fs.existsSync(absPath)) {
|
|
616
|
+
const entries = [
|
|
617
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
618
|
+
];
|
|
619
|
+
if (this.repos.has(repoRoot)) {
|
|
620
|
+
const repo = this.repos.get(repoRoot);
|
|
621
|
+
repo.entries.push(entries[0]);
|
|
622
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
623
|
+
}
|
|
624
|
+
else {
|
|
625
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* @module templates/helpers
|
|
633
|
+
* Registers built-in Handlebars helpers for content templates.
|
|
634
|
+
*/
|
|
635
|
+
/** Pre-built rehype parser for HTML → hast conversion. */
|
|
636
|
+
const htmlParser = unified.unified().use(rehypeParse, { fragment: true });
|
|
637
|
+
/**
|
|
638
|
+
* Register all built-in helpers on a Handlebars instance.
|
|
639
|
+
*
|
|
640
|
+
* @param hbs - The Handlebars instance.
|
|
641
|
+
*/
|
|
642
|
+
function registerBuiltinHelpers(hbs) {
|
|
643
|
+
// Structural: ADF → Markdown
|
|
644
|
+
hbs.registerHelper('adfToMarkdown', function (adf) {
|
|
645
|
+
if (!adf || typeof adf !== 'object')
|
|
646
|
+
return '';
|
|
647
|
+
try {
|
|
648
|
+
const mdast = mdastUtilFromAdf.fromADF(adf);
|
|
649
|
+
return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
|
|
650
|
+
}
|
|
651
|
+
catch {
|
|
652
|
+
return '<!-- ADF conversion failed -->';
|
|
653
|
+
}
|
|
654
|
+
});
|
|
655
|
+
// Structural: HTML → Markdown
|
|
656
|
+
hbs.registerHelper('markdownify', function (html) {
|
|
657
|
+
if (typeof html !== 'string' || !html.trim())
|
|
658
|
+
return '';
|
|
659
|
+
try {
|
|
660
|
+
const hast = htmlParser.parse(html);
|
|
661
|
+
const mdast = hastUtilToMdast.toMdast(hast);
|
|
662
|
+
return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
|
|
663
|
+
}
|
|
664
|
+
catch {
|
|
665
|
+
return '<!-- HTML conversion failed -->';
|
|
666
|
+
}
|
|
667
|
+
});
|
|
668
|
+
// Formatting: dateFormat
|
|
669
|
+
hbs.registerHelper('dateFormat', function (value, format) {
|
|
670
|
+
if (value === undefined || value === null)
|
|
671
|
+
return '';
|
|
672
|
+
const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
|
|
673
|
+
return dayjs(value).format(fmt);
|
|
674
|
+
});
|
|
675
|
+
// Formatting: join
|
|
676
|
+
hbs.registerHelper('join', function (arr, separator) {
|
|
677
|
+
if (!Array.isArray(arr))
|
|
678
|
+
return '';
|
|
679
|
+
const sep = typeof separator === 'string' ? separator : ', ';
|
|
680
|
+
return arr.join(sep);
|
|
681
|
+
});
|
|
682
|
+
// Formatting: pluck
|
|
683
|
+
hbs.registerHelper('pluck', function (arr, key) {
|
|
684
|
+
if (!Array.isArray(arr) || typeof key !== 'string')
|
|
685
|
+
return [];
|
|
686
|
+
return arr.map((item) => item && typeof item === 'object'
|
|
687
|
+
? item[key]
|
|
688
|
+
: undefined);
|
|
689
|
+
});
|
|
690
|
+
// String transforms
|
|
691
|
+
hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
|
|
692
|
+
hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
|
|
693
|
+
hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? radash.capitalize(text) : '');
|
|
694
|
+
hbs.registerHelper('title', (text) => typeof text === 'string' ? radash.title(text) : '');
|
|
695
|
+
hbs.registerHelper('camel', (text) => typeof text === 'string' ? radash.camel(text) : '');
|
|
696
|
+
hbs.registerHelper('snake', (text) => typeof text === 'string' ? radash.snake(text) : '');
|
|
697
|
+
hbs.registerHelper('dash', (text) => typeof text === 'string' ? radash.dash(text) : '');
|
|
698
|
+
// default helper
|
|
699
|
+
hbs.registerHelper('default', function (value, fallback) {
|
|
700
|
+
return value ?? fallback ?? '';
|
|
701
|
+
});
|
|
702
|
+
// eq helper (deep equality)
|
|
703
|
+
hbs.registerHelper('eq', function (a, b) {
|
|
704
|
+
return radash.isEqual(a, b);
|
|
705
|
+
});
|
|
706
|
+
// json helper
|
|
707
|
+
hbs.registerHelper('json', function (value) {
|
|
708
|
+
return new hbs.SafeString(JSON.stringify(value, null, 2));
|
|
709
|
+
});
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
/**
|
|
713
|
+
* @module templates/engine
|
|
714
|
+
* Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
|
|
715
|
+
*/
|
|
716
|
+
/**
|
|
717
|
+
* Resolve a template value to its source string.
|
|
718
|
+
*
|
|
719
|
+
* Resolution order:
|
|
720
|
+
* 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
|
|
721
|
+
* 2. Matches a key in namedTemplates → named ref (recursively resolve)
|
|
722
|
+
* 3. Otherwise → inline Handlebars template string
|
|
723
|
+
*
|
|
724
|
+
* @param value - The template reference (inline, file path, or named ref).
|
|
725
|
+
* @param namedTemplates - Named template definitions from config.
|
|
726
|
+
* @param configDir - Directory to resolve relative file paths against.
|
|
727
|
+
* @param visited - Set of visited named refs for cycle detection.
|
|
728
|
+
* @returns The resolved template source string.
|
|
729
|
+
*/
|
|
730
|
+
function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
|
|
731
|
+
// File path detection
|
|
732
|
+
if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
|
|
733
|
+
return node_fs.readFileSync(node_path.resolve(configDir, value), 'utf-8');
|
|
734
|
+
}
|
|
735
|
+
// Named ref
|
|
736
|
+
if (namedTemplates?.[value] !== undefined) {
|
|
737
|
+
if (visited.has(value)) {
|
|
738
|
+
throw new Error(`Circular template reference detected: ${value}`);
|
|
739
|
+
}
|
|
740
|
+
visited.add(value);
|
|
741
|
+
return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
|
|
742
|
+
}
|
|
743
|
+
// Inline
|
|
744
|
+
return value;
|
|
745
|
+
}
|
|
746
|
+
/**
|
|
747
|
+
* Create a configured Handlebars instance with built-in helpers registered.
|
|
748
|
+
*
|
|
749
|
+
* @returns A Handlebars instance with helpers.
|
|
750
|
+
*/
|
|
751
|
+
function createHandlebarsInstance() {
|
|
752
|
+
const hbs = Handlebars.create();
|
|
753
|
+
registerBuiltinHelpers(hbs);
|
|
754
|
+
return hbs;
|
|
755
|
+
}
|
|
756
|
+
/**
|
|
757
|
+
* Load custom helpers from file paths.
|
|
758
|
+
*
|
|
759
|
+
* Each file should export a default function that receives the Handlebars instance.
|
|
760
|
+
*
|
|
761
|
+
* @param hbs - The Handlebars instance.
|
|
762
|
+
* @param paths - File paths to custom helper modules.
|
|
763
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
764
|
+
*/
|
|
765
|
+
async function loadCustomHelpers(hbs, paths, configDir) {
|
|
766
|
+
for (const p of paths) {
|
|
767
|
+
const resolved = node_path.resolve(configDir, p);
|
|
768
|
+
const mod = (await import(resolved));
|
|
769
|
+
if (typeof mod.default === 'function') {
|
|
770
|
+
mod.default(hbs);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
/**
|
|
775
|
+
* The template engine: holds compiled templates and renders them against context.
|
|
776
|
+
*/
|
|
777
|
+
class TemplateEngine {
|
|
778
|
+
hbs;
|
|
779
|
+
compiled = new Map();
|
|
780
|
+
constructor(hbs) {
|
|
781
|
+
this.hbs = hbs;
|
|
782
|
+
}
|
|
783
|
+
/**
|
|
784
|
+
* Compile and cache a template from its source string.
|
|
785
|
+
*
|
|
786
|
+
* @param key - Cache key (rule index or named template).
|
|
787
|
+
* @param source - Handlebars template source.
|
|
788
|
+
* @returns The compiled template.
|
|
789
|
+
*/
|
|
790
|
+
compile(key, source) {
|
|
791
|
+
const fn = this.hbs.compile(source);
|
|
792
|
+
this.compiled.set(key, fn);
|
|
793
|
+
return fn;
|
|
794
|
+
}
|
|
795
|
+
/**
|
|
796
|
+
* Get a previously compiled template by key.
|
|
797
|
+
*
|
|
798
|
+
* @param key - The cache key.
|
|
799
|
+
* @returns The compiled template, or undefined.
|
|
800
|
+
*/
|
|
801
|
+
get(key) {
|
|
802
|
+
return this.compiled.get(key);
|
|
803
|
+
}
|
|
804
|
+
/**
|
|
805
|
+
* Render a compiled template against a context.
|
|
806
|
+
*
|
|
807
|
+
* @param key - The cache key of the compiled template.
|
|
808
|
+
* @param context - The data context for rendering.
|
|
809
|
+
* @returns The rendered string, or null if the template was not found.
|
|
810
|
+
*/
|
|
811
|
+
render(key, context) {
|
|
812
|
+
const fn = this.compiled.get(key);
|
|
813
|
+
if (!fn)
|
|
814
|
+
return null;
|
|
815
|
+
return fn(context);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
/**
|
|
820
|
+
* @module templates/buildTemplateEngine
|
|
821
|
+
* Factory to build a TemplateEngine from config, compiling all rule templates at load time.
|
|
822
|
+
*/
|
|
823
|
+
/**
|
|
824
|
+
* Build a TemplateEngine from configuration, pre-compiling all rule templates.
|
|
825
|
+
*
|
|
826
|
+
* @param rules - The inference rules (may contain template fields).
|
|
827
|
+
* @param namedTemplates - Named template definitions from config.
|
|
828
|
+
* @param templateHelperPaths - Paths to custom helper modules.
|
|
829
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
830
|
+
* @returns The configured TemplateEngine, or undefined if no templates are used.
|
|
831
|
+
*/
|
|
832
|
+
async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
|
|
833
|
+
const rulesWithTemplates = rules.filter((r) => r.template);
|
|
834
|
+
if (rulesWithTemplates.length === 0)
|
|
835
|
+
return undefined;
|
|
836
|
+
const hbs = createHandlebarsInstance();
|
|
837
|
+
// Load custom helpers
|
|
838
|
+
if (templateHelperPaths?.length && configDir) {
|
|
839
|
+
await loadCustomHelpers(hbs, templateHelperPaths, configDir);
|
|
840
|
+
}
|
|
841
|
+
const engine = new TemplateEngine(hbs);
|
|
842
|
+
// Compile all rule templates
|
|
843
|
+
for (const [index, rule] of rules.entries()) {
|
|
844
|
+
if (!rule.template)
|
|
845
|
+
continue;
|
|
846
|
+
const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
|
|
847
|
+
engine.compile(`rule-${String(index)}`, source);
|
|
848
|
+
}
|
|
849
|
+
return engine;
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
/**
|
|
853
|
+
* @module app/configWatcher
|
|
854
|
+
* Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
|
|
855
|
+
*/
|
|
856
|
+
/**
|
|
857
|
+
* Debounced config file watcher.
|
|
858
|
+
*/
|
|
859
|
+
class ConfigWatcher {
|
|
860
|
+
options;
|
|
861
|
+
watcher;
|
|
862
|
+
debounce;
|
|
863
|
+
constructor(options) {
|
|
864
|
+
this.options = options;
|
|
865
|
+
}
|
|
866
|
+
start() {
|
|
867
|
+
if (!this.options.enabled)
|
|
868
|
+
return;
|
|
869
|
+
this.watcher = chokidar.watch(this.options.configPath, {
|
|
870
|
+
ignoreInitial: true,
|
|
871
|
+
});
|
|
872
|
+
this.watcher.on('change', () => {
|
|
873
|
+
if (this.debounce)
|
|
874
|
+
clearTimeout(this.debounce);
|
|
875
|
+
this.debounce = setTimeout(() => {
|
|
876
|
+
void this.options.onChange();
|
|
877
|
+
}, this.options.debounceMs);
|
|
878
|
+
});
|
|
879
|
+
this.watcher.on('error', (error) => {
|
|
880
|
+
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
881
|
+
});
|
|
882
|
+
this.options.logger.info({
|
|
883
|
+
configPath: this.options.configPath,
|
|
884
|
+
debounceMs: this.options.debounceMs,
|
|
885
|
+
}, 'Config watcher started');
|
|
886
|
+
}
|
|
887
|
+
async stop() {
|
|
888
|
+
if (this.debounce) {
|
|
889
|
+
clearTimeout(this.debounce);
|
|
890
|
+
this.debounce = undefined;
|
|
891
|
+
}
|
|
892
|
+
if (this.watcher) {
|
|
893
|
+
await this.watcher.close();
|
|
894
|
+
this.watcher = undefined;
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
419
899
|
/**
|
|
420
900
|
* @module config/defaults
|
|
421
901
|
* Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
|
|
@@ -617,7 +1097,12 @@
|
|
|
617
1097
|
map: zod.z
|
|
618
1098
|
.union([jsonmap.jsonMapMapSchema, zod.z.string()])
|
|
619
1099
|
.optional()
|
|
620
|
-
.describe('JsonMap transformation (inline definition
|
|
1100
|
+
.describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
|
|
1101
|
+
/** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
|
|
1102
|
+
template: zod.z
|
|
1103
|
+
.string()
|
|
1104
|
+
.optional()
|
|
1105
|
+
.describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
|
|
621
1106
|
});
|
|
622
1107
|
/**
|
|
623
1108
|
* Top-level configuration for jeeves-watcher.
|
|
@@ -654,7 +1139,23 @@
|
|
|
654
1139
|
maps: zod.z
|
|
655
1140
|
.record(zod.z.string(), jsonmap.jsonMapMapSchema)
|
|
656
1141
|
.optional()
|
|
657
|
-
.describe('Reusable named JsonMap transformations.'),
|
|
1142
|
+
.describe('Reusable named JsonMap transformations.'),
|
|
1143
|
+
/** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
|
|
1144
|
+
templates: zod.z
|
|
1145
|
+
.record(zod.z.string(), zod.z.string())
|
|
1146
|
+
.optional()
|
|
1147
|
+
.describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
|
|
1148
|
+
/** Custom Handlebars helper registration. */
|
|
1149
|
+
templateHelpers: zod.z
|
|
1150
|
+
.object({
|
|
1151
|
+
/** File paths to custom helper modules. */
|
|
1152
|
+
paths: zod.z
|
|
1153
|
+
.array(zod.z.string())
|
|
1154
|
+
.optional()
|
|
1155
|
+
.describe('File paths to custom helper modules.'),
|
|
1156
|
+
})
|
|
1157
|
+
.optional()
|
|
1158
|
+
.describe('Custom Handlebars helper registration.'),
|
|
658
1159
|
/** Logging configuration. */
|
|
659
1160
|
logging: loggingConfigSchema.optional().describe('Logging configuration.'),
|
|
660
1161
|
/** Timeout in milliseconds for graceful shutdown. */
|
|
@@ -904,266 +1405,60 @@
|
|
|
904
1405
|
dimensions,
|
|
905
1406
|
async embed(texts) {
|
|
906
1407
|
const vectors = await retry(async (attempt) => {
|
|
907
|
-
if (attempt > 1) {
|
|
908
|
-
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
909
|
-
}
|
|
910
|
-
// embedDocuments returns vectors for multiple texts
|
|
911
|
-
return embedder.embedDocuments(texts);
|
|
912
|
-
}, {
|
|
913
|
-
attempts: 5,
|
|
914
|
-
baseDelayMs: 500,
|
|
915
|
-
maxDelayMs: 10_000,
|
|
916
|
-
jitter: 0.2,
|
|
917
|
-
onRetry: ({ attempt, delayMs, error }) => {
|
|
918
|
-
log.warn({
|
|
919
|
-
attempt,
|
|
920
|
-
delayMs,
|
|
921
|
-
provider: 'gemini',
|
|
922
|
-
model: config.model,
|
|
923
|
-
err: normalizeError(error),
|
|
924
|
-
}, 'Embedding call failed; will retry');
|
|
925
|
-
},
|
|
926
|
-
});
|
|
927
|
-
// Validate dimensions
|
|
928
|
-
for (const vector of vectors) {
|
|
929
|
-
if (vector.length !== dimensions) {
|
|
930
|
-
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
|
-
return vectors;
|
|
934
|
-
},
|
|
935
|
-
};
|
|
936
|
-
}
|
|
937
|
-
function createMockFromConfig(config) {
|
|
938
|
-
const dimensions = config.dimensions ?? 768;
|
|
939
|
-
return createMockProvider(dimensions);
|
|
940
|
-
}
|
|
941
|
-
const embeddingProviderRegistry = new Map([
|
|
942
|
-
['mock', createMockFromConfig],
|
|
943
|
-
['gemini', createGeminiProvider],
|
|
944
|
-
]);
|
|
945
|
-
/**
|
|
946
|
-
* Create an embedding provider based on the given configuration.
|
|
947
|
-
*
|
|
948
|
-
* Each provider is responsible for its own default dimensions.
|
|
949
|
-
*
|
|
950
|
-
* @param config - The embedding configuration.
|
|
951
|
-
* @param logger - Optional pino logger for retry warnings.
|
|
952
|
-
* @returns An {@link EmbeddingProvider} instance.
|
|
953
|
-
* @throws If the configured provider is not supported.
|
|
954
|
-
*/
|
|
955
|
-
function createEmbeddingProvider(config, logger) {
|
|
956
|
-
const factory = embeddingProviderRegistry.get(config.provider);
|
|
957
|
-
if (!factory) {
|
|
958
|
-
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
959
|
-
}
|
|
960
|
-
return factory(config, logger);
|
|
961
|
-
}
|
|
962
|
-
|
|
963
|
-
/**
|
|
964
|
-
* @module gitignore
|
|
965
|
-
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
966
|
-
*/
|
|
967
|
-
/**
|
|
968
|
-
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
969
|
-
* Returns `undefined` if no repo is found.
|
|
970
|
-
*/
|
|
971
|
-
function findRepoRoot(startDir) {
|
|
972
|
-
let dir = node_path.resolve(startDir);
|
|
973
|
-
const root = node_path.resolve('/');
|
|
974
|
-
while (dir !== root) {
|
|
975
|
-
if (node_fs.existsSync(node_path.join(dir, '.git')) &&
|
|
976
|
-
node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
|
|
977
|
-
return dir;
|
|
978
|
-
}
|
|
979
|
-
const parent = node_path.dirname(dir);
|
|
980
|
-
if (parent === dir)
|
|
981
|
-
break;
|
|
982
|
-
dir = parent;
|
|
983
|
-
}
|
|
984
|
-
return undefined;
|
|
985
|
-
}
|
|
986
|
-
/**
|
|
987
|
-
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
988
|
-
* that can be scanned for a repo root.
|
|
989
|
-
*/
|
|
990
|
-
function watchPathToScanDir(watchPath) {
|
|
991
|
-
const absPath = node_path.resolve(watchPath);
|
|
992
|
-
try {
|
|
993
|
-
return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
|
|
994
|
-
}
|
|
995
|
-
catch {
|
|
996
|
-
// ignore
|
|
997
|
-
}
|
|
998
|
-
// If this is a glob, fall back to the non-glob prefix.
|
|
999
|
-
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1000
|
-
if (!globMatch)
|
|
1001
|
-
return undefined;
|
|
1002
|
-
const prefix = watchPath.slice(0, globMatch.index);
|
|
1003
|
-
const trimmed = prefix.trim();
|
|
1004
|
-
const baseDir = trimmed.length === 0
|
|
1005
|
-
? '.'
|
|
1006
|
-
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1007
|
-
? trimmed
|
|
1008
|
-
: node_path.dirname(trimmed);
|
|
1009
|
-
const resolved = node_path.resolve(baseDir);
|
|
1010
|
-
if (!node_fs.existsSync(resolved))
|
|
1011
|
-
return undefined;
|
|
1012
|
-
return resolved;
|
|
1013
|
-
}
|
|
1014
|
-
/**
|
|
1015
|
-
* Recursively find all `.gitignore` files under `dir`.
|
|
1016
|
-
* Skips `.git` and `node_modules` directories for performance.
|
|
1017
|
-
*/
|
|
1018
|
-
function findGitignoreFiles(dir) {
|
|
1019
|
-
const results = [];
|
|
1020
|
-
const gitignorePath = node_path.join(dir, '.gitignore');
|
|
1021
|
-
if (node_fs.existsSync(gitignorePath)) {
|
|
1022
|
-
results.push(gitignorePath);
|
|
1023
|
-
}
|
|
1024
|
-
let entries;
|
|
1025
|
-
try {
|
|
1026
|
-
entries = node_fs.readdirSync(dir);
|
|
1027
|
-
}
|
|
1028
|
-
catch {
|
|
1029
|
-
return results;
|
|
1030
|
-
}
|
|
1031
|
-
for (const entry of entries) {
|
|
1032
|
-
if (entry === '.git' || entry === 'node_modules')
|
|
1033
|
-
continue;
|
|
1034
|
-
const fullPath = node_path.join(dir, entry);
|
|
1035
|
-
try {
|
|
1036
|
-
if (node_fs.statSync(fullPath).isDirectory()) {
|
|
1037
|
-
results.push(...findGitignoreFiles(fullPath));
|
|
1038
|
-
}
|
|
1039
|
-
}
|
|
1040
|
-
catch {
|
|
1041
|
-
// Skip inaccessible entries
|
|
1042
|
-
}
|
|
1043
|
-
}
|
|
1044
|
-
return results;
|
|
1045
|
-
}
|
|
1046
|
-
/**
|
|
1047
|
-
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1048
|
-
*/
|
|
1049
|
-
function parseGitignore(gitignorePath) {
|
|
1050
|
-
const content = node_fs.readFileSync(gitignorePath, 'utf8');
|
|
1051
|
-
return ignore().add(content);
|
|
1052
|
-
}
|
|
1053
|
-
/**
|
|
1054
|
-
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1055
|
-
*/
|
|
1056
|
-
function toForwardSlash(p) {
|
|
1057
|
-
return p.replace(/\\/g, '/');
|
|
1058
|
-
}
|
|
1059
|
-
/**
|
|
1060
|
-
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1061
|
-
* `.gitignore` chain in git repositories.
|
|
1062
|
-
*/
|
|
1063
|
-
class GitignoreFilter {
|
|
1064
|
-
repos = new Map();
|
|
1065
|
-
/**
|
|
1066
|
-
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1067
|
-
*
|
|
1068
|
-
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1069
|
-
*/
|
|
1070
|
-
constructor(watchPaths) {
|
|
1071
|
-
this.scan(watchPaths);
|
|
1072
|
-
}
|
|
1073
|
-
/**
|
|
1074
|
-
* Scan paths for git repos and their `.gitignore` files.
|
|
1075
|
-
*/
|
|
1076
|
-
scan(watchPaths) {
|
|
1077
|
-
this.repos.clear();
|
|
1078
|
-
const scannedDirs = new Set();
|
|
1079
|
-
for (const watchPath of watchPaths) {
|
|
1080
|
-
const scanDir = watchPathToScanDir(watchPath);
|
|
1081
|
-
if (!scanDir)
|
|
1082
|
-
continue;
|
|
1083
|
-
if (scannedDirs.has(scanDir))
|
|
1084
|
-
continue;
|
|
1085
|
-
scannedDirs.add(scanDir);
|
|
1086
|
-
const repoRoot = findRepoRoot(scanDir);
|
|
1087
|
-
if (!repoRoot)
|
|
1088
|
-
continue;
|
|
1089
|
-
if (this.repos.has(repoRoot))
|
|
1090
|
-
continue;
|
|
1091
|
-
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1092
|
-
const entries = gitignoreFiles.map((gf) => ({
|
|
1093
|
-
dir: node_path.dirname(gf),
|
|
1094
|
-
ig: parseGitignore(gf),
|
|
1095
|
-
}));
|
|
1096
|
-
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1097
|
-
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1098
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1101
|
-
/**
|
|
1102
|
-
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1103
|
-
*
|
|
1104
|
-
* @param filePath - Absolute file path to check.
|
|
1105
|
-
* @returns `true` if the file should be ignored.
|
|
1106
|
-
*/
|
|
1107
|
-
isIgnored(filePath) {
|
|
1108
|
-
const absPath = node_path.resolve(filePath);
|
|
1109
|
-
for (const [, repo] of this.repos) {
|
|
1110
|
-
// Check if file is within this repo
|
|
1111
|
-
const relToRepo = node_path.relative(repo.root, absPath);
|
|
1112
|
-
if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
|
|
1113
|
-
continue;
|
|
1114
|
-
}
|
|
1115
|
-
// Check each `.gitignore` entry (deepest-first)
|
|
1116
|
-
for (const entry of repo.entries) {
|
|
1117
|
-
const relToEntry = node_path.relative(entry.dir, absPath);
|
|
1118
|
-
if (relToEntry.startsWith('..'))
|
|
1119
|
-
continue;
|
|
1120
|
-
const normalized = toForwardSlash(relToEntry);
|
|
1121
|
-
if (entry.ig.ignores(normalized)) {
|
|
1122
|
-
return true;
|
|
1123
|
-
}
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
return false;
|
|
1127
|
-
}
|
|
1128
|
-
/**
|
|
1129
|
-
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1130
|
-
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1131
|
-
*
|
|
1132
|
-
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1133
|
-
*/
|
|
1134
|
-
invalidate(gitignorePath) {
|
|
1135
|
-
const absPath = node_path.resolve(gitignorePath);
|
|
1136
|
-
const gitignoreDir = node_path.dirname(absPath);
|
|
1137
|
-
for (const [, repo] of this.repos) {
|
|
1138
|
-
const relToRepo = node_path.relative(repo.root, gitignoreDir);
|
|
1139
|
-
if (relToRepo.startsWith('..'))
|
|
1140
|
-
continue;
|
|
1141
|
-
// Remove old entry for this directory
|
|
1142
|
-
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1143
|
-
// Re-parse if file still exists
|
|
1144
|
-
if (node_fs.existsSync(absPath)) {
|
|
1145
|
-
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1146
|
-
// Re-sort deepest-first
|
|
1147
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1148
|
-
}
|
|
1149
|
-
return;
|
|
1150
|
-
}
|
|
1151
|
-
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1152
|
-
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1153
|
-
if (repoRoot && node_fs.existsSync(absPath)) {
|
|
1154
|
-
const entries = [
|
|
1155
|
-
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1156
|
-
];
|
|
1157
|
-
if (this.repos.has(repoRoot)) {
|
|
1158
|
-
const repo = this.repos.get(repoRoot);
|
|
1159
|
-
repo.entries.push(entries[0]);
|
|
1160
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1161
|
-
}
|
|
1162
|
-
else {
|
|
1163
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1408
|
+
if (attempt > 1) {
|
|
1409
|
+
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
1410
|
+
}
|
|
1411
|
+
// embedDocuments returns vectors for multiple texts
|
|
1412
|
+
return embedder.embedDocuments(texts);
|
|
1413
|
+
}, {
|
|
1414
|
+
attempts: 5,
|
|
1415
|
+
baseDelayMs: 500,
|
|
1416
|
+
maxDelayMs: 10_000,
|
|
1417
|
+
jitter: 0.2,
|
|
1418
|
+
onRetry: ({ attempt, delayMs, error }) => {
|
|
1419
|
+
log.warn({
|
|
1420
|
+
attempt,
|
|
1421
|
+
delayMs,
|
|
1422
|
+
provider: 'gemini',
|
|
1423
|
+
model: config.model,
|
|
1424
|
+
err: normalizeError(error),
|
|
1425
|
+
}, 'Embedding call failed; will retry');
|
|
1426
|
+
},
|
|
1427
|
+
});
|
|
1428
|
+
// Validate dimensions
|
|
1429
|
+
for (const vector of vectors) {
|
|
1430
|
+
if (vector.length !== dimensions) {
|
|
1431
|
+
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
1432
|
+
}
|
|
1164
1433
|
}
|
|
1165
|
-
|
|
1434
|
+
return vectors;
|
|
1435
|
+
},
|
|
1436
|
+
};
|
|
1437
|
+
}
|
|
1438
|
+
function createMockFromConfig(config) {
|
|
1439
|
+
const dimensions = config.dimensions ?? 768;
|
|
1440
|
+
return createMockProvider(dimensions);
|
|
1441
|
+
}
|
|
1442
|
+
const embeddingProviderRegistry = new Map([
|
|
1443
|
+
['mock', createMockFromConfig],
|
|
1444
|
+
['gemini', createGeminiProvider],
|
|
1445
|
+
]);
|
|
1446
|
+
/**
|
|
1447
|
+
* Create an embedding provider based on the given configuration.
|
|
1448
|
+
*
|
|
1449
|
+
* Each provider is responsible for its own default dimensions.
|
|
1450
|
+
*
|
|
1451
|
+
* @param config - The embedding configuration.
|
|
1452
|
+
* @param logger - Optional pino logger for retry warnings.
|
|
1453
|
+
* @returns An {@link EmbeddingProvider} instance.
|
|
1454
|
+
* @throws If the configured provider is not supported.
|
|
1455
|
+
*/
|
|
1456
|
+
function createEmbeddingProvider(config, logger) {
|
|
1457
|
+
const factory = embeddingProviderRegistry.get(config.provider);
|
|
1458
|
+
if (!factory) {
|
|
1459
|
+
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
1166
1460
|
}
|
|
1461
|
+
return factory(config, logger);
|
|
1167
1462
|
}
|
|
1168
1463
|
|
|
1169
1464
|
/**
|
|
@@ -1395,7 +1690,7 @@
|
|
|
1395
1690
|
};
|
|
1396
1691
|
}
|
|
1397
1692
|
/**
|
|
1398
|
-
* Apply compiled inference rules to file attributes, returning merged metadata.
|
|
1693
|
+
* Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
|
|
1399
1694
|
*
|
|
1400
1695
|
* Rules are evaluated in order; later rules override earlier ones.
|
|
1401
1696
|
* If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
|
|
@@ -1405,15 +1700,18 @@
|
|
|
1405
1700
|
* @param attributes - The file attributes to match against.
|
|
1406
1701
|
* @param namedMaps - Optional record of named JsonMap definitions.
|
|
1407
1702
|
* @param logger - Optional logger for warnings (falls back to console.warn).
|
|
1408
|
-
* @
|
|
1703
|
+
* @param templateEngine - Optional template engine for rendering content templates.
|
|
1704
|
+
* @param configDir - Optional config directory for resolving .json map file paths.
|
|
1705
|
+
* @returns The merged metadata and optional rendered content.
|
|
1409
1706
|
*/
|
|
1410
|
-
async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
1707
|
+
async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
|
|
1411
1708
|
// JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
|
|
1412
1709
|
// Our helper functions accept multiple args, which JsonMap supports at runtime.
|
|
1413
1710
|
const lib = createJsonMapLib();
|
|
1414
1711
|
let merged = {};
|
|
1712
|
+
let renderedContent = null;
|
|
1415
1713
|
const log = logger ?? console;
|
|
1416
|
-
for (const { rule, validate } of compiledRules) {
|
|
1714
|
+
for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
|
|
1417
1715
|
if (validate(attributes)) {
|
|
1418
1716
|
// Apply set resolution
|
|
1419
1717
|
const setOutput = resolveSet(rule.set, attributes);
|
|
@@ -1423,10 +1721,24 @@
|
|
|
1423
1721
|
let mapDef;
|
|
1424
1722
|
// Resolve map reference
|
|
1425
1723
|
if (typeof rule.map === 'string') {
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1724
|
+
if (rule.map.endsWith('.json') && configDir) {
|
|
1725
|
+
// File path: load from .json file
|
|
1726
|
+
try {
|
|
1727
|
+
const mapPath = node_path.resolve(configDir, rule.map);
|
|
1728
|
+
const raw = node_fs.readFileSync(mapPath, 'utf-8');
|
|
1729
|
+
mapDef = JSON.parse(raw);
|
|
1730
|
+
}
|
|
1731
|
+
catch (error) {
|
|
1732
|
+
log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
|
|
1733
|
+
continue;
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
else {
|
|
1737
|
+
mapDef = namedMaps?.[rule.map];
|
|
1738
|
+
if (!mapDef) {
|
|
1739
|
+
log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
|
|
1740
|
+
continue;
|
|
1741
|
+
}
|
|
1430
1742
|
}
|
|
1431
1743
|
}
|
|
1432
1744
|
else {
|
|
@@ -1449,9 +1761,31 @@
|
|
|
1449
1761
|
log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
1450
1762
|
}
|
|
1451
1763
|
}
|
|
1764
|
+
// Render template if present
|
|
1765
|
+
if (rule.template && templateEngine) {
|
|
1766
|
+
const templateKey = `rule-${String(ruleIndex)}`;
|
|
1767
|
+
// Build template context: attributes (with json spread at top) + map output
|
|
1768
|
+
const context = {
|
|
1769
|
+
...(attributes.json ?? {}),
|
|
1770
|
+
...attributes,
|
|
1771
|
+
...merged,
|
|
1772
|
+
};
|
|
1773
|
+
try {
|
|
1774
|
+
const result = templateEngine.render(templateKey, context);
|
|
1775
|
+
if (result && result.trim()) {
|
|
1776
|
+
renderedContent = result;
|
|
1777
|
+
}
|
|
1778
|
+
else {
|
|
1779
|
+
log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
catch (error) {
|
|
1783
|
+
log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
|
|
1784
|
+
}
|
|
1785
|
+
}
|
|
1452
1786
|
}
|
|
1453
1787
|
}
|
|
1454
|
-
return merged;
|
|
1788
|
+
return { metadata: merged, renderedContent };
|
|
1455
1789
|
}
|
|
1456
1790
|
|
|
1457
1791
|
/**
|
|
@@ -1540,23 +1874,32 @@
|
|
|
1540
1874
|
* @param metadataDir - The metadata directory for enrichment files.
|
|
1541
1875
|
* @param maps - Optional named JsonMap definitions.
|
|
1542
1876
|
* @param logger - Optional logger for rule warnings.
|
|
1877
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1878
|
+
* @param configDir - Optional config directory for resolving file paths.
|
|
1543
1879
|
* @returns The merged metadata and intermediate data.
|
|
1544
1880
|
*/
|
|
1545
|
-
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
|
|
1881
|
+
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
|
|
1546
1882
|
const ext = node_path.extname(filePath);
|
|
1547
1883
|
const stats = await promises.stat(filePath);
|
|
1548
1884
|
// 1. Extract text and structured data
|
|
1549
1885
|
const extracted = await extractText(filePath, ext);
|
|
1550
1886
|
// 2. Build attributes + apply rules
|
|
1551
1887
|
const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
|
|
1552
|
-
const inferred = await applyRules(compiledRules, attributes, maps, logger);
|
|
1888
|
+
const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
|
|
1553
1889
|
// 3. Read enrichment metadata (merge, enrichment wins)
|
|
1554
1890
|
const enrichment = await readMetadata(filePath, metadataDir);
|
|
1555
1891
|
const metadata = {
|
|
1556
1892
|
...inferred,
|
|
1557
1893
|
...(enrichment ?? {}),
|
|
1558
1894
|
};
|
|
1559
|
-
return {
|
|
1895
|
+
return {
|
|
1896
|
+
inferred,
|
|
1897
|
+
enrichment,
|
|
1898
|
+
metadata,
|
|
1899
|
+
attributes,
|
|
1900
|
+
extracted,
|
|
1901
|
+
renderedContent,
|
|
1902
|
+
};
|
|
1560
1903
|
}
|
|
1561
1904
|
|
|
1562
1905
|
/**
|
|
@@ -1627,6 +1970,7 @@
|
|
|
1627
1970
|
vectorStore;
|
|
1628
1971
|
compiledRules;
|
|
1629
1972
|
logger;
|
|
1973
|
+
templateEngine;
|
|
1630
1974
|
/**
|
|
1631
1975
|
* Create a new DocumentProcessor.
|
|
1632
1976
|
*
|
|
@@ -1635,13 +1979,15 @@
|
|
|
1635
1979
|
* @param vectorStore - The vector store client.
|
|
1636
1980
|
* @param compiledRules - The compiled inference rules.
|
|
1637
1981
|
* @param logger - The logger instance.
|
|
1982
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1638
1983
|
*/
|
|
1639
|
-
constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
|
|
1984
|
+
constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
|
|
1640
1985
|
this.config = config;
|
|
1641
1986
|
this.embeddingProvider = embeddingProvider;
|
|
1642
1987
|
this.vectorStore = vectorStore;
|
|
1643
1988
|
this.compiledRules = compiledRules;
|
|
1644
1989
|
this.logger = logger;
|
|
1990
|
+
this.templateEngine = templateEngine;
|
|
1645
1991
|
}
|
|
1646
1992
|
/**
|
|
1647
1993
|
* Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
|
|
@@ -1652,13 +1998,15 @@
|
|
|
1652
1998
|
try {
|
|
1653
1999
|
const ext = node_path.extname(filePath);
|
|
1654
2000
|
// 1. Build merged metadata + extract text
|
|
1655
|
-
const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
1656
|
-
if
|
|
2001
|
+
const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
2002
|
+
// Use rendered template content if available, otherwise raw extracted text
|
|
2003
|
+
const textToEmbed = renderedContent ?? extracted.text;
|
|
2004
|
+
if (!textToEmbed.trim()) {
|
|
1657
2005
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
1658
2006
|
return;
|
|
1659
2007
|
}
|
|
1660
2008
|
// 2. Content hash check — skip if unchanged
|
|
1661
|
-
const hash = contentHash(
|
|
2009
|
+
const hash = contentHash(textToEmbed);
|
|
1662
2010
|
const baseId = pointId(filePath, 0);
|
|
1663
2011
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
1664
2012
|
if (existingPayload && existingPayload['content_hash'] === hash) {
|
|
@@ -1670,7 +2018,7 @@
|
|
|
1670
2018
|
const chunkSize = this.config.chunkSize ?? 1000;
|
|
1671
2019
|
const chunkOverlap = this.config.chunkOverlap ?? 200;
|
|
1672
2020
|
const splitter = createSplitter(ext, chunkSize, chunkOverlap);
|
|
1673
|
-
const chunks = await splitter.splitText(
|
|
2021
|
+
const chunks = await splitter.splitText(textToEmbed);
|
|
1674
2022
|
// 4. Embed all chunks
|
|
1675
2023
|
const vectors = await this.embeddingProvider.embed(chunks);
|
|
1676
2024
|
// 5. Upsert all chunk points
|
|
@@ -1764,7 +2112,7 @@
|
|
|
1764
2112
|
return null;
|
|
1765
2113
|
}
|
|
1766
2114
|
// Build merged metadata (lightweight — no embedding)
|
|
1767
|
-
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
2115
|
+
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
1768
2116
|
// Update all chunk payloads
|
|
1769
2117
|
const totalChunks = getChunkCount(existingPayload);
|
|
1770
2118
|
const ids = chunkIds(filePath, totalChunks);
|
|
@@ -1782,8 +2130,17 @@
|
|
|
1782
2130
|
*
|
|
1783
2131
|
* @param compiledRules - The newly compiled rules.
|
|
1784
2132
|
*/
|
|
1785
|
-
|
|
2133
|
+
/**
|
|
2134
|
+
* Update compiled inference rules and optionally the template engine.
|
|
2135
|
+
*
|
|
2136
|
+
* @param compiledRules - The newly compiled rules.
|
|
2137
|
+
* @param templateEngine - Optional updated template engine.
|
|
2138
|
+
*/
|
|
2139
|
+
updateRules(compiledRules, templateEngine) {
|
|
1786
2140
|
this.compiledRules = compiledRules;
|
|
2141
|
+
if (templateEngine) {
|
|
2142
|
+
this.templateEngine = templateEngine;
|
|
2143
|
+
}
|
|
1787
2144
|
this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
|
|
1788
2145
|
}
|
|
1789
2146
|
}
|
|
@@ -2312,6 +2669,76 @@
|
|
|
2312
2669
|
}
|
|
2313
2670
|
}
|
|
2314
2671
|
|
|
2672
|
+
/**
|
|
2673
|
+
* @module watcher/globToDir
|
|
2674
|
+
* Adapts glob-based watch config to chokidar v4+, which removed glob support
|
|
2675
|
+
* (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
|
|
2676
|
+
* strings, silently producing zero events. This module extracts static directory
|
|
2677
|
+
* roots from glob patterns for chokidar to watch, then filters emitted events
|
|
2678
|
+
* against the original globs via picomatch.
|
|
2679
|
+
*/
|
|
2680
|
+
/**
|
|
2681
|
+
* Extract the static directory root from a glob pattern.
|
|
2682
|
+
* Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
|
|
2683
|
+
*
|
|
2684
|
+
* @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
|
|
2685
|
+
* @returns The static directory prefix (e.g., `j:/domains`).
|
|
2686
|
+
*/
|
|
2687
|
+
function globRoot(glob) {
|
|
2688
|
+
const normalized = glob.replace(/\\/g, '/');
|
|
2689
|
+
const segments = normalized.split('/');
|
|
2690
|
+
const staticSegments = [];
|
|
2691
|
+
for (const seg of segments) {
|
|
2692
|
+
if (/[*?{[\]]/.test(seg))
|
|
2693
|
+
break;
|
|
2694
|
+
staticSegments.push(seg);
|
|
2695
|
+
}
|
|
2696
|
+
return staticSegments.join('/') || '.';
|
|
2697
|
+
}
|
|
2698
|
+
/**
|
|
2699
|
+
* Deduplicate directory roots, removing paths that are subdirectories of others.
|
|
2700
|
+
*
|
|
2701
|
+
* @param roots - Array of directory paths.
|
|
2702
|
+
* @returns Deduplicated array with subdirectories removed.
|
|
2703
|
+
*/
|
|
2704
|
+
function deduplicateRoots(roots) {
|
|
2705
|
+
const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
|
|
2706
|
+
const sorted = [...new Set(normalized)].sort();
|
|
2707
|
+
return sorted.filter((root, _i, arr) => {
|
|
2708
|
+
const withSlash = root.endsWith('/') ? root : root + '/';
|
|
2709
|
+
return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
|
|
2710
|
+
});
|
|
2711
|
+
}
|
|
2712
|
+
/**
|
|
2713
|
+
* Build a picomatch matcher from an array of glob patterns.
|
|
2714
|
+
* Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
|
|
2715
|
+
* before matching.
|
|
2716
|
+
*
|
|
2717
|
+
* @param globs - Glob patterns to match against.
|
|
2718
|
+
* @returns A function that tests whether a file path matches any of the globs.
|
|
2719
|
+
*/
|
|
2720
|
+
function buildGlobMatcher(globs) {
|
|
2721
|
+
const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
|
|
2722
|
+
const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
|
|
2723
|
+
return (filePath) => {
|
|
2724
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
2725
|
+
return isMatch(normalized);
|
|
2726
|
+
};
|
|
2727
|
+
}
|
|
2728
|
+
/**
|
|
2729
|
+
* Convert an array of glob patterns into chokidar-compatible directory roots
|
|
2730
|
+
* and a filter function for post-hoc event filtering.
|
|
2731
|
+
*
|
|
2732
|
+
* @param globs - Glob patterns from the watch config.
|
|
2733
|
+
* @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
|
|
2734
|
+
*/
|
|
2735
|
+
function resolveWatchPaths(globs) {
|
|
2736
|
+
const rawRoots = globs.map(globRoot);
|
|
2737
|
+
const roots = deduplicateRoots(rawRoots);
|
|
2738
|
+
const matches = buildGlobMatcher(globs);
|
|
2739
|
+
return { roots, matches };
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2315
2742
|
/**
|
|
2316
2743
|
* @module watcher
|
|
2317
2744
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -2326,6 +2753,7 @@
|
|
|
2326
2753
|
logger;
|
|
2327
2754
|
health;
|
|
2328
2755
|
gitignoreFilter;
|
|
2756
|
+
globMatches;
|
|
2329
2757
|
watcher;
|
|
2330
2758
|
/**
|
|
2331
2759
|
* Create a new FileSystemWatcher.
|
|
@@ -2342,6 +2770,7 @@
|
|
|
2342
2770
|
this.processor = processor;
|
|
2343
2771
|
this.logger = logger;
|
|
2344
2772
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
2773
|
+
this.globMatches = () => true;
|
|
2345
2774
|
const healthOptions = {
|
|
2346
2775
|
maxRetries: options.maxRetries,
|
|
2347
2776
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2354,7 +2783,13 @@
|
|
|
2354
2783
|
* Start watching the filesystem and processing events.
|
|
2355
2784
|
*/
|
|
2356
2785
|
start() {
|
|
2357
|
-
|
|
2786
|
+
// Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
|
|
2787
|
+
// Glob patterns are silently treated as literal strings, producing zero
|
|
2788
|
+
// events. We extract static directory roots for chokidar to watch, then
|
|
2789
|
+
// filter emitted events against the original globs via picomatch.
|
|
2790
|
+
const { roots, matches } = resolveWatchPaths(this.config.paths);
|
|
2791
|
+
this.globMatches = matches;
|
|
2792
|
+
this.watcher = chokidar.watch(roots, {
|
|
2358
2793
|
ignored: this.config.ignored,
|
|
2359
2794
|
usePolling: this.config.usePolling,
|
|
2360
2795
|
interval: this.config.pollIntervalMs,
|
|
@@ -2365,6 +2800,8 @@
|
|
|
2365
2800
|
});
|
|
2366
2801
|
this.watcher.on('add', (path) => {
|
|
2367
2802
|
this.handleGitignoreChange(path);
|
|
2803
|
+
if (!this.globMatches(path))
|
|
2804
|
+
return;
|
|
2368
2805
|
if (this.isGitignored(path))
|
|
2369
2806
|
return;
|
|
2370
2807
|
this.logger.debug({ path }, 'File added');
|
|
@@ -2372,6 +2809,8 @@
|
|
|
2372
2809
|
});
|
|
2373
2810
|
this.watcher.on('change', (path) => {
|
|
2374
2811
|
this.handleGitignoreChange(path);
|
|
2812
|
+
if (!this.globMatches(path))
|
|
2813
|
+
return;
|
|
2375
2814
|
if (this.isGitignored(path))
|
|
2376
2815
|
return;
|
|
2377
2816
|
this.logger.debug({ path }, 'File changed');
|
|
@@ -2379,6 +2818,8 @@
|
|
|
2379
2818
|
});
|
|
2380
2819
|
this.watcher.on('unlink', (path) => {
|
|
2381
2820
|
this.handleGitignoreChange(path);
|
|
2821
|
+
if (!this.globMatches(path))
|
|
2822
|
+
return;
|
|
2382
2823
|
if (this.isGitignored(path))
|
|
2383
2824
|
return;
|
|
2384
2825
|
this.logger.debug({ path }, 'File removed');
|
|
@@ -2451,51 +2892,21 @@
|
|
|
2451
2892
|
}
|
|
2452
2893
|
|
|
2453
2894
|
/**
|
|
2454
|
-
* @module app/
|
|
2455
|
-
*
|
|
2456
|
-
*/
|
|
2457
|
-
/**
|
|
2458
|
-
* Debounced config file watcher.
|
|
2895
|
+
* @module app/factories
|
|
2896
|
+
* Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
|
|
2459
2897
|
*/
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
});
|
|
2473
|
-
this.watcher.on('change', () => {
|
|
2474
|
-
if (this.debounce)
|
|
2475
|
-
clearTimeout(this.debounce);
|
|
2476
|
-
this.debounce = setTimeout(() => {
|
|
2477
|
-
void this.options.onChange();
|
|
2478
|
-
}, this.options.debounceMs);
|
|
2479
|
-
});
|
|
2480
|
-
this.watcher.on('error', (error) => {
|
|
2481
|
-
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
2482
|
-
});
|
|
2483
|
-
this.options.logger.info({
|
|
2484
|
-
configPath: this.options.configPath,
|
|
2485
|
-
debounceMs: this.options.debounceMs,
|
|
2486
|
-
}, 'Config watcher started');
|
|
2487
|
-
}
|
|
2488
|
-
async stop() {
|
|
2489
|
-
if (this.debounce) {
|
|
2490
|
-
clearTimeout(this.debounce);
|
|
2491
|
-
this.debounce = undefined;
|
|
2492
|
-
}
|
|
2493
|
-
if (this.watcher) {
|
|
2494
|
-
await this.watcher.close();
|
|
2495
|
-
this.watcher = undefined;
|
|
2496
|
-
}
|
|
2497
|
-
}
|
|
2498
|
-
}
|
|
2898
|
+
/** Default component factories wiring real implementations. */
|
|
2899
|
+
const defaultFactories = {
|
|
2900
|
+
loadConfig,
|
|
2901
|
+
createLogger,
|
|
2902
|
+
createEmbeddingProvider,
|
|
2903
|
+
createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
|
|
2904
|
+
compileRules,
|
|
2905
|
+
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
|
|
2906
|
+
createEventQueue: (options) => new EventQueue(options),
|
|
2907
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2908
|
+
createApiServer,
|
|
2909
|
+
};
|
|
2499
2910
|
|
|
2500
2911
|
/**
|
|
2501
2912
|
* @module app/shutdown
|
|
@@ -2515,17 +2926,28 @@
|
|
|
2515
2926
|
process.on('SIGINT', () => void shutdown());
|
|
2516
2927
|
}
|
|
2517
2928
|
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2929
|
+
/**
|
|
2930
|
+
* @module app/startFromConfig
|
|
2931
|
+
* Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
|
|
2932
|
+
*/
|
|
2933
|
+
/**
|
|
2934
|
+
* Create and start a JeevesWatcher from a config file path.
|
|
2935
|
+
*
|
|
2936
|
+
* @param configPath - Optional path to the configuration file.
|
|
2937
|
+
* @returns The running JeevesWatcher instance.
|
|
2938
|
+
*/
|
|
2939
|
+
async function startFromConfig(configPath) {
|
|
2940
|
+
const config = await loadConfig(configPath);
|
|
2941
|
+
const app = new JeevesWatcher(config, configPath);
|
|
2942
|
+
installShutdownHandlers(() => app.stop());
|
|
2943
|
+
await app.start();
|
|
2944
|
+
return app;
|
|
2945
|
+
}
|
|
2946
|
+
|
|
2947
|
+
/**
|
|
2948
|
+
* @module app
|
|
2949
|
+
* Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
|
|
2950
|
+
*/
|
|
2529
2951
|
/**
|
|
2530
2952
|
* Main application class that wires together all components.
|
|
2531
2953
|
*/
|
|
@@ -2560,56 +2982,26 @@
|
|
|
2560
2982
|
async start() {
|
|
2561
2983
|
const logger = this.factories.createLogger(this.config.logging);
|
|
2562
2984
|
this.logger = logger;
|
|
2563
|
-
|
|
2564
|
-
try {
|
|
2565
|
-
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
2566
|
-
}
|
|
2567
|
-
catch (error) {
|
|
2568
|
-
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
2569
|
-
throw error;
|
|
2570
|
-
}
|
|
2571
|
-
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
2572
|
-
await vectorStore.ensureCollection();
|
|
2985
|
+
const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
|
|
2573
2986
|
const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
|
|
2574
|
-
const
|
|
2987
|
+
const configDir = this.configPath ? node_path.dirname(this.configPath) : '.';
|
|
2988
|
+
const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
|
|
2989
|
+
const processor = this.factories.createDocumentProcessor({
|
|
2575
2990
|
metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
|
|
2576
2991
|
chunkSize: this.config.embedding.chunkSize,
|
|
2577
2992
|
chunkOverlap: this.config.embedding.chunkOverlap,
|
|
2578
2993
|
maps: this.config.maps,
|
|
2579
|
-
|
|
2580
|
-
|
|
2994
|
+
configDir,
|
|
2995
|
+
}, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
|
|
2581
2996
|
this.processor = processor;
|
|
2582
|
-
|
|
2997
|
+
this.queue = this.factories.createEventQueue({
|
|
2583
2998
|
debounceMs: this.config.watch.debounceMs ?? 2000,
|
|
2584
2999
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
2585
3000
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2586
3001
|
});
|
|
2587
|
-
this.
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
? new GitignoreFilter(this.config.watch.paths)
|
|
2591
|
-
: undefined;
|
|
2592
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2593
|
-
maxRetries: this.config.maxRetries,
|
|
2594
|
-
maxBackoffMs: this.config.maxBackoffMs,
|
|
2595
|
-
onFatalError: this.runtimeOptions.onFatalError,
|
|
2596
|
-
gitignoreFilter,
|
|
2597
|
-
});
|
|
2598
|
-
this.watcher = watcher;
|
|
2599
|
-
const server = this.factories.createApiServer({
|
|
2600
|
-
processor,
|
|
2601
|
-
vectorStore,
|
|
2602
|
-
embeddingProvider,
|
|
2603
|
-
queue,
|
|
2604
|
-
config: this.config,
|
|
2605
|
-
logger,
|
|
2606
|
-
});
|
|
2607
|
-
this.server = server;
|
|
2608
|
-
await server.listen({
|
|
2609
|
-
host: this.config.api?.host ?? '127.0.0.1',
|
|
2610
|
-
port: this.config.api?.port ?? 3456,
|
|
2611
|
-
});
|
|
2612
|
-
watcher.start();
|
|
3002
|
+
this.watcher = this.createWatcher(this.queue, processor, logger);
|
|
3003
|
+
this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
|
|
3004
|
+
this.watcher.start();
|
|
2613
3005
|
this.startConfigWatch();
|
|
2614
3006
|
logger.info('jeeves-watcher started');
|
|
2615
3007
|
}
|
|
@@ -2640,22 +3032,61 @@
|
|
|
2640
3032
|
}
|
|
2641
3033
|
this.logger?.info('jeeves-watcher stopped');
|
|
2642
3034
|
}
|
|
3035
|
+
async initEmbeddingAndStore(logger) {
|
|
3036
|
+
let embeddingProvider;
|
|
3037
|
+
try {
|
|
3038
|
+
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
3039
|
+
}
|
|
3040
|
+
catch (error) {
|
|
3041
|
+
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
3042
|
+
throw error;
|
|
3043
|
+
}
|
|
3044
|
+
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
3045
|
+
await vectorStore.ensureCollection();
|
|
3046
|
+
return { embeddingProvider, vectorStore };
|
|
3047
|
+
}
|
|
3048
|
+
createWatcher(queue, processor, logger) {
|
|
3049
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
3050
|
+
const gitignoreFilter = respectGitignore
|
|
3051
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
3052
|
+
: undefined;
|
|
3053
|
+
return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
3054
|
+
maxRetries: this.config.maxRetries,
|
|
3055
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
3056
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
3057
|
+
gitignoreFilter,
|
|
3058
|
+
});
|
|
3059
|
+
}
|
|
3060
|
+
async startApiServer(processor, vectorStore, embeddingProvider, logger) {
|
|
3061
|
+
const server = this.factories.createApiServer({
|
|
3062
|
+
processor,
|
|
3063
|
+
vectorStore,
|
|
3064
|
+
embeddingProvider,
|
|
3065
|
+
queue: this.queue,
|
|
3066
|
+
config: this.config,
|
|
3067
|
+
logger,
|
|
3068
|
+
});
|
|
3069
|
+
await server.listen({
|
|
3070
|
+
host: this.config.api?.host ?? '127.0.0.1',
|
|
3071
|
+
port: this.config.api?.port ?? 3456,
|
|
3072
|
+
});
|
|
3073
|
+
return server;
|
|
3074
|
+
}
|
|
2643
3075
|
startConfigWatch() {
|
|
2644
3076
|
const logger = this.logger;
|
|
2645
3077
|
if (!logger)
|
|
2646
3078
|
return;
|
|
2647
3079
|
const enabled = this.config.configWatch?.enabled ?? true;
|
|
2648
|
-
if (!enabled)
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
3080
|
+
if (!enabled || !this.configPath) {
|
|
3081
|
+
if (!this.configPath) {
|
|
3082
|
+
logger.debug('Config watch enabled, but no config path was provided');
|
|
3083
|
+
}
|
|
2652
3084
|
return;
|
|
2653
3085
|
}
|
|
2654
|
-
const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
|
|
2655
3086
|
this.configWatcher = new ConfigWatcher({
|
|
2656
3087
|
configPath: this.configPath,
|
|
2657
3088
|
enabled,
|
|
2658
|
-
debounceMs,
|
|
3089
|
+
debounceMs: this.config.configWatch?.debounceMs ?? 10000,
|
|
2659
3090
|
logger,
|
|
2660
3091
|
onChange: async () => this.reloadConfig(),
|
|
2661
3092
|
});
|
|
@@ -2677,7 +3108,9 @@
|
|
|
2677
3108
|
const newConfig = await this.factories.loadConfig(this.configPath);
|
|
2678
3109
|
this.config = newConfig;
|
|
2679
3110
|
const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
|
|
2680
|
-
|
|
3111
|
+
const reloadConfigDir = node_path.dirname(this.configPath);
|
|
3112
|
+
const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
|
|
3113
|
+
processor.updateRules(compiledRules, newTemplateEngine);
|
|
2681
3114
|
logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
|
|
2682
3115
|
}
|
|
2683
3116
|
catch (error) {
|
|
@@ -2685,19 +3118,7 @@
|
|
|
2685
3118
|
}
|
|
2686
3119
|
}
|
|
2687
3120
|
}
|
|
2688
|
-
|
|
2689
|
-
* Create and start a JeevesWatcher from a config file path.
|
|
2690
|
-
*
|
|
2691
|
-
* @param configPath - Optional path to the configuration file.
|
|
2692
|
-
* @returns The running JeevesWatcher instance.
|
|
2693
|
-
*/
|
|
2694
|
-
async function startFromConfig(configPath) {
|
|
2695
|
-
const config = await loadConfig(configPath);
|
|
2696
|
-
const app = new JeevesWatcher(config, configPath);
|
|
2697
|
-
installShutdownHandlers(() => app.stop());
|
|
2698
|
-
await app.start();
|
|
2699
|
-
return app;
|
|
2700
|
-
}
|
|
3121
|
+
// startFromConfig re-exported from ./startFromConfig
|
|
2701
3122
|
|
|
2702
3123
|
exports.DocumentProcessor = DocumentProcessor;
|
|
2703
3124
|
exports.EventQueue = EventQueue;
|
|
@@ -2705,15 +3126,18 @@
|
|
|
2705
3126
|
exports.GitignoreFilter = GitignoreFilter;
|
|
2706
3127
|
exports.JeevesWatcher = JeevesWatcher;
|
|
2707
3128
|
exports.SystemHealth = SystemHealth;
|
|
3129
|
+
exports.TemplateEngine = TemplateEngine;
|
|
2708
3130
|
exports.VectorStoreClient = VectorStoreClient;
|
|
2709
3131
|
exports.apiConfigSchema = apiConfigSchema;
|
|
2710
3132
|
exports.applyRules = applyRules;
|
|
2711
3133
|
exports.buildAttributes = buildAttributes;
|
|
3134
|
+
exports.buildTemplateEngine = buildTemplateEngine;
|
|
2712
3135
|
exports.compileRules = compileRules;
|
|
2713
3136
|
exports.configWatchConfigSchema = configWatchConfigSchema;
|
|
2714
3137
|
exports.contentHash = contentHash;
|
|
2715
3138
|
exports.createApiServer = createApiServer;
|
|
2716
3139
|
exports.createEmbeddingProvider = createEmbeddingProvider;
|
|
3140
|
+
exports.createHandlebarsInstance = createHandlebarsInstance;
|
|
2717
3141
|
exports.createLogger = createLogger;
|
|
2718
3142
|
exports.deleteMetadata = deleteMetadata;
|
|
2719
3143
|
exports.embeddingConfigSchema = embeddingConfigSchema;
|
|
@@ -2721,13 +3145,16 @@
|
|
|
2721
3145
|
exports.inferenceRuleSchema = inferenceRuleSchema;
|
|
2722
3146
|
exports.jeevesWatcherConfigSchema = jeevesWatcherConfigSchema;
|
|
2723
3147
|
exports.loadConfig = loadConfig;
|
|
3148
|
+
exports.loadCustomHelpers = loadCustomHelpers;
|
|
2724
3149
|
exports.loggingConfigSchema = loggingConfigSchema;
|
|
2725
3150
|
exports.metadataPath = metadataPath;
|
|
2726
3151
|
exports.pointId = pointId;
|
|
2727
3152
|
exports.readMetadata = readMetadata;
|
|
3153
|
+
exports.registerBuiltinHelpers = registerBuiltinHelpers;
|
|
3154
|
+
exports.resolveTemplateSource = resolveTemplateSource;
|
|
2728
3155
|
exports.startFromConfig = startFromConfig;
|
|
2729
3156
|
exports.vectorStoreConfigSchema = vectorStoreConfigSchema;
|
|
2730
3157
|
exports.watchConfigSchema = watchConfigSchema;
|
|
2731
3158
|
exports.writeMetadata = writeMetadata;
|
|
2732
3159
|
|
|
2733
|
-
})(this["jeeves-watcher"] = this["jeeves-watcher"] || {}, Fastify, promises, node_path, picomatch, radash, node_crypto, cosmiconfig, zod, jsonmap, googleGenai,
|
|
3160
|
+
})(this["jeeves-watcher"] = this["jeeves-watcher"] || {}, Fastify, promises, node_path, picomatch, radash, node_crypto, node_fs, ignore, Handlebars, dayjs, hastUtilToMdast, mdastUtilFromAdf, mdastUtilToMarkdown, rehypeParse, unified, chokidar, cosmiconfig, zod, jsonmap, googleGenai, pino, uuid, cheerio, yaml, mammoth, Ajv, addFormats, textsplitters, jsClientRest);
|