@karmaniverous/jeeves-watcher 0.3.1 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +69 -14
- package/dist/cjs/index.js +858 -390
- package/dist/cli/jeeves-watcher/index.js +850 -388
- package/dist/index.d.ts +160 -16
- package/dist/index.iife.js +850 -389
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +854 -392
- package/package.json +12 -4
package/dist/mjs/index.js
CHANGED
|
@@ -2,14 +2,22 @@ import Fastify from 'fastify';
|
|
|
2
2
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
3
3
|
import { resolve, dirname, join, relative, extname, basename } from 'node:path';
|
|
4
4
|
import picomatch from 'picomatch';
|
|
5
|
-
import { omit, get } from 'radash';
|
|
5
|
+
import { omit, capitalize, title, camel, snake, dash, isEqual, get } from 'radash';
|
|
6
6
|
import { createHash } from 'node:crypto';
|
|
7
|
+
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
8
|
+
import ignore from 'ignore';
|
|
9
|
+
import Handlebars from 'handlebars';
|
|
10
|
+
import dayjs from 'dayjs';
|
|
11
|
+
import { toMdast } from 'hast-util-to-mdast';
|
|
12
|
+
import { fromADF } from 'mdast-util-from-adf';
|
|
13
|
+
import { toMarkdown } from 'mdast-util-to-markdown';
|
|
14
|
+
import rehypeParse from 'rehype-parse';
|
|
15
|
+
import { unified } from 'unified';
|
|
16
|
+
import chokidar from 'chokidar';
|
|
7
17
|
import { cosmiconfig } from 'cosmiconfig';
|
|
8
18
|
import { z, ZodError } from 'zod';
|
|
9
19
|
import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
|
|
10
20
|
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
|
|
11
|
-
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
12
|
-
import ignore from 'ignore';
|
|
13
21
|
import pino from 'pino';
|
|
14
22
|
import { v5 } from 'uuid';
|
|
15
23
|
import * as cheerio from 'cheerio';
|
|
@@ -19,7 +27,6 @@ import Ajv from 'ajv';
|
|
|
19
27
|
import addFormats from 'ajv-formats';
|
|
20
28
|
import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
21
29
|
import { QdrantClient } from '@qdrant/js-client-rest';
|
|
22
|
-
import chokidar from 'chokidar';
|
|
23
30
|
|
|
24
31
|
/**
|
|
25
32
|
* @module util/normalizeError
|
|
@@ -417,6 +424,486 @@ function createApiServer(options) {
|
|
|
417
424
|
return app;
|
|
418
425
|
}
|
|
419
426
|
|
|
427
|
+
/**
|
|
428
|
+
* @module gitignore
|
|
429
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
430
|
+
*/
|
|
431
|
+
/**
|
|
432
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
433
|
+
* Returns `undefined` if no repo is found.
|
|
434
|
+
*/
|
|
435
|
+
function findRepoRoot(startDir) {
|
|
436
|
+
let dir = resolve(startDir);
|
|
437
|
+
const root = resolve('/');
|
|
438
|
+
while (dir !== root) {
|
|
439
|
+
if (existsSync(join(dir, '.git')) &&
|
|
440
|
+
statSync(join(dir, '.git')).isDirectory()) {
|
|
441
|
+
return dir;
|
|
442
|
+
}
|
|
443
|
+
const parent = dirname(dir);
|
|
444
|
+
if (parent === dir)
|
|
445
|
+
break;
|
|
446
|
+
dir = parent;
|
|
447
|
+
}
|
|
448
|
+
return undefined;
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
452
|
+
* that can be scanned for a repo root.
|
|
453
|
+
*/
|
|
454
|
+
function watchPathToScanDir(watchPath) {
|
|
455
|
+
const absPath = resolve(watchPath);
|
|
456
|
+
try {
|
|
457
|
+
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
458
|
+
}
|
|
459
|
+
catch {
|
|
460
|
+
// ignore
|
|
461
|
+
}
|
|
462
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
463
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
464
|
+
if (!globMatch)
|
|
465
|
+
return undefined;
|
|
466
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
467
|
+
const trimmed = prefix.trim();
|
|
468
|
+
const baseDir = trimmed.length === 0
|
|
469
|
+
? '.'
|
|
470
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
471
|
+
? trimmed
|
|
472
|
+
: dirname(trimmed);
|
|
473
|
+
const resolved = resolve(baseDir);
|
|
474
|
+
if (!existsSync(resolved))
|
|
475
|
+
return undefined;
|
|
476
|
+
return resolved;
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
480
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
481
|
+
*/
|
|
482
|
+
function findGitignoreFiles(dir) {
|
|
483
|
+
const results = [];
|
|
484
|
+
const gitignorePath = join(dir, '.gitignore');
|
|
485
|
+
if (existsSync(gitignorePath)) {
|
|
486
|
+
results.push(gitignorePath);
|
|
487
|
+
}
|
|
488
|
+
let entries;
|
|
489
|
+
try {
|
|
490
|
+
entries = readdirSync(dir);
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
return results;
|
|
494
|
+
}
|
|
495
|
+
for (const entry of entries) {
|
|
496
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
497
|
+
continue;
|
|
498
|
+
const fullPath = join(dir, entry);
|
|
499
|
+
try {
|
|
500
|
+
if (statSync(fullPath).isDirectory()) {
|
|
501
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
catch {
|
|
505
|
+
// Skip inaccessible entries
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
return results;
|
|
509
|
+
}
|
|
510
|
+
/**
|
|
511
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
512
|
+
*/
|
|
513
|
+
function parseGitignore(gitignorePath) {
|
|
514
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
515
|
+
return ignore().add(content);
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
519
|
+
*/
|
|
520
|
+
function toForwardSlash(p) {
|
|
521
|
+
return p.replace(/\\/g, '/');
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
525
|
+
* `.gitignore` chain in git repositories.
|
|
526
|
+
*/
|
|
527
|
+
class GitignoreFilter {
|
|
528
|
+
repos = new Map();
|
|
529
|
+
/**
|
|
530
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
531
|
+
*
|
|
532
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
533
|
+
*/
|
|
534
|
+
constructor(watchPaths) {
|
|
535
|
+
this.scan(watchPaths);
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
539
|
+
*/
|
|
540
|
+
scan(watchPaths) {
|
|
541
|
+
this.repos.clear();
|
|
542
|
+
const scannedDirs = new Set();
|
|
543
|
+
for (const watchPath of watchPaths) {
|
|
544
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
545
|
+
if (!scanDir)
|
|
546
|
+
continue;
|
|
547
|
+
if (scannedDirs.has(scanDir))
|
|
548
|
+
continue;
|
|
549
|
+
scannedDirs.add(scanDir);
|
|
550
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
551
|
+
if (!repoRoot)
|
|
552
|
+
continue;
|
|
553
|
+
if (this.repos.has(repoRoot))
|
|
554
|
+
continue;
|
|
555
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
556
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
557
|
+
dir: dirname(gf),
|
|
558
|
+
ig: parseGitignore(gf),
|
|
559
|
+
}));
|
|
560
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
561
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
562
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
567
|
+
*
|
|
568
|
+
* @param filePath - Absolute file path to check.
|
|
569
|
+
* @returns `true` if the file should be ignored.
|
|
570
|
+
*/
|
|
571
|
+
isIgnored(filePath) {
|
|
572
|
+
const absPath = resolve(filePath);
|
|
573
|
+
for (const [, repo] of this.repos) {
|
|
574
|
+
// Check if file is within this repo
|
|
575
|
+
const relToRepo = relative(repo.root, absPath);
|
|
576
|
+
// On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
|
|
577
|
+
// an absolute path with a drive letter instead of a relative one. The
|
|
578
|
+
// `ignore` library rejects these with a RangeError. Skip repos on
|
|
579
|
+
// different drives to avoid cross-drive gitignore mismatches.
|
|
580
|
+
if (relToRepo.startsWith('..') ||
|
|
581
|
+
relToRepo.startsWith(resolve('/')) ||
|
|
582
|
+
/^[a-zA-Z]:/.test(relToRepo)) {
|
|
583
|
+
continue;
|
|
584
|
+
}
|
|
585
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
586
|
+
for (const entry of repo.entries) {
|
|
587
|
+
const relToEntry = relative(entry.dir, absPath);
|
|
588
|
+
if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
|
|
589
|
+
continue;
|
|
590
|
+
const normalized = toForwardSlash(relToEntry);
|
|
591
|
+
if (entry.ig.ignores(normalized)) {
|
|
592
|
+
return true;
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
return false;
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
600
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
601
|
+
*
|
|
602
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
603
|
+
*/
|
|
604
|
+
invalidate(gitignorePath) {
|
|
605
|
+
const absPath = resolve(gitignorePath);
|
|
606
|
+
const gitignoreDir = dirname(absPath);
|
|
607
|
+
for (const [, repo] of this.repos) {
|
|
608
|
+
const relToRepo = relative(repo.root, gitignoreDir);
|
|
609
|
+
if (relToRepo.startsWith('..'))
|
|
610
|
+
continue;
|
|
611
|
+
// Remove old entry for this directory
|
|
612
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
613
|
+
// Re-parse if file still exists
|
|
614
|
+
if (existsSync(absPath)) {
|
|
615
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
616
|
+
// Re-sort deepest-first
|
|
617
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
618
|
+
}
|
|
619
|
+
return;
|
|
620
|
+
}
|
|
621
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
622
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
623
|
+
if (repoRoot && existsSync(absPath)) {
|
|
624
|
+
const entries = [
|
|
625
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
626
|
+
];
|
|
627
|
+
if (this.repos.has(repoRoot)) {
|
|
628
|
+
const repo = this.repos.get(repoRoot);
|
|
629
|
+
repo.entries.push(entries[0]);
|
|
630
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
631
|
+
}
|
|
632
|
+
else {
|
|
633
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* @module templates/helpers
|
|
641
|
+
* Registers built-in Handlebars helpers for content templates.
|
|
642
|
+
*/
|
|
643
|
+
/** Pre-built rehype parser for HTML → hast conversion. */
|
|
644
|
+
const htmlParser = unified().use(rehypeParse, { fragment: true });
|
|
645
|
+
/**
|
|
646
|
+
* Register all built-in helpers on a Handlebars instance.
|
|
647
|
+
*
|
|
648
|
+
* @param hbs - The Handlebars instance.
|
|
649
|
+
*/
|
|
650
|
+
function registerBuiltinHelpers(hbs) {
|
|
651
|
+
// Structural: ADF → Markdown
|
|
652
|
+
hbs.registerHelper('adfToMarkdown', function (adf) {
|
|
653
|
+
if (!adf || typeof adf !== 'object')
|
|
654
|
+
return '';
|
|
655
|
+
try {
|
|
656
|
+
const mdast = fromADF(adf);
|
|
657
|
+
return new hbs.SafeString(toMarkdown(mdast).trim());
|
|
658
|
+
}
|
|
659
|
+
catch {
|
|
660
|
+
return '<!-- ADF conversion failed -->';
|
|
661
|
+
}
|
|
662
|
+
});
|
|
663
|
+
// Structural: HTML → Markdown
|
|
664
|
+
hbs.registerHelper('markdownify', function (html) {
|
|
665
|
+
if (typeof html !== 'string' || !html.trim())
|
|
666
|
+
return '';
|
|
667
|
+
try {
|
|
668
|
+
const hast = htmlParser.parse(html);
|
|
669
|
+
const mdast = toMdast(hast);
|
|
670
|
+
return new hbs.SafeString(toMarkdown(mdast).trim());
|
|
671
|
+
}
|
|
672
|
+
catch {
|
|
673
|
+
return '<!-- HTML conversion failed -->';
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
// Formatting: dateFormat
|
|
677
|
+
hbs.registerHelper('dateFormat', function (value, format) {
|
|
678
|
+
if (value === undefined || value === null)
|
|
679
|
+
return '';
|
|
680
|
+
const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
|
|
681
|
+
return dayjs(value).format(fmt);
|
|
682
|
+
});
|
|
683
|
+
// Formatting: join
|
|
684
|
+
hbs.registerHelper('join', function (arr, separator) {
|
|
685
|
+
if (!Array.isArray(arr))
|
|
686
|
+
return '';
|
|
687
|
+
const sep = typeof separator === 'string' ? separator : ', ';
|
|
688
|
+
return arr.join(sep);
|
|
689
|
+
});
|
|
690
|
+
// Formatting: pluck
|
|
691
|
+
hbs.registerHelper('pluck', function (arr, key) {
|
|
692
|
+
if (!Array.isArray(arr) || typeof key !== 'string')
|
|
693
|
+
return [];
|
|
694
|
+
return arr.map((item) => item && typeof item === 'object'
|
|
695
|
+
? item[key]
|
|
696
|
+
: undefined);
|
|
697
|
+
});
|
|
698
|
+
// String transforms
|
|
699
|
+
hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
|
|
700
|
+
hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
|
|
701
|
+
hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? capitalize(text) : '');
|
|
702
|
+
hbs.registerHelper('title', (text) => typeof text === 'string' ? title(text) : '');
|
|
703
|
+
hbs.registerHelper('camel', (text) => typeof text === 'string' ? camel(text) : '');
|
|
704
|
+
hbs.registerHelper('snake', (text) => typeof text === 'string' ? snake(text) : '');
|
|
705
|
+
hbs.registerHelper('dash', (text) => typeof text === 'string' ? dash(text) : '');
|
|
706
|
+
// default helper
|
|
707
|
+
hbs.registerHelper('default', function (value, fallback) {
|
|
708
|
+
return value ?? fallback ?? '';
|
|
709
|
+
});
|
|
710
|
+
// eq helper (deep equality)
|
|
711
|
+
hbs.registerHelper('eq', function (a, b) {
|
|
712
|
+
return isEqual(a, b);
|
|
713
|
+
});
|
|
714
|
+
// json helper
|
|
715
|
+
hbs.registerHelper('json', function (value) {
|
|
716
|
+
return new hbs.SafeString(JSON.stringify(value, null, 2));
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/**
|
|
721
|
+
* @module templates/engine
|
|
722
|
+
* Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
|
|
723
|
+
*/
|
|
724
|
+
/**
|
|
725
|
+
* Resolve a template value to its source string.
|
|
726
|
+
*
|
|
727
|
+
* Resolution order:
|
|
728
|
+
* 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
|
|
729
|
+
* 2. Matches a key in namedTemplates → named ref (recursively resolve)
|
|
730
|
+
* 3. Otherwise → inline Handlebars template string
|
|
731
|
+
*
|
|
732
|
+
* @param value - The template reference (inline, file path, or named ref).
|
|
733
|
+
* @param namedTemplates - Named template definitions from config.
|
|
734
|
+
* @param configDir - Directory to resolve relative file paths against.
|
|
735
|
+
* @param visited - Set of visited named refs for cycle detection.
|
|
736
|
+
* @returns The resolved template source string.
|
|
737
|
+
*/
|
|
738
|
+
function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
|
|
739
|
+
// File path detection
|
|
740
|
+
if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
|
|
741
|
+
return readFileSync(resolve(configDir, value), 'utf-8');
|
|
742
|
+
}
|
|
743
|
+
// Named ref
|
|
744
|
+
if (namedTemplates?.[value] !== undefined) {
|
|
745
|
+
if (visited.has(value)) {
|
|
746
|
+
throw new Error(`Circular template reference detected: ${value}`);
|
|
747
|
+
}
|
|
748
|
+
visited.add(value);
|
|
749
|
+
return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
|
|
750
|
+
}
|
|
751
|
+
// Inline
|
|
752
|
+
return value;
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Create a configured Handlebars instance with built-in helpers registered.
|
|
756
|
+
*
|
|
757
|
+
* @returns A Handlebars instance with helpers.
|
|
758
|
+
*/
|
|
759
|
+
function createHandlebarsInstance() {
|
|
760
|
+
const hbs = Handlebars.create();
|
|
761
|
+
registerBuiltinHelpers(hbs);
|
|
762
|
+
return hbs;
|
|
763
|
+
}
|
|
764
|
+
/**
|
|
765
|
+
* Load custom helpers from file paths.
|
|
766
|
+
*
|
|
767
|
+
* Each file should export a default function that receives the Handlebars instance.
|
|
768
|
+
*
|
|
769
|
+
* @param hbs - The Handlebars instance.
|
|
770
|
+
* @param paths - File paths to custom helper modules.
|
|
771
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
772
|
+
*/
|
|
773
|
+
async function loadCustomHelpers(hbs, paths, configDir) {
|
|
774
|
+
for (const p of paths) {
|
|
775
|
+
const resolved = resolve(configDir, p);
|
|
776
|
+
const mod = (await import(resolved));
|
|
777
|
+
if (typeof mod.default === 'function') {
|
|
778
|
+
mod.default(hbs);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
/**
|
|
783
|
+
* The template engine: holds compiled templates and renders them against context.
|
|
784
|
+
*/
|
|
785
|
+
class TemplateEngine {
|
|
786
|
+
hbs;
|
|
787
|
+
compiled = new Map();
|
|
788
|
+
constructor(hbs) {
|
|
789
|
+
this.hbs = hbs;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Compile and cache a template from its source string.
|
|
793
|
+
*
|
|
794
|
+
* @param key - Cache key (rule index or named template).
|
|
795
|
+
* @param source - Handlebars template source.
|
|
796
|
+
* @returns The compiled template.
|
|
797
|
+
*/
|
|
798
|
+
compile(key, source) {
|
|
799
|
+
const fn = this.hbs.compile(source);
|
|
800
|
+
this.compiled.set(key, fn);
|
|
801
|
+
return fn;
|
|
802
|
+
}
|
|
803
|
+
/**
|
|
804
|
+
* Get a previously compiled template by key.
|
|
805
|
+
*
|
|
806
|
+
* @param key - The cache key.
|
|
807
|
+
* @returns The compiled template, or undefined.
|
|
808
|
+
*/
|
|
809
|
+
get(key) {
|
|
810
|
+
return this.compiled.get(key);
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Render a compiled template against a context.
|
|
814
|
+
*
|
|
815
|
+
* @param key - The cache key of the compiled template.
|
|
816
|
+
* @param context - The data context for rendering.
|
|
817
|
+
* @returns The rendered string, or null if the template was not found.
|
|
818
|
+
*/
|
|
819
|
+
render(key, context) {
|
|
820
|
+
const fn = this.compiled.get(key);
|
|
821
|
+
if (!fn)
|
|
822
|
+
return null;
|
|
823
|
+
return fn(context);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* @module templates/buildTemplateEngine
|
|
829
|
+
* Factory to build a TemplateEngine from config, compiling all rule templates at load time.
|
|
830
|
+
*/
|
|
831
|
+
/**
|
|
832
|
+
* Build a TemplateEngine from configuration, pre-compiling all rule templates.
|
|
833
|
+
*
|
|
834
|
+
* @param rules - The inference rules (may contain template fields).
|
|
835
|
+
* @param namedTemplates - Named template definitions from config.
|
|
836
|
+
* @param templateHelperPaths - Paths to custom helper modules.
|
|
837
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
838
|
+
* @returns The configured TemplateEngine, or undefined if no templates are used.
|
|
839
|
+
*/
|
|
840
|
+
async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
|
|
841
|
+
const rulesWithTemplates = rules.filter((r) => r.template);
|
|
842
|
+
if (rulesWithTemplates.length === 0)
|
|
843
|
+
return undefined;
|
|
844
|
+
const hbs = createHandlebarsInstance();
|
|
845
|
+
// Load custom helpers
|
|
846
|
+
if (templateHelperPaths?.length && configDir) {
|
|
847
|
+
await loadCustomHelpers(hbs, templateHelperPaths, configDir);
|
|
848
|
+
}
|
|
849
|
+
const engine = new TemplateEngine(hbs);
|
|
850
|
+
// Compile all rule templates
|
|
851
|
+
for (const [index, rule] of rules.entries()) {
|
|
852
|
+
if (!rule.template)
|
|
853
|
+
continue;
|
|
854
|
+
const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
|
|
855
|
+
engine.compile(`rule-${String(index)}`, source);
|
|
856
|
+
}
|
|
857
|
+
return engine;
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
/**
|
|
861
|
+
* @module app/configWatcher
|
|
862
|
+
* Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
|
|
863
|
+
*/
|
|
864
|
+
/**
|
|
865
|
+
* Debounced config file watcher.
|
|
866
|
+
*/
|
|
867
|
+
class ConfigWatcher {
|
|
868
|
+
options;
|
|
869
|
+
watcher;
|
|
870
|
+
debounce;
|
|
871
|
+
constructor(options) {
|
|
872
|
+
this.options = options;
|
|
873
|
+
}
|
|
874
|
+
start() {
|
|
875
|
+
if (!this.options.enabled)
|
|
876
|
+
return;
|
|
877
|
+
this.watcher = chokidar.watch(this.options.configPath, {
|
|
878
|
+
ignoreInitial: true,
|
|
879
|
+
});
|
|
880
|
+
this.watcher.on('change', () => {
|
|
881
|
+
if (this.debounce)
|
|
882
|
+
clearTimeout(this.debounce);
|
|
883
|
+
this.debounce = setTimeout(() => {
|
|
884
|
+
void this.options.onChange();
|
|
885
|
+
}, this.options.debounceMs);
|
|
886
|
+
});
|
|
887
|
+
this.watcher.on('error', (error) => {
|
|
888
|
+
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
889
|
+
});
|
|
890
|
+
this.options.logger.info({
|
|
891
|
+
configPath: this.options.configPath,
|
|
892
|
+
debounceMs: this.options.debounceMs,
|
|
893
|
+
}, 'Config watcher started');
|
|
894
|
+
}
|
|
895
|
+
async stop() {
|
|
896
|
+
if (this.debounce) {
|
|
897
|
+
clearTimeout(this.debounce);
|
|
898
|
+
this.debounce = undefined;
|
|
899
|
+
}
|
|
900
|
+
if (this.watcher) {
|
|
901
|
+
await this.watcher.close();
|
|
902
|
+
this.watcher = undefined;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
|
|
420
907
|
/**
|
|
421
908
|
* @module config/defaults
|
|
422
909
|
* Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
|
|
@@ -618,7 +1105,12 @@ const inferenceRuleSchema = z.object({
|
|
|
618
1105
|
map: z
|
|
619
1106
|
.union([jsonMapMapSchema, z.string()])
|
|
620
1107
|
.optional()
|
|
621
|
-
.describe('JsonMap transformation (inline definition
|
|
1108
|
+
.describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
|
|
1109
|
+
/** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
|
|
1110
|
+
template: z
|
|
1111
|
+
.string()
|
|
1112
|
+
.optional()
|
|
1113
|
+
.describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
|
|
622
1114
|
});
|
|
623
1115
|
/**
|
|
624
1116
|
* Top-level configuration for jeeves-watcher.
|
|
@@ -656,6 +1148,22 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
656
1148
|
.record(z.string(), jsonMapMapSchema)
|
|
657
1149
|
.optional()
|
|
658
1150
|
.describe('Reusable named JsonMap transformations.'),
|
|
1151
|
+
/** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
|
|
1152
|
+
templates: z
|
|
1153
|
+
.record(z.string(), z.string())
|
|
1154
|
+
.optional()
|
|
1155
|
+
.describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
|
|
1156
|
+
/** Custom Handlebars helper registration. */
|
|
1157
|
+
templateHelpers: z
|
|
1158
|
+
.object({
|
|
1159
|
+
/** File paths to custom helper modules. */
|
|
1160
|
+
paths: z
|
|
1161
|
+
.array(z.string())
|
|
1162
|
+
.optional()
|
|
1163
|
+
.describe('File paths to custom helper modules.'),
|
|
1164
|
+
})
|
|
1165
|
+
.optional()
|
|
1166
|
+
.describe('Custom Handlebars helper registration.'),
|
|
659
1167
|
/** Logging configuration. */
|
|
660
1168
|
logging: loggingConfigSchema.optional().describe('Logging configuration.'),
|
|
661
1169
|
/** Timeout in milliseconds for graceful shutdown. */
|
|
@@ -913,258 +1421,52 @@ function createGeminiProvider(config, logger) {
|
|
|
913
1421
|
}, {
|
|
914
1422
|
attempts: 5,
|
|
915
1423
|
baseDelayMs: 500,
|
|
916
|
-
maxDelayMs: 10_000,
|
|
917
|
-
jitter: 0.2,
|
|
918
|
-
onRetry: ({ attempt, delayMs, error }) => {
|
|
919
|
-
log.warn({
|
|
920
|
-
attempt,
|
|
921
|
-
delayMs,
|
|
922
|
-
provider: 'gemini',
|
|
923
|
-
model: config.model,
|
|
924
|
-
err: normalizeError(error),
|
|
925
|
-
}, 'Embedding call failed; will retry');
|
|
926
|
-
},
|
|
927
|
-
});
|
|
928
|
-
// Validate dimensions
|
|
929
|
-
for (const vector of vectors) {
|
|
930
|
-
if (vector.length !== dimensions) {
|
|
931
|
-
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
932
|
-
}
|
|
933
|
-
}
|
|
934
|
-
return vectors;
|
|
935
|
-
},
|
|
936
|
-
};
|
|
937
|
-
}
|
|
938
|
-
function createMockFromConfig(config) {
|
|
939
|
-
const dimensions = config.dimensions ?? 768;
|
|
940
|
-
return createMockProvider(dimensions);
|
|
941
|
-
}
|
|
942
|
-
const embeddingProviderRegistry = new Map([
|
|
943
|
-
['mock', createMockFromConfig],
|
|
944
|
-
['gemini', createGeminiProvider],
|
|
945
|
-
]);
|
|
946
|
-
/**
|
|
947
|
-
* Create an embedding provider based on the given configuration.
|
|
948
|
-
*
|
|
949
|
-
* Each provider is responsible for its own default dimensions.
|
|
950
|
-
*
|
|
951
|
-
* @param config - The embedding configuration.
|
|
952
|
-
* @param logger - Optional pino logger for retry warnings.
|
|
953
|
-
* @returns An {@link EmbeddingProvider} instance.
|
|
954
|
-
* @throws If the configured provider is not supported.
|
|
955
|
-
*/
|
|
956
|
-
function createEmbeddingProvider(config, logger) {
|
|
957
|
-
const factory = embeddingProviderRegistry.get(config.provider);
|
|
958
|
-
if (!factory) {
|
|
959
|
-
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
960
|
-
}
|
|
961
|
-
return factory(config, logger);
|
|
962
|
-
}
|
|
963
|
-
|
|
964
|
-
/**
|
|
965
|
-
* @module gitignore
|
|
966
|
-
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
967
|
-
*/
|
|
968
|
-
/**
|
|
969
|
-
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
970
|
-
* Returns `undefined` if no repo is found.
|
|
971
|
-
*/
|
|
972
|
-
function findRepoRoot(startDir) {
|
|
973
|
-
let dir = resolve(startDir);
|
|
974
|
-
const root = resolve('/');
|
|
975
|
-
while (dir !== root) {
|
|
976
|
-
if (existsSync(join(dir, '.git')) &&
|
|
977
|
-
statSync(join(dir, '.git')).isDirectory()) {
|
|
978
|
-
return dir;
|
|
979
|
-
}
|
|
980
|
-
const parent = dirname(dir);
|
|
981
|
-
if (parent === dir)
|
|
982
|
-
break;
|
|
983
|
-
dir = parent;
|
|
984
|
-
}
|
|
985
|
-
return undefined;
|
|
986
|
-
}
|
|
987
|
-
/**
|
|
988
|
-
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
989
|
-
* that can be scanned for a repo root.
|
|
990
|
-
*/
|
|
991
|
-
function watchPathToScanDir(watchPath) {
|
|
992
|
-
const absPath = resolve(watchPath);
|
|
993
|
-
try {
|
|
994
|
-
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
995
|
-
}
|
|
996
|
-
catch {
|
|
997
|
-
// ignore
|
|
998
|
-
}
|
|
999
|
-
// If this is a glob, fall back to the non-glob prefix.
|
|
1000
|
-
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1001
|
-
if (!globMatch)
|
|
1002
|
-
return undefined;
|
|
1003
|
-
const prefix = watchPath.slice(0, globMatch.index);
|
|
1004
|
-
const trimmed = prefix.trim();
|
|
1005
|
-
const baseDir = trimmed.length === 0
|
|
1006
|
-
? '.'
|
|
1007
|
-
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1008
|
-
? trimmed
|
|
1009
|
-
: dirname(trimmed);
|
|
1010
|
-
const resolved = resolve(baseDir);
|
|
1011
|
-
if (!existsSync(resolved))
|
|
1012
|
-
return undefined;
|
|
1013
|
-
return resolved;
|
|
1014
|
-
}
|
|
1015
|
-
/**
|
|
1016
|
-
* Recursively find all `.gitignore` files under `dir`.
|
|
1017
|
-
* Skips `.git` and `node_modules` directories for performance.
|
|
1018
|
-
*/
|
|
1019
|
-
function findGitignoreFiles(dir) {
|
|
1020
|
-
const results = [];
|
|
1021
|
-
const gitignorePath = join(dir, '.gitignore');
|
|
1022
|
-
if (existsSync(gitignorePath)) {
|
|
1023
|
-
results.push(gitignorePath);
|
|
1024
|
-
}
|
|
1025
|
-
let entries;
|
|
1026
|
-
try {
|
|
1027
|
-
entries = readdirSync(dir);
|
|
1028
|
-
}
|
|
1029
|
-
catch {
|
|
1030
|
-
return results;
|
|
1031
|
-
}
|
|
1032
|
-
for (const entry of entries) {
|
|
1033
|
-
if (entry === '.git' || entry === 'node_modules')
|
|
1034
|
-
continue;
|
|
1035
|
-
const fullPath = join(dir, entry);
|
|
1036
|
-
try {
|
|
1037
|
-
if (statSync(fullPath).isDirectory()) {
|
|
1038
|
-
results.push(...findGitignoreFiles(fullPath));
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
catch {
|
|
1042
|
-
// Skip inaccessible entries
|
|
1043
|
-
}
|
|
1044
|
-
}
|
|
1045
|
-
return results;
|
|
1046
|
-
}
|
|
1047
|
-
/**
|
|
1048
|
-
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1049
|
-
*/
|
|
1050
|
-
function parseGitignore(gitignorePath) {
|
|
1051
|
-
const content = readFileSync(gitignorePath, 'utf8');
|
|
1052
|
-
return ignore().add(content);
|
|
1053
|
-
}
|
|
1054
|
-
/**
|
|
1055
|
-
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1056
|
-
*/
|
|
1057
|
-
function toForwardSlash(p) {
|
|
1058
|
-
return p.replace(/\\/g, '/');
|
|
1059
|
-
}
|
|
1060
|
-
/**
|
|
1061
|
-
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1062
|
-
* `.gitignore` chain in git repositories.
|
|
1063
|
-
*/
|
|
1064
|
-
class GitignoreFilter {
|
|
1065
|
-
repos = new Map();
|
|
1066
|
-
/**
|
|
1067
|
-
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1068
|
-
*
|
|
1069
|
-
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1070
|
-
*/
|
|
1071
|
-
constructor(watchPaths) {
|
|
1072
|
-
this.scan(watchPaths);
|
|
1073
|
-
}
|
|
1074
|
-
/**
|
|
1075
|
-
* Scan paths for git repos and their `.gitignore` files.
|
|
1076
|
-
*/
|
|
1077
|
-
scan(watchPaths) {
|
|
1078
|
-
this.repos.clear();
|
|
1079
|
-
const scannedDirs = new Set();
|
|
1080
|
-
for (const watchPath of watchPaths) {
|
|
1081
|
-
const scanDir = watchPathToScanDir(watchPath);
|
|
1082
|
-
if (!scanDir)
|
|
1083
|
-
continue;
|
|
1084
|
-
if (scannedDirs.has(scanDir))
|
|
1085
|
-
continue;
|
|
1086
|
-
scannedDirs.add(scanDir);
|
|
1087
|
-
const repoRoot = findRepoRoot(scanDir);
|
|
1088
|
-
if (!repoRoot)
|
|
1089
|
-
continue;
|
|
1090
|
-
if (this.repos.has(repoRoot))
|
|
1091
|
-
continue;
|
|
1092
|
-
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1093
|
-
const entries = gitignoreFiles.map((gf) => ({
|
|
1094
|
-
dir: dirname(gf),
|
|
1095
|
-
ig: parseGitignore(gf),
|
|
1096
|
-
}));
|
|
1097
|
-
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1098
|
-
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1099
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1100
|
-
}
|
|
1101
|
-
}
|
|
1102
|
-
/**
|
|
1103
|
-
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1104
|
-
*
|
|
1105
|
-
* @param filePath - Absolute file path to check.
|
|
1106
|
-
* @returns `true` if the file should be ignored.
|
|
1107
|
-
*/
|
|
1108
|
-
isIgnored(filePath) {
|
|
1109
|
-
const absPath = resolve(filePath);
|
|
1110
|
-
for (const [, repo] of this.repos) {
|
|
1111
|
-
// Check if file is within this repo
|
|
1112
|
-
const relToRepo = relative(repo.root, absPath);
|
|
1113
|
-
if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
|
|
1114
|
-
continue;
|
|
1115
|
-
}
|
|
1116
|
-
// Check each `.gitignore` entry (deepest-first)
|
|
1117
|
-
for (const entry of repo.entries) {
|
|
1118
|
-
const relToEntry = relative(entry.dir, absPath);
|
|
1119
|
-
if (relToEntry.startsWith('..'))
|
|
1120
|
-
continue;
|
|
1121
|
-
const normalized = toForwardSlash(relToEntry);
|
|
1122
|
-
if (entry.ig.ignores(normalized)) {
|
|
1123
|
-
return true;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
return false;
|
|
1128
|
-
}
|
|
1129
|
-
/**
|
|
1130
|
-
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1131
|
-
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1132
|
-
*
|
|
1133
|
-
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1134
|
-
*/
|
|
1135
|
-
invalidate(gitignorePath) {
|
|
1136
|
-
const absPath = resolve(gitignorePath);
|
|
1137
|
-
const gitignoreDir = dirname(absPath);
|
|
1138
|
-
for (const [, repo] of this.repos) {
|
|
1139
|
-
const relToRepo = relative(repo.root, gitignoreDir);
|
|
1140
|
-
if (relToRepo.startsWith('..'))
|
|
1141
|
-
continue;
|
|
1142
|
-
// Remove old entry for this directory
|
|
1143
|
-
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1144
|
-
// Re-parse if file still exists
|
|
1145
|
-
if (existsSync(absPath)) {
|
|
1146
|
-
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1147
|
-
// Re-sort deepest-first
|
|
1148
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1149
|
-
}
|
|
1150
|
-
return;
|
|
1151
|
-
}
|
|
1152
|
-
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1153
|
-
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1154
|
-
if (repoRoot && existsSync(absPath)) {
|
|
1155
|
-
const entries = [
|
|
1156
|
-
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1157
|
-
];
|
|
1158
|
-
if (this.repos.has(repoRoot)) {
|
|
1159
|
-
const repo = this.repos.get(repoRoot);
|
|
1160
|
-
repo.entries.push(entries[0]);
|
|
1161
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1162
|
-
}
|
|
1163
|
-
else {
|
|
1164
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1424
|
+
maxDelayMs: 10_000,
|
|
1425
|
+
jitter: 0.2,
|
|
1426
|
+
onRetry: ({ attempt, delayMs, error }) => {
|
|
1427
|
+
log.warn({
|
|
1428
|
+
attempt,
|
|
1429
|
+
delayMs,
|
|
1430
|
+
provider: 'gemini',
|
|
1431
|
+
model: config.model,
|
|
1432
|
+
err: normalizeError(error),
|
|
1433
|
+
}, 'Embedding call failed; will retry');
|
|
1434
|
+
},
|
|
1435
|
+
});
|
|
1436
|
+
// Validate dimensions
|
|
1437
|
+
for (const vector of vectors) {
|
|
1438
|
+
if (vector.length !== dimensions) {
|
|
1439
|
+
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
1440
|
+
}
|
|
1165
1441
|
}
|
|
1166
|
-
|
|
1442
|
+
return vectors;
|
|
1443
|
+
},
|
|
1444
|
+
};
|
|
1445
|
+
}
|
|
1446
|
+
function createMockFromConfig(config) {
|
|
1447
|
+
const dimensions = config.dimensions ?? 768;
|
|
1448
|
+
return createMockProvider(dimensions);
|
|
1449
|
+
}
|
|
1450
|
+
const embeddingProviderRegistry = new Map([
|
|
1451
|
+
['mock', createMockFromConfig],
|
|
1452
|
+
['gemini', createGeminiProvider],
|
|
1453
|
+
]);
|
|
1454
|
+
/**
|
|
1455
|
+
* Create an embedding provider based on the given configuration.
|
|
1456
|
+
*
|
|
1457
|
+
* Each provider is responsible for its own default dimensions.
|
|
1458
|
+
*
|
|
1459
|
+
* @param config - The embedding configuration.
|
|
1460
|
+
* @param logger - Optional pino logger for retry warnings.
|
|
1461
|
+
* @returns An {@link EmbeddingProvider} instance.
|
|
1462
|
+
* @throws If the configured provider is not supported.
|
|
1463
|
+
*/
|
|
1464
|
+
function createEmbeddingProvider(config, logger) {
|
|
1465
|
+
const factory = embeddingProviderRegistry.get(config.provider);
|
|
1466
|
+
if (!factory) {
|
|
1467
|
+
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
1167
1468
|
}
|
|
1469
|
+
return factory(config, logger);
|
|
1168
1470
|
}
|
|
1169
1471
|
|
|
1170
1472
|
/**
|
|
@@ -1396,7 +1698,7 @@ function createJsonMapLib() {
|
|
|
1396
1698
|
};
|
|
1397
1699
|
}
|
|
1398
1700
|
/**
|
|
1399
|
-
* Apply compiled inference rules to file attributes, returning merged metadata.
|
|
1701
|
+
* Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
|
|
1400
1702
|
*
|
|
1401
1703
|
* Rules are evaluated in order; later rules override earlier ones.
|
|
1402
1704
|
* If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
|
|
@@ -1406,15 +1708,18 @@ function createJsonMapLib() {
|
|
|
1406
1708
|
* @param attributes - The file attributes to match against.
|
|
1407
1709
|
* @param namedMaps - Optional record of named JsonMap definitions.
|
|
1408
1710
|
* @param logger - Optional logger for warnings (falls back to console.warn).
|
|
1409
|
-
* @
|
|
1711
|
+
* @param templateEngine - Optional template engine for rendering content templates.
|
|
1712
|
+
* @param configDir - Optional config directory for resolving .json map file paths.
|
|
1713
|
+
* @returns The merged metadata and optional rendered content.
|
|
1410
1714
|
*/
|
|
1411
|
-
async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
1715
|
+
async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
|
|
1412
1716
|
// JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
|
|
1413
1717
|
// Our helper functions accept multiple args, which JsonMap supports at runtime.
|
|
1414
1718
|
const lib = createJsonMapLib();
|
|
1415
1719
|
let merged = {};
|
|
1720
|
+
let renderedContent = null;
|
|
1416
1721
|
const log = logger ?? console;
|
|
1417
|
-
for (const { rule, validate } of compiledRules) {
|
|
1722
|
+
for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
|
|
1418
1723
|
if (validate(attributes)) {
|
|
1419
1724
|
// Apply set resolution
|
|
1420
1725
|
const setOutput = resolveSet(rule.set, attributes);
|
|
@@ -1424,10 +1729,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1424
1729
|
let mapDef;
|
|
1425
1730
|
// Resolve map reference
|
|
1426
1731
|
if (typeof rule.map === 'string') {
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1732
|
+
if (rule.map.endsWith('.json') && configDir) {
|
|
1733
|
+
// File path: load from .json file
|
|
1734
|
+
try {
|
|
1735
|
+
const mapPath = resolve(configDir, rule.map);
|
|
1736
|
+
const raw = readFileSync(mapPath, 'utf-8');
|
|
1737
|
+
mapDef = JSON.parse(raw);
|
|
1738
|
+
}
|
|
1739
|
+
catch (error) {
|
|
1740
|
+
log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
|
|
1741
|
+
continue;
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
else {
|
|
1745
|
+
mapDef = namedMaps?.[rule.map];
|
|
1746
|
+
if (!mapDef) {
|
|
1747
|
+
log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
|
|
1748
|
+
continue;
|
|
1749
|
+
}
|
|
1431
1750
|
}
|
|
1432
1751
|
}
|
|
1433
1752
|
else {
|
|
@@ -1450,9 +1769,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1450
1769
|
log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
1451
1770
|
}
|
|
1452
1771
|
}
|
|
1772
|
+
// Render template if present
|
|
1773
|
+
if (rule.template && templateEngine) {
|
|
1774
|
+
const templateKey = `rule-${String(ruleIndex)}`;
|
|
1775
|
+
// Build template context: attributes (with json spread at top) + map output
|
|
1776
|
+
const context = {
|
|
1777
|
+
...(attributes.json ?? {}),
|
|
1778
|
+
...attributes,
|
|
1779
|
+
...merged,
|
|
1780
|
+
};
|
|
1781
|
+
try {
|
|
1782
|
+
const result = templateEngine.render(templateKey, context);
|
|
1783
|
+
if (result && result.trim()) {
|
|
1784
|
+
renderedContent = result;
|
|
1785
|
+
}
|
|
1786
|
+
else {
|
|
1787
|
+
log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
|
|
1788
|
+
}
|
|
1789
|
+
}
|
|
1790
|
+
catch (error) {
|
|
1791
|
+
log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1453
1794
|
}
|
|
1454
1795
|
}
|
|
1455
|
-
return merged;
|
|
1796
|
+
return { metadata: merged, renderedContent };
|
|
1456
1797
|
}
|
|
1457
1798
|
|
|
1458
1799
|
/**
|
|
@@ -1541,23 +1882,32 @@ function compileRules(rules) {
|
|
|
1541
1882
|
* @param metadataDir - The metadata directory for enrichment files.
|
|
1542
1883
|
* @param maps - Optional named JsonMap definitions.
|
|
1543
1884
|
* @param logger - Optional logger for rule warnings.
|
|
1885
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1886
|
+
* @param configDir - Optional config directory for resolving file paths.
|
|
1544
1887
|
* @returns The merged metadata and intermediate data.
|
|
1545
1888
|
*/
|
|
1546
|
-
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
|
|
1889
|
+
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
|
|
1547
1890
|
const ext = extname(filePath);
|
|
1548
1891
|
const stats = await stat(filePath);
|
|
1549
1892
|
// 1. Extract text and structured data
|
|
1550
1893
|
const extracted = await extractText(filePath, ext);
|
|
1551
1894
|
// 2. Build attributes + apply rules
|
|
1552
1895
|
const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
|
|
1553
|
-
const inferred = await applyRules(compiledRules, attributes, maps, logger);
|
|
1896
|
+
const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
|
|
1554
1897
|
// 3. Read enrichment metadata (merge, enrichment wins)
|
|
1555
1898
|
const enrichment = await readMetadata(filePath, metadataDir);
|
|
1556
1899
|
const metadata = {
|
|
1557
1900
|
...inferred,
|
|
1558
1901
|
...(enrichment ?? {}),
|
|
1559
1902
|
};
|
|
1560
|
-
return {
|
|
1903
|
+
return {
|
|
1904
|
+
inferred,
|
|
1905
|
+
enrichment,
|
|
1906
|
+
metadata,
|
|
1907
|
+
attributes,
|
|
1908
|
+
extracted,
|
|
1909
|
+
renderedContent,
|
|
1910
|
+
};
|
|
1561
1911
|
}
|
|
1562
1912
|
|
|
1563
1913
|
/**
|
|
@@ -1628,6 +1978,7 @@ class DocumentProcessor {
|
|
|
1628
1978
|
vectorStore;
|
|
1629
1979
|
compiledRules;
|
|
1630
1980
|
logger;
|
|
1981
|
+
templateEngine;
|
|
1631
1982
|
/**
|
|
1632
1983
|
* Create a new DocumentProcessor.
|
|
1633
1984
|
*
|
|
@@ -1636,13 +1987,15 @@ class DocumentProcessor {
|
|
|
1636
1987
|
* @param vectorStore - The vector store client.
|
|
1637
1988
|
* @param compiledRules - The compiled inference rules.
|
|
1638
1989
|
* @param logger - The logger instance.
|
|
1990
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1639
1991
|
*/
|
|
1640
|
-
constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
|
|
1992
|
+
constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
|
|
1641
1993
|
this.config = config;
|
|
1642
1994
|
this.embeddingProvider = embeddingProvider;
|
|
1643
1995
|
this.vectorStore = vectorStore;
|
|
1644
1996
|
this.compiledRules = compiledRules;
|
|
1645
1997
|
this.logger = logger;
|
|
1998
|
+
this.templateEngine = templateEngine;
|
|
1646
1999
|
}
|
|
1647
2000
|
/**
|
|
1648
2001
|
* Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
|
|
@@ -1653,13 +2006,15 @@ class DocumentProcessor {
|
|
|
1653
2006
|
try {
|
|
1654
2007
|
const ext = extname(filePath);
|
|
1655
2008
|
// 1. Build merged metadata + extract text
|
|
1656
|
-
const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
1657
|
-
if
|
|
2009
|
+
const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
2010
|
+
// Use rendered template content if available, otherwise raw extracted text
|
|
2011
|
+
const textToEmbed = renderedContent ?? extracted.text;
|
|
2012
|
+
if (!textToEmbed.trim()) {
|
|
1658
2013
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
1659
2014
|
return;
|
|
1660
2015
|
}
|
|
1661
2016
|
// 2. Content hash check — skip if unchanged
|
|
1662
|
-
const hash = contentHash(
|
|
2017
|
+
const hash = contentHash(textToEmbed);
|
|
1663
2018
|
const baseId = pointId(filePath, 0);
|
|
1664
2019
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
1665
2020
|
if (existingPayload && existingPayload['content_hash'] === hash) {
|
|
@@ -1671,7 +2026,7 @@ class DocumentProcessor {
|
|
|
1671
2026
|
const chunkSize = this.config.chunkSize ?? 1000;
|
|
1672
2027
|
const chunkOverlap = this.config.chunkOverlap ?? 200;
|
|
1673
2028
|
const splitter = createSplitter(ext, chunkSize, chunkOverlap);
|
|
1674
|
-
const chunks = await splitter.splitText(
|
|
2029
|
+
const chunks = await splitter.splitText(textToEmbed);
|
|
1675
2030
|
// 4. Embed all chunks
|
|
1676
2031
|
const vectors = await this.embeddingProvider.embed(chunks);
|
|
1677
2032
|
// 5. Upsert all chunk points
|
|
@@ -1765,7 +2120,7 @@ class DocumentProcessor {
|
|
|
1765
2120
|
return null;
|
|
1766
2121
|
}
|
|
1767
2122
|
// Build merged metadata (lightweight — no embedding)
|
|
1768
|
-
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
2123
|
+
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
1769
2124
|
// Update all chunk payloads
|
|
1770
2125
|
const totalChunks = getChunkCount(existingPayload);
|
|
1771
2126
|
const ids = chunkIds(filePath, totalChunks);
|
|
@@ -1783,8 +2138,17 @@ class DocumentProcessor {
|
|
|
1783
2138
|
*
|
|
1784
2139
|
* @param compiledRules - The newly compiled rules.
|
|
1785
2140
|
*/
|
|
1786
|
-
|
|
2141
|
+
/**
|
|
2142
|
+
* Update compiled inference rules and optionally the template engine.
|
|
2143
|
+
*
|
|
2144
|
+
* @param compiledRules - The newly compiled rules.
|
|
2145
|
+
* @param templateEngine - Optional updated template engine.
|
|
2146
|
+
*/
|
|
2147
|
+
updateRules(compiledRules, templateEngine) {
|
|
1787
2148
|
this.compiledRules = compiledRules;
|
|
2149
|
+
if (templateEngine) {
|
|
2150
|
+
this.templateEngine = templateEngine;
|
|
2151
|
+
}
|
|
1788
2152
|
this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
|
|
1789
2153
|
}
|
|
1790
2154
|
}
|
|
@@ -2313,6 +2677,104 @@ class SystemHealth {
|
|
|
2313
2677
|
}
|
|
2314
2678
|
}
|
|
2315
2679
|
|
|
2680
|
+
/**
|
|
2681
|
+
* @module watcher/globToDir
|
|
2682
|
+
* Adapts glob-based watch config to chokidar v4+, which removed glob support
|
|
2683
|
+
* (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
|
|
2684
|
+
* strings, silently producing zero events. This module extracts static directory
|
|
2685
|
+
* roots from glob patterns for chokidar to watch, then filters emitted events
|
|
2686
|
+
* against the original globs via picomatch.
|
|
2687
|
+
*/
|
|
2688
|
+
/**
|
|
2689
|
+
* Extract the static directory root from a glob pattern.
|
|
2690
|
+
* Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
|
|
2691
|
+
*
|
|
2692
|
+
* @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
|
|
2693
|
+
* @returns The static directory prefix (e.g., `j:/domains`).
|
|
2694
|
+
*/
|
|
2695
|
+
function globRoot(glob) {
|
|
2696
|
+
const normalized = glob.replace(/\\/g, '/');
|
|
2697
|
+
const segments = normalized.split('/');
|
|
2698
|
+
const staticSegments = [];
|
|
2699
|
+
for (const seg of segments) {
|
|
2700
|
+
if (/[*?{[\]]/.test(seg))
|
|
2701
|
+
break;
|
|
2702
|
+
staticSegments.push(seg);
|
|
2703
|
+
}
|
|
2704
|
+
return staticSegments.join('/') || '.';
|
|
2705
|
+
}
|
|
2706
|
+
/**
|
|
2707
|
+
* Deduplicate directory roots, removing paths that are subdirectories of others.
|
|
2708
|
+
*
|
|
2709
|
+
* @param roots - Array of directory paths.
|
|
2710
|
+
* @returns Deduplicated array with subdirectories removed.
|
|
2711
|
+
*/
|
|
2712
|
+
function deduplicateRoots(roots) {
|
|
2713
|
+
const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
|
|
2714
|
+
const sorted = [...new Set(normalized)].sort();
|
|
2715
|
+
return sorted.filter((root, _i, arr) => {
|
|
2716
|
+
const withSlash = root.endsWith('/') ? root : root + '/';
|
|
2717
|
+
return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
|
|
2718
|
+
});
|
|
2719
|
+
}
|
|
2720
|
+
/**
|
|
2721
|
+
* Build a picomatch matcher from an array of glob patterns.
|
|
2722
|
+
* Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
|
|
2723
|
+
* before matching.
|
|
2724
|
+
*
|
|
2725
|
+
* @param globs - Glob patterns to match against.
|
|
2726
|
+
* @returns A function that tests whether a file path matches any of the globs.
|
|
2727
|
+
*/
|
|
2728
|
+
function buildGlobMatcher(globs) {
|
|
2729
|
+
const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
|
|
2730
|
+
const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
|
|
2731
|
+
return (filePath) => {
|
|
2732
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
2733
|
+
return isMatch(normalized);
|
|
2734
|
+
};
|
|
2735
|
+
}
|
|
2736
|
+
/**
|
|
2737
|
+
* Convert an array of glob patterns into chokidar-compatible directory roots
|
|
2738
|
+
* and a filter function for post-hoc event filtering.
|
|
2739
|
+
*
|
|
2740
|
+
* @param globs - Glob patterns from the watch config.
|
|
2741
|
+
* @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
|
|
2742
|
+
*/
|
|
2743
|
+
function resolveWatchPaths(globs) {
|
|
2744
|
+
const rawRoots = globs.map(globRoot);
|
|
2745
|
+
const roots = deduplicateRoots(rawRoots);
|
|
2746
|
+
const matches = buildGlobMatcher(globs);
|
|
2747
|
+
return { roots, matches };
|
|
2748
|
+
}
|
|
2749
|
+
/**
|
|
2750
|
+
* Convert ignored glob patterns to picomatch matcher functions.
|
|
2751
|
+
*
|
|
2752
|
+
* Chokidar v5 replaced the external `anymatch` dependency with an inline
|
|
2753
|
+
* implementation that does **exact string equality** for string matchers,
|
|
2754
|
+
* breaking glob-based `ignored` patterns. This function converts glob strings
|
|
2755
|
+
* to picomatch functions that chokidar's `createPattern` passes through
|
|
2756
|
+
* unchanged (`typeof matcher === 'function'`).
|
|
2757
|
+
*
|
|
2758
|
+
* Non-string entries (functions, RegExps) are passed through as-is.
|
|
2759
|
+
*
|
|
2760
|
+
* @param ignored - Array of ignored patterns (globs, functions, RegExps).
|
|
2761
|
+
* @returns Array with glob strings replaced by picomatch matcher functions.
|
|
2762
|
+
*/
|
|
2763
|
+
function resolveIgnored(ignored) {
|
|
2764
|
+
return ignored.map((entry) => {
|
|
2765
|
+
if (typeof entry !== 'string')
|
|
2766
|
+
return entry;
|
|
2767
|
+
// If the string contains glob characters, convert to a picomatch function.
|
|
2768
|
+
// Literal strings (exact paths) are also converted for consistent matching.
|
|
2769
|
+
const normalizedPattern = entry.replace(/\\/g, '/');
|
|
2770
|
+
const matcher = picomatch(normalizedPattern, { dot: true, nocase: true });
|
|
2771
|
+
return (filePath) => {
|
|
2772
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
2773
|
+
return matcher(normalized);
|
|
2774
|
+
};
|
|
2775
|
+
});
|
|
2776
|
+
}
|
|
2777
|
+
|
|
2316
2778
|
/**
|
|
2317
2779
|
* @module watcher
|
|
2318
2780
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -2327,6 +2789,7 @@ class FileSystemWatcher {
|
|
|
2327
2789
|
logger;
|
|
2328
2790
|
health;
|
|
2329
2791
|
gitignoreFilter;
|
|
2792
|
+
globMatches;
|
|
2330
2793
|
watcher;
|
|
2331
2794
|
/**
|
|
2332
2795
|
* Create a new FileSystemWatcher.
|
|
@@ -2343,6 +2806,7 @@ class FileSystemWatcher {
|
|
|
2343
2806
|
this.processor = processor;
|
|
2344
2807
|
this.logger = logger;
|
|
2345
2808
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
2809
|
+
this.globMatches = () => true;
|
|
2346
2810
|
const healthOptions = {
|
|
2347
2811
|
maxRetries: options.maxRetries,
|
|
2348
2812
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2355,8 +2819,20 @@ class FileSystemWatcher {
|
|
|
2355
2819
|
* Start watching the filesystem and processing events.
|
|
2356
2820
|
*/
|
|
2357
2821
|
start() {
|
|
2358
|
-
|
|
2359
|
-
|
|
2822
|
+
// Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
|
|
2823
|
+
// Glob patterns are silently treated as literal strings, producing zero
|
|
2824
|
+
// events. We extract static directory roots for chokidar to watch, then
|
|
2825
|
+
// filter emitted events against the original globs via picomatch.
|
|
2826
|
+
const { roots, matches } = resolveWatchPaths(this.config.paths);
|
|
2827
|
+
this.globMatches = matches;
|
|
2828
|
+
// Chokidar v5's inline anymatch does exact string equality for string
|
|
2829
|
+
// matchers, breaking glob-based ignored patterns. Convert to picomatch
|
|
2830
|
+
// functions that chokidar passes through as-is.
|
|
2831
|
+
const ignored = this.config.ignored
|
|
2832
|
+
? resolveIgnored(this.config.ignored)
|
|
2833
|
+
: undefined;
|
|
2834
|
+
this.watcher = chokidar.watch(roots, {
|
|
2835
|
+
ignored,
|
|
2360
2836
|
usePolling: this.config.usePolling,
|
|
2361
2837
|
interval: this.config.pollIntervalMs,
|
|
2362
2838
|
awaitWriteFinish: this.config.stabilityThresholdMs
|
|
@@ -2366,6 +2842,8 @@ class FileSystemWatcher {
|
|
|
2366
2842
|
});
|
|
2367
2843
|
this.watcher.on('add', (path) => {
|
|
2368
2844
|
this.handleGitignoreChange(path);
|
|
2845
|
+
if (!this.globMatches(path))
|
|
2846
|
+
return;
|
|
2369
2847
|
if (this.isGitignored(path))
|
|
2370
2848
|
return;
|
|
2371
2849
|
this.logger.debug({ path }, 'File added');
|
|
@@ -2373,6 +2851,8 @@ class FileSystemWatcher {
|
|
|
2373
2851
|
});
|
|
2374
2852
|
this.watcher.on('change', (path) => {
|
|
2375
2853
|
this.handleGitignoreChange(path);
|
|
2854
|
+
if (!this.globMatches(path))
|
|
2855
|
+
return;
|
|
2376
2856
|
if (this.isGitignored(path))
|
|
2377
2857
|
return;
|
|
2378
2858
|
this.logger.debug({ path }, 'File changed');
|
|
@@ -2380,6 +2860,8 @@ class FileSystemWatcher {
|
|
|
2380
2860
|
});
|
|
2381
2861
|
this.watcher.on('unlink', (path) => {
|
|
2382
2862
|
this.handleGitignoreChange(path);
|
|
2863
|
+
if (!this.globMatches(path))
|
|
2864
|
+
return;
|
|
2383
2865
|
if (this.isGitignored(path))
|
|
2384
2866
|
return;
|
|
2385
2867
|
this.logger.debug({ path }, 'File removed');
|
|
@@ -2452,51 +2934,21 @@ class FileSystemWatcher {
|
|
|
2452
2934
|
}
|
|
2453
2935
|
|
|
2454
2936
|
/**
|
|
2455
|
-
* @module app/
|
|
2456
|
-
*
|
|
2457
|
-
*/
|
|
2458
|
-
/**
|
|
2459
|
-
* Debounced config file watcher.
|
|
2937
|
+
* @module app/factories
|
|
2938
|
+
* Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
|
|
2460
2939
|
*/
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
});
|
|
2474
|
-
this.watcher.on('change', () => {
|
|
2475
|
-
if (this.debounce)
|
|
2476
|
-
clearTimeout(this.debounce);
|
|
2477
|
-
this.debounce = setTimeout(() => {
|
|
2478
|
-
void this.options.onChange();
|
|
2479
|
-
}, this.options.debounceMs);
|
|
2480
|
-
});
|
|
2481
|
-
this.watcher.on('error', (error) => {
|
|
2482
|
-
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
2483
|
-
});
|
|
2484
|
-
this.options.logger.info({
|
|
2485
|
-
configPath: this.options.configPath,
|
|
2486
|
-
debounceMs: this.options.debounceMs,
|
|
2487
|
-
}, 'Config watcher started');
|
|
2488
|
-
}
|
|
2489
|
-
async stop() {
|
|
2490
|
-
if (this.debounce) {
|
|
2491
|
-
clearTimeout(this.debounce);
|
|
2492
|
-
this.debounce = undefined;
|
|
2493
|
-
}
|
|
2494
|
-
if (this.watcher) {
|
|
2495
|
-
await this.watcher.close();
|
|
2496
|
-
this.watcher = undefined;
|
|
2497
|
-
}
|
|
2498
|
-
}
|
|
2499
|
-
}
|
|
2940
|
+
/** Default component factories wiring real implementations. */
|
|
2941
|
+
const defaultFactories = {
|
|
2942
|
+
loadConfig,
|
|
2943
|
+
createLogger,
|
|
2944
|
+
createEmbeddingProvider,
|
|
2945
|
+
createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
|
|
2946
|
+
compileRules,
|
|
2947
|
+
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
|
|
2948
|
+
createEventQueue: (options) => new EventQueue(options),
|
|
2949
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2950
|
+
createApiServer,
|
|
2951
|
+
};
|
|
2500
2952
|
|
|
2501
2953
|
/**
|
|
2502
2954
|
* @module app/shutdown
|
|
@@ -2516,17 +2968,28 @@ function installShutdownHandlers(stop) {
|
|
|
2516
2968
|
process.on('SIGINT', () => void shutdown());
|
|
2517
2969
|
}
|
|
2518
2970
|
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2971
|
+
/**
|
|
2972
|
+
* @module app/startFromConfig
|
|
2973
|
+
* Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
|
|
2974
|
+
*/
|
|
2975
|
+
/**
|
|
2976
|
+
* Create and start a JeevesWatcher from a config file path.
|
|
2977
|
+
*
|
|
2978
|
+
* @param configPath - Optional path to the configuration file.
|
|
2979
|
+
* @returns The running JeevesWatcher instance.
|
|
2980
|
+
*/
|
|
2981
|
+
async function startFromConfig(configPath) {
|
|
2982
|
+
const config = await loadConfig(configPath);
|
|
2983
|
+
const app = new JeevesWatcher(config, configPath);
|
|
2984
|
+
installShutdownHandlers(() => app.stop());
|
|
2985
|
+
await app.start();
|
|
2986
|
+
return app;
|
|
2987
|
+
}
|
|
2988
|
+
|
|
2989
|
+
/**
|
|
2990
|
+
* @module app
|
|
2991
|
+
* Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
|
|
2992
|
+
*/
|
|
2530
2993
|
/**
|
|
2531
2994
|
* Main application class that wires together all components.
|
|
2532
2995
|
*/
|
|
@@ -2561,56 +3024,26 @@ class JeevesWatcher {
|
|
|
2561
3024
|
async start() {
|
|
2562
3025
|
const logger = this.factories.createLogger(this.config.logging);
|
|
2563
3026
|
this.logger = logger;
|
|
2564
|
-
|
|
2565
|
-
try {
|
|
2566
|
-
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
2567
|
-
}
|
|
2568
|
-
catch (error) {
|
|
2569
|
-
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
2570
|
-
throw error;
|
|
2571
|
-
}
|
|
2572
|
-
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
2573
|
-
await vectorStore.ensureCollection();
|
|
3027
|
+
const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
|
|
2574
3028
|
const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
|
|
2575
|
-
const
|
|
3029
|
+
const configDir = this.configPath ? dirname(this.configPath) : '.';
|
|
3030
|
+
const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
|
|
3031
|
+
const processor = this.factories.createDocumentProcessor({
|
|
2576
3032
|
metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
|
|
2577
3033
|
chunkSize: this.config.embedding.chunkSize,
|
|
2578
3034
|
chunkOverlap: this.config.embedding.chunkOverlap,
|
|
2579
3035
|
maps: this.config.maps,
|
|
2580
|
-
|
|
2581
|
-
|
|
3036
|
+
configDir,
|
|
3037
|
+
}, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
|
|
2582
3038
|
this.processor = processor;
|
|
2583
|
-
|
|
3039
|
+
this.queue = this.factories.createEventQueue({
|
|
2584
3040
|
debounceMs: this.config.watch.debounceMs ?? 2000,
|
|
2585
3041
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
2586
3042
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2587
3043
|
});
|
|
2588
|
-
this.
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
? new GitignoreFilter(this.config.watch.paths)
|
|
2592
|
-
: undefined;
|
|
2593
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2594
|
-
maxRetries: this.config.maxRetries,
|
|
2595
|
-
maxBackoffMs: this.config.maxBackoffMs,
|
|
2596
|
-
onFatalError: this.runtimeOptions.onFatalError,
|
|
2597
|
-
gitignoreFilter,
|
|
2598
|
-
});
|
|
2599
|
-
this.watcher = watcher;
|
|
2600
|
-
const server = this.factories.createApiServer({
|
|
2601
|
-
processor,
|
|
2602
|
-
vectorStore,
|
|
2603
|
-
embeddingProvider,
|
|
2604
|
-
queue,
|
|
2605
|
-
config: this.config,
|
|
2606
|
-
logger,
|
|
2607
|
-
});
|
|
2608
|
-
this.server = server;
|
|
2609
|
-
await server.listen({
|
|
2610
|
-
host: this.config.api?.host ?? '127.0.0.1',
|
|
2611
|
-
port: this.config.api?.port ?? 3456,
|
|
2612
|
-
});
|
|
2613
|
-
watcher.start();
|
|
3044
|
+
this.watcher = this.createWatcher(this.queue, processor, logger);
|
|
3045
|
+
this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
|
|
3046
|
+
this.watcher.start();
|
|
2614
3047
|
this.startConfigWatch();
|
|
2615
3048
|
logger.info('jeeves-watcher started');
|
|
2616
3049
|
}
|
|
@@ -2641,22 +3074,61 @@ class JeevesWatcher {
|
|
|
2641
3074
|
}
|
|
2642
3075
|
this.logger?.info('jeeves-watcher stopped');
|
|
2643
3076
|
}
|
|
3077
|
+
async initEmbeddingAndStore(logger) {
|
|
3078
|
+
let embeddingProvider;
|
|
3079
|
+
try {
|
|
3080
|
+
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
3081
|
+
}
|
|
3082
|
+
catch (error) {
|
|
3083
|
+
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
3084
|
+
throw error;
|
|
3085
|
+
}
|
|
3086
|
+
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
3087
|
+
await vectorStore.ensureCollection();
|
|
3088
|
+
return { embeddingProvider, vectorStore };
|
|
3089
|
+
}
|
|
3090
|
+
createWatcher(queue, processor, logger) {
|
|
3091
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
3092
|
+
const gitignoreFilter = respectGitignore
|
|
3093
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
3094
|
+
: undefined;
|
|
3095
|
+
return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
3096
|
+
maxRetries: this.config.maxRetries,
|
|
3097
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
3098
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
3099
|
+
gitignoreFilter,
|
|
3100
|
+
});
|
|
3101
|
+
}
|
|
3102
|
+
async startApiServer(processor, vectorStore, embeddingProvider, logger) {
|
|
3103
|
+
const server = this.factories.createApiServer({
|
|
3104
|
+
processor,
|
|
3105
|
+
vectorStore,
|
|
3106
|
+
embeddingProvider,
|
|
3107
|
+
queue: this.queue,
|
|
3108
|
+
config: this.config,
|
|
3109
|
+
logger,
|
|
3110
|
+
});
|
|
3111
|
+
await server.listen({
|
|
3112
|
+
host: this.config.api?.host ?? '127.0.0.1',
|
|
3113
|
+
port: this.config.api?.port ?? 3456,
|
|
3114
|
+
});
|
|
3115
|
+
return server;
|
|
3116
|
+
}
|
|
2644
3117
|
startConfigWatch() {
|
|
2645
3118
|
const logger = this.logger;
|
|
2646
3119
|
if (!logger)
|
|
2647
3120
|
return;
|
|
2648
3121
|
const enabled = this.config.configWatch?.enabled ?? true;
|
|
2649
|
-
if (!enabled)
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
3122
|
+
if (!enabled || !this.configPath) {
|
|
3123
|
+
if (!this.configPath) {
|
|
3124
|
+
logger.debug('Config watch enabled, but no config path was provided');
|
|
3125
|
+
}
|
|
2653
3126
|
return;
|
|
2654
3127
|
}
|
|
2655
|
-
const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
|
|
2656
3128
|
this.configWatcher = new ConfigWatcher({
|
|
2657
3129
|
configPath: this.configPath,
|
|
2658
3130
|
enabled,
|
|
2659
|
-
debounceMs,
|
|
3131
|
+
debounceMs: this.config.configWatch?.debounceMs ?? 10000,
|
|
2660
3132
|
logger,
|
|
2661
3133
|
onChange: async () => this.reloadConfig(),
|
|
2662
3134
|
});
|
|
@@ -2678,7 +3150,9 @@ class JeevesWatcher {
|
|
|
2678
3150
|
const newConfig = await this.factories.loadConfig(this.configPath);
|
|
2679
3151
|
this.config = newConfig;
|
|
2680
3152
|
const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
|
|
2681
|
-
|
|
3153
|
+
const reloadConfigDir = dirname(this.configPath);
|
|
3154
|
+
const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
|
|
3155
|
+
processor.updateRules(compiledRules, newTemplateEngine);
|
|
2682
3156
|
logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
|
|
2683
3157
|
}
|
|
2684
3158
|
catch (error) {
|
|
@@ -2686,18 +3160,6 @@ class JeevesWatcher {
|
|
|
2686
3160
|
}
|
|
2687
3161
|
}
|
|
2688
3162
|
}
|
|
2689
|
-
|
|
2690
|
-
* Create and start a JeevesWatcher from a config file path.
|
|
2691
|
-
*
|
|
2692
|
-
* @param configPath - Optional path to the configuration file.
|
|
2693
|
-
* @returns The running JeevesWatcher instance.
|
|
2694
|
-
*/
|
|
2695
|
-
async function startFromConfig(configPath) {
|
|
2696
|
-
const config = await loadConfig(configPath);
|
|
2697
|
-
const app = new JeevesWatcher(config, configPath);
|
|
2698
|
-
installShutdownHandlers(() => app.stop());
|
|
2699
|
-
await app.start();
|
|
2700
|
-
return app;
|
|
2701
|
-
}
|
|
3163
|
+
// startFromConfig re-exported from ./startFromConfig
|
|
2702
3164
|
|
|
2703
|
-
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
|
3165
|
+
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, TemplateEngine, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, metadataPath, pointId, readMetadata, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|