@karmaniverous/jeeves-watcher 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +69 -14
- package/dist/cjs/index.js +996 -562
- package/dist/cli/jeeves-watcher/index.js +824 -396
- package/dist/index.d.ts +160 -16
- package/dist/index.iife.js +824 -397
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +992 -564
- package/package.json +12 -4
package/dist/cjs/index.js
CHANGED
|
@@ -6,12 +6,20 @@ var node_path = require('node:path');
|
|
|
6
6
|
var picomatch = require('picomatch');
|
|
7
7
|
var radash = require('radash');
|
|
8
8
|
var node_crypto = require('node:crypto');
|
|
9
|
+
var node_fs = require('node:fs');
|
|
10
|
+
var ignore = require('ignore');
|
|
11
|
+
var Handlebars = require('handlebars');
|
|
12
|
+
var dayjs = require('dayjs');
|
|
13
|
+
var hastUtilToMdast = require('hast-util-to-mdast');
|
|
14
|
+
var mdastUtilFromAdf = require('mdast-util-from-adf');
|
|
15
|
+
var mdastUtilToMarkdown = require('mdast-util-to-markdown');
|
|
16
|
+
var rehypeParse = require('rehype-parse');
|
|
17
|
+
var unified = require('unified');
|
|
18
|
+
var chokidar = require('chokidar');
|
|
9
19
|
var cosmiconfig = require('cosmiconfig');
|
|
10
20
|
var zod = require('zod');
|
|
11
21
|
var jsonmap = require('@karmaniverous/jsonmap');
|
|
12
22
|
var googleGenai = require('@langchain/google-genai');
|
|
13
|
-
var node_fs = require('node:fs');
|
|
14
|
-
var ignore = require('ignore');
|
|
15
23
|
var pino = require('pino');
|
|
16
24
|
var uuid = require('uuid');
|
|
17
25
|
var cheerio = require('cheerio');
|
|
@@ -21,7 +29,6 @@ var Ajv = require('ajv');
|
|
|
21
29
|
var addFormats = require('ajv-formats');
|
|
22
30
|
var textsplitters = require('@langchain/textsplitters');
|
|
23
31
|
var jsClientRest = require('@qdrant/js-client-rest');
|
|
24
|
-
var chokidar = require('chokidar');
|
|
25
32
|
|
|
26
33
|
function _interopNamespaceDefault(e) {
|
|
27
34
|
var n = Object.create(null);
|
|
@@ -439,183 +446,663 @@ function createApiServer(options) {
|
|
|
439
446
|
}
|
|
440
447
|
|
|
441
448
|
/**
|
|
442
|
-
* @module
|
|
443
|
-
*
|
|
449
|
+
* @module gitignore
|
|
450
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
444
451
|
*/
|
|
445
|
-
/** Default root-level config values. */
|
|
446
|
-
const ROOT_DEFAULTS = {
|
|
447
|
-
metadataDir: '.jeeves-watcher',
|
|
448
|
-
shutdownTimeoutMs: 10000,
|
|
449
|
-
};
|
|
450
|
-
/** Default configWatch values. */
|
|
451
|
-
const CONFIG_WATCH_DEFAULTS = {
|
|
452
|
-
enabled: true,
|
|
453
|
-
debounceMs: 1000,
|
|
454
|
-
};
|
|
455
|
-
/** Default API values. */
|
|
456
|
-
const API_DEFAULTS = {
|
|
457
|
-
host: '127.0.0.1',
|
|
458
|
-
port: 3456,
|
|
459
|
-
};
|
|
460
|
-
/** Default logging values. */
|
|
461
|
-
const LOGGING_DEFAULTS = {
|
|
462
|
-
level: 'info',
|
|
463
|
-
};
|
|
464
|
-
/** Default watch configuration. */
|
|
465
|
-
const WATCH_DEFAULTS = {
|
|
466
|
-
debounceMs: 300,
|
|
467
|
-
stabilityThresholdMs: 500,
|
|
468
|
-
usePolling: false,
|
|
469
|
-
pollIntervalMs: 1000,
|
|
470
|
-
respectGitignore: true,
|
|
471
|
-
};
|
|
472
|
-
/** Default embedding configuration. */
|
|
473
|
-
const EMBEDDING_DEFAULTS = {
|
|
474
|
-
chunkSize: 1000,
|
|
475
|
-
chunkOverlap: 200,
|
|
476
|
-
dimensions: 3072,
|
|
477
|
-
rateLimitPerMinute: 300,
|
|
478
|
-
concurrency: 5,
|
|
479
|
-
};
|
|
480
|
-
|
|
481
452
|
/**
|
|
482
|
-
*
|
|
453
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
454
|
+
* Returns `undefined` if no repo is found.
|
|
483
455
|
*/
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
.
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
.
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
.describe('Polling interval in milliseconds when usePolling is enabled.'),
|
|
500
|
-
/** Whether to use polling instead of native watchers. */
|
|
501
|
-
usePolling: zod.z
|
|
502
|
-
.boolean()
|
|
503
|
-
.optional()
|
|
504
|
-
.describe('Use polling instead of native file system events (for network drives).'),
|
|
505
|
-
/** Debounce delay in milliseconds for file change events. */
|
|
506
|
-
debounceMs: zod.z
|
|
507
|
-
.number()
|
|
508
|
-
.optional()
|
|
509
|
-
.describe('Debounce delay in milliseconds for file change events.'),
|
|
510
|
-
/** Time in milliseconds a file must be stable before processing. */
|
|
511
|
-
stabilityThresholdMs: zod.z
|
|
512
|
-
.number()
|
|
513
|
-
.optional()
|
|
514
|
-
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
515
|
-
/** Whether to respect .gitignore files when processing. */
|
|
516
|
-
respectGitignore: zod.z
|
|
517
|
-
.boolean()
|
|
518
|
-
.optional()
|
|
519
|
-
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
520
|
-
});
|
|
456
|
+
function findRepoRoot(startDir) {
|
|
457
|
+
let dir = node_path.resolve(startDir);
|
|
458
|
+
const root = node_path.resolve('/');
|
|
459
|
+
while (dir !== root) {
|
|
460
|
+
if (node_fs.existsSync(node_path.join(dir, '.git')) &&
|
|
461
|
+
node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
|
|
462
|
+
return dir;
|
|
463
|
+
}
|
|
464
|
+
const parent = node_path.dirname(dir);
|
|
465
|
+
if (parent === dir)
|
|
466
|
+
break;
|
|
467
|
+
dir = parent;
|
|
468
|
+
}
|
|
469
|
+
return undefined;
|
|
470
|
+
}
|
|
521
471
|
/**
|
|
522
|
-
*
|
|
472
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
473
|
+
* that can be scanned for a repo root.
|
|
523
474
|
*/
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
.
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
475
|
+
function watchPathToScanDir(watchPath) {
|
|
476
|
+
const absPath = node_path.resolve(watchPath);
|
|
477
|
+
try {
|
|
478
|
+
return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
|
|
479
|
+
}
|
|
480
|
+
catch {
|
|
481
|
+
// ignore
|
|
482
|
+
}
|
|
483
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
484
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
485
|
+
if (!globMatch)
|
|
486
|
+
return undefined;
|
|
487
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
488
|
+
const trimmed = prefix.trim();
|
|
489
|
+
const baseDir = trimmed.length === 0
|
|
490
|
+
? '.'
|
|
491
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
492
|
+
? trimmed
|
|
493
|
+
: node_path.dirname(trimmed);
|
|
494
|
+
const resolved = node_path.resolve(baseDir);
|
|
495
|
+
if (!node_fs.existsSync(resolved))
|
|
496
|
+
return undefined;
|
|
497
|
+
return resolved;
|
|
498
|
+
}
|
|
536
499
|
/**
|
|
537
|
-
*
|
|
500
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
501
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
538
502
|
*/
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
.
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
.
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
.
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
.string()
|
|
568
|
-
.optional()
|
|
569
|
-
.describe('API key for embedding provider (supports ${ENV_VAR} substitution).'),
|
|
570
|
-
/** Maximum embedding requests per minute. */
|
|
571
|
-
rateLimitPerMinute: zod.z
|
|
572
|
-
.number()
|
|
573
|
-
.optional()
|
|
574
|
-
.describe('Maximum embedding API requests per minute (rate limiting).'),
|
|
575
|
-
/** Maximum concurrent embedding requests. */
|
|
576
|
-
concurrency: zod.z
|
|
577
|
-
.number()
|
|
578
|
-
.optional()
|
|
579
|
-
.describe('Maximum concurrent embedding requests.'),
|
|
580
|
-
});
|
|
503
|
+
function findGitignoreFiles(dir) {
|
|
504
|
+
const results = [];
|
|
505
|
+
const gitignorePath = node_path.join(dir, '.gitignore');
|
|
506
|
+
if (node_fs.existsSync(gitignorePath)) {
|
|
507
|
+
results.push(gitignorePath);
|
|
508
|
+
}
|
|
509
|
+
let entries;
|
|
510
|
+
try {
|
|
511
|
+
entries = node_fs.readdirSync(dir);
|
|
512
|
+
}
|
|
513
|
+
catch {
|
|
514
|
+
return results;
|
|
515
|
+
}
|
|
516
|
+
for (const entry of entries) {
|
|
517
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
518
|
+
continue;
|
|
519
|
+
const fullPath = node_path.join(dir, entry);
|
|
520
|
+
try {
|
|
521
|
+
if (node_fs.statSync(fullPath).isDirectory()) {
|
|
522
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
catch {
|
|
526
|
+
// Skip inaccessible entries
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
return results;
|
|
530
|
+
}
|
|
581
531
|
/**
|
|
582
|
-
*
|
|
532
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
583
533
|
*/
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
.describe('Qdrant server URL (e.g., "http://localhost:6333").'),
|
|
589
|
-
/** Qdrant collection name. */
|
|
590
|
-
collectionName: zod.z
|
|
591
|
-
.string()
|
|
592
|
-
.describe('Qdrant collection name for vector storage.'),
|
|
593
|
-
/** Qdrant API key. */
|
|
594
|
-
apiKey: zod.z
|
|
595
|
-
.string()
|
|
596
|
-
.optional()
|
|
597
|
-
.describe('Qdrant API key for authentication (supports ${ENV_VAR} substitution).'),
|
|
598
|
-
});
|
|
534
|
+
function parseGitignore(gitignorePath) {
|
|
535
|
+
const content = node_fs.readFileSync(gitignorePath, 'utf8');
|
|
536
|
+
return ignore().add(content);
|
|
537
|
+
}
|
|
599
538
|
/**
|
|
600
|
-
*
|
|
539
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
601
540
|
*/
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
.string()
|
|
606
|
-
.optional()
|
|
607
|
-
.describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),
|
|
608
|
-
/** Port to listen on. */
|
|
609
|
-
port: zod.z.number().optional().describe('Port for API server (e.g., 3456).'),
|
|
610
|
-
});
|
|
541
|
+
function toForwardSlash(p) {
|
|
542
|
+
return p.replace(/\\/g, '/');
|
|
543
|
+
}
|
|
611
544
|
/**
|
|
612
|
-
*
|
|
545
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
546
|
+
* `.gitignore` chain in git repositories.
|
|
613
547
|
*/
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
548
|
+
class GitignoreFilter {
|
|
549
|
+
repos = new Map();
|
|
550
|
+
/**
|
|
551
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
552
|
+
*
|
|
553
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
554
|
+
*/
|
|
555
|
+
constructor(watchPaths) {
|
|
556
|
+
this.scan(watchPaths);
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
560
|
+
*/
|
|
561
|
+
scan(watchPaths) {
|
|
562
|
+
this.repos.clear();
|
|
563
|
+
const scannedDirs = new Set();
|
|
564
|
+
for (const watchPath of watchPaths) {
|
|
565
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
566
|
+
if (!scanDir)
|
|
567
|
+
continue;
|
|
568
|
+
if (scannedDirs.has(scanDir))
|
|
569
|
+
continue;
|
|
570
|
+
scannedDirs.add(scanDir);
|
|
571
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
572
|
+
if (!repoRoot)
|
|
573
|
+
continue;
|
|
574
|
+
if (this.repos.has(repoRoot))
|
|
575
|
+
continue;
|
|
576
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
577
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
578
|
+
dir: node_path.dirname(gf),
|
|
579
|
+
ig: parseGitignore(gf),
|
|
580
|
+
}));
|
|
581
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
582
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
583
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
/**
|
|
587
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
588
|
+
*
|
|
589
|
+
* @param filePath - Absolute file path to check.
|
|
590
|
+
* @returns `true` if the file should be ignored.
|
|
591
|
+
*/
|
|
592
|
+
isIgnored(filePath) {
|
|
593
|
+
const absPath = node_path.resolve(filePath);
|
|
594
|
+
for (const [, repo] of this.repos) {
|
|
595
|
+
// Check if file is within this repo
|
|
596
|
+
const relToRepo = node_path.relative(repo.root, absPath);
|
|
597
|
+
// On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
|
|
598
|
+
// an absolute path with a drive letter instead of a relative one. The
|
|
599
|
+
// `ignore` library rejects these with a RangeError. Skip repos on
|
|
600
|
+
// different drives to avoid cross-drive gitignore mismatches.
|
|
601
|
+
if (relToRepo.startsWith('..') ||
|
|
602
|
+
relToRepo.startsWith(node_path.resolve('/')) ||
|
|
603
|
+
/^[a-zA-Z]:/.test(relToRepo)) {
|
|
604
|
+
continue;
|
|
605
|
+
}
|
|
606
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
607
|
+
for (const entry of repo.entries) {
|
|
608
|
+
const relToEntry = node_path.relative(entry.dir, absPath);
|
|
609
|
+
if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
|
|
610
|
+
continue;
|
|
611
|
+
const normalized = toForwardSlash(relToEntry);
|
|
612
|
+
if (entry.ig.ignores(normalized)) {
|
|
613
|
+
return true;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
return false;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
621
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
622
|
+
*
|
|
623
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
624
|
+
*/
|
|
625
|
+
invalidate(gitignorePath) {
|
|
626
|
+
const absPath = node_path.resolve(gitignorePath);
|
|
627
|
+
const gitignoreDir = node_path.dirname(absPath);
|
|
628
|
+
for (const [, repo] of this.repos) {
|
|
629
|
+
const relToRepo = node_path.relative(repo.root, gitignoreDir);
|
|
630
|
+
if (relToRepo.startsWith('..'))
|
|
631
|
+
continue;
|
|
632
|
+
// Remove old entry for this directory
|
|
633
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
634
|
+
// Re-parse if file still exists
|
|
635
|
+
if (node_fs.existsSync(absPath)) {
|
|
636
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
637
|
+
// Re-sort deepest-first
|
|
638
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
639
|
+
}
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
643
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
644
|
+
if (repoRoot && node_fs.existsSync(absPath)) {
|
|
645
|
+
const entries = [
|
|
646
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
647
|
+
];
|
|
648
|
+
if (this.repos.has(repoRoot)) {
|
|
649
|
+
const repo = this.repos.get(repoRoot);
|
|
650
|
+
repo.entries.push(entries[0]);
|
|
651
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
652
|
+
}
|
|
653
|
+
else {
|
|
654
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* @module templates/helpers
|
|
662
|
+
* Registers built-in Handlebars helpers for content templates.
|
|
663
|
+
*/
|
|
664
|
+
/** Pre-built rehype parser for HTML → hast conversion. */
|
|
665
|
+
const htmlParser = unified.unified().use(rehypeParse, { fragment: true });
|
|
666
|
+
/**
|
|
667
|
+
* Register all built-in helpers on a Handlebars instance.
|
|
668
|
+
*
|
|
669
|
+
* @param hbs - The Handlebars instance.
|
|
670
|
+
*/
|
|
671
|
+
function registerBuiltinHelpers(hbs) {
|
|
672
|
+
// Structural: ADF → Markdown
|
|
673
|
+
hbs.registerHelper('adfToMarkdown', function (adf) {
|
|
674
|
+
if (!adf || typeof adf !== 'object')
|
|
675
|
+
return '';
|
|
676
|
+
try {
|
|
677
|
+
const mdast = mdastUtilFromAdf.fromADF(adf);
|
|
678
|
+
return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
|
|
679
|
+
}
|
|
680
|
+
catch {
|
|
681
|
+
return '<!-- ADF conversion failed -->';
|
|
682
|
+
}
|
|
683
|
+
});
|
|
684
|
+
// Structural: HTML → Markdown
|
|
685
|
+
hbs.registerHelper('markdownify', function (html) {
|
|
686
|
+
if (typeof html !== 'string' || !html.trim())
|
|
687
|
+
return '';
|
|
688
|
+
try {
|
|
689
|
+
const hast = htmlParser.parse(html);
|
|
690
|
+
const mdast = hastUtilToMdast.toMdast(hast);
|
|
691
|
+
return new hbs.SafeString(mdastUtilToMarkdown.toMarkdown(mdast).trim());
|
|
692
|
+
}
|
|
693
|
+
catch {
|
|
694
|
+
return '<!-- HTML conversion failed -->';
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
// Formatting: dateFormat
|
|
698
|
+
hbs.registerHelper('dateFormat', function (value, format) {
|
|
699
|
+
if (value === undefined || value === null)
|
|
700
|
+
return '';
|
|
701
|
+
const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
|
|
702
|
+
return dayjs(value).format(fmt);
|
|
703
|
+
});
|
|
704
|
+
// Formatting: join
|
|
705
|
+
hbs.registerHelper('join', function (arr, separator) {
|
|
706
|
+
if (!Array.isArray(arr))
|
|
707
|
+
return '';
|
|
708
|
+
const sep = typeof separator === 'string' ? separator : ', ';
|
|
709
|
+
return arr.join(sep);
|
|
710
|
+
});
|
|
711
|
+
// Formatting: pluck
|
|
712
|
+
hbs.registerHelper('pluck', function (arr, key) {
|
|
713
|
+
if (!Array.isArray(arr) || typeof key !== 'string')
|
|
714
|
+
return [];
|
|
715
|
+
return arr.map((item) => item && typeof item === 'object'
|
|
716
|
+
? item[key]
|
|
717
|
+
: undefined);
|
|
718
|
+
});
|
|
719
|
+
// String transforms
|
|
720
|
+
hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
|
|
721
|
+
hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
|
|
722
|
+
hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? radash.capitalize(text) : '');
|
|
723
|
+
hbs.registerHelper('title', (text) => typeof text === 'string' ? radash.title(text) : '');
|
|
724
|
+
hbs.registerHelper('camel', (text) => typeof text === 'string' ? radash.camel(text) : '');
|
|
725
|
+
hbs.registerHelper('snake', (text) => typeof text === 'string' ? radash.snake(text) : '');
|
|
726
|
+
hbs.registerHelper('dash', (text) => typeof text === 'string' ? radash.dash(text) : '');
|
|
727
|
+
// default helper
|
|
728
|
+
hbs.registerHelper('default', function (value, fallback) {
|
|
729
|
+
return value ?? fallback ?? '';
|
|
730
|
+
});
|
|
731
|
+
// eq helper (deep equality)
|
|
732
|
+
hbs.registerHelper('eq', function (a, b) {
|
|
733
|
+
return radash.isEqual(a, b);
|
|
734
|
+
});
|
|
735
|
+
// json helper
|
|
736
|
+
hbs.registerHelper('json', function (value) {
|
|
737
|
+
return new hbs.SafeString(JSON.stringify(value, null, 2));
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
/**
|
|
742
|
+
* @module templates/engine
|
|
743
|
+
* Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
|
|
744
|
+
*/
|
|
745
|
+
/**
|
|
746
|
+
* Resolve a template value to its source string.
|
|
747
|
+
*
|
|
748
|
+
* Resolution order:
|
|
749
|
+
* 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
|
|
750
|
+
* 2. Matches a key in namedTemplates → named ref (recursively resolve)
|
|
751
|
+
* 3. Otherwise → inline Handlebars template string
|
|
752
|
+
*
|
|
753
|
+
* @param value - The template reference (inline, file path, or named ref).
|
|
754
|
+
* @param namedTemplates - Named template definitions from config.
|
|
755
|
+
* @param configDir - Directory to resolve relative file paths against.
|
|
756
|
+
* @param visited - Set of visited named refs for cycle detection.
|
|
757
|
+
* @returns The resolved template source string.
|
|
758
|
+
*/
|
|
759
|
+
function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
|
|
760
|
+
// File path detection
|
|
761
|
+
if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
|
|
762
|
+
return node_fs.readFileSync(node_path.resolve(configDir, value), 'utf-8');
|
|
763
|
+
}
|
|
764
|
+
// Named ref
|
|
765
|
+
if (namedTemplates?.[value] !== undefined) {
|
|
766
|
+
if (visited.has(value)) {
|
|
767
|
+
throw new Error(`Circular template reference detected: ${value}`);
|
|
768
|
+
}
|
|
769
|
+
visited.add(value);
|
|
770
|
+
return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
|
|
771
|
+
}
|
|
772
|
+
// Inline
|
|
773
|
+
return value;
|
|
774
|
+
}
|
|
775
|
+
/**
|
|
776
|
+
* Create a configured Handlebars instance with built-in helpers registered.
|
|
777
|
+
*
|
|
778
|
+
* @returns A Handlebars instance with helpers.
|
|
779
|
+
*/
|
|
780
|
+
function createHandlebarsInstance() {
|
|
781
|
+
const hbs = Handlebars.create();
|
|
782
|
+
registerBuiltinHelpers(hbs);
|
|
783
|
+
return hbs;
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Load custom helpers from file paths.
|
|
787
|
+
*
|
|
788
|
+
* Each file should export a default function that receives the Handlebars instance.
|
|
789
|
+
*
|
|
790
|
+
* @param hbs - The Handlebars instance.
|
|
791
|
+
* @param paths - File paths to custom helper modules.
|
|
792
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
793
|
+
*/
|
|
794
|
+
async function loadCustomHelpers(hbs, paths, configDir) {
|
|
795
|
+
for (const p of paths) {
|
|
796
|
+
const resolved = node_path.resolve(configDir, p);
|
|
797
|
+
const mod = (await import(resolved));
|
|
798
|
+
if (typeof mod.default === 'function') {
|
|
799
|
+
mod.default(hbs);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
/**
|
|
804
|
+
* The template engine: holds compiled templates and renders them against context.
|
|
805
|
+
*/
|
|
806
|
+
class TemplateEngine {
|
|
807
|
+
hbs;
|
|
808
|
+
compiled = new Map();
|
|
809
|
+
constructor(hbs) {
|
|
810
|
+
this.hbs = hbs;
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Compile and cache a template from its source string.
|
|
814
|
+
*
|
|
815
|
+
* @param key - Cache key (rule index or named template).
|
|
816
|
+
* @param source - Handlebars template source.
|
|
817
|
+
* @returns The compiled template.
|
|
818
|
+
*/
|
|
819
|
+
compile(key, source) {
|
|
820
|
+
const fn = this.hbs.compile(source);
|
|
821
|
+
this.compiled.set(key, fn);
|
|
822
|
+
return fn;
|
|
823
|
+
}
|
|
824
|
+
/**
|
|
825
|
+
* Get a previously compiled template by key.
|
|
826
|
+
*
|
|
827
|
+
* @param key - The cache key.
|
|
828
|
+
* @returns The compiled template, or undefined.
|
|
829
|
+
*/
|
|
830
|
+
get(key) {
|
|
831
|
+
return this.compiled.get(key);
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Render a compiled template against a context.
|
|
835
|
+
*
|
|
836
|
+
* @param key - The cache key of the compiled template.
|
|
837
|
+
* @param context - The data context for rendering.
|
|
838
|
+
* @returns The rendered string, or null if the template was not found.
|
|
839
|
+
*/
|
|
840
|
+
render(key, context) {
|
|
841
|
+
const fn = this.compiled.get(key);
|
|
842
|
+
if (!fn)
|
|
843
|
+
return null;
|
|
844
|
+
return fn(context);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
/**
|
|
849
|
+
* @module templates/buildTemplateEngine
|
|
850
|
+
* Factory to build a TemplateEngine from config, compiling all rule templates at load time.
|
|
851
|
+
*/
|
|
852
|
+
/**
|
|
853
|
+
* Build a TemplateEngine from configuration, pre-compiling all rule templates.
|
|
854
|
+
*
|
|
855
|
+
* @param rules - The inference rules (may contain template fields).
|
|
856
|
+
* @param namedTemplates - Named template definitions from config.
|
|
857
|
+
* @param templateHelperPaths - Paths to custom helper modules.
|
|
858
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
859
|
+
* @returns The configured TemplateEngine, or undefined if no templates are used.
|
|
860
|
+
*/
|
|
861
|
+
async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
|
|
862
|
+
const rulesWithTemplates = rules.filter((r) => r.template);
|
|
863
|
+
if (rulesWithTemplates.length === 0)
|
|
864
|
+
return undefined;
|
|
865
|
+
const hbs = createHandlebarsInstance();
|
|
866
|
+
// Load custom helpers
|
|
867
|
+
if (templateHelperPaths?.length && configDir) {
|
|
868
|
+
await loadCustomHelpers(hbs, templateHelperPaths, configDir);
|
|
869
|
+
}
|
|
870
|
+
const engine = new TemplateEngine(hbs);
|
|
871
|
+
// Compile all rule templates
|
|
872
|
+
for (const [index, rule] of rules.entries()) {
|
|
873
|
+
if (!rule.template)
|
|
874
|
+
continue;
|
|
875
|
+
const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
|
|
876
|
+
engine.compile(`rule-${String(index)}`, source);
|
|
877
|
+
}
|
|
878
|
+
return engine;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
/**
|
|
882
|
+
* @module app/configWatcher
|
|
883
|
+
* Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
|
|
884
|
+
*/
|
|
885
|
+
/**
|
|
886
|
+
* Debounced config file watcher.
|
|
887
|
+
*/
|
|
888
|
+
class ConfigWatcher {
|
|
889
|
+
options;
|
|
890
|
+
watcher;
|
|
891
|
+
debounce;
|
|
892
|
+
constructor(options) {
|
|
893
|
+
this.options = options;
|
|
894
|
+
}
|
|
895
|
+
start() {
|
|
896
|
+
if (!this.options.enabled)
|
|
897
|
+
return;
|
|
898
|
+
this.watcher = chokidar.watch(this.options.configPath, {
|
|
899
|
+
ignoreInitial: true,
|
|
900
|
+
});
|
|
901
|
+
this.watcher.on('change', () => {
|
|
902
|
+
if (this.debounce)
|
|
903
|
+
clearTimeout(this.debounce);
|
|
904
|
+
this.debounce = setTimeout(() => {
|
|
905
|
+
void this.options.onChange();
|
|
906
|
+
}, this.options.debounceMs);
|
|
907
|
+
});
|
|
908
|
+
this.watcher.on('error', (error) => {
|
|
909
|
+
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
910
|
+
});
|
|
911
|
+
this.options.logger.info({
|
|
912
|
+
configPath: this.options.configPath,
|
|
913
|
+
debounceMs: this.options.debounceMs,
|
|
914
|
+
}, 'Config watcher started');
|
|
915
|
+
}
|
|
916
|
+
async stop() {
|
|
917
|
+
if (this.debounce) {
|
|
918
|
+
clearTimeout(this.debounce);
|
|
919
|
+
this.debounce = undefined;
|
|
920
|
+
}
|
|
921
|
+
if (this.watcher) {
|
|
922
|
+
await this.watcher.close();
|
|
923
|
+
this.watcher = undefined;
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
/**
|
|
929
|
+
* @module config/defaults
|
|
930
|
+
* Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
|
|
931
|
+
*/
|
|
932
|
+
/** Default root-level config values. */
|
|
933
|
+
const ROOT_DEFAULTS = {
|
|
934
|
+
metadataDir: '.jeeves-watcher',
|
|
935
|
+
shutdownTimeoutMs: 10000,
|
|
936
|
+
};
|
|
937
|
+
/** Default configWatch values. */
|
|
938
|
+
const CONFIG_WATCH_DEFAULTS = {
|
|
939
|
+
enabled: true,
|
|
940
|
+
debounceMs: 1000,
|
|
941
|
+
};
|
|
942
|
+
/** Default API values. */
|
|
943
|
+
const API_DEFAULTS = {
|
|
944
|
+
host: '127.0.0.1',
|
|
945
|
+
port: 3456,
|
|
946
|
+
};
|
|
947
|
+
/** Default logging values. */
|
|
948
|
+
const LOGGING_DEFAULTS = {
|
|
949
|
+
level: 'info',
|
|
950
|
+
};
|
|
951
|
+
/** Default watch configuration. */
|
|
952
|
+
const WATCH_DEFAULTS = {
|
|
953
|
+
debounceMs: 300,
|
|
954
|
+
stabilityThresholdMs: 500,
|
|
955
|
+
usePolling: false,
|
|
956
|
+
pollIntervalMs: 1000,
|
|
957
|
+
respectGitignore: true,
|
|
958
|
+
};
|
|
959
|
+
/** Default embedding configuration. */
|
|
960
|
+
const EMBEDDING_DEFAULTS = {
|
|
961
|
+
chunkSize: 1000,
|
|
962
|
+
chunkOverlap: 200,
|
|
963
|
+
dimensions: 3072,
|
|
964
|
+
rateLimitPerMinute: 300,
|
|
965
|
+
concurrency: 5,
|
|
966
|
+
};
|
|
967
|
+
|
|
968
|
+
/**
|
|
969
|
+
* Watch configuration for file system monitoring.
|
|
970
|
+
*/
|
|
971
|
+
const watchConfigSchema = zod.z.object({
|
|
972
|
+
/** Glob patterns to watch. */
|
|
973
|
+
paths: zod.z
|
|
974
|
+
.array(zod.z.string())
|
|
975
|
+
.min(1)
|
|
976
|
+
.describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),
|
|
977
|
+
/** Glob patterns to ignore. */
|
|
978
|
+
ignored: zod.z
|
|
979
|
+
.array(zod.z.string())
|
|
980
|
+
.optional()
|
|
981
|
+
.describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),
|
|
982
|
+
/** Polling interval in milliseconds. */
|
|
983
|
+
pollIntervalMs: zod.z
|
|
984
|
+
.number()
|
|
985
|
+
.optional()
|
|
986
|
+
.describe('Polling interval in milliseconds when usePolling is enabled.'),
|
|
987
|
+
/** Whether to use polling instead of native watchers. */
|
|
988
|
+
usePolling: zod.z
|
|
989
|
+
.boolean()
|
|
990
|
+
.optional()
|
|
991
|
+
.describe('Use polling instead of native file system events (for network drives).'),
|
|
992
|
+
/** Debounce delay in milliseconds for file change events. */
|
|
993
|
+
debounceMs: zod.z
|
|
994
|
+
.number()
|
|
995
|
+
.optional()
|
|
996
|
+
.describe('Debounce delay in milliseconds for file change events.'),
|
|
997
|
+
/** Time in milliseconds a file must be stable before processing. */
|
|
998
|
+
stabilityThresholdMs: zod.z
|
|
999
|
+
.number()
|
|
1000
|
+
.optional()
|
|
1001
|
+
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
1002
|
+
/** Whether to respect .gitignore files when processing. */
|
|
1003
|
+
respectGitignore: zod.z
|
|
1004
|
+
.boolean()
|
|
1005
|
+
.optional()
|
|
1006
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
1007
|
+
});
|
|
1008
|
+
/**
|
|
1009
|
+
* Configuration watch settings.
|
|
1010
|
+
*/
|
|
1011
|
+
const configWatchConfigSchema = zod.z.object({
|
|
1012
|
+
/** Whether config file watching is enabled. */
|
|
1013
|
+
enabled: zod.z
|
|
1014
|
+
.boolean()
|
|
1015
|
+
.optional()
|
|
1016
|
+
.describe('Enable automatic reloading when config file changes.'),
|
|
1017
|
+
/** Debounce delay in milliseconds for config change events. */
|
|
1018
|
+
debounceMs: zod.z
|
|
1019
|
+
.number()
|
|
1020
|
+
.optional()
|
|
1021
|
+
.describe('Debounce delay in milliseconds for config file change detection.'),
|
|
1022
|
+
});
|
|
1023
|
+
/**
|
|
1024
|
+
* Embedding model configuration.
|
|
1025
|
+
*/
|
|
1026
|
+
const embeddingConfigSchema = zod.z.object({
|
|
1027
|
+
/** The embedding model provider. */
|
|
1028
|
+
provider: zod.z
|
|
1029
|
+
.string()
|
|
1030
|
+
.default('gemini')
|
|
1031
|
+
.describe('Embedding provider name (e.g., "gemini", "openai").'),
|
|
1032
|
+
/** The embedding model name. */
|
|
1033
|
+
model: zod.z
|
|
1034
|
+
.string()
|
|
1035
|
+
.default('gemini-embedding-001')
|
|
1036
|
+
.describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),
|
|
1037
|
+
/** Maximum tokens per chunk for splitting. */
|
|
1038
|
+
chunkSize: zod.z
|
|
1039
|
+
.number()
|
|
1040
|
+
.optional()
|
|
1041
|
+
.describe('Maximum chunk size in characters for text splitting.'),
|
|
1042
|
+
/** Overlap between chunks in tokens. */
|
|
1043
|
+
chunkOverlap: zod.z
|
|
1044
|
+
.number()
|
|
1045
|
+
.optional()
|
|
1046
|
+
.describe('Character overlap between consecutive chunks.'),
|
|
1047
|
+
/** Embedding vector dimensions. */
|
|
1048
|
+
dimensions: zod.z
|
|
1049
|
+
.number()
|
|
1050
|
+
.optional()
|
|
1051
|
+
.describe('Embedding vector dimensions (must match model output).'),
|
|
1052
|
+
/** API key for the embedding provider. */
|
|
1053
|
+
apiKey: zod.z
|
|
1054
|
+
.string()
|
|
1055
|
+
.optional()
|
|
1056
|
+
.describe('API key for embedding provider (supports ${ENV_VAR} substitution).'),
|
|
1057
|
+
/** Maximum embedding requests per minute. */
|
|
1058
|
+
rateLimitPerMinute: zod.z
|
|
1059
|
+
.number()
|
|
1060
|
+
.optional()
|
|
1061
|
+
.describe('Maximum embedding API requests per minute (rate limiting).'),
|
|
1062
|
+
/** Maximum concurrent embedding requests. */
|
|
1063
|
+
concurrency: zod.z
|
|
1064
|
+
.number()
|
|
1065
|
+
.optional()
|
|
1066
|
+
.describe('Maximum concurrent embedding requests.'),
|
|
1067
|
+
});
|
|
1068
|
+
/**
|
|
1069
|
+
* Vector store configuration for Qdrant.
|
|
1070
|
+
*/
|
|
1071
|
+
const vectorStoreConfigSchema = zod.z.object({
|
|
1072
|
+
/** Qdrant server URL. */
|
|
1073
|
+
url: zod.z
|
|
1074
|
+
.string()
|
|
1075
|
+
.describe('Qdrant server URL (e.g., "http://localhost:6333").'),
|
|
1076
|
+
/** Qdrant collection name. */
|
|
1077
|
+
collectionName: zod.z
|
|
1078
|
+
.string()
|
|
1079
|
+
.describe('Qdrant collection name for vector storage.'),
|
|
1080
|
+
/** Qdrant API key. */
|
|
1081
|
+
apiKey: zod.z
|
|
1082
|
+
.string()
|
|
1083
|
+
.optional()
|
|
1084
|
+
.describe('Qdrant API key for authentication (supports ${ENV_VAR} substitution).'),
|
|
1085
|
+
});
|
|
1086
|
+
/**
|
|
1087
|
+
* API server configuration.
|
|
1088
|
+
*/
|
|
1089
|
+
const apiConfigSchema = zod.z.object({
|
|
1090
|
+
/** Host to bind to. */
|
|
1091
|
+
host: zod.z
|
|
1092
|
+
.string()
|
|
1093
|
+
.optional()
|
|
1094
|
+
.describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),
|
|
1095
|
+
/** Port to listen on. */
|
|
1096
|
+
port: zod.z.number().optional().describe('Port for API server (e.g., 3456).'),
|
|
1097
|
+
});
|
|
1098
|
+
/**
|
|
1099
|
+
* Logging configuration.
|
|
1100
|
+
*/
|
|
1101
|
+
const loggingConfigSchema = zod.z.object({
|
|
1102
|
+
/** Log level. */
|
|
1103
|
+
level: zod.z
|
|
1104
|
+
.string()
|
|
1105
|
+
.optional()
|
|
619
1106
|
.describe('Logging level (trace, debug, info, warn, error, fatal).'),
|
|
620
1107
|
/** Log file path. */
|
|
621
1108
|
file: zod.z
|
|
@@ -639,7 +1126,12 @@ const inferenceRuleSchema = zod.z.object({
|
|
|
639
1126
|
map: zod.z
|
|
640
1127
|
.union([jsonmap.jsonMapMapSchema, zod.z.string()])
|
|
641
1128
|
.optional()
|
|
642
|
-
.describe('JsonMap transformation (inline definition
|
|
1129
|
+
.describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
|
|
1130
|
+
/** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
|
|
1131
|
+
template: zod.z
|
|
1132
|
+
.string()
|
|
1133
|
+
.optional()
|
|
1134
|
+
.describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
|
|
643
1135
|
});
|
|
644
1136
|
/**
|
|
645
1137
|
* Top-level configuration for jeeves-watcher.
|
|
@@ -676,7 +1168,23 @@ const jeevesWatcherConfigSchema = zod.z.object({
|
|
|
676
1168
|
maps: zod.z
|
|
677
1169
|
.record(zod.z.string(), jsonmap.jsonMapMapSchema)
|
|
678
1170
|
.optional()
|
|
679
|
-
.describe('Reusable named JsonMap transformations.'),
|
|
1171
|
+
.describe('Reusable named JsonMap transformations.'),
|
|
1172
|
+
/** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
|
|
1173
|
+
templates: zod.z
|
|
1174
|
+
.record(zod.z.string(), zod.z.string())
|
|
1175
|
+
.optional()
|
|
1176
|
+
.describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
|
|
1177
|
+
/** Custom Handlebars helper registration. */
|
|
1178
|
+
templateHelpers: zod.z
|
|
1179
|
+
.object({
|
|
1180
|
+
/** File paths to custom helper modules. */
|
|
1181
|
+
paths: zod.z
|
|
1182
|
+
.array(zod.z.string())
|
|
1183
|
+
.optional()
|
|
1184
|
+
.describe('File paths to custom helper modules.'),
|
|
1185
|
+
})
|
|
1186
|
+
.optional()
|
|
1187
|
+
.describe('Custom Handlebars helper registration.'),
|
|
680
1188
|
/** Logging configuration. */
|
|
681
1189
|
logging: loggingConfigSchema.optional().describe('Logging configuration.'),
|
|
682
1190
|
/** Timeout in milliseconds for graceful shutdown. */
|
|
@@ -926,266 +1434,60 @@ function createGeminiProvider(config, logger) {
|
|
|
926
1434
|
dimensions,
|
|
927
1435
|
async embed(texts) {
|
|
928
1436
|
const vectors = await retry(async (attempt) => {
|
|
929
|
-
if (attempt > 1) {
|
|
930
|
-
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
931
|
-
}
|
|
932
|
-
// embedDocuments returns vectors for multiple texts
|
|
933
|
-
return embedder.embedDocuments(texts);
|
|
934
|
-
}, {
|
|
935
|
-
attempts: 5,
|
|
936
|
-
baseDelayMs: 500,
|
|
937
|
-
maxDelayMs: 10_000,
|
|
938
|
-
jitter: 0.2,
|
|
939
|
-
onRetry: ({ attempt, delayMs, error }) => {
|
|
940
|
-
log.warn({
|
|
941
|
-
attempt,
|
|
942
|
-
delayMs,
|
|
943
|
-
provider: 'gemini',
|
|
944
|
-
model: config.model,
|
|
945
|
-
err: normalizeError(error),
|
|
946
|
-
}, 'Embedding call failed; will retry');
|
|
947
|
-
},
|
|
948
|
-
});
|
|
949
|
-
// Validate dimensions
|
|
950
|
-
for (const vector of vectors) {
|
|
951
|
-
if (vector.length !== dimensions) {
|
|
952
|
-
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
953
|
-
}
|
|
954
|
-
}
|
|
955
|
-
return vectors;
|
|
956
|
-
},
|
|
957
|
-
};
|
|
958
|
-
}
|
|
959
|
-
function createMockFromConfig(config) {
|
|
960
|
-
const dimensions = config.dimensions ?? 768;
|
|
961
|
-
return createMockProvider(dimensions);
|
|
962
|
-
}
|
|
963
|
-
const embeddingProviderRegistry = new Map([
|
|
964
|
-
['mock', createMockFromConfig],
|
|
965
|
-
['gemini', createGeminiProvider],
|
|
966
|
-
]);
|
|
967
|
-
/**
|
|
968
|
-
* Create an embedding provider based on the given configuration.
|
|
969
|
-
*
|
|
970
|
-
* Each provider is responsible for its own default dimensions.
|
|
971
|
-
*
|
|
972
|
-
* @param config - The embedding configuration.
|
|
973
|
-
* @param logger - Optional pino logger for retry warnings.
|
|
974
|
-
* @returns An {@link EmbeddingProvider} instance.
|
|
975
|
-
* @throws If the configured provider is not supported.
|
|
976
|
-
*/
|
|
977
|
-
function createEmbeddingProvider(config, logger) {
|
|
978
|
-
const factory = embeddingProviderRegistry.get(config.provider);
|
|
979
|
-
if (!factory) {
|
|
980
|
-
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
981
|
-
}
|
|
982
|
-
return factory(config, logger);
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
/**
|
|
986
|
-
* @module gitignore
|
|
987
|
-
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
988
|
-
*/
|
|
989
|
-
/**
|
|
990
|
-
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
991
|
-
* Returns `undefined` if no repo is found.
|
|
992
|
-
*/
|
|
993
|
-
function findRepoRoot(startDir) {
|
|
994
|
-
let dir = node_path.resolve(startDir);
|
|
995
|
-
const root = node_path.resolve('/');
|
|
996
|
-
while (dir !== root) {
|
|
997
|
-
if (node_fs.existsSync(node_path.join(dir, '.git')) &&
|
|
998
|
-
node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
|
|
999
|
-
return dir;
|
|
1000
|
-
}
|
|
1001
|
-
const parent = node_path.dirname(dir);
|
|
1002
|
-
if (parent === dir)
|
|
1003
|
-
break;
|
|
1004
|
-
dir = parent;
|
|
1005
|
-
}
|
|
1006
|
-
return undefined;
|
|
1007
|
-
}
|
|
1008
|
-
/**
|
|
1009
|
-
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
1010
|
-
* that can be scanned for a repo root.
|
|
1011
|
-
*/
|
|
1012
|
-
function watchPathToScanDir(watchPath) {
|
|
1013
|
-
const absPath = node_path.resolve(watchPath);
|
|
1014
|
-
try {
|
|
1015
|
-
return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
|
|
1016
|
-
}
|
|
1017
|
-
catch {
|
|
1018
|
-
// ignore
|
|
1019
|
-
}
|
|
1020
|
-
// If this is a glob, fall back to the non-glob prefix.
|
|
1021
|
-
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1022
|
-
if (!globMatch)
|
|
1023
|
-
return undefined;
|
|
1024
|
-
const prefix = watchPath.slice(0, globMatch.index);
|
|
1025
|
-
const trimmed = prefix.trim();
|
|
1026
|
-
const baseDir = trimmed.length === 0
|
|
1027
|
-
? '.'
|
|
1028
|
-
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1029
|
-
? trimmed
|
|
1030
|
-
: node_path.dirname(trimmed);
|
|
1031
|
-
const resolved = node_path.resolve(baseDir);
|
|
1032
|
-
if (!node_fs.existsSync(resolved))
|
|
1033
|
-
return undefined;
|
|
1034
|
-
return resolved;
|
|
1035
|
-
}
|
|
1036
|
-
/**
|
|
1037
|
-
* Recursively find all `.gitignore` files under `dir`.
|
|
1038
|
-
* Skips `.git` and `node_modules` directories for performance.
|
|
1039
|
-
*/
|
|
1040
|
-
function findGitignoreFiles(dir) {
|
|
1041
|
-
const results = [];
|
|
1042
|
-
const gitignorePath = node_path.join(dir, '.gitignore');
|
|
1043
|
-
if (node_fs.existsSync(gitignorePath)) {
|
|
1044
|
-
results.push(gitignorePath);
|
|
1045
|
-
}
|
|
1046
|
-
let entries;
|
|
1047
|
-
try {
|
|
1048
|
-
entries = node_fs.readdirSync(dir);
|
|
1049
|
-
}
|
|
1050
|
-
catch {
|
|
1051
|
-
return results;
|
|
1052
|
-
}
|
|
1053
|
-
for (const entry of entries) {
|
|
1054
|
-
if (entry === '.git' || entry === 'node_modules')
|
|
1055
|
-
continue;
|
|
1056
|
-
const fullPath = node_path.join(dir, entry);
|
|
1057
|
-
try {
|
|
1058
|
-
if (node_fs.statSync(fullPath).isDirectory()) {
|
|
1059
|
-
results.push(...findGitignoreFiles(fullPath));
|
|
1060
|
-
}
|
|
1061
|
-
}
|
|
1062
|
-
catch {
|
|
1063
|
-
// Skip inaccessible entries
|
|
1064
|
-
}
|
|
1065
|
-
}
|
|
1066
|
-
return results;
|
|
1067
|
-
}
|
|
1068
|
-
/**
|
|
1069
|
-
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1070
|
-
*/
|
|
1071
|
-
function parseGitignore(gitignorePath) {
|
|
1072
|
-
const content = node_fs.readFileSync(gitignorePath, 'utf8');
|
|
1073
|
-
return ignore().add(content);
|
|
1074
|
-
}
|
|
1075
|
-
/**
|
|
1076
|
-
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1077
|
-
*/
|
|
1078
|
-
function toForwardSlash(p) {
|
|
1079
|
-
return p.replace(/\\/g, '/');
|
|
1080
|
-
}
|
|
1081
|
-
/**
|
|
1082
|
-
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1083
|
-
* `.gitignore` chain in git repositories.
|
|
1084
|
-
*/
|
|
1085
|
-
class GitignoreFilter {
|
|
1086
|
-
repos = new Map();
|
|
1087
|
-
/**
|
|
1088
|
-
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1089
|
-
*
|
|
1090
|
-
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1091
|
-
*/
|
|
1092
|
-
constructor(watchPaths) {
|
|
1093
|
-
this.scan(watchPaths);
|
|
1094
|
-
}
|
|
1095
|
-
/**
|
|
1096
|
-
* Scan paths for git repos and their `.gitignore` files.
|
|
1097
|
-
*/
|
|
1098
|
-
scan(watchPaths) {
|
|
1099
|
-
this.repos.clear();
|
|
1100
|
-
const scannedDirs = new Set();
|
|
1101
|
-
for (const watchPath of watchPaths) {
|
|
1102
|
-
const scanDir = watchPathToScanDir(watchPath);
|
|
1103
|
-
if (!scanDir)
|
|
1104
|
-
continue;
|
|
1105
|
-
if (scannedDirs.has(scanDir))
|
|
1106
|
-
continue;
|
|
1107
|
-
scannedDirs.add(scanDir);
|
|
1108
|
-
const repoRoot = findRepoRoot(scanDir);
|
|
1109
|
-
if (!repoRoot)
|
|
1110
|
-
continue;
|
|
1111
|
-
if (this.repos.has(repoRoot))
|
|
1112
|
-
continue;
|
|
1113
|
-
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1114
|
-
const entries = gitignoreFiles.map((gf) => ({
|
|
1115
|
-
dir: node_path.dirname(gf),
|
|
1116
|
-
ig: parseGitignore(gf),
|
|
1117
|
-
}));
|
|
1118
|
-
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1119
|
-
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1120
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1121
|
-
}
|
|
1122
|
-
}
|
|
1123
|
-
/**
|
|
1124
|
-
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1125
|
-
*
|
|
1126
|
-
* @param filePath - Absolute file path to check.
|
|
1127
|
-
* @returns `true` if the file should be ignored.
|
|
1128
|
-
*/
|
|
1129
|
-
isIgnored(filePath) {
|
|
1130
|
-
const absPath = node_path.resolve(filePath);
|
|
1131
|
-
for (const [, repo] of this.repos) {
|
|
1132
|
-
// Check if file is within this repo
|
|
1133
|
-
const relToRepo = node_path.relative(repo.root, absPath);
|
|
1134
|
-
if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
|
|
1135
|
-
continue;
|
|
1136
|
-
}
|
|
1137
|
-
// Check each `.gitignore` entry (deepest-first)
|
|
1138
|
-
for (const entry of repo.entries) {
|
|
1139
|
-
const relToEntry = node_path.relative(entry.dir, absPath);
|
|
1140
|
-
if (relToEntry.startsWith('..'))
|
|
1141
|
-
continue;
|
|
1142
|
-
const normalized = toForwardSlash(relToEntry);
|
|
1143
|
-
if (entry.ig.ignores(normalized)) {
|
|
1144
|
-
return true;
|
|
1145
|
-
}
|
|
1146
|
-
}
|
|
1147
|
-
}
|
|
1148
|
-
return false;
|
|
1149
|
-
}
|
|
1150
|
-
/**
|
|
1151
|
-
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1152
|
-
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1153
|
-
*
|
|
1154
|
-
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1155
|
-
*/
|
|
1156
|
-
invalidate(gitignorePath) {
|
|
1157
|
-
const absPath = node_path.resolve(gitignorePath);
|
|
1158
|
-
const gitignoreDir = node_path.dirname(absPath);
|
|
1159
|
-
for (const [, repo] of this.repos) {
|
|
1160
|
-
const relToRepo = node_path.relative(repo.root, gitignoreDir);
|
|
1161
|
-
if (relToRepo.startsWith('..'))
|
|
1162
|
-
continue;
|
|
1163
|
-
// Remove old entry for this directory
|
|
1164
|
-
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1165
|
-
// Re-parse if file still exists
|
|
1166
|
-
if (node_fs.existsSync(absPath)) {
|
|
1167
|
-
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1168
|
-
// Re-sort deepest-first
|
|
1169
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1170
|
-
}
|
|
1171
|
-
return;
|
|
1172
|
-
}
|
|
1173
|
-
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1174
|
-
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1175
|
-
if (repoRoot && node_fs.existsSync(absPath)) {
|
|
1176
|
-
const entries = [
|
|
1177
|
-
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1178
|
-
];
|
|
1179
|
-
if (this.repos.has(repoRoot)) {
|
|
1180
|
-
const repo = this.repos.get(repoRoot);
|
|
1181
|
-
repo.entries.push(entries[0]);
|
|
1182
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1183
|
-
}
|
|
1184
|
-
else {
|
|
1185
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1437
|
+
if (attempt > 1) {
|
|
1438
|
+
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
1439
|
+
}
|
|
1440
|
+
// embedDocuments returns vectors for multiple texts
|
|
1441
|
+
return embedder.embedDocuments(texts);
|
|
1442
|
+
}, {
|
|
1443
|
+
attempts: 5,
|
|
1444
|
+
baseDelayMs: 500,
|
|
1445
|
+
maxDelayMs: 10_000,
|
|
1446
|
+
jitter: 0.2,
|
|
1447
|
+
onRetry: ({ attempt, delayMs, error }) => {
|
|
1448
|
+
log.warn({
|
|
1449
|
+
attempt,
|
|
1450
|
+
delayMs,
|
|
1451
|
+
provider: 'gemini',
|
|
1452
|
+
model: config.model,
|
|
1453
|
+
err: normalizeError(error),
|
|
1454
|
+
}, 'Embedding call failed; will retry');
|
|
1455
|
+
},
|
|
1456
|
+
});
|
|
1457
|
+
// Validate dimensions
|
|
1458
|
+
for (const vector of vectors) {
|
|
1459
|
+
if (vector.length !== dimensions) {
|
|
1460
|
+
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
1461
|
+
}
|
|
1186
1462
|
}
|
|
1187
|
-
|
|
1463
|
+
return vectors;
|
|
1464
|
+
},
|
|
1465
|
+
};
|
|
1466
|
+
}
|
|
1467
|
+
function createMockFromConfig(config) {
|
|
1468
|
+
const dimensions = config.dimensions ?? 768;
|
|
1469
|
+
return createMockProvider(dimensions);
|
|
1470
|
+
}
|
|
1471
|
+
const embeddingProviderRegistry = new Map([
|
|
1472
|
+
['mock', createMockFromConfig],
|
|
1473
|
+
['gemini', createGeminiProvider],
|
|
1474
|
+
]);
|
|
1475
|
+
/**
|
|
1476
|
+
* Create an embedding provider based on the given configuration.
|
|
1477
|
+
*
|
|
1478
|
+
* Each provider is responsible for its own default dimensions.
|
|
1479
|
+
*
|
|
1480
|
+
* @param config - The embedding configuration.
|
|
1481
|
+
* @param logger - Optional pino logger for retry warnings.
|
|
1482
|
+
* @returns An {@link EmbeddingProvider} instance.
|
|
1483
|
+
* @throws If the configured provider is not supported.
|
|
1484
|
+
*/
|
|
1485
|
+
function createEmbeddingProvider(config, logger) {
|
|
1486
|
+
const factory = embeddingProviderRegistry.get(config.provider);
|
|
1487
|
+
if (!factory) {
|
|
1488
|
+
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
1188
1489
|
}
|
|
1490
|
+
return factory(config, logger);
|
|
1189
1491
|
}
|
|
1190
1492
|
|
|
1191
1493
|
/**
|
|
@@ -1417,7 +1719,7 @@ function createJsonMapLib() {
|
|
|
1417
1719
|
};
|
|
1418
1720
|
}
|
|
1419
1721
|
/**
|
|
1420
|
-
* Apply compiled inference rules to file attributes, returning merged metadata.
|
|
1722
|
+
* Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
|
|
1421
1723
|
*
|
|
1422
1724
|
* Rules are evaluated in order; later rules override earlier ones.
|
|
1423
1725
|
* If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
|
|
@@ -1427,15 +1729,18 @@ function createJsonMapLib() {
|
|
|
1427
1729
|
* @param attributes - The file attributes to match against.
|
|
1428
1730
|
* @param namedMaps - Optional record of named JsonMap definitions.
|
|
1429
1731
|
* @param logger - Optional logger for warnings (falls back to console.warn).
|
|
1430
|
-
* @
|
|
1732
|
+
* @param templateEngine - Optional template engine for rendering content templates.
|
|
1733
|
+
* @param configDir - Optional config directory for resolving .json map file paths.
|
|
1734
|
+
* @returns The merged metadata and optional rendered content.
|
|
1431
1735
|
*/
|
|
1432
|
-
async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
1736
|
+
async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
|
|
1433
1737
|
// JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
|
|
1434
1738
|
// Our helper functions accept multiple args, which JsonMap supports at runtime.
|
|
1435
1739
|
const lib = createJsonMapLib();
|
|
1436
1740
|
let merged = {};
|
|
1741
|
+
let renderedContent = null;
|
|
1437
1742
|
const log = logger ?? console;
|
|
1438
|
-
for (const { rule, validate } of compiledRules) {
|
|
1743
|
+
for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
|
|
1439
1744
|
if (validate(attributes)) {
|
|
1440
1745
|
// Apply set resolution
|
|
1441
1746
|
const setOutput = resolveSet(rule.set, attributes);
|
|
@@ -1445,10 +1750,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1445
1750
|
let mapDef;
|
|
1446
1751
|
// Resolve map reference
|
|
1447
1752
|
if (typeof rule.map === 'string') {
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1753
|
+
if (rule.map.endsWith('.json') && configDir) {
|
|
1754
|
+
// File path: load from .json file
|
|
1755
|
+
try {
|
|
1756
|
+
const mapPath = node_path.resolve(configDir, rule.map);
|
|
1757
|
+
const raw = node_fs.readFileSync(mapPath, 'utf-8');
|
|
1758
|
+
mapDef = JSON.parse(raw);
|
|
1759
|
+
}
|
|
1760
|
+
catch (error) {
|
|
1761
|
+
log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
|
|
1762
|
+
continue;
|
|
1763
|
+
}
|
|
1764
|
+
}
|
|
1765
|
+
else {
|
|
1766
|
+
mapDef = namedMaps?.[rule.map];
|
|
1767
|
+
if (!mapDef) {
|
|
1768
|
+
log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
|
|
1769
|
+
continue;
|
|
1770
|
+
}
|
|
1452
1771
|
}
|
|
1453
1772
|
}
|
|
1454
1773
|
else {
|
|
@@ -1471,9 +1790,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1471
1790
|
log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
1472
1791
|
}
|
|
1473
1792
|
}
|
|
1793
|
+
// Render template if present
|
|
1794
|
+
if (rule.template && templateEngine) {
|
|
1795
|
+
const templateKey = `rule-${String(ruleIndex)}`;
|
|
1796
|
+
// Build template context: attributes (with json spread at top) + map output
|
|
1797
|
+
const context = {
|
|
1798
|
+
...(attributes.json ?? {}),
|
|
1799
|
+
...attributes,
|
|
1800
|
+
...merged,
|
|
1801
|
+
};
|
|
1802
|
+
try {
|
|
1803
|
+
const result = templateEngine.render(templateKey, context);
|
|
1804
|
+
if (result && result.trim()) {
|
|
1805
|
+
renderedContent = result;
|
|
1806
|
+
}
|
|
1807
|
+
else {
|
|
1808
|
+
log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
|
|
1809
|
+
}
|
|
1810
|
+
}
|
|
1811
|
+
catch (error) {
|
|
1812
|
+
log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1474
1815
|
}
|
|
1475
1816
|
}
|
|
1476
|
-
return merged;
|
|
1817
|
+
return { metadata: merged, renderedContent };
|
|
1477
1818
|
}
|
|
1478
1819
|
|
|
1479
1820
|
/**
|
|
@@ -1562,23 +1903,32 @@ function compileRules(rules) {
|
|
|
1562
1903
|
* @param metadataDir - The metadata directory for enrichment files.
|
|
1563
1904
|
* @param maps - Optional named JsonMap definitions.
|
|
1564
1905
|
* @param logger - Optional logger for rule warnings.
|
|
1906
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1907
|
+
* @param configDir - Optional config directory for resolving file paths.
|
|
1565
1908
|
* @returns The merged metadata and intermediate data.
|
|
1566
1909
|
*/
|
|
1567
|
-
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
|
|
1910
|
+
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
|
|
1568
1911
|
const ext = node_path.extname(filePath);
|
|
1569
1912
|
const stats = await promises.stat(filePath);
|
|
1570
1913
|
// 1. Extract text and structured data
|
|
1571
1914
|
const extracted = await extractText(filePath, ext);
|
|
1572
1915
|
// 2. Build attributes + apply rules
|
|
1573
1916
|
const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
|
|
1574
|
-
const inferred = await applyRules(compiledRules, attributes, maps, logger);
|
|
1917
|
+
const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
|
|
1575
1918
|
// 3. Read enrichment metadata (merge, enrichment wins)
|
|
1576
1919
|
const enrichment = await readMetadata(filePath, metadataDir);
|
|
1577
1920
|
const metadata = {
|
|
1578
1921
|
...inferred,
|
|
1579
1922
|
...(enrichment ?? {}),
|
|
1580
1923
|
};
|
|
1581
|
-
return {
|
|
1924
|
+
return {
|
|
1925
|
+
inferred,
|
|
1926
|
+
enrichment,
|
|
1927
|
+
metadata,
|
|
1928
|
+
attributes,
|
|
1929
|
+
extracted,
|
|
1930
|
+
renderedContent,
|
|
1931
|
+
};
|
|
1582
1932
|
}
|
|
1583
1933
|
|
|
1584
1934
|
/**
|
|
@@ -1649,6 +1999,7 @@ class DocumentProcessor {
|
|
|
1649
1999
|
vectorStore;
|
|
1650
2000
|
compiledRules;
|
|
1651
2001
|
logger;
|
|
2002
|
+
templateEngine;
|
|
1652
2003
|
/**
|
|
1653
2004
|
* Create a new DocumentProcessor.
|
|
1654
2005
|
*
|
|
@@ -1657,13 +2008,15 @@ class DocumentProcessor {
|
|
|
1657
2008
|
* @param vectorStore - The vector store client.
|
|
1658
2009
|
* @param compiledRules - The compiled inference rules.
|
|
1659
2010
|
* @param logger - The logger instance.
|
|
2011
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1660
2012
|
*/
|
|
1661
|
-
constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
|
|
2013
|
+
constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
|
|
1662
2014
|
this.config = config;
|
|
1663
2015
|
this.embeddingProvider = embeddingProvider;
|
|
1664
2016
|
this.vectorStore = vectorStore;
|
|
1665
2017
|
this.compiledRules = compiledRules;
|
|
1666
2018
|
this.logger = logger;
|
|
2019
|
+
this.templateEngine = templateEngine;
|
|
1667
2020
|
}
|
|
1668
2021
|
/**
|
|
1669
2022
|
* Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
|
|
@@ -1674,13 +2027,15 @@ class DocumentProcessor {
|
|
|
1674
2027
|
try {
|
|
1675
2028
|
const ext = node_path.extname(filePath);
|
|
1676
2029
|
// 1. Build merged metadata + extract text
|
|
1677
|
-
const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
1678
|
-
if
|
|
2030
|
+
const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
2031
|
+
// Use rendered template content if available, otherwise raw extracted text
|
|
2032
|
+
const textToEmbed = renderedContent ?? extracted.text;
|
|
2033
|
+
if (!textToEmbed.trim()) {
|
|
1679
2034
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
1680
2035
|
return;
|
|
1681
2036
|
}
|
|
1682
2037
|
// 2. Content hash check — skip if unchanged
|
|
1683
|
-
const hash = contentHash(
|
|
2038
|
+
const hash = contentHash(textToEmbed);
|
|
1684
2039
|
const baseId = pointId(filePath, 0);
|
|
1685
2040
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
1686
2041
|
if (existingPayload && existingPayload['content_hash'] === hash) {
|
|
@@ -1692,7 +2047,7 @@ class DocumentProcessor {
|
|
|
1692
2047
|
const chunkSize = this.config.chunkSize ?? 1000;
|
|
1693
2048
|
const chunkOverlap = this.config.chunkOverlap ?? 200;
|
|
1694
2049
|
const splitter = createSplitter(ext, chunkSize, chunkOverlap);
|
|
1695
|
-
const chunks = await splitter.splitText(
|
|
2050
|
+
const chunks = await splitter.splitText(textToEmbed);
|
|
1696
2051
|
// 4. Embed all chunks
|
|
1697
2052
|
const vectors = await this.embeddingProvider.embed(chunks);
|
|
1698
2053
|
// 5. Upsert all chunk points
|
|
@@ -1786,7 +2141,7 @@ class DocumentProcessor {
|
|
|
1786
2141
|
return null;
|
|
1787
2142
|
}
|
|
1788
2143
|
// Build merged metadata (lightweight — no embedding)
|
|
1789
|
-
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
2144
|
+
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
1790
2145
|
// Update all chunk payloads
|
|
1791
2146
|
const totalChunks = getChunkCount(existingPayload);
|
|
1792
2147
|
const ids = chunkIds(filePath, totalChunks);
|
|
@@ -1804,8 +2159,17 @@ class DocumentProcessor {
|
|
|
1804
2159
|
*
|
|
1805
2160
|
* @param compiledRules - The newly compiled rules.
|
|
1806
2161
|
*/
|
|
1807
|
-
|
|
2162
|
+
/**
|
|
2163
|
+
* Update compiled inference rules and optionally the template engine.
|
|
2164
|
+
*
|
|
2165
|
+
* @param compiledRules - The newly compiled rules.
|
|
2166
|
+
* @param templateEngine - Optional updated template engine.
|
|
2167
|
+
*/
|
|
2168
|
+
updateRules(compiledRules, templateEngine) {
|
|
1808
2169
|
this.compiledRules = compiledRules;
|
|
2170
|
+
if (templateEngine) {
|
|
2171
|
+
this.templateEngine = templateEngine;
|
|
2172
|
+
}
|
|
1809
2173
|
this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
|
|
1810
2174
|
}
|
|
1811
2175
|
}
|
|
@@ -2334,6 +2698,76 @@ class SystemHealth {
|
|
|
2334
2698
|
}
|
|
2335
2699
|
}
|
|
2336
2700
|
|
|
2701
|
+
/**
|
|
2702
|
+
* @module watcher/globToDir
|
|
2703
|
+
* Adapts glob-based watch config to chokidar v4+, which removed glob support
|
|
2704
|
+
* (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
|
|
2705
|
+
* strings, silently producing zero events. This module extracts static directory
|
|
2706
|
+
* roots from glob patterns for chokidar to watch, then filters emitted events
|
|
2707
|
+
* against the original globs via picomatch.
|
|
2708
|
+
*/
|
|
2709
|
+
/**
|
|
2710
|
+
* Extract the static directory root from a glob pattern.
|
|
2711
|
+
* Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
|
|
2712
|
+
*
|
|
2713
|
+
* @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
|
|
2714
|
+
* @returns The static directory prefix (e.g., `j:/domains`).
|
|
2715
|
+
*/
|
|
2716
|
+
function globRoot(glob) {
|
|
2717
|
+
const normalized = glob.replace(/\\/g, '/');
|
|
2718
|
+
const segments = normalized.split('/');
|
|
2719
|
+
const staticSegments = [];
|
|
2720
|
+
for (const seg of segments) {
|
|
2721
|
+
if (/[*?{[\]]/.test(seg))
|
|
2722
|
+
break;
|
|
2723
|
+
staticSegments.push(seg);
|
|
2724
|
+
}
|
|
2725
|
+
return staticSegments.join('/') || '.';
|
|
2726
|
+
}
|
|
2727
|
+
/**
|
|
2728
|
+
* Deduplicate directory roots, removing paths that are subdirectories of others.
|
|
2729
|
+
*
|
|
2730
|
+
* @param roots - Array of directory paths.
|
|
2731
|
+
* @returns Deduplicated array with subdirectories removed.
|
|
2732
|
+
*/
|
|
2733
|
+
function deduplicateRoots(roots) {
|
|
2734
|
+
const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
|
|
2735
|
+
const sorted = [...new Set(normalized)].sort();
|
|
2736
|
+
return sorted.filter((root, _i, arr) => {
|
|
2737
|
+
const withSlash = root.endsWith('/') ? root : root + '/';
|
|
2738
|
+
return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
|
|
2739
|
+
});
|
|
2740
|
+
}
|
|
2741
|
+
/**
|
|
2742
|
+
* Build a picomatch matcher from an array of glob patterns.
|
|
2743
|
+
* Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
|
|
2744
|
+
* before matching.
|
|
2745
|
+
*
|
|
2746
|
+
* @param globs - Glob patterns to match against.
|
|
2747
|
+
* @returns A function that tests whether a file path matches any of the globs.
|
|
2748
|
+
*/
|
|
2749
|
+
function buildGlobMatcher(globs) {
|
|
2750
|
+
const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
|
|
2751
|
+
const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
|
|
2752
|
+
return (filePath) => {
|
|
2753
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
2754
|
+
return isMatch(normalized);
|
|
2755
|
+
};
|
|
2756
|
+
}
|
|
2757
|
+
/**
|
|
2758
|
+
* Convert an array of glob patterns into chokidar-compatible directory roots
|
|
2759
|
+
* and a filter function for post-hoc event filtering.
|
|
2760
|
+
*
|
|
2761
|
+
* @param globs - Glob patterns from the watch config.
|
|
2762
|
+
* @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
|
|
2763
|
+
*/
|
|
2764
|
+
function resolveWatchPaths(globs) {
|
|
2765
|
+
const rawRoots = globs.map(globRoot);
|
|
2766
|
+
const roots = deduplicateRoots(rawRoots);
|
|
2767
|
+
const matches = buildGlobMatcher(globs);
|
|
2768
|
+
return { roots, matches };
|
|
2769
|
+
}
|
|
2770
|
+
|
|
2337
2771
|
/**
|
|
2338
2772
|
* @module watcher
|
|
2339
2773
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -2348,6 +2782,7 @@ class FileSystemWatcher {
|
|
|
2348
2782
|
logger;
|
|
2349
2783
|
health;
|
|
2350
2784
|
gitignoreFilter;
|
|
2785
|
+
globMatches;
|
|
2351
2786
|
watcher;
|
|
2352
2787
|
/**
|
|
2353
2788
|
* Create a new FileSystemWatcher.
|
|
@@ -2364,6 +2799,7 @@ class FileSystemWatcher {
|
|
|
2364
2799
|
this.processor = processor;
|
|
2365
2800
|
this.logger = logger;
|
|
2366
2801
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
2802
|
+
this.globMatches = () => true;
|
|
2367
2803
|
const healthOptions = {
|
|
2368
2804
|
maxRetries: options.maxRetries,
|
|
2369
2805
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2376,7 +2812,13 @@ class FileSystemWatcher {
|
|
|
2376
2812
|
* Start watching the filesystem and processing events.
|
|
2377
2813
|
*/
|
|
2378
2814
|
start() {
|
|
2379
|
-
|
|
2815
|
+
// Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
|
|
2816
|
+
// Glob patterns are silently treated as literal strings, producing zero
|
|
2817
|
+
// events. We extract static directory roots for chokidar to watch, then
|
|
2818
|
+
// filter emitted events against the original globs via picomatch.
|
|
2819
|
+
const { roots, matches } = resolveWatchPaths(this.config.paths);
|
|
2820
|
+
this.globMatches = matches;
|
|
2821
|
+
this.watcher = chokidar.watch(roots, {
|
|
2380
2822
|
ignored: this.config.ignored,
|
|
2381
2823
|
usePolling: this.config.usePolling,
|
|
2382
2824
|
interval: this.config.pollIntervalMs,
|
|
@@ -2387,6 +2829,8 @@ class FileSystemWatcher {
|
|
|
2387
2829
|
});
|
|
2388
2830
|
this.watcher.on('add', (path) => {
|
|
2389
2831
|
this.handleGitignoreChange(path);
|
|
2832
|
+
if (!this.globMatches(path))
|
|
2833
|
+
return;
|
|
2390
2834
|
if (this.isGitignored(path))
|
|
2391
2835
|
return;
|
|
2392
2836
|
this.logger.debug({ path }, 'File added');
|
|
@@ -2394,6 +2838,8 @@ class FileSystemWatcher {
|
|
|
2394
2838
|
});
|
|
2395
2839
|
this.watcher.on('change', (path) => {
|
|
2396
2840
|
this.handleGitignoreChange(path);
|
|
2841
|
+
if (!this.globMatches(path))
|
|
2842
|
+
return;
|
|
2397
2843
|
if (this.isGitignored(path))
|
|
2398
2844
|
return;
|
|
2399
2845
|
this.logger.debug({ path }, 'File changed');
|
|
@@ -2401,6 +2847,8 @@ class FileSystemWatcher {
|
|
|
2401
2847
|
});
|
|
2402
2848
|
this.watcher.on('unlink', (path) => {
|
|
2403
2849
|
this.handleGitignoreChange(path);
|
|
2850
|
+
if (!this.globMatches(path))
|
|
2851
|
+
return;
|
|
2404
2852
|
if (this.isGitignored(path))
|
|
2405
2853
|
return;
|
|
2406
2854
|
this.logger.debug({ path }, 'File removed');
|
|
@@ -2473,51 +2921,21 @@ class FileSystemWatcher {
|
|
|
2473
2921
|
}
|
|
2474
2922
|
|
|
2475
2923
|
/**
|
|
2476
|
-
* @module app/
|
|
2477
|
-
*
|
|
2478
|
-
*/
|
|
2479
|
-
/**
|
|
2480
|
-
* Debounced config file watcher.
|
|
2924
|
+
* @module app/factories
|
|
2925
|
+
* Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
|
|
2481
2926
|
*/
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
});
|
|
2495
|
-
this.watcher.on('change', () => {
|
|
2496
|
-
if (this.debounce)
|
|
2497
|
-
clearTimeout(this.debounce);
|
|
2498
|
-
this.debounce = setTimeout(() => {
|
|
2499
|
-
void this.options.onChange();
|
|
2500
|
-
}, this.options.debounceMs);
|
|
2501
|
-
});
|
|
2502
|
-
this.watcher.on('error', (error) => {
|
|
2503
|
-
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
2504
|
-
});
|
|
2505
|
-
this.options.logger.info({
|
|
2506
|
-
configPath: this.options.configPath,
|
|
2507
|
-
debounceMs: this.options.debounceMs,
|
|
2508
|
-
}, 'Config watcher started');
|
|
2509
|
-
}
|
|
2510
|
-
async stop() {
|
|
2511
|
-
if (this.debounce) {
|
|
2512
|
-
clearTimeout(this.debounce);
|
|
2513
|
-
this.debounce = undefined;
|
|
2514
|
-
}
|
|
2515
|
-
if (this.watcher) {
|
|
2516
|
-
await this.watcher.close();
|
|
2517
|
-
this.watcher = undefined;
|
|
2518
|
-
}
|
|
2519
|
-
}
|
|
2520
|
-
}
|
|
2927
|
+
/** Default component factories wiring real implementations. */
|
|
2928
|
+
const defaultFactories = {
|
|
2929
|
+
loadConfig,
|
|
2930
|
+
createLogger,
|
|
2931
|
+
createEmbeddingProvider,
|
|
2932
|
+
createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
|
|
2933
|
+
compileRules,
|
|
2934
|
+
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
|
|
2935
|
+
createEventQueue: (options) => new EventQueue(options),
|
|
2936
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2937
|
+
createApiServer,
|
|
2938
|
+
};
|
|
2521
2939
|
|
|
2522
2940
|
/**
|
|
2523
2941
|
* @module app/shutdown
|
|
@@ -2537,17 +2955,28 @@ function installShutdownHandlers(stop) {
|
|
|
2537
2955
|
process.on('SIGINT', () => void shutdown());
|
|
2538
2956
|
}
|
|
2539
2957
|
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2958
|
+
/**
|
|
2959
|
+
* @module app/startFromConfig
|
|
2960
|
+
* Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
|
|
2961
|
+
*/
|
|
2962
|
+
/**
|
|
2963
|
+
* Create and start a JeevesWatcher from a config file path.
|
|
2964
|
+
*
|
|
2965
|
+
* @param configPath - Optional path to the configuration file.
|
|
2966
|
+
* @returns The running JeevesWatcher instance.
|
|
2967
|
+
*/
|
|
2968
|
+
async function startFromConfig(configPath) {
|
|
2969
|
+
const config = await loadConfig(configPath);
|
|
2970
|
+
const app = new JeevesWatcher(config, configPath);
|
|
2971
|
+
installShutdownHandlers(() => app.stop());
|
|
2972
|
+
await app.start();
|
|
2973
|
+
return app;
|
|
2974
|
+
}
|
|
2975
|
+
|
|
2976
|
+
/**
|
|
2977
|
+
* @module app
|
|
2978
|
+
* Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
|
|
2979
|
+
*/
|
|
2551
2980
|
/**
|
|
2552
2981
|
* Main application class that wires together all components.
|
|
2553
2982
|
*/
|
|
@@ -2582,56 +3011,26 @@ class JeevesWatcher {
|
|
|
2582
3011
|
async start() {
|
|
2583
3012
|
const logger = this.factories.createLogger(this.config.logging);
|
|
2584
3013
|
this.logger = logger;
|
|
2585
|
-
|
|
2586
|
-
try {
|
|
2587
|
-
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
2588
|
-
}
|
|
2589
|
-
catch (error) {
|
|
2590
|
-
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
2591
|
-
throw error;
|
|
2592
|
-
}
|
|
2593
|
-
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
2594
|
-
await vectorStore.ensureCollection();
|
|
3014
|
+
const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
|
|
2595
3015
|
const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
|
|
2596
|
-
const
|
|
3016
|
+
const configDir = this.configPath ? node_path.dirname(this.configPath) : '.';
|
|
3017
|
+
const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
|
|
3018
|
+
const processor = this.factories.createDocumentProcessor({
|
|
2597
3019
|
metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
|
|
2598
3020
|
chunkSize: this.config.embedding.chunkSize,
|
|
2599
3021
|
chunkOverlap: this.config.embedding.chunkOverlap,
|
|
2600
3022
|
maps: this.config.maps,
|
|
2601
|
-
|
|
2602
|
-
|
|
3023
|
+
configDir,
|
|
3024
|
+
}, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
|
|
2603
3025
|
this.processor = processor;
|
|
2604
|
-
|
|
3026
|
+
this.queue = this.factories.createEventQueue({
|
|
2605
3027
|
debounceMs: this.config.watch.debounceMs ?? 2000,
|
|
2606
3028
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
2607
3029
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2608
3030
|
});
|
|
2609
|
-
this.
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
? new GitignoreFilter(this.config.watch.paths)
|
|
2613
|
-
: undefined;
|
|
2614
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2615
|
-
maxRetries: this.config.maxRetries,
|
|
2616
|
-
maxBackoffMs: this.config.maxBackoffMs,
|
|
2617
|
-
onFatalError: this.runtimeOptions.onFatalError,
|
|
2618
|
-
gitignoreFilter,
|
|
2619
|
-
});
|
|
2620
|
-
this.watcher = watcher;
|
|
2621
|
-
const server = this.factories.createApiServer({
|
|
2622
|
-
processor,
|
|
2623
|
-
vectorStore,
|
|
2624
|
-
embeddingProvider,
|
|
2625
|
-
queue,
|
|
2626
|
-
config: this.config,
|
|
2627
|
-
logger,
|
|
2628
|
-
});
|
|
2629
|
-
this.server = server;
|
|
2630
|
-
await server.listen({
|
|
2631
|
-
host: this.config.api?.host ?? '127.0.0.1',
|
|
2632
|
-
port: this.config.api?.port ?? 3456,
|
|
2633
|
-
});
|
|
2634
|
-
watcher.start();
|
|
3031
|
+
this.watcher = this.createWatcher(this.queue, processor, logger);
|
|
3032
|
+
this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
|
|
3033
|
+
this.watcher.start();
|
|
2635
3034
|
this.startConfigWatch();
|
|
2636
3035
|
logger.info('jeeves-watcher started');
|
|
2637
3036
|
}
|
|
@@ -2662,22 +3061,61 @@ class JeevesWatcher {
|
|
|
2662
3061
|
}
|
|
2663
3062
|
this.logger?.info('jeeves-watcher stopped');
|
|
2664
3063
|
}
|
|
3064
|
+
async initEmbeddingAndStore(logger) {
|
|
3065
|
+
let embeddingProvider;
|
|
3066
|
+
try {
|
|
3067
|
+
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
3068
|
+
}
|
|
3069
|
+
catch (error) {
|
|
3070
|
+
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
3071
|
+
throw error;
|
|
3072
|
+
}
|
|
3073
|
+
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
3074
|
+
await vectorStore.ensureCollection();
|
|
3075
|
+
return { embeddingProvider, vectorStore };
|
|
3076
|
+
}
|
|
3077
|
+
createWatcher(queue, processor, logger) {
|
|
3078
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
3079
|
+
const gitignoreFilter = respectGitignore
|
|
3080
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
3081
|
+
: undefined;
|
|
3082
|
+
return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
3083
|
+
maxRetries: this.config.maxRetries,
|
|
3084
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
3085
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
3086
|
+
gitignoreFilter,
|
|
3087
|
+
});
|
|
3088
|
+
}
|
|
3089
|
+
async startApiServer(processor, vectorStore, embeddingProvider, logger) {
|
|
3090
|
+
const server = this.factories.createApiServer({
|
|
3091
|
+
processor,
|
|
3092
|
+
vectorStore,
|
|
3093
|
+
embeddingProvider,
|
|
3094
|
+
queue: this.queue,
|
|
3095
|
+
config: this.config,
|
|
3096
|
+
logger,
|
|
3097
|
+
});
|
|
3098
|
+
await server.listen({
|
|
3099
|
+
host: this.config.api?.host ?? '127.0.0.1',
|
|
3100
|
+
port: this.config.api?.port ?? 3456,
|
|
3101
|
+
});
|
|
3102
|
+
return server;
|
|
3103
|
+
}
|
|
2665
3104
|
startConfigWatch() {
|
|
2666
3105
|
const logger = this.logger;
|
|
2667
3106
|
if (!logger)
|
|
2668
3107
|
return;
|
|
2669
3108
|
const enabled = this.config.configWatch?.enabled ?? true;
|
|
2670
|
-
if (!enabled)
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
3109
|
+
if (!enabled || !this.configPath) {
|
|
3110
|
+
if (!this.configPath) {
|
|
3111
|
+
logger.debug('Config watch enabled, but no config path was provided');
|
|
3112
|
+
}
|
|
2674
3113
|
return;
|
|
2675
3114
|
}
|
|
2676
|
-
const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
|
|
2677
3115
|
this.configWatcher = new ConfigWatcher({
|
|
2678
3116
|
configPath: this.configPath,
|
|
2679
3117
|
enabled,
|
|
2680
|
-
debounceMs,
|
|
3118
|
+
debounceMs: this.config.configWatch?.debounceMs ?? 10000,
|
|
2681
3119
|
logger,
|
|
2682
3120
|
onChange: async () => this.reloadConfig(),
|
|
2683
3121
|
});
|
|
@@ -2699,7 +3137,9 @@ class JeevesWatcher {
|
|
|
2699
3137
|
const newConfig = await this.factories.loadConfig(this.configPath);
|
|
2700
3138
|
this.config = newConfig;
|
|
2701
3139
|
const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
|
|
2702
|
-
|
|
3140
|
+
const reloadConfigDir = node_path.dirname(this.configPath);
|
|
3141
|
+
const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
|
|
3142
|
+
processor.updateRules(compiledRules, newTemplateEngine);
|
|
2703
3143
|
logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
|
|
2704
3144
|
}
|
|
2705
3145
|
catch (error) {
|
|
@@ -2707,19 +3147,7 @@ class JeevesWatcher {
|
|
|
2707
3147
|
}
|
|
2708
3148
|
}
|
|
2709
3149
|
}
|
|
2710
|
-
|
|
2711
|
-
* Create and start a JeevesWatcher from a config file path.
|
|
2712
|
-
*
|
|
2713
|
-
* @param configPath - Optional path to the configuration file.
|
|
2714
|
-
* @returns The running JeevesWatcher instance.
|
|
2715
|
-
*/
|
|
2716
|
-
async function startFromConfig(configPath) {
|
|
2717
|
-
const config = await loadConfig(configPath);
|
|
2718
|
-
const app = new JeevesWatcher(config, configPath);
|
|
2719
|
-
installShutdownHandlers(() => app.stop());
|
|
2720
|
-
await app.start();
|
|
2721
|
-
return app;
|
|
2722
|
-
}
|
|
3150
|
+
// startFromConfig re-exported from ./startFromConfig
|
|
2723
3151
|
|
|
2724
3152
|
exports.DocumentProcessor = DocumentProcessor;
|
|
2725
3153
|
exports.EventQueue = EventQueue;
|
|
@@ -2727,15 +3155,18 @@ exports.FileSystemWatcher = FileSystemWatcher;
|
|
|
2727
3155
|
exports.GitignoreFilter = GitignoreFilter;
|
|
2728
3156
|
exports.JeevesWatcher = JeevesWatcher;
|
|
2729
3157
|
exports.SystemHealth = SystemHealth;
|
|
3158
|
+
exports.TemplateEngine = TemplateEngine;
|
|
2730
3159
|
exports.VectorStoreClient = VectorStoreClient;
|
|
2731
3160
|
exports.apiConfigSchema = apiConfigSchema;
|
|
2732
3161
|
exports.applyRules = applyRules;
|
|
2733
3162
|
exports.buildAttributes = buildAttributes;
|
|
3163
|
+
exports.buildTemplateEngine = buildTemplateEngine;
|
|
2734
3164
|
exports.compileRules = compileRules;
|
|
2735
3165
|
exports.configWatchConfigSchema = configWatchConfigSchema;
|
|
2736
3166
|
exports.contentHash = contentHash;
|
|
2737
3167
|
exports.createApiServer = createApiServer;
|
|
2738
3168
|
exports.createEmbeddingProvider = createEmbeddingProvider;
|
|
3169
|
+
exports.createHandlebarsInstance = createHandlebarsInstance;
|
|
2739
3170
|
exports.createLogger = createLogger;
|
|
2740
3171
|
exports.deleteMetadata = deleteMetadata;
|
|
2741
3172
|
exports.embeddingConfigSchema = embeddingConfigSchema;
|
|
@@ -2743,10 +3174,13 @@ exports.extractText = extractText;
|
|
|
2743
3174
|
exports.inferenceRuleSchema = inferenceRuleSchema;
|
|
2744
3175
|
exports.jeevesWatcherConfigSchema = jeevesWatcherConfigSchema;
|
|
2745
3176
|
exports.loadConfig = loadConfig;
|
|
3177
|
+
exports.loadCustomHelpers = loadCustomHelpers;
|
|
2746
3178
|
exports.loggingConfigSchema = loggingConfigSchema;
|
|
2747
3179
|
exports.metadataPath = metadataPath;
|
|
2748
3180
|
exports.pointId = pointId;
|
|
2749
3181
|
exports.readMetadata = readMetadata;
|
|
3182
|
+
exports.registerBuiltinHelpers = registerBuiltinHelpers;
|
|
3183
|
+
exports.resolveTemplateSource = resolveTemplateSource;
|
|
2750
3184
|
exports.startFromConfig = startFromConfig;
|
|
2751
3185
|
exports.vectorStoreConfigSchema = vectorStoreConfigSchema;
|
|
2752
3186
|
exports.watchConfigSchema = watchConfigSchema;
|