@karmaniverous/jeeves-watcher 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +69 -14
- package/dist/cjs/index.js +996 -562
- package/dist/cli/jeeves-watcher/index.js +825 -396
- package/dist/index.d.ts +160 -16
- package/dist/index.iife.js +824 -397
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +992 -564
- package/package.json +12 -4
package/dist/mjs/index.js
CHANGED
|
@@ -2,14 +2,22 @@ import Fastify from 'fastify';
|
|
|
2
2
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
3
3
|
import { resolve, dirname, join, relative, extname, basename } from 'node:path';
|
|
4
4
|
import picomatch from 'picomatch';
|
|
5
|
-
import { omit, get } from 'radash';
|
|
5
|
+
import { omit, capitalize, title, camel, snake, dash, isEqual, get } from 'radash';
|
|
6
6
|
import { createHash } from 'node:crypto';
|
|
7
|
+
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
8
|
+
import ignore from 'ignore';
|
|
9
|
+
import Handlebars from 'handlebars';
|
|
10
|
+
import dayjs from 'dayjs';
|
|
11
|
+
import { toMdast } from 'hast-util-to-mdast';
|
|
12
|
+
import { fromADF } from 'mdast-util-from-adf';
|
|
13
|
+
import { toMarkdown } from 'mdast-util-to-markdown';
|
|
14
|
+
import rehypeParse from 'rehype-parse';
|
|
15
|
+
import { unified } from 'unified';
|
|
16
|
+
import chokidar from 'chokidar';
|
|
7
17
|
import { cosmiconfig } from 'cosmiconfig';
|
|
8
18
|
import { z, ZodError } from 'zod';
|
|
9
19
|
import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
|
|
10
20
|
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
|
|
11
|
-
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
12
|
-
import ignore from 'ignore';
|
|
13
21
|
import pino from 'pino';
|
|
14
22
|
import { v5 } from 'uuid';
|
|
15
23
|
import * as cheerio from 'cheerio';
|
|
@@ -19,7 +27,6 @@ import Ajv from 'ajv';
|
|
|
19
27
|
import addFormats from 'ajv-formats';
|
|
20
28
|
import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
21
29
|
import { QdrantClient } from '@qdrant/js-client-rest';
|
|
22
|
-
import chokidar from 'chokidar';
|
|
23
30
|
|
|
24
31
|
/**
|
|
25
32
|
* @module util/normalizeError
|
|
@@ -418,183 +425,663 @@ function createApiServer(options) {
|
|
|
418
425
|
}
|
|
419
426
|
|
|
420
427
|
/**
|
|
421
|
-
* @module
|
|
422
|
-
*
|
|
428
|
+
* @module gitignore
|
|
429
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
423
430
|
*/
|
|
424
|
-
/** Default root-level config values. */
|
|
425
|
-
const ROOT_DEFAULTS = {
|
|
426
|
-
metadataDir: '.jeeves-watcher',
|
|
427
|
-
shutdownTimeoutMs: 10000,
|
|
428
|
-
};
|
|
429
|
-
/** Default configWatch values. */
|
|
430
|
-
const CONFIG_WATCH_DEFAULTS = {
|
|
431
|
-
enabled: true,
|
|
432
|
-
debounceMs: 1000,
|
|
433
|
-
};
|
|
434
|
-
/** Default API values. */
|
|
435
|
-
const API_DEFAULTS = {
|
|
436
|
-
host: '127.0.0.1',
|
|
437
|
-
port: 3456,
|
|
438
|
-
};
|
|
439
|
-
/** Default logging values. */
|
|
440
|
-
const LOGGING_DEFAULTS = {
|
|
441
|
-
level: 'info',
|
|
442
|
-
};
|
|
443
|
-
/** Default watch configuration. */
|
|
444
|
-
const WATCH_DEFAULTS = {
|
|
445
|
-
debounceMs: 300,
|
|
446
|
-
stabilityThresholdMs: 500,
|
|
447
|
-
usePolling: false,
|
|
448
|
-
pollIntervalMs: 1000,
|
|
449
|
-
respectGitignore: true,
|
|
450
|
-
};
|
|
451
|
-
/** Default embedding configuration. */
|
|
452
|
-
const EMBEDDING_DEFAULTS = {
|
|
453
|
-
chunkSize: 1000,
|
|
454
|
-
chunkOverlap: 200,
|
|
455
|
-
dimensions: 3072,
|
|
456
|
-
rateLimitPerMinute: 300,
|
|
457
|
-
concurrency: 5,
|
|
458
|
-
};
|
|
459
|
-
|
|
460
431
|
/**
|
|
461
|
-
*
|
|
432
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
433
|
+
* Returns `undefined` if no repo is found.
|
|
462
434
|
*/
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
.
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
.describe('Polling interval in milliseconds when usePolling is enabled.'),
|
|
479
|
-
/** Whether to use polling instead of native watchers. */
|
|
480
|
-
usePolling: z
|
|
481
|
-
.boolean()
|
|
482
|
-
.optional()
|
|
483
|
-
.describe('Use polling instead of native file system events (for network drives).'),
|
|
484
|
-
/** Debounce delay in milliseconds for file change events. */
|
|
485
|
-
debounceMs: z
|
|
486
|
-
.number()
|
|
487
|
-
.optional()
|
|
488
|
-
.describe('Debounce delay in milliseconds for file change events.'),
|
|
489
|
-
/** Time in milliseconds a file must be stable before processing. */
|
|
490
|
-
stabilityThresholdMs: z
|
|
491
|
-
.number()
|
|
492
|
-
.optional()
|
|
493
|
-
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
494
|
-
/** Whether to respect .gitignore files when processing. */
|
|
495
|
-
respectGitignore: z
|
|
496
|
-
.boolean()
|
|
497
|
-
.optional()
|
|
498
|
-
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
499
|
-
});
|
|
435
|
+
function findRepoRoot(startDir) {
|
|
436
|
+
let dir = resolve(startDir);
|
|
437
|
+
const root = resolve('/');
|
|
438
|
+
while (dir !== root) {
|
|
439
|
+
if (existsSync(join(dir, '.git')) &&
|
|
440
|
+
statSync(join(dir, '.git')).isDirectory()) {
|
|
441
|
+
return dir;
|
|
442
|
+
}
|
|
443
|
+
const parent = dirname(dir);
|
|
444
|
+
if (parent === dir)
|
|
445
|
+
break;
|
|
446
|
+
dir = parent;
|
|
447
|
+
}
|
|
448
|
+
return undefined;
|
|
449
|
+
}
|
|
500
450
|
/**
|
|
501
|
-
*
|
|
451
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
452
|
+
* that can be scanned for a repo root.
|
|
502
453
|
*/
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
.
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
454
|
+
function watchPathToScanDir(watchPath) {
|
|
455
|
+
const absPath = resolve(watchPath);
|
|
456
|
+
try {
|
|
457
|
+
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
458
|
+
}
|
|
459
|
+
catch {
|
|
460
|
+
// ignore
|
|
461
|
+
}
|
|
462
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
463
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
464
|
+
if (!globMatch)
|
|
465
|
+
return undefined;
|
|
466
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
467
|
+
const trimmed = prefix.trim();
|
|
468
|
+
const baseDir = trimmed.length === 0
|
|
469
|
+
? '.'
|
|
470
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
471
|
+
? trimmed
|
|
472
|
+
: dirname(trimmed);
|
|
473
|
+
const resolved = resolve(baseDir);
|
|
474
|
+
if (!existsSync(resolved))
|
|
475
|
+
return undefined;
|
|
476
|
+
return resolved;
|
|
477
|
+
}
|
|
515
478
|
/**
|
|
516
|
-
*
|
|
479
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
480
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
517
481
|
*/
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
.
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
.
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
.string()
|
|
547
|
-
.optional()
|
|
548
|
-
.describe('API key for embedding provider (supports ${ENV_VAR} substitution).'),
|
|
549
|
-
/** Maximum embedding requests per minute. */
|
|
550
|
-
rateLimitPerMinute: z
|
|
551
|
-
.number()
|
|
552
|
-
.optional()
|
|
553
|
-
.describe('Maximum embedding API requests per minute (rate limiting).'),
|
|
554
|
-
/** Maximum concurrent embedding requests. */
|
|
555
|
-
concurrency: z
|
|
556
|
-
.number()
|
|
557
|
-
.optional()
|
|
558
|
-
.describe('Maximum concurrent embedding requests.'),
|
|
559
|
-
});
|
|
482
|
+
function findGitignoreFiles(dir) {
|
|
483
|
+
const results = [];
|
|
484
|
+
const gitignorePath = join(dir, '.gitignore');
|
|
485
|
+
if (existsSync(gitignorePath)) {
|
|
486
|
+
results.push(gitignorePath);
|
|
487
|
+
}
|
|
488
|
+
let entries;
|
|
489
|
+
try {
|
|
490
|
+
entries = readdirSync(dir);
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
return results;
|
|
494
|
+
}
|
|
495
|
+
for (const entry of entries) {
|
|
496
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
497
|
+
continue;
|
|
498
|
+
const fullPath = join(dir, entry);
|
|
499
|
+
try {
|
|
500
|
+
if (statSync(fullPath).isDirectory()) {
|
|
501
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
catch {
|
|
505
|
+
// Skip inaccessible entries
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
return results;
|
|
509
|
+
}
|
|
560
510
|
/**
|
|
561
|
-
*
|
|
511
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
562
512
|
*/
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
.describe('Qdrant server URL (e.g., "http://localhost:6333").'),
|
|
568
|
-
/** Qdrant collection name. */
|
|
569
|
-
collectionName: z
|
|
570
|
-
.string()
|
|
571
|
-
.describe('Qdrant collection name for vector storage.'),
|
|
572
|
-
/** Qdrant API key. */
|
|
573
|
-
apiKey: z
|
|
574
|
-
.string()
|
|
575
|
-
.optional()
|
|
576
|
-
.describe('Qdrant API key for authentication (supports ${ENV_VAR} substitution).'),
|
|
577
|
-
});
|
|
513
|
+
function parseGitignore(gitignorePath) {
|
|
514
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
515
|
+
return ignore().add(content);
|
|
516
|
+
}
|
|
578
517
|
/**
|
|
579
|
-
*
|
|
518
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
580
519
|
*/
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
.string()
|
|
585
|
-
.optional()
|
|
586
|
-
.describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),
|
|
587
|
-
/** Port to listen on. */
|
|
588
|
-
port: z.number().optional().describe('Port for API server (e.g., 3456).'),
|
|
589
|
-
});
|
|
520
|
+
function toForwardSlash(p) {
|
|
521
|
+
return p.replace(/\\/g, '/');
|
|
522
|
+
}
|
|
590
523
|
/**
|
|
591
|
-
*
|
|
524
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
525
|
+
* `.gitignore` chain in git repositories.
|
|
592
526
|
*/
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
527
|
+
class GitignoreFilter {
|
|
528
|
+
repos = new Map();
|
|
529
|
+
/**
|
|
530
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
531
|
+
*
|
|
532
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
533
|
+
*/
|
|
534
|
+
constructor(watchPaths) {
|
|
535
|
+
this.scan(watchPaths);
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
539
|
+
*/
|
|
540
|
+
scan(watchPaths) {
|
|
541
|
+
this.repos.clear();
|
|
542
|
+
const scannedDirs = new Set();
|
|
543
|
+
for (const watchPath of watchPaths) {
|
|
544
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
545
|
+
if (!scanDir)
|
|
546
|
+
continue;
|
|
547
|
+
if (scannedDirs.has(scanDir))
|
|
548
|
+
continue;
|
|
549
|
+
scannedDirs.add(scanDir);
|
|
550
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
551
|
+
if (!repoRoot)
|
|
552
|
+
continue;
|
|
553
|
+
if (this.repos.has(repoRoot))
|
|
554
|
+
continue;
|
|
555
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
556
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
557
|
+
dir: dirname(gf),
|
|
558
|
+
ig: parseGitignore(gf),
|
|
559
|
+
}));
|
|
560
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
561
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
562
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
567
|
+
*
|
|
568
|
+
* @param filePath - Absolute file path to check.
|
|
569
|
+
* @returns `true` if the file should be ignored.
|
|
570
|
+
*/
|
|
571
|
+
isIgnored(filePath) {
|
|
572
|
+
const absPath = resolve(filePath);
|
|
573
|
+
for (const [, repo] of this.repos) {
|
|
574
|
+
// Check if file is within this repo
|
|
575
|
+
const relToRepo = relative(repo.root, absPath);
|
|
576
|
+
// On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
|
|
577
|
+
// an absolute path with a drive letter instead of a relative one. The
|
|
578
|
+
// `ignore` library rejects these with a RangeError. Skip repos on
|
|
579
|
+
// different drives to avoid cross-drive gitignore mismatches.
|
|
580
|
+
if (relToRepo.startsWith('..') ||
|
|
581
|
+
relToRepo.startsWith(resolve('/')) ||
|
|
582
|
+
/^[a-zA-Z]:/.test(relToRepo)) {
|
|
583
|
+
continue;
|
|
584
|
+
}
|
|
585
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
586
|
+
for (const entry of repo.entries) {
|
|
587
|
+
const relToEntry = relative(entry.dir, absPath);
|
|
588
|
+
if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
|
|
589
|
+
continue;
|
|
590
|
+
const normalized = toForwardSlash(relToEntry);
|
|
591
|
+
if (entry.ig.ignores(normalized)) {
|
|
592
|
+
return true;
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
return false;
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
600
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
601
|
+
*
|
|
602
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
603
|
+
*/
|
|
604
|
+
invalidate(gitignorePath) {
|
|
605
|
+
const absPath = resolve(gitignorePath);
|
|
606
|
+
const gitignoreDir = dirname(absPath);
|
|
607
|
+
for (const [, repo] of this.repos) {
|
|
608
|
+
const relToRepo = relative(repo.root, gitignoreDir);
|
|
609
|
+
if (relToRepo.startsWith('..'))
|
|
610
|
+
continue;
|
|
611
|
+
// Remove old entry for this directory
|
|
612
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
613
|
+
// Re-parse if file still exists
|
|
614
|
+
if (existsSync(absPath)) {
|
|
615
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
616
|
+
// Re-sort deepest-first
|
|
617
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
618
|
+
}
|
|
619
|
+
return;
|
|
620
|
+
}
|
|
621
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
622
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
623
|
+
if (repoRoot && existsSync(absPath)) {
|
|
624
|
+
const entries = [
|
|
625
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
626
|
+
];
|
|
627
|
+
if (this.repos.has(repoRoot)) {
|
|
628
|
+
const repo = this.repos.get(repoRoot);
|
|
629
|
+
repo.entries.push(entries[0]);
|
|
630
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
631
|
+
}
|
|
632
|
+
else {
|
|
633
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* @module templates/helpers
|
|
641
|
+
* Registers built-in Handlebars helpers for content templates.
|
|
642
|
+
*/
|
|
643
|
+
/** Pre-built rehype parser for HTML → hast conversion. */
|
|
644
|
+
const htmlParser = unified().use(rehypeParse, { fragment: true });
|
|
645
|
+
/**
|
|
646
|
+
* Register all built-in helpers on a Handlebars instance.
|
|
647
|
+
*
|
|
648
|
+
* @param hbs - The Handlebars instance.
|
|
649
|
+
*/
|
|
650
|
+
function registerBuiltinHelpers(hbs) {
|
|
651
|
+
// Structural: ADF → Markdown
|
|
652
|
+
hbs.registerHelper('adfToMarkdown', function (adf) {
|
|
653
|
+
if (!adf || typeof adf !== 'object')
|
|
654
|
+
return '';
|
|
655
|
+
try {
|
|
656
|
+
const mdast = fromADF(adf);
|
|
657
|
+
return new hbs.SafeString(toMarkdown(mdast).trim());
|
|
658
|
+
}
|
|
659
|
+
catch {
|
|
660
|
+
return '<!-- ADF conversion failed -->';
|
|
661
|
+
}
|
|
662
|
+
});
|
|
663
|
+
// Structural: HTML → Markdown
|
|
664
|
+
hbs.registerHelper('markdownify', function (html) {
|
|
665
|
+
if (typeof html !== 'string' || !html.trim())
|
|
666
|
+
return '';
|
|
667
|
+
try {
|
|
668
|
+
const hast = htmlParser.parse(html);
|
|
669
|
+
const mdast = toMdast(hast);
|
|
670
|
+
return new hbs.SafeString(toMarkdown(mdast).trim());
|
|
671
|
+
}
|
|
672
|
+
catch {
|
|
673
|
+
return '<!-- HTML conversion failed -->';
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
// Formatting: dateFormat
|
|
677
|
+
hbs.registerHelper('dateFormat', function (value, format) {
|
|
678
|
+
if (value === undefined || value === null)
|
|
679
|
+
return '';
|
|
680
|
+
const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
|
|
681
|
+
return dayjs(value).format(fmt);
|
|
682
|
+
});
|
|
683
|
+
// Formatting: join
|
|
684
|
+
hbs.registerHelper('join', function (arr, separator) {
|
|
685
|
+
if (!Array.isArray(arr))
|
|
686
|
+
return '';
|
|
687
|
+
const sep = typeof separator === 'string' ? separator : ', ';
|
|
688
|
+
return arr.join(sep);
|
|
689
|
+
});
|
|
690
|
+
// Formatting: pluck
|
|
691
|
+
hbs.registerHelper('pluck', function (arr, key) {
|
|
692
|
+
if (!Array.isArray(arr) || typeof key !== 'string')
|
|
693
|
+
return [];
|
|
694
|
+
return arr.map((item) => item && typeof item === 'object'
|
|
695
|
+
? item[key]
|
|
696
|
+
: undefined);
|
|
697
|
+
});
|
|
698
|
+
// String transforms
|
|
699
|
+
hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
|
|
700
|
+
hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
|
|
701
|
+
hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? capitalize(text) : '');
|
|
702
|
+
hbs.registerHelper('title', (text) => typeof text === 'string' ? title(text) : '');
|
|
703
|
+
hbs.registerHelper('camel', (text) => typeof text === 'string' ? camel(text) : '');
|
|
704
|
+
hbs.registerHelper('snake', (text) => typeof text === 'string' ? snake(text) : '');
|
|
705
|
+
hbs.registerHelper('dash', (text) => typeof text === 'string' ? dash(text) : '');
|
|
706
|
+
// default helper
|
|
707
|
+
hbs.registerHelper('default', function (value, fallback) {
|
|
708
|
+
return value ?? fallback ?? '';
|
|
709
|
+
});
|
|
710
|
+
// eq helper (deep equality)
|
|
711
|
+
hbs.registerHelper('eq', function (a, b) {
|
|
712
|
+
return isEqual(a, b);
|
|
713
|
+
});
|
|
714
|
+
// json helper
|
|
715
|
+
hbs.registerHelper('json', function (value) {
|
|
716
|
+
return new hbs.SafeString(JSON.stringify(value, null, 2));
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/**
|
|
721
|
+
* @module templates/engine
|
|
722
|
+
* Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
|
|
723
|
+
*/
|
|
724
|
+
/**
|
|
725
|
+
* Resolve a template value to its source string.
|
|
726
|
+
*
|
|
727
|
+
* Resolution order:
|
|
728
|
+
* 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
|
|
729
|
+
* 2. Matches a key in namedTemplates → named ref (recursively resolve)
|
|
730
|
+
* 3. Otherwise → inline Handlebars template string
|
|
731
|
+
*
|
|
732
|
+
* @param value - The template reference (inline, file path, or named ref).
|
|
733
|
+
* @param namedTemplates - Named template definitions from config.
|
|
734
|
+
* @param configDir - Directory to resolve relative file paths against.
|
|
735
|
+
* @param visited - Set of visited named refs for cycle detection.
|
|
736
|
+
* @returns The resolved template source string.
|
|
737
|
+
*/
|
|
738
|
+
function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
|
|
739
|
+
// File path detection
|
|
740
|
+
if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
|
|
741
|
+
return readFileSync(resolve(configDir, value), 'utf-8');
|
|
742
|
+
}
|
|
743
|
+
// Named ref
|
|
744
|
+
if (namedTemplates?.[value] !== undefined) {
|
|
745
|
+
if (visited.has(value)) {
|
|
746
|
+
throw new Error(`Circular template reference detected: ${value}`);
|
|
747
|
+
}
|
|
748
|
+
visited.add(value);
|
|
749
|
+
return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
|
|
750
|
+
}
|
|
751
|
+
// Inline
|
|
752
|
+
return value;
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Create a configured Handlebars instance with built-in helpers registered.
|
|
756
|
+
*
|
|
757
|
+
* @returns A Handlebars instance with helpers.
|
|
758
|
+
*/
|
|
759
|
+
function createHandlebarsInstance() {
|
|
760
|
+
const hbs = Handlebars.create();
|
|
761
|
+
registerBuiltinHelpers(hbs);
|
|
762
|
+
return hbs;
|
|
763
|
+
}
|
|
764
|
+
/**
|
|
765
|
+
* Load custom helpers from file paths.
|
|
766
|
+
*
|
|
767
|
+
* Each file should export a default function that receives the Handlebars instance.
|
|
768
|
+
*
|
|
769
|
+
* @param hbs - The Handlebars instance.
|
|
770
|
+
* @param paths - File paths to custom helper modules.
|
|
771
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
772
|
+
*/
|
|
773
|
+
async function loadCustomHelpers(hbs, paths, configDir) {
|
|
774
|
+
for (const p of paths) {
|
|
775
|
+
const resolved = resolve(configDir, p);
|
|
776
|
+
const mod = (await import(resolved));
|
|
777
|
+
if (typeof mod.default === 'function') {
|
|
778
|
+
mod.default(hbs);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
/**
|
|
783
|
+
* The template engine: holds compiled templates and renders them against context.
|
|
784
|
+
*/
|
|
785
|
+
class TemplateEngine {
|
|
786
|
+
hbs;
|
|
787
|
+
compiled = new Map();
|
|
788
|
+
constructor(hbs) {
|
|
789
|
+
this.hbs = hbs;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Compile and cache a template from its source string.
|
|
793
|
+
*
|
|
794
|
+
* @param key - Cache key (rule index or named template).
|
|
795
|
+
* @param source - Handlebars template source.
|
|
796
|
+
* @returns The compiled template.
|
|
797
|
+
*/
|
|
798
|
+
compile(key, source) {
|
|
799
|
+
const fn = this.hbs.compile(source);
|
|
800
|
+
this.compiled.set(key, fn);
|
|
801
|
+
return fn;
|
|
802
|
+
}
|
|
803
|
+
/**
|
|
804
|
+
* Get a previously compiled template by key.
|
|
805
|
+
*
|
|
806
|
+
* @param key - The cache key.
|
|
807
|
+
* @returns The compiled template, or undefined.
|
|
808
|
+
*/
|
|
809
|
+
get(key) {
|
|
810
|
+
return this.compiled.get(key);
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Render a compiled template against a context.
|
|
814
|
+
*
|
|
815
|
+
* @param key - The cache key of the compiled template.
|
|
816
|
+
* @param context - The data context for rendering.
|
|
817
|
+
* @returns The rendered string, or null if the template was not found.
|
|
818
|
+
*/
|
|
819
|
+
render(key, context) {
|
|
820
|
+
const fn = this.compiled.get(key);
|
|
821
|
+
if (!fn)
|
|
822
|
+
return null;
|
|
823
|
+
return fn(context);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* @module templates/buildTemplateEngine
|
|
829
|
+
* Factory to build a TemplateEngine from config, compiling all rule templates at load time.
|
|
830
|
+
*/
|
|
831
|
+
/**
|
|
832
|
+
* Build a TemplateEngine from configuration, pre-compiling all rule templates.
|
|
833
|
+
*
|
|
834
|
+
* @param rules - The inference rules (may contain template fields).
|
|
835
|
+
* @param namedTemplates - Named template definitions from config.
|
|
836
|
+
* @param templateHelperPaths - Paths to custom helper modules.
|
|
837
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
838
|
+
* @returns The configured TemplateEngine, or undefined if no templates are used.
|
|
839
|
+
*/
|
|
840
|
+
async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
|
|
841
|
+
const rulesWithTemplates = rules.filter((r) => r.template);
|
|
842
|
+
if (rulesWithTemplates.length === 0)
|
|
843
|
+
return undefined;
|
|
844
|
+
const hbs = createHandlebarsInstance();
|
|
845
|
+
// Load custom helpers
|
|
846
|
+
if (templateHelperPaths?.length && configDir) {
|
|
847
|
+
await loadCustomHelpers(hbs, templateHelperPaths, configDir);
|
|
848
|
+
}
|
|
849
|
+
const engine = new TemplateEngine(hbs);
|
|
850
|
+
// Compile all rule templates
|
|
851
|
+
for (const [index, rule] of rules.entries()) {
|
|
852
|
+
if (!rule.template)
|
|
853
|
+
continue;
|
|
854
|
+
const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
|
|
855
|
+
engine.compile(`rule-${String(index)}`, source);
|
|
856
|
+
}
|
|
857
|
+
return engine;
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
/**
|
|
861
|
+
* @module app/configWatcher
|
|
862
|
+
* Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
|
|
863
|
+
*/
|
|
864
|
+
/**
|
|
865
|
+
* Debounced config file watcher.
|
|
866
|
+
*/
|
|
867
|
+
class ConfigWatcher {
|
|
868
|
+
options;
|
|
869
|
+
watcher;
|
|
870
|
+
debounce;
|
|
871
|
+
constructor(options) {
|
|
872
|
+
this.options = options;
|
|
873
|
+
}
|
|
874
|
+
start() {
|
|
875
|
+
if (!this.options.enabled)
|
|
876
|
+
return;
|
|
877
|
+
this.watcher = chokidar.watch(this.options.configPath, {
|
|
878
|
+
ignoreInitial: true,
|
|
879
|
+
});
|
|
880
|
+
this.watcher.on('change', () => {
|
|
881
|
+
if (this.debounce)
|
|
882
|
+
clearTimeout(this.debounce);
|
|
883
|
+
this.debounce = setTimeout(() => {
|
|
884
|
+
void this.options.onChange();
|
|
885
|
+
}, this.options.debounceMs);
|
|
886
|
+
});
|
|
887
|
+
this.watcher.on('error', (error) => {
|
|
888
|
+
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
889
|
+
});
|
|
890
|
+
this.options.logger.info({
|
|
891
|
+
configPath: this.options.configPath,
|
|
892
|
+
debounceMs: this.options.debounceMs,
|
|
893
|
+
}, 'Config watcher started');
|
|
894
|
+
}
|
|
895
|
+
async stop() {
|
|
896
|
+
if (this.debounce) {
|
|
897
|
+
clearTimeout(this.debounce);
|
|
898
|
+
this.debounce = undefined;
|
|
899
|
+
}
|
|
900
|
+
if (this.watcher) {
|
|
901
|
+
await this.watcher.close();
|
|
902
|
+
this.watcher = undefined;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
/**
|
|
908
|
+
* @module config/defaults
|
|
909
|
+
* Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
|
|
910
|
+
*/
|
|
911
|
+
/** Default root-level config values. */
|
|
912
|
+
const ROOT_DEFAULTS = {
|
|
913
|
+
metadataDir: '.jeeves-watcher',
|
|
914
|
+
shutdownTimeoutMs: 10000,
|
|
915
|
+
};
|
|
916
|
+
/** Default configWatch values. */
|
|
917
|
+
const CONFIG_WATCH_DEFAULTS = {
|
|
918
|
+
enabled: true,
|
|
919
|
+
debounceMs: 1000,
|
|
920
|
+
};
|
|
921
|
+
/** Default API values. */
|
|
922
|
+
const API_DEFAULTS = {
|
|
923
|
+
host: '127.0.0.1',
|
|
924
|
+
port: 3456,
|
|
925
|
+
};
|
|
926
|
+
/** Default logging values. */
|
|
927
|
+
const LOGGING_DEFAULTS = {
|
|
928
|
+
level: 'info',
|
|
929
|
+
};
|
|
930
|
+
/** Default watch configuration. */
|
|
931
|
+
const WATCH_DEFAULTS = {
|
|
932
|
+
debounceMs: 300,
|
|
933
|
+
stabilityThresholdMs: 500,
|
|
934
|
+
usePolling: false,
|
|
935
|
+
pollIntervalMs: 1000,
|
|
936
|
+
respectGitignore: true,
|
|
937
|
+
};
|
|
938
|
+
/** Default embedding configuration. */
|
|
939
|
+
const EMBEDDING_DEFAULTS = {
|
|
940
|
+
chunkSize: 1000,
|
|
941
|
+
chunkOverlap: 200,
|
|
942
|
+
dimensions: 3072,
|
|
943
|
+
rateLimitPerMinute: 300,
|
|
944
|
+
concurrency: 5,
|
|
945
|
+
};
|
|
946
|
+
|
|
947
|
+
/**
|
|
948
|
+
* Watch configuration for file system monitoring.
|
|
949
|
+
*/
|
|
950
|
+
const watchConfigSchema = z.object({
|
|
951
|
+
/** Glob patterns to watch. */
|
|
952
|
+
paths: z
|
|
953
|
+
.array(z.string())
|
|
954
|
+
.min(1)
|
|
955
|
+
.describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),
|
|
956
|
+
/** Glob patterns to ignore. */
|
|
957
|
+
ignored: z
|
|
958
|
+
.array(z.string())
|
|
959
|
+
.optional()
|
|
960
|
+
.describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),
|
|
961
|
+
/** Polling interval in milliseconds. */
|
|
962
|
+
pollIntervalMs: z
|
|
963
|
+
.number()
|
|
964
|
+
.optional()
|
|
965
|
+
.describe('Polling interval in milliseconds when usePolling is enabled.'),
|
|
966
|
+
/** Whether to use polling instead of native watchers. */
|
|
967
|
+
usePolling: z
|
|
968
|
+
.boolean()
|
|
969
|
+
.optional()
|
|
970
|
+
.describe('Use polling instead of native file system events (for network drives).'),
|
|
971
|
+
/** Debounce delay in milliseconds for file change events. */
|
|
972
|
+
debounceMs: z
|
|
973
|
+
.number()
|
|
974
|
+
.optional()
|
|
975
|
+
.describe('Debounce delay in milliseconds for file change events.'),
|
|
976
|
+
/** Time in milliseconds a file must be stable before processing. */
|
|
977
|
+
stabilityThresholdMs: z
|
|
978
|
+
.number()
|
|
979
|
+
.optional()
|
|
980
|
+
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
981
|
+
/** Whether to respect .gitignore files when processing. */
|
|
982
|
+
respectGitignore: z
|
|
983
|
+
.boolean()
|
|
984
|
+
.optional()
|
|
985
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
986
|
+
});
|
|
987
|
+
/**
|
|
988
|
+
* Configuration watch settings.
|
|
989
|
+
*/
|
|
990
|
+
const configWatchConfigSchema = z.object({
|
|
991
|
+
/** Whether config file watching is enabled. */
|
|
992
|
+
enabled: z
|
|
993
|
+
.boolean()
|
|
994
|
+
.optional()
|
|
995
|
+
.describe('Enable automatic reloading when config file changes.'),
|
|
996
|
+
/** Debounce delay in milliseconds for config change events. */
|
|
997
|
+
debounceMs: z
|
|
998
|
+
.number()
|
|
999
|
+
.optional()
|
|
1000
|
+
.describe('Debounce delay in milliseconds for config file change detection.'),
|
|
1001
|
+
});
|
|
1002
|
+
/**
|
|
1003
|
+
* Embedding model configuration.
|
|
1004
|
+
*/
|
|
1005
|
+
const embeddingConfigSchema = z.object({
|
|
1006
|
+
/** The embedding model provider. */
|
|
1007
|
+
provider: z
|
|
1008
|
+
.string()
|
|
1009
|
+
.default('gemini')
|
|
1010
|
+
.describe('Embedding provider name (e.g., "gemini", "openai").'),
|
|
1011
|
+
/** The embedding model name. */
|
|
1012
|
+
model: z
|
|
1013
|
+
.string()
|
|
1014
|
+
.default('gemini-embedding-001')
|
|
1015
|
+
.describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),
|
|
1016
|
+
/** Maximum tokens per chunk for splitting. */
|
|
1017
|
+
chunkSize: z
|
|
1018
|
+
.number()
|
|
1019
|
+
.optional()
|
|
1020
|
+
.describe('Maximum chunk size in characters for text splitting.'),
|
|
1021
|
+
/** Overlap between chunks in tokens. */
|
|
1022
|
+
chunkOverlap: z
|
|
1023
|
+
.number()
|
|
1024
|
+
.optional()
|
|
1025
|
+
.describe('Character overlap between consecutive chunks.'),
|
|
1026
|
+
/** Embedding vector dimensions. */
|
|
1027
|
+
dimensions: z
|
|
1028
|
+
.number()
|
|
1029
|
+
.optional()
|
|
1030
|
+
.describe('Embedding vector dimensions (must match model output).'),
|
|
1031
|
+
/** API key for the embedding provider. */
|
|
1032
|
+
apiKey: z
|
|
1033
|
+
.string()
|
|
1034
|
+
.optional()
|
|
1035
|
+
.describe('API key for embedding provider (supports ${ENV_VAR} substitution).'),
|
|
1036
|
+
/** Maximum embedding requests per minute. */
|
|
1037
|
+
rateLimitPerMinute: z
|
|
1038
|
+
.number()
|
|
1039
|
+
.optional()
|
|
1040
|
+
.describe('Maximum embedding API requests per minute (rate limiting).'),
|
|
1041
|
+
/** Maximum concurrent embedding requests. */
|
|
1042
|
+
concurrency: z
|
|
1043
|
+
.number()
|
|
1044
|
+
.optional()
|
|
1045
|
+
.describe('Maximum concurrent embedding requests.'),
|
|
1046
|
+
});
|
|
1047
|
+
/**
|
|
1048
|
+
* Vector store configuration for Qdrant.
|
|
1049
|
+
*/
|
|
1050
|
+
const vectorStoreConfigSchema = z.object({
|
|
1051
|
+
/** Qdrant server URL. */
|
|
1052
|
+
url: z
|
|
1053
|
+
.string()
|
|
1054
|
+
.describe('Qdrant server URL (e.g., "http://localhost:6333").'),
|
|
1055
|
+
/** Qdrant collection name. */
|
|
1056
|
+
collectionName: z
|
|
1057
|
+
.string()
|
|
1058
|
+
.describe('Qdrant collection name for vector storage.'),
|
|
1059
|
+
/** Qdrant API key. */
|
|
1060
|
+
apiKey: z
|
|
1061
|
+
.string()
|
|
1062
|
+
.optional()
|
|
1063
|
+
.describe('Qdrant API key for authentication (supports ${ENV_VAR} substitution).'),
|
|
1064
|
+
});
|
|
1065
|
+
/**
|
|
1066
|
+
* API server configuration.
|
|
1067
|
+
*/
|
|
1068
|
+
const apiConfigSchema = z.object({
|
|
1069
|
+
/** Host to bind to. */
|
|
1070
|
+
host: z
|
|
1071
|
+
.string()
|
|
1072
|
+
.optional()
|
|
1073
|
+
.describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),
|
|
1074
|
+
/** Port to listen on. */
|
|
1075
|
+
port: z.number().optional().describe('Port for API server (e.g., 3456).'),
|
|
1076
|
+
});
|
|
1077
|
+
/**
|
|
1078
|
+
* Logging configuration.
|
|
1079
|
+
*/
|
|
1080
|
+
const loggingConfigSchema = z.object({
|
|
1081
|
+
/** Log level. */
|
|
1082
|
+
level: z
|
|
1083
|
+
.string()
|
|
1084
|
+
.optional()
|
|
598
1085
|
.describe('Logging level (trace, debug, info, warn, error, fatal).'),
|
|
599
1086
|
/** Log file path. */
|
|
600
1087
|
file: z
|
|
@@ -618,7 +1105,12 @@ const inferenceRuleSchema = z.object({
|
|
|
618
1105
|
map: z
|
|
619
1106
|
.union([jsonMapMapSchema, z.string()])
|
|
620
1107
|
.optional()
|
|
621
|
-
.describe('JsonMap transformation (inline definition
|
|
1108
|
+
.describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
|
|
1109
|
+
/** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
|
|
1110
|
+
template: z
|
|
1111
|
+
.string()
|
|
1112
|
+
.optional()
|
|
1113
|
+
.describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
|
|
622
1114
|
});
|
|
623
1115
|
/**
|
|
624
1116
|
* Top-level configuration for jeeves-watcher.
|
|
@@ -655,7 +1147,23 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
655
1147
|
maps: z
|
|
656
1148
|
.record(z.string(), jsonMapMapSchema)
|
|
657
1149
|
.optional()
|
|
658
|
-
.describe('Reusable named JsonMap transformations.'),
|
|
1150
|
+
.describe('Reusable named JsonMap transformations.'),
|
|
1151
|
+
/** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
|
|
1152
|
+
templates: z
|
|
1153
|
+
.record(z.string(), z.string())
|
|
1154
|
+
.optional()
|
|
1155
|
+
.describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
|
|
1156
|
+
/** Custom Handlebars helper registration. */
|
|
1157
|
+
templateHelpers: z
|
|
1158
|
+
.object({
|
|
1159
|
+
/** File paths to custom helper modules. */
|
|
1160
|
+
paths: z
|
|
1161
|
+
.array(z.string())
|
|
1162
|
+
.optional()
|
|
1163
|
+
.describe('File paths to custom helper modules.'),
|
|
1164
|
+
})
|
|
1165
|
+
.optional()
|
|
1166
|
+
.describe('Custom Handlebars helper registration.'),
|
|
659
1167
|
/** Logging configuration. */
|
|
660
1168
|
logging: loggingConfigSchema.optional().describe('Logging configuration.'),
|
|
661
1169
|
/** Timeout in milliseconds for graceful shutdown. */
|
|
@@ -905,266 +1413,60 @@ function createGeminiProvider(config, logger) {
|
|
|
905
1413
|
dimensions,
|
|
906
1414
|
async embed(texts) {
|
|
907
1415
|
const vectors = await retry(async (attempt) => {
|
|
908
|
-
if (attempt > 1) {
|
|
909
|
-
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
910
|
-
}
|
|
911
|
-
// embedDocuments returns vectors for multiple texts
|
|
912
|
-
return embedder.embedDocuments(texts);
|
|
913
|
-
}, {
|
|
914
|
-
attempts: 5,
|
|
915
|
-
baseDelayMs: 500,
|
|
916
|
-
maxDelayMs: 10_000,
|
|
917
|
-
jitter: 0.2,
|
|
918
|
-
onRetry: ({ attempt, delayMs, error }) => {
|
|
919
|
-
log.warn({
|
|
920
|
-
attempt,
|
|
921
|
-
delayMs,
|
|
922
|
-
provider: 'gemini',
|
|
923
|
-
model: config.model,
|
|
924
|
-
err: normalizeError(error),
|
|
925
|
-
}, 'Embedding call failed; will retry');
|
|
926
|
-
},
|
|
927
|
-
});
|
|
928
|
-
// Validate dimensions
|
|
929
|
-
for (const vector of vectors) {
|
|
930
|
-
if (vector.length !== dimensions) {
|
|
931
|
-
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
932
|
-
}
|
|
933
|
-
}
|
|
934
|
-
return vectors;
|
|
935
|
-
},
|
|
936
|
-
};
|
|
937
|
-
}
|
|
938
|
-
function createMockFromConfig(config) {
|
|
939
|
-
const dimensions = config.dimensions ?? 768;
|
|
940
|
-
return createMockProvider(dimensions);
|
|
941
|
-
}
|
|
942
|
-
const embeddingProviderRegistry = new Map([
|
|
943
|
-
['mock', createMockFromConfig],
|
|
944
|
-
['gemini', createGeminiProvider],
|
|
945
|
-
]);
|
|
946
|
-
/**
|
|
947
|
-
* Create an embedding provider based on the given configuration.
|
|
948
|
-
*
|
|
949
|
-
* Each provider is responsible for its own default dimensions.
|
|
950
|
-
*
|
|
951
|
-
* @param config - The embedding configuration.
|
|
952
|
-
* @param logger - Optional pino logger for retry warnings.
|
|
953
|
-
* @returns An {@link EmbeddingProvider} instance.
|
|
954
|
-
* @throws If the configured provider is not supported.
|
|
955
|
-
*/
|
|
956
|
-
function createEmbeddingProvider(config, logger) {
|
|
957
|
-
const factory = embeddingProviderRegistry.get(config.provider);
|
|
958
|
-
if (!factory) {
|
|
959
|
-
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
960
|
-
}
|
|
961
|
-
return factory(config, logger);
|
|
962
|
-
}
|
|
963
|
-
|
|
964
|
-
/**
|
|
965
|
-
* @module gitignore
|
|
966
|
-
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
967
|
-
*/
|
|
968
|
-
/**
|
|
969
|
-
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
970
|
-
* Returns `undefined` if no repo is found.
|
|
971
|
-
*/
|
|
972
|
-
function findRepoRoot(startDir) {
|
|
973
|
-
let dir = resolve(startDir);
|
|
974
|
-
const root = resolve('/');
|
|
975
|
-
while (dir !== root) {
|
|
976
|
-
if (existsSync(join(dir, '.git')) &&
|
|
977
|
-
statSync(join(dir, '.git')).isDirectory()) {
|
|
978
|
-
return dir;
|
|
979
|
-
}
|
|
980
|
-
const parent = dirname(dir);
|
|
981
|
-
if (parent === dir)
|
|
982
|
-
break;
|
|
983
|
-
dir = parent;
|
|
984
|
-
}
|
|
985
|
-
return undefined;
|
|
986
|
-
}
|
|
987
|
-
/**
|
|
988
|
-
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
989
|
-
* that can be scanned for a repo root.
|
|
990
|
-
*/
|
|
991
|
-
function watchPathToScanDir(watchPath) {
|
|
992
|
-
const absPath = resolve(watchPath);
|
|
993
|
-
try {
|
|
994
|
-
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
995
|
-
}
|
|
996
|
-
catch {
|
|
997
|
-
// ignore
|
|
998
|
-
}
|
|
999
|
-
// If this is a glob, fall back to the non-glob prefix.
|
|
1000
|
-
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1001
|
-
if (!globMatch)
|
|
1002
|
-
return undefined;
|
|
1003
|
-
const prefix = watchPath.slice(0, globMatch.index);
|
|
1004
|
-
const trimmed = prefix.trim();
|
|
1005
|
-
const baseDir = trimmed.length === 0
|
|
1006
|
-
? '.'
|
|
1007
|
-
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1008
|
-
? trimmed
|
|
1009
|
-
: dirname(trimmed);
|
|
1010
|
-
const resolved = resolve(baseDir);
|
|
1011
|
-
if (!existsSync(resolved))
|
|
1012
|
-
return undefined;
|
|
1013
|
-
return resolved;
|
|
1014
|
-
}
|
|
1015
|
-
/**
|
|
1016
|
-
* Recursively find all `.gitignore` files under `dir`.
|
|
1017
|
-
* Skips `.git` and `node_modules` directories for performance.
|
|
1018
|
-
*/
|
|
1019
|
-
function findGitignoreFiles(dir) {
|
|
1020
|
-
const results = [];
|
|
1021
|
-
const gitignorePath = join(dir, '.gitignore');
|
|
1022
|
-
if (existsSync(gitignorePath)) {
|
|
1023
|
-
results.push(gitignorePath);
|
|
1024
|
-
}
|
|
1025
|
-
let entries;
|
|
1026
|
-
try {
|
|
1027
|
-
entries = readdirSync(dir);
|
|
1028
|
-
}
|
|
1029
|
-
catch {
|
|
1030
|
-
return results;
|
|
1031
|
-
}
|
|
1032
|
-
for (const entry of entries) {
|
|
1033
|
-
if (entry === '.git' || entry === 'node_modules')
|
|
1034
|
-
continue;
|
|
1035
|
-
const fullPath = join(dir, entry);
|
|
1036
|
-
try {
|
|
1037
|
-
if (statSync(fullPath).isDirectory()) {
|
|
1038
|
-
results.push(...findGitignoreFiles(fullPath));
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
catch {
|
|
1042
|
-
// Skip inaccessible entries
|
|
1043
|
-
}
|
|
1044
|
-
}
|
|
1045
|
-
return results;
|
|
1046
|
-
}
|
|
1047
|
-
/**
|
|
1048
|
-
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1049
|
-
*/
|
|
1050
|
-
function parseGitignore(gitignorePath) {
|
|
1051
|
-
const content = readFileSync(gitignorePath, 'utf8');
|
|
1052
|
-
return ignore().add(content);
|
|
1053
|
-
}
|
|
1054
|
-
/**
|
|
1055
|
-
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1056
|
-
*/
|
|
1057
|
-
function toForwardSlash(p) {
|
|
1058
|
-
return p.replace(/\\/g, '/');
|
|
1059
|
-
}
|
|
1060
|
-
/**
|
|
1061
|
-
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1062
|
-
* `.gitignore` chain in git repositories.
|
|
1063
|
-
*/
|
|
1064
|
-
class GitignoreFilter {
|
|
1065
|
-
repos = new Map();
|
|
1066
|
-
/**
|
|
1067
|
-
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1068
|
-
*
|
|
1069
|
-
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1070
|
-
*/
|
|
1071
|
-
constructor(watchPaths) {
|
|
1072
|
-
this.scan(watchPaths);
|
|
1073
|
-
}
|
|
1074
|
-
/**
|
|
1075
|
-
* Scan paths for git repos and their `.gitignore` files.
|
|
1076
|
-
*/
|
|
1077
|
-
scan(watchPaths) {
|
|
1078
|
-
this.repos.clear();
|
|
1079
|
-
const scannedDirs = new Set();
|
|
1080
|
-
for (const watchPath of watchPaths) {
|
|
1081
|
-
const scanDir = watchPathToScanDir(watchPath);
|
|
1082
|
-
if (!scanDir)
|
|
1083
|
-
continue;
|
|
1084
|
-
if (scannedDirs.has(scanDir))
|
|
1085
|
-
continue;
|
|
1086
|
-
scannedDirs.add(scanDir);
|
|
1087
|
-
const repoRoot = findRepoRoot(scanDir);
|
|
1088
|
-
if (!repoRoot)
|
|
1089
|
-
continue;
|
|
1090
|
-
if (this.repos.has(repoRoot))
|
|
1091
|
-
continue;
|
|
1092
|
-
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1093
|
-
const entries = gitignoreFiles.map((gf) => ({
|
|
1094
|
-
dir: dirname(gf),
|
|
1095
|
-
ig: parseGitignore(gf),
|
|
1096
|
-
}));
|
|
1097
|
-
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1098
|
-
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1099
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1100
|
-
}
|
|
1101
|
-
}
|
|
1102
|
-
/**
|
|
1103
|
-
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1104
|
-
*
|
|
1105
|
-
* @param filePath - Absolute file path to check.
|
|
1106
|
-
* @returns `true` if the file should be ignored.
|
|
1107
|
-
*/
|
|
1108
|
-
isIgnored(filePath) {
|
|
1109
|
-
const absPath = resolve(filePath);
|
|
1110
|
-
for (const [, repo] of this.repos) {
|
|
1111
|
-
// Check if file is within this repo
|
|
1112
|
-
const relToRepo = relative(repo.root, absPath);
|
|
1113
|
-
if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
|
|
1114
|
-
continue;
|
|
1115
|
-
}
|
|
1116
|
-
// Check each `.gitignore` entry (deepest-first)
|
|
1117
|
-
for (const entry of repo.entries) {
|
|
1118
|
-
const relToEntry = relative(entry.dir, absPath);
|
|
1119
|
-
if (relToEntry.startsWith('..'))
|
|
1120
|
-
continue;
|
|
1121
|
-
const normalized = toForwardSlash(relToEntry);
|
|
1122
|
-
if (entry.ig.ignores(normalized)) {
|
|
1123
|
-
return true;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
return false;
|
|
1128
|
-
}
|
|
1129
|
-
/**
|
|
1130
|
-
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1131
|
-
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1132
|
-
*
|
|
1133
|
-
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1134
|
-
*/
|
|
1135
|
-
invalidate(gitignorePath) {
|
|
1136
|
-
const absPath = resolve(gitignorePath);
|
|
1137
|
-
const gitignoreDir = dirname(absPath);
|
|
1138
|
-
for (const [, repo] of this.repos) {
|
|
1139
|
-
const relToRepo = relative(repo.root, gitignoreDir);
|
|
1140
|
-
if (relToRepo.startsWith('..'))
|
|
1141
|
-
continue;
|
|
1142
|
-
// Remove old entry for this directory
|
|
1143
|
-
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1144
|
-
// Re-parse if file still exists
|
|
1145
|
-
if (existsSync(absPath)) {
|
|
1146
|
-
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1147
|
-
// Re-sort deepest-first
|
|
1148
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1149
|
-
}
|
|
1150
|
-
return;
|
|
1151
|
-
}
|
|
1152
|
-
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1153
|
-
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1154
|
-
if (repoRoot && existsSync(absPath)) {
|
|
1155
|
-
const entries = [
|
|
1156
|
-
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1157
|
-
];
|
|
1158
|
-
if (this.repos.has(repoRoot)) {
|
|
1159
|
-
const repo = this.repos.get(repoRoot);
|
|
1160
|
-
repo.entries.push(entries[0]);
|
|
1161
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1162
|
-
}
|
|
1163
|
-
else {
|
|
1164
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1416
|
+
if (attempt > 1) {
|
|
1417
|
+
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
1418
|
+
}
|
|
1419
|
+
// embedDocuments returns vectors for multiple texts
|
|
1420
|
+
return embedder.embedDocuments(texts);
|
|
1421
|
+
}, {
|
|
1422
|
+
attempts: 5,
|
|
1423
|
+
baseDelayMs: 500,
|
|
1424
|
+
maxDelayMs: 10_000,
|
|
1425
|
+
jitter: 0.2,
|
|
1426
|
+
onRetry: ({ attempt, delayMs, error }) => {
|
|
1427
|
+
log.warn({
|
|
1428
|
+
attempt,
|
|
1429
|
+
delayMs,
|
|
1430
|
+
provider: 'gemini',
|
|
1431
|
+
model: config.model,
|
|
1432
|
+
err: normalizeError(error),
|
|
1433
|
+
}, 'Embedding call failed; will retry');
|
|
1434
|
+
},
|
|
1435
|
+
});
|
|
1436
|
+
// Validate dimensions
|
|
1437
|
+
for (const vector of vectors) {
|
|
1438
|
+
if (vector.length !== dimensions) {
|
|
1439
|
+
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
1440
|
+
}
|
|
1165
1441
|
}
|
|
1166
|
-
|
|
1442
|
+
return vectors;
|
|
1443
|
+
},
|
|
1444
|
+
};
|
|
1445
|
+
}
|
|
1446
|
+
function createMockFromConfig(config) {
|
|
1447
|
+
const dimensions = config.dimensions ?? 768;
|
|
1448
|
+
return createMockProvider(dimensions);
|
|
1449
|
+
}
|
|
1450
|
+
const embeddingProviderRegistry = new Map([
|
|
1451
|
+
['mock', createMockFromConfig],
|
|
1452
|
+
['gemini', createGeminiProvider],
|
|
1453
|
+
]);
|
|
1454
|
+
/**
|
|
1455
|
+
* Create an embedding provider based on the given configuration.
|
|
1456
|
+
*
|
|
1457
|
+
* Each provider is responsible for its own default dimensions.
|
|
1458
|
+
*
|
|
1459
|
+
* @param config - The embedding configuration.
|
|
1460
|
+
* @param logger - Optional pino logger for retry warnings.
|
|
1461
|
+
* @returns An {@link EmbeddingProvider} instance.
|
|
1462
|
+
* @throws If the configured provider is not supported.
|
|
1463
|
+
*/
|
|
1464
|
+
function createEmbeddingProvider(config, logger) {
|
|
1465
|
+
const factory = embeddingProviderRegistry.get(config.provider);
|
|
1466
|
+
if (!factory) {
|
|
1467
|
+
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
1167
1468
|
}
|
|
1469
|
+
return factory(config, logger);
|
|
1168
1470
|
}
|
|
1169
1471
|
|
|
1170
1472
|
/**
|
|
@@ -1396,7 +1698,7 @@ function createJsonMapLib() {
|
|
|
1396
1698
|
};
|
|
1397
1699
|
}
|
|
1398
1700
|
/**
|
|
1399
|
-
* Apply compiled inference rules to file attributes, returning merged metadata.
|
|
1701
|
+
* Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
|
|
1400
1702
|
*
|
|
1401
1703
|
* Rules are evaluated in order; later rules override earlier ones.
|
|
1402
1704
|
* If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
|
|
@@ -1406,15 +1708,18 @@ function createJsonMapLib() {
|
|
|
1406
1708
|
* @param attributes - The file attributes to match against.
|
|
1407
1709
|
* @param namedMaps - Optional record of named JsonMap definitions.
|
|
1408
1710
|
* @param logger - Optional logger for warnings (falls back to console.warn).
|
|
1409
|
-
* @
|
|
1711
|
+
* @param templateEngine - Optional template engine for rendering content templates.
|
|
1712
|
+
* @param configDir - Optional config directory for resolving .json map file paths.
|
|
1713
|
+
* @returns The merged metadata and optional rendered content.
|
|
1410
1714
|
*/
|
|
1411
|
-
async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
1715
|
+
async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
|
|
1412
1716
|
// JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
|
|
1413
1717
|
// Our helper functions accept multiple args, which JsonMap supports at runtime.
|
|
1414
1718
|
const lib = createJsonMapLib();
|
|
1415
1719
|
let merged = {};
|
|
1720
|
+
let renderedContent = null;
|
|
1416
1721
|
const log = logger ?? console;
|
|
1417
|
-
for (const { rule, validate } of compiledRules) {
|
|
1722
|
+
for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
|
|
1418
1723
|
if (validate(attributes)) {
|
|
1419
1724
|
// Apply set resolution
|
|
1420
1725
|
const setOutput = resolveSet(rule.set, attributes);
|
|
@@ -1424,10 +1729,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1424
1729
|
let mapDef;
|
|
1425
1730
|
// Resolve map reference
|
|
1426
1731
|
if (typeof rule.map === 'string') {
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1732
|
+
if (rule.map.endsWith('.json') && configDir) {
|
|
1733
|
+
// File path: load from .json file
|
|
1734
|
+
try {
|
|
1735
|
+
const mapPath = resolve(configDir, rule.map);
|
|
1736
|
+
const raw = readFileSync(mapPath, 'utf-8');
|
|
1737
|
+
mapDef = JSON.parse(raw);
|
|
1738
|
+
}
|
|
1739
|
+
catch (error) {
|
|
1740
|
+
log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
|
|
1741
|
+
continue;
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
else {
|
|
1745
|
+
mapDef = namedMaps?.[rule.map];
|
|
1746
|
+
if (!mapDef) {
|
|
1747
|
+
log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
|
|
1748
|
+
continue;
|
|
1749
|
+
}
|
|
1431
1750
|
}
|
|
1432
1751
|
}
|
|
1433
1752
|
else {
|
|
@@ -1450,9 +1769,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1450
1769
|
log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
1451
1770
|
}
|
|
1452
1771
|
}
|
|
1772
|
+
// Render template if present
|
|
1773
|
+
if (rule.template && templateEngine) {
|
|
1774
|
+
const templateKey = `rule-${String(ruleIndex)}`;
|
|
1775
|
+
// Build template context: attributes (with json spread at top) + map output
|
|
1776
|
+
const context = {
|
|
1777
|
+
...(attributes.json ?? {}),
|
|
1778
|
+
...attributes,
|
|
1779
|
+
...merged,
|
|
1780
|
+
};
|
|
1781
|
+
try {
|
|
1782
|
+
const result = templateEngine.render(templateKey, context);
|
|
1783
|
+
if (result && result.trim()) {
|
|
1784
|
+
renderedContent = result;
|
|
1785
|
+
}
|
|
1786
|
+
else {
|
|
1787
|
+
log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
|
|
1788
|
+
}
|
|
1789
|
+
}
|
|
1790
|
+
catch (error) {
|
|
1791
|
+
log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1453
1794
|
}
|
|
1454
1795
|
}
|
|
1455
|
-
return merged;
|
|
1796
|
+
return { metadata: merged, renderedContent };
|
|
1456
1797
|
}
|
|
1457
1798
|
|
|
1458
1799
|
/**
|
|
@@ -1541,23 +1882,32 @@ function compileRules(rules) {
|
|
|
1541
1882
|
* @param metadataDir - The metadata directory for enrichment files.
|
|
1542
1883
|
* @param maps - Optional named JsonMap definitions.
|
|
1543
1884
|
* @param logger - Optional logger for rule warnings.
|
|
1885
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1886
|
+
* @param configDir - Optional config directory for resolving file paths.
|
|
1544
1887
|
* @returns The merged metadata and intermediate data.
|
|
1545
1888
|
*/
|
|
1546
|
-
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
|
|
1889
|
+
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
|
|
1547
1890
|
const ext = extname(filePath);
|
|
1548
1891
|
const stats = await stat(filePath);
|
|
1549
1892
|
// 1. Extract text and structured data
|
|
1550
1893
|
const extracted = await extractText(filePath, ext);
|
|
1551
1894
|
// 2. Build attributes + apply rules
|
|
1552
1895
|
const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
|
|
1553
|
-
const inferred = await applyRules(compiledRules, attributes, maps, logger);
|
|
1896
|
+
const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
|
|
1554
1897
|
// 3. Read enrichment metadata (merge, enrichment wins)
|
|
1555
1898
|
const enrichment = await readMetadata(filePath, metadataDir);
|
|
1556
1899
|
const metadata = {
|
|
1557
1900
|
...inferred,
|
|
1558
1901
|
...(enrichment ?? {}),
|
|
1559
1902
|
};
|
|
1560
|
-
return {
|
|
1903
|
+
return {
|
|
1904
|
+
inferred,
|
|
1905
|
+
enrichment,
|
|
1906
|
+
metadata,
|
|
1907
|
+
attributes,
|
|
1908
|
+
extracted,
|
|
1909
|
+
renderedContent,
|
|
1910
|
+
};
|
|
1561
1911
|
}
|
|
1562
1912
|
|
|
1563
1913
|
/**
|
|
@@ -1628,6 +1978,7 @@ class DocumentProcessor {
|
|
|
1628
1978
|
vectorStore;
|
|
1629
1979
|
compiledRules;
|
|
1630
1980
|
logger;
|
|
1981
|
+
templateEngine;
|
|
1631
1982
|
/**
|
|
1632
1983
|
* Create a new DocumentProcessor.
|
|
1633
1984
|
*
|
|
@@ -1636,13 +1987,15 @@ class DocumentProcessor {
|
|
|
1636
1987
|
* @param vectorStore - The vector store client.
|
|
1637
1988
|
* @param compiledRules - The compiled inference rules.
|
|
1638
1989
|
* @param logger - The logger instance.
|
|
1990
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1639
1991
|
*/
|
|
1640
|
-
constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
|
|
1992
|
+
constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
|
|
1641
1993
|
this.config = config;
|
|
1642
1994
|
this.embeddingProvider = embeddingProvider;
|
|
1643
1995
|
this.vectorStore = vectorStore;
|
|
1644
1996
|
this.compiledRules = compiledRules;
|
|
1645
1997
|
this.logger = logger;
|
|
1998
|
+
this.templateEngine = templateEngine;
|
|
1646
1999
|
}
|
|
1647
2000
|
/**
|
|
1648
2001
|
* Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
|
|
@@ -1653,13 +2006,15 @@ class DocumentProcessor {
|
|
|
1653
2006
|
try {
|
|
1654
2007
|
const ext = extname(filePath);
|
|
1655
2008
|
// 1. Build merged metadata + extract text
|
|
1656
|
-
const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
1657
|
-
if
|
|
2009
|
+
const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
2010
|
+
// Use rendered template content if available, otherwise raw extracted text
|
|
2011
|
+
const textToEmbed = renderedContent ?? extracted.text;
|
|
2012
|
+
if (!textToEmbed.trim()) {
|
|
1658
2013
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
1659
2014
|
return;
|
|
1660
2015
|
}
|
|
1661
2016
|
// 2. Content hash check — skip if unchanged
|
|
1662
|
-
const hash = contentHash(
|
|
2017
|
+
const hash = contentHash(textToEmbed);
|
|
1663
2018
|
const baseId = pointId(filePath, 0);
|
|
1664
2019
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
1665
2020
|
if (existingPayload && existingPayload['content_hash'] === hash) {
|
|
@@ -1671,7 +2026,7 @@ class DocumentProcessor {
|
|
|
1671
2026
|
const chunkSize = this.config.chunkSize ?? 1000;
|
|
1672
2027
|
const chunkOverlap = this.config.chunkOverlap ?? 200;
|
|
1673
2028
|
const splitter = createSplitter(ext, chunkSize, chunkOverlap);
|
|
1674
|
-
const chunks = await splitter.splitText(
|
|
2029
|
+
const chunks = await splitter.splitText(textToEmbed);
|
|
1675
2030
|
// 4. Embed all chunks
|
|
1676
2031
|
const vectors = await this.embeddingProvider.embed(chunks);
|
|
1677
2032
|
// 5. Upsert all chunk points
|
|
@@ -1765,7 +2120,7 @@ class DocumentProcessor {
|
|
|
1765
2120
|
return null;
|
|
1766
2121
|
}
|
|
1767
2122
|
// Build merged metadata (lightweight — no embedding)
|
|
1768
|
-
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
2123
|
+
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
1769
2124
|
// Update all chunk payloads
|
|
1770
2125
|
const totalChunks = getChunkCount(existingPayload);
|
|
1771
2126
|
const ids = chunkIds(filePath, totalChunks);
|
|
@@ -1783,8 +2138,17 @@ class DocumentProcessor {
|
|
|
1783
2138
|
*
|
|
1784
2139
|
* @param compiledRules - The newly compiled rules.
|
|
1785
2140
|
*/
|
|
1786
|
-
|
|
2141
|
+
/**
|
|
2142
|
+
* Update compiled inference rules and optionally the template engine.
|
|
2143
|
+
*
|
|
2144
|
+
* @param compiledRules - The newly compiled rules.
|
|
2145
|
+
* @param templateEngine - Optional updated template engine.
|
|
2146
|
+
*/
|
|
2147
|
+
updateRules(compiledRules, templateEngine) {
|
|
1787
2148
|
this.compiledRules = compiledRules;
|
|
2149
|
+
if (templateEngine) {
|
|
2150
|
+
this.templateEngine = templateEngine;
|
|
2151
|
+
}
|
|
1788
2152
|
this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
|
|
1789
2153
|
}
|
|
1790
2154
|
}
|
|
@@ -2313,6 +2677,76 @@ class SystemHealth {
|
|
|
2313
2677
|
}
|
|
2314
2678
|
}
|
|
2315
2679
|
|
|
2680
|
+
/**
|
|
2681
|
+
* @module watcher/globToDir
|
|
2682
|
+
* Adapts glob-based watch config to chokidar v4+, which removed glob support
|
|
2683
|
+
* (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
|
|
2684
|
+
* strings, silently producing zero events. This module extracts static directory
|
|
2685
|
+
* roots from glob patterns for chokidar to watch, then filters emitted events
|
|
2686
|
+
* against the original globs via picomatch.
|
|
2687
|
+
*/
|
|
2688
|
+
/**
|
|
2689
|
+
* Extract the static directory root from a glob pattern.
|
|
2690
|
+
* Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
|
|
2691
|
+
*
|
|
2692
|
+
* @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
|
|
2693
|
+
* @returns The static directory prefix (e.g., `j:/domains`).
|
|
2694
|
+
*/
|
|
2695
|
+
function globRoot(glob) {
|
|
2696
|
+
const normalized = glob.replace(/\\/g, '/');
|
|
2697
|
+
const segments = normalized.split('/');
|
|
2698
|
+
const staticSegments = [];
|
|
2699
|
+
for (const seg of segments) {
|
|
2700
|
+
if (/[*?{[\]]/.test(seg))
|
|
2701
|
+
break;
|
|
2702
|
+
staticSegments.push(seg);
|
|
2703
|
+
}
|
|
2704
|
+
return staticSegments.join('/') || '.';
|
|
2705
|
+
}
|
|
2706
|
+
/**
|
|
2707
|
+
* Deduplicate directory roots, removing paths that are subdirectories of others.
|
|
2708
|
+
*
|
|
2709
|
+
* @param roots - Array of directory paths.
|
|
2710
|
+
* @returns Deduplicated array with subdirectories removed.
|
|
2711
|
+
*/
|
|
2712
|
+
function deduplicateRoots(roots) {
|
|
2713
|
+
const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
|
|
2714
|
+
const sorted = [...new Set(normalized)].sort();
|
|
2715
|
+
return sorted.filter((root, _i, arr) => {
|
|
2716
|
+
const withSlash = root.endsWith('/') ? root : root + '/';
|
|
2717
|
+
return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
|
|
2718
|
+
});
|
|
2719
|
+
}
|
|
2720
|
+
/**
|
|
2721
|
+
* Build a picomatch matcher from an array of glob patterns.
|
|
2722
|
+
* Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
|
|
2723
|
+
* before matching.
|
|
2724
|
+
*
|
|
2725
|
+
* @param globs - Glob patterns to match against.
|
|
2726
|
+
* @returns A function that tests whether a file path matches any of the globs.
|
|
2727
|
+
*/
|
|
2728
|
+
function buildGlobMatcher(globs) {
|
|
2729
|
+
const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
|
|
2730
|
+
const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
|
|
2731
|
+
return (filePath) => {
|
|
2732
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
2733
|
+
return isMatch(normalized);
|
|
2734
|
+
};
|
|
2735
|
+
}
|
|
2736
|
+
/**
|
|
2737
|
+
* Convert an array of glob patterns into chokidar-compatible directory roots
|
|
2738
|
+
* and a filter function for post-hoc event filtering.
|
|
2739
|
+
*
|
|
2740
|
+
* @param globs - Glob patterns from the watch config.
|
|
2741
|
+
* @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
|
|
2742
|
+
*/
|
|
2743
|
+
function resolveWatchPaths(globs) {
|
|
2744
|
+
const rawRoots = globs.map(globRoot);
|
|
2745
|
+
const roots = deduplicateRoots(rawRoots);
|
|
2746
|
+
const matches = buildGlobMatcher(globs);
|
|
2747
|
+
return { roots, matches };
|
|
2748
|
+
}
|
|
2749
|
+
|
|
2316
2750
|
/**
|
|
2317
2751
|
* @module watcher
|
|
2318
2752
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -2327,6 +2761,7 @@ class FileSystemWatcher {
|
|
|
2327
2761
|
logger;
|
|
2328
2762
|
health;
|
|
2329
2763
|
gitignoreFilter;
|
|
2764
|
+
globMatches;
|
|
2330
2765
|
watcher;
|
|
2331
2766
|
/**
|
|
2332
2767
|
* Create a new FileSystemWatcher.
|
|
@@ -2343,6 +2778,7 @@ class FileSystemWatcher {
|
|
|
2343
2778
|
this.processor = processor;
|
|
2344
2779
|
this.logger = logger;
|
|
2345
2780
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
2781
|
+
this.globMatches = () => true;
|
|
2346
2782
|
const healthOptions = {
|
|
2347
2783
|
maxRetries: options.maxRetries,
|
|
2348
2784
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2355,7 +2791,13 @@ class FileSystemWatcher {
|
|
|
2355
2791
|
* Start watching the filesystem and processing events.
|
|
2356
2792
|
*/
|
|
2357
2793
|
start() {
|
|
2358
|
-
|
|
2794
|
+
// Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
|
|
2795
|
+
// Glob patterns are silently treated as literal strings, producing zero
|
|
2796
|
+
// events. We extract static directory roots for chokidar to watch, then
|
|
2797
|
+
// filter emitted events against the original globs via picomatch.
|
|
2798
|
+
const { roots, matches } = resolveWatchPaths(this.config.paths);
|
|
2799
|
+
this.globMatches = matches;
|
|
2800
|
+
this.watcher = chokidar.watch(roots, {
|
|
2359
2801
|
ignored: this.config.ignored,
|
|
2360
2802
|
usePolling: this.config.usePolling,
|
|
2361
2803
|
interval: this.config.pollIntervalMs,
|
|
@@ -2366,6 +2808,8 @@ class FileSystemWatcher {
|
|
|
2366
2808
|
});
|
|
2367
2809
|
this.watcher.on('add', (path) => {
|
|
2368
2810
|
this.handleGitignoreChange(path);
|
|
2811
|
+
if (!this.globMatches(path))
|
|
2812
|
+
return;
|
|
2369
2813
|
if (this.isGitignored(path))
|
|
2370
2814
|
return;
|
|
2371
2815
|
this.logger.debug({ path }, 'File added');
|
|
@@ -2373,6 +2817,8 @@ class FileSystemWatcher {
|
|
|
2373
2817
|
});
|
|
2374
2818
|
this.watcher.on('change', (path) => {
|
|
2375
2819
|
this.handleGitignoreChange(path);
|
|
2820
|
+
if (!this.globMatches(path))
|
|
2821
|
+
return;
|
|
2376
2822
|
if (this.isGitignored(path))
|
|
2377
2823
|
return;
|
|
2378
2824
|
this.logger.debug({ path }, 'File changed');
|
|
@@ -2380,6 +2826,8 @@ class FileSystemWatcher {
|
|
|
2380
2826
|
});
|
|
2381
2827
|
this.watcher.on('unlink', (path) => {
|
|
2382
2828
|
this.handleGitignoreChange(path);
|
|
2829
|
+
if (!this.globMatches(path))
|
|
2830
|
+
return;
|
|
2383
2831
|
if (this.isGitignored(path))
|
|
2384
2832
|
return;
|
|
2385
2833
|
this.logger.debug({ path }, 'File removed');
|
|
@@ -2452,51 +2900,21 @@ class FileSystemWatcher {
|
|
|
2452
2900
|
}
|
|
2453
2901
|
|
|
2454
2902
|
/**
|
|
2455
|
-
* @module app/
|
|
2456
|
-
*
|
|
2457
|
-
*/
|
|
2458
|
-
/**
|
|
2459
|
-
* Debounced config file watcher.
|
|
2903
|
+
* @module app/factories
|
|
2904
|
+
* Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
|
|
2460
2905
|
*/
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
});
|
|
2474
|
-
this.watcher.on('change', () => {
|
|
2475
|
-
if (this.debounce)
|
|
2476
|
-
clearTimeout(this.debounce);
|
|
2477
|
-
this.debounce = setTimeout(() => {
|
|
2478
|
-
void this.options.onChange();
|
|
2479
|
-
}, this.options.debounceMs);
|
|
2480
|
-
});
|
|
2481
|
-
this.watcher.on('error', (error) => {
|
|
2482
|
-
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
2483
|
-
});
|
|
2484
|
-
this.options.logger.info({
|
|
2485
|
-
configPath: this.options.configPath,
|
|
2486
|
-
debounceMs: this.options.debounceMs,
|
|
2487
|
-
}, 'Config watcher started');
|
|
2488
|
-
}
|
|
2489
|
-
async stop() {
|
|
2490
|
-
if (this.debounce) {
|
|
2491
|
-
clearTimeout(this.debounce);
|
|
2492
|
-
this.debounce = undefined;
|
|
2493
|
-
}
|
|
2494
|
-
if (this.watcher) {
|
|
2495
|
-
await this.watcher.close();
|
|
2496
|
-
this.watcher = undefined;
|
|
2497
|
-
}
|
|
2498
|
-
}
|
|
2499
|
-
}
|
|
2906
|
+
/** Default component factories wiring real implementations. */
|
|
2907
|
+
const defaultFactories = {
|
|
2908
|
+
loadConfig,
|
|
2909
|
+
createLogger,
|
|
2910
|
+
createEmbeddingProvider,
|
|
2911
|
+
createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
|
|
2912
|
+
compileRules,
|
|
2913
|
+
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
|
|
2914
|
+
createEventQueue: (options) => new EventQueue(options),
|
|
2915
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2916
|
+
createApiServer,
|
|
2917
|
+
};
|
|
2500
2918
|
|
|
2501
2919
|
/**
|
|
2502
2920
|
* @module app/shutdown
|
|
@@ -2516,17 +2934,28 @@ function installShutdownHandlers(stop) {
|
|
|
2516
2934
|
process.on('SIGINT', () => void shutdown());
|
|
2517
2935
|
}
|
|
2518
2936
|
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2937
|
+
/**
|
|
2938
|
+
* @module app/startFromConfig
|
|
2939
|
+
* Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
|
|
2940
|
+
*/
|
|
2941
|
+
/**
|
|
2942
|
+
* Create and start a JeevesWatcher from a config file path.
|
|
2943
|
+
*
|
|
2944
|
+
* @param configPath - Optional path to the configuration file.
|
|
2945
|
+
* @returns The running JeevesWatcher instance.
|
|
2946
|
+
*/
|
|
2947
|
+
async function startFromConfig(configPath) {
|
|
2948
|
+
const config = await loadConfig(configPath);
|
|
2949
|
+
const app = new JeevesWatcher(config, configPath);
|
|
2950
|
+
installShutdownHandlers(() => app.stop());
|
|
2951
|
+
await app.start();
|
|
2952
|
+
return app;
|
|
2953
|
+
}
|
|
2954
|
+
|
|
2955
|
+
/**
|
|
2956
|
+
* @module app
|
|
2957
|
+
* Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
|
|
2958
|
+
*/
|
|
2530
2959
|
/**
|
|
2531
2960
|
* Main application class that wires together all components.
|
|
2532
2961
|
*/
|
|
@@ -2561,56 +2990,26 @@ class JeevesWatcher {
|
|
|
2561
2990
|
async start() {
|
|
2562
2991
|
const logger = this.factories.createLogger(this.config.logging);
|
|
2563
2992
|
this.logger = logger;
|
|
2564
|
-
|
|
2565
|
-
try {
|
|
2566
|
-
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
2567
|
-
}
|
|
2568
|
-
catch (error) {
|
|
2569
|
-
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
2570
|
-
throw error;
|
|
2571
|
-
}
|
|
2572
|
-
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
2573
|
-
await vectorStore.ensureCollection();
|
|
2993
|
+
const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
|
|
2574
2994
|
const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
|
|
2575
|
-
const
|
|
2995
|
+
const configDir = this.configPath ? dirname(this.configPath) : '.';
|
|
2996
|
+
const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
|
|
2997
|
+
const processor = this.factories.createDocumentProcessor({
|
|
2576
2998
|
metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
|
|
2577
2999
|
chunkSize: this.config.embedding.chunkSize,
|
|
2578
3000
|
chunkOverlap: this.config.embedding.chunkOverlap,
|
|
2579
3001
|
maps: this.config.maps,
|
|
2580
|
-
|
|
2581
|
-
|
|
3002
|
+
configDir,
|
|
3003
|
+
}, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
|
|
2582
3004
|
this.processor = processor;
|
|
2583
|
-
|
|
3005
|
+
this.queue = this.factories.createEventQueue({
|
|
2584
3006
|
debounceMs: this.config.watch.debounceMs ?? 2000,
|
|
2585
3007
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
2586
3008
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2587
3009
|
});
|
|
2588
|
-
this.
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
? new GitignoreFilter(this.config.watch.paths)
|
|
2592
|
-
: undefined;
|
|
2593
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2594
|
-
maxRetries: this.config.maxRetries,
|
|
2595
|
-
maxBackoffMs: this.config.maxBackoffMs,
|
|
2596
|
-
onFatalError: this.runtimeOptions.onFatalError,
|
|
2597
|
-
gitignoreFilter,
|
|
2598
|
-
});
|
|
2599
|
-
this.watcher = watcher;
|
|
2600
|
-
const server = this.factories.createApiServer({
|
|
2601
|
-
processor,
|
|
2602
|
-
vectorStore,
|
|
2603
|
-
embeddingProvider,
|
|
2604
|
-
queue,
|
|
2605
|
-
config: this.config,
|
|
2606
|
-
logger,
|
|
2607
|
-
});
|
|
2608
|
-
this.server = server;
|
|
2609
|
-
await server.listen({
|
|
2610
|
-
host: this.config.api?.host ?? '127.0.0.1',
|
|
2611
|
-
port: this.config.api?.port ?? 3456,
|
|
2612
|
-
});
|
|
2613
|
-
watcher.start();
|
|
3010
|
+
this.watcher = this.createWatcher(this.queue, processor, logger);
|
|
3011
|
+
this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
|
|
3012
|
+
this.watcher.start();
|
|
2614
3013
|
this.startConfigWatch();
|
|
2615
3014
|
logger.info('jeeves-watcher started');
|
|
2616
3015
|
}
|
|
@@ -2641,22 +3040,61 @@ class JeevesWatcher {
|
|
|
2641
3040
|
}
|
|
2642
3041
|
this.logger?.info('jeeves-watcher stopped');
|
|
2643
3042
|
}
|
|
3043
|
+
async initEmbeddingAndStore(logger) {
|
|
3044
|
+
let embeddingProvider;
|
|
3045
|
+
try {
|
|
3046
|
+
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
3047
|
+
}
|
|
3048
|
+
catch (error) {
|
|
3049
|
+
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
3050
|
+
throw error;
|
|
3051
|
+
}
|
|
3052
|
+
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
3053
|
+
await vectorStore.ensureCollection();
|
|
3054
|
+
return { embeddingProvider, vectorStore };
|
|
3055
|
+
}
|
|
3056
|
+
createWatcher(queue, processor, logger) {
|
|
3057
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
3058
|
+
const gitignoreFilter = respectGitignore
|
|
3059
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
3060
|
+
: undefined;
|
|
3061
|
+
return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
3062
|
+
maxRetries: this.config.maxRetries,
|
|
3063
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
3064
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
3065
|
+
gitignoreFilter,
|
|
3066
|
+
});
|
|
3067
|
+
}
|
|
3068
|
+
async startApiServer(processor, vectorStore, embeddingProvider, logger) {
|
|
3069
|
+
const server = this.factories.createApiServer({
|
|
3070
|
+
processor,
|
|
3071
|
+
vectorStore,
|
|
3072
|
+
embeddingProvider,
|
|
3073
|
+
queue: this.queue,
|
|
3074
|
+
config: this.config,
|
|
3075
|
+
logger,
|
|
3076
|
+
});
|
|
3077
|
+
await server.listen({
|
|
3078
|
+
host: this.config.api?.host ?? '127.0.0.1',
|
|
3079
|
+
port: this.config.api?.port ?? 3456,
|
|
3080
|
+
});
|
|
3081
|
+
return server;
|
|
3082
|
+
}
|
|
2644
3083
|
startConfigWatch() {
|
|
2645
3084
|
const logger = this.logger;
|
|
2646
3085
|
if (!logger)
|
|
2647
3086
|
return;
|
|
2648
3087
|
const enabled = this.config.configWatch?.enabled ?? true;
|
|
2649
|
-
if (!enabled)
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
3088
|
+
if (!enabled || !this.configPath) {
|
|
3089
|
+
if (!this.configPath) {
|
|
3090
|
+
logger.debug('Config watch enabled, but no config path was provided');
|
|
3091
|
+
}
|
|
2653
3092
|
return;
|
|
2654
3093
|
}
|
|
2655
|
-
const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
|
|
2656
3094
|
this.configWatcher = new ConfigWatcher({
|
|
2657
3095
|
configPath: this.configPath,
|
|
2658
3096
|
enabled,
|
|
2659
|
-
debounceMs,
|
|
3097
|
+
debounceMs: this.config.configWatch?.debounceMs ?? 10000,
|
|
2660
3098
|
logger,
|
|
2661
3099
|
onChange: async () => this.reloadConfig(),
|
|
2662
3100
|
});
|
|
@@ -2678,7 +3116,9 @@ class JeevesWatcher {
|
|
|
2678
3116
|
const newConfig = await this.factories.loadConfig(this.configPath);
|
|
2679
3117
|
this.config = newConfig;
|
|
2680
3118
|
const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
|
|
2681
|
-
|
|
3119
|
+
const reloadConfigDir = dirname(this.configPath);
|
|
3120
|
+
const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
|
|
3121
|
+
processor.updateRules(compiledRules, newTemplateEngine);
|
|
2682
3122
|
logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
|
|
2683
3123
|
}
|
|
2684
3124
|
catch (error) {
|
|
@@ -2686,18 +3126,6 @@ class JeevesWatcher {
|
|
|
2686
3126
|
}
|
|
2687
3127
|
}
|
|
2688
3128
|
}
|
|
2689
|
-
|
|
2690
|
-
* Create and start a JeevesWatcher from a config file path.
|
|
2691
|
-
*
|
|
2692
|
-
* @param configPath - Optional path to the configuration file.
|
|
2693
|
-
* @returns The running JeevesWatcher instance.
|
|
2694
|
-
*/
|
|
2695
|
-
async function startFromConfig(configPath) {
|
|
2696
|
-
const config = await loadConfig(configPath);
|
|
2697
|
-
const app = new JeevesWatcher(config, configPath);
|
|
2698
|
-
installShutdownHandlers(() => app.stop());
|
|
2699
|
-
await app.start();
|
|
2700
|
-
return app;
|
|
2701
|
-
}
|
|
3129
|
+
// startFromConfig re-exported from ./startFromConfig
|
|
2702
3130
|
|
|
2703
|
-
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
|
3131
|
+
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, TemplateEngine, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, metadataPath, pointId, readMetadata, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|