@karmaniverous/jeeves-watcher 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +69 -14
- package/dist/cjs/index.js +996 -562
- package/dist/cli/jeeves-watcher/index.js +824 -396
- package/dist/index.d.ts +160 -16
- package/dist/index.iife.js +824 -397
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +992 -564
- package/package.json +12 -4
|
@@ -1,17 +1,25 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Command } from '@commander-js/extra-typings';
|
|
3
|
+
import { dirname, resolve, relative, join, extname, basename } from 'node:path';
|
|
4
|
+
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
5
|
+
import ignore from 'ignore';
|
|
6
|
+
import Handlebars from 'handlebars';
|
|
7
|
+
import dayjs from 'dayjs';
|
|
8
|
+
import { toMdast } from 'hast-util-to-mdast';
|
|
9
|
+
import { fromADF } from 'mdast-util-from-adf';
|
|
10
|
+
import { toMarkdown } from 'mdast-util-to-markdown';
|
|
11
|
+
import { capitalize, title, camel, snake, dash, isEqual, omit, get } from 'radash';
|
|
12
|
+
import rehypeParse from 'rehype-parse';
|
|
13
|
+
import { unified } from 'unified';
|
|
14
|
+
import chokidar from 'chokidar';
|
|
3
15
|
import Fastify from 'fastify';
|
|
4
16
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
5
|
-
import { resolve, dirname, join, relative, extname, basename } from 'node:path';
|
|
6
17
|
import picomatch from 'picomatch';
|
|
7
|
-
import { omit, get } from 'radash';
|
|
8
18
|
import { createHash } from 'node:crypto';
|
|
9
19
|
import { cosmiconfig } from 'cosmiconfig';
|
|
10
20
|
import { z, ZodError } from 'zod';
|
|
11
21
|
import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
|
|
12
22
|
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
|
|
13
|
-
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
14
|
-
import ignore from 'ignore';
|
|
15
23
|
import pino from 'pino';
|
|
16
24
|
import { v5 } from 'uuid';
|
|
17
25
|
import * as cheerio from 'cheerio';
|
|
@@ -21,7 +29,439 @@ import Ajv from 'ajv';
|
|
|
21
29
|
import addFormats from 'ajv-formats';
|
|
22
30
|
import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
23
31
|
import { QdrantClient } from '@qdrant/js-client-rest';
|
|
24
|
-
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* @module gitignore
|
|
35
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
36
|
+
*/
|
|
37
|
+
/**
|
|
38
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
39
|
+
* Returns `undefined` if no repo is found.
|
|
40
|
+
*/
|
|
41
|
+
function findRepoRoot(startDir) {
|
|
42
|
+
let dir = resolve(startDir);
|
|
43
|
+
const root = resolve('/');
|
|
44
|
+
while (dir !== root) {
|
|
45
|
+
if (existsSync(join(dir, '.git')) &&
|
|
46
|
+
statSync(join(dir, '.git')).isDirectory()) {
|
|
47
|
+
return dir;
|
|
48
|
+
}
|
|
49
|
+
const parent = dirname(dir);
|
|
50
|
+
if (parent === dir)
|
|
51
|
+
break;
|
|
52
|
+
dir = parent;
|
|
53
|
+
}
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
58
|
+
* that can be scanned for a repo root.
|
|
59
|
+
*/
|
|
60
|
+
function watchPathToScanDir(watchPath) {
|
|
61
|
+
const absPath = resolve(watchPath);
|
|
62
|
+
try {
|
|
63
|
+
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
// ignore
|
|
67
|
+
}
|
|
68
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
69
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
70
|
+
if (!globMatch)
|
|
71
|
+
return undefined;
|
|
72
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
73
|
+
const trimmed = prefix.trim();
|
|
74
|
+
const baseDir = trimmed.length === 0
|
|
75
|
+
? '.'
|
|
76
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
77
|
+
? trimmed
|
|
78
|
+
: dirname(trimmed);
|
|
79
|
+
const resolved = resolve(baseDir);
|
|
80
|
+
if (!existsSync(resolved))
|
|
81
|
+
return undefined;
|
|
82
|
+
return resolved;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
86
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
87
|
+
*/
|
|
88
|
+
function findGitignoreFiles(dir) {
|
|
89
|
+
const results = [];
|
|
90
|
+
const gitignorePath = join(dir, '.gitignore');
|
|
91
|
+
if (existsSync(gitignorePath)) {
|
|
92
|
+
results.push(gitignorePath);
|
|
93
|
+
}
|
|
94
|
+
let entries;
|
|
95
|
+
try {
|
|
96
|
+
entries = readdirSync(dir);
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
return results;
|
|
100
|
+
}
|
|
101
|
+
for (const entry of entries) {
|
|
102
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
103
|
+
continue;
|
|
104
|
+
const fullPath = join(dir, entry);
|
|
105
|
+
try {
|
|
106
|
+
if (statSync(fullPath).isDirectory()) {
|
|
107
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// Skip inaccessible entries
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return results;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
118
|
+
*/
|
|
119
|
+
function parseGitignore(gitignorePath) {
|
|
120
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
121
|
+
return ignore().add(content);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
125
|
+
*/
|
|
126
|
+
function toForwardSlash(p) {
|
|
127
|
+
return p.replace(/\\/g, '/');
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
131
|
+
* `.gitignore` chain in git repositories.
|
|
132
|
+
*/
|
|
133
|
+
class GitignoreFilter {
|
|
134
|
+
repos = new Map();
|
|
135
|
+
/**
|
|
136
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
137
|
+
*
|
|
138
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
139
|
+
*/
|
|
140
|
+
constructor(watchPaths) {
|
|
141
|
+
this.scan(watchPaths);
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
145
|
+
*/
|
|
146
|
+
scan(watchPaths) {
|
|
147
|
+
this.repos.clear();
|
|
148
|
+
const scannedDirs = new Set();
|
|
149
|
+
for (const watchPath of watchPaths) {
|
|
150
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
151
|
+
if (!scanDir)
|
|
152
|
+
continue;
|
|
153
|
+
if (scannedDirs.has(scanDir))
|
|
154
|
+
continue;
|
|
155
|
+
scannedDirs.add(scanDir);
|
|
156
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
157
|
+
if (!repoRoot)
|
|
158
|
+
continue;
|
|
159
|
+
if (this.repos.has(repoRoot))
|
|
160
|
+
continue;
|
|
161
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
162
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
163
|
+
dir: dirname(gf),
|
|
164
|
+
ig: parseGitignore(gf),
|
|
165
|
+
}));
|
|
166
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
167
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
168
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
173
|
+
*
|
|
174
|
+
* @param filePath - Absolute file path to check.
|
|
175
|
+
* @returns `true` if the file should be ignored.
|
|
176
|
+
*/
|
|
177
|
+
isIgnored(filePath) {
|
|
178
|
+
const absPath = resolve(filePath);
|
|
179
|
+
for (const [, repo] of this.repos) {
|
|
180
|
+
// Check if file is within this repo
|
|
181
|
+
const relToRepo = relative(repo.root, absPath);
|
|
182
|
+
// On Windows, path.relative() across drives (e.g. D:\ → J:\) produces
|
|
183
|
+
// an absolute path with a drive letter instead of a relative one. The
|
|
184
|
+
// `ignore` library rejects these with a RangeError. Skip repos on
|
|
185
|
+
// different drives to avoid cross-drive gitignore mismatches.
|
|
186
|
+
if (relToRepo.startsWith('..') ||
|
|
187
|
+
relToRepo.startsWith(resolve('/')) ||
|
|
188
|
+
/^[a-zA-Z]:/.test(relToRepo)) {
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
192
|
+
for (const entry of repo.entries) {
|
|
193
|
+
const relToEntry = relative(entry.dir, absPath);
|
|
194
|
+
if (relToEntry.startsWith('..') || /^[a-zA-Z]:/.test(relToEntry))
|
|
195
|
+
continue;
|
|
196
|
+
const normalized = toForwardSlash(relToEntry);
|
|
197
|
+
if (entry.ig.ignores(normalized)) {
|
|
198
|
+
return true;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return false;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
206
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
207
|
+
*
|
|
208
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
209
|
+
*/
|
|
210
|
+
invalidate(gitignorePath) {
|
|
211
|
+
const absPath = resolve(gitignorePath);
|
|
212
|
+
const gitignoreDir = dirname(absPath);
|
|
213
|
+
for (const [, repo] of this.repos) {
|
|
214
|
+
const relToRepo = relative(repo.root, gitignoreDir);
|
|
215
|
+
if (relToRepo.startsWith('..'))
|
|
216
|
+
continue;
|
|
217
|
+
// Remove old entry for this directory
|
|
218
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
219
|
+
// Re-parse if file still exists
|
|
220
|
+
if (existsSync(absPath)) {
|
|
221
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
222
|
+
// Re-sort deepest-first
|
|
223
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
224
|
+
}
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
228
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
229
|
+
if (repoRoot && existsSync(absPath)) {
|
|
230
|
+
const entries = [
|
|
231
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
232
|
+
];
|
|
233
|
+
if (this.repos.has(repoRoot)) {
|
|
234
|
+
const repo = this.repos.get(repoRoot);
|
|
235
|
+
repo.entries.push(entries[0]);
|
|
236
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* @module templates/helpers
|
|
247
|
+
* Registers built-in Handlebars helpers for content templates.
|
|
248
|
+
*/
|
|
249
|
+
/** Pre-built rehype parser for HTML → hast conversion. */
|
|
250
|
+
const htmlParser = unified().use(rehypeParse, { fragment: true });
|
|
251
|
+
/**
|
|
252
|
+
* Register all built-in helpers on a Handlebars instance.
|
|
253
|
+
*
|
|
254
|
+
* @param hbs - The Handlebars instance.
|
|
255
|
+
*/
|
|
256
|
+
function registerBuiltinHelpers(hbs) {
|
|
257
|
+
// Structural: ADF → Markdown
|
|
258
|
+
hbs.registerHelper('adfToMarkdown', function (adf) {
|
|
259
|
+
if (!adf || typeof adf !== 'object')
|
|
260
|
+
return '';
|
|
261
|
+
try {
|
|
262
|
+
const mdast = fromADF(adf);
|
|
263
|
+
return new hbs.SafeString(toMarkdown(mdast).trim());
|
|
264
|
+
}
|
|
265
|
+
catch {
|
|
266
|
+
return '<!-- ADF conversion failed -->';
|
|
267
|
+
}
|
|
268
|
+
});
|
|
269
|
+
// Structural: HTML → Markdown
|
|
270
|
+
hbs.registerHelper('markdownify', function (html) {
|
|
271
|
+
if (typeof html !== 'string' || !html.trim())
|
|
272
|
+
return '';
|
|
273
|
+
try {
|
|
274
|
+
const hast = htmlParser.parse(html);
|
|
275
|
+
const mdast = toMdast(hast);
|
|
276
|
+
return new hbs.SafeString(toMarkdown(mdast).trim());
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
279
|
+
return '<!-- HTML conversion failed -->';
|
|
280
|
+
}
|
|
281
|
+
});
|
|
282
|
+
// Formatting: dateFormat
|
|
283
|
+
hbs.registerHelper('dateFormat', function (value, format) {
|
|
284
|
+
if (value === undefined || value === null)
|
|
285
|
+
return '';
|
|
286
|
+
const fmt = typeof format === 'string' ? format : 'YYYY-MM-DD';
|
|
287
|
+
return dayjs(value).format(fmt);
|
|
288
|
+
});
|
|
289
|
+
// Formatting: join
|
|
290
|
+
hbs.registerHelper('join', function (arr, separator) {
|
|
291
|
+
if (!Array.isArray(arr))
|
|
292
|
+
return '';
|
|
293
|
+
const sep = typeof separator === 'string' ? separator : ', ';
|
|
294
|
+
return arr.join(sep);
|
|
295
|
+
});
|
|
296
|
+
// Formatting: pluck
|
|
297
|
+
hbs.registerHelper('pluck', function (arr, key) {
|
|
298
|
+
if (!Array.isArray(arr) || typeof key !== 'string')
|
|
299
|
+
return [];
|
|
300
|
+
return arr.map((item) => item && typeof item === 'object'
|
|
301
|
+
? item[key]
|
|
302
|
+
: undefined);
|
|
303
|
+
});
|
|
304
|
+
// String transforms
|
|
305
|
+
hbs.registerHelper('lowercase', (text) => typeof text === 'string' ? text.toLowerCase() : '');
|
|
306
|
+
hbs.registerHelper('uppercase', (text) => typeof text === 'string' ? text.toUpperCase() : '');
|
|
307
|
+
hbs.registerHelper('capitalize', (text) => typeof text === 'string' ? capitalize(text) : '');
|
|
308
|
+
hbs.registerHelper('title', (text) => typeof text === 'string' ? title(text) : '');
|
|
309
|
+
hbs.registerHelper('camel', (text) => typeof text === 'string' ? camel(text) : '');
|
|
310
|
+
hbs.registerHelper('snake', (text) => typeof text === 'string' ? snake(text) : '');
|
|
311
|
+
hbs.registerHelper('dash', (text) => typeof text === 'string' ? dash(text) : '');
|
|
312
|
+
// default helper
|
|
313
|
+
hbs.registerHelper('default', function (value, fallback) {
|
|
314
|
+
return value ?? fallback ?? '';
|
|
315
|
+
});
|
|
316
|
+
// eq helper (deep equality)
|
|
317
|
+
hbs.registerHelper('eq', function (a, b) {
|
|
318
|
+
return isEqual(a, b);
|
|
319
|
+
});
|
|
320
|
+
// json helper
|
|
321
|
+
hbs.registerHelper('json', function (value) {
|
|
322
|
+
return new hbs.SafeString(JSON.stringify(value, null, 2));
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* @module templates/engine
|
|
328
|
+
* Handlebars template compilation, caching, and resolution (file path vs named ref vs inline).
|
|
329
|
+
*/
|
|
330
|
+
/**
|
|
331
|
+
* Resolve a template value to its source string.
|
|
332
|
+
*
|
|
333
|
+
* Resolution order:
|
|
334
|
+
* 1. Ends in `.hbs` or `.handlebars` → file path (resolve relative to configDir)
|
|
335
|
+
* 2. Matches a key in namedTemplates → named ref (recursively resolve)
|
|
336
|
+
* 3. Otherwise → inline Handlebars template string
|
|
337
|
+
*
|
|
338
|
+
* @param value - The template reference (inline, file path, or named ref).
|
|
339
|
+
* @param namedTemplates - Named template definitions from config.
|
|
340
|
+
* @param configDir - Directory to resolve relative file paths against.
|
|
341
|
+
* @param visited - Set of visited named refs for cycle detection.
|
|
342
|
+
* @returns The resolved template source string.
|
|
343
|
+
*/
|
|
344
|
+
function resolveTemplateSource(value, namedTemplates, configDir, visited = new Set()) {
|
|
345
|
+
// File path detection
|
|
346
|
+
if (value.endsWith('.hbs') || value.endsWith('.handlebars')) {
|
|
347
|
+
return readFileSync(resolve(configDir, value), 'utf-8');
|
|
348
|
+
}
|
|
349
|
+
// Named ref
|
|
350
|
+
if (namedTemplates?.[value] !== undefined) {
|
|
351
|
+
if (visited.has(value)) {
|
|
352
|
+
throw new Error(`Circular template reference detected: ${value}`);
|
|
353
|
+
}
|
|
354
|
+
visited.add(value);
|
|
355
|
+
return resolveTemplateSource(namedTemplates[value], namedTemplates, configDir, visited);
|
|
356
|
+
}
|
|
357
|
+
// Inline
|
|
358
|
+
return value;
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Create a configured Handlebars instance with built-in helpers registered.
|
|
362
|
+
*
|
|
363
|
+
* @returns A Handlebars instance with helpers.
|
|
364
|
+
*/
|
|
365
|
+
function createHandlebarsInstance() {
|
|
366
|
+
const hbs = Handlebars.create();
|
|
367
|
+
registerBuiltinHelpers(hbs);
|
|
368
|
+
return hbs;
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Load custom helpers from file paths.
|
|
372
|
+
*
|
|
373
|
+
* Each file should export a default function that receives the Handlebars instance.
|
|
374
|
+
*
|
|
375
|
+
* @param hbs - The Handlebars instance.
|
|
376
|
+
* @param paths - File paths to custom helper modules.
|
|
377
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
378
|
+
*/
|
|
379
|
+
async function loadCustomHelpers(hbs, paths, configDir) {
|
|
380
|
+
for (const p of paths) {
|
|
381
|
+
const resolved = resolve(configDir, p);
|
|
382
|
+
const mod = (await import(resolved));
|
|
383
|
+
if (typeof mod.default === 'function') {
|
|
384
|
+
mod.default(hbs);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* The template engine: holds compiled templates and renders them against context.
|
|
390
|
+
*/
|
|
391
|
+
class TemplateEngine {
|
|
392
|
+
hbs;
|
|
393
|
+
compiled = new Map();
|
|
394
|
+
constructor(hbs) {
|
|
395
|
+
this.hbs = hbs;
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Compile and cache a template from its source string.
|
|
399
|
+
*
|
|
400
|
+
* @param key - Cache key (rule index or named template).
|
|
401
|
+
* @param source - Handlebars template source.
|
|
402
|
+
* @returns The compiled template.
|
|
403
|
+
*/
|
|
404
|
+
compile(key, source) {
|
|
405
|
+
const fn = this.hbs.compile(source);
|
|
406
|
+
this.compiled.set(key, fn);
|
|
407
|
+
return fn;
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Get a previously compiled template by key.
|
|
411
|
+
*
|
|
412
|
+
* @param key - The cache key.
|
|
413
|
+
* @returns The compiled template, or undefined.
|
|
414
|
+
*/
|
|
415
|
+
get(key) {
|
|
416
|
+
return this.compiled.get(key);
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Render a compiled template against a context.
|
|
420
|
+
*
|
|
421
|
+
* @param key - The cache key of the compiled template.
|
|
422
|
+
* @param context - The data context for rendering.
|
|
423
|
+
* @returns The rendered string, or null if the template was not found.
|
|
424
|
+
*/
|
|
425
|
+
render(key, context) {
|
|
426
|
+
const fn = this.compiled.get(key);
|
|
427
|
+
if (!fn)
|
|
428
|
+
return null;
|
|
429
|
+
return fn(context);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* @module templates/buildTemplateEngine
|
|
435
|
+
* Factory to build a TemplateEngine from config, compiling all rule templates at load time.
|
|
436
|
+
*/
|
|
437
|
+
/**
|
|
438
|
+
* Build a TemplateEngine from configuration, pre-compiling all rule templates.
|
|
439
|
+
*
|
|
440
|
+
* @param rules - The inference rules (may contain template fields).
|
|
441
|
+
* @param namedTemplates - Named template definitions from config.
|
|
442
|
+
* @param templateHelperPaths - Paths to custom helper modules.
|
|
443
|
+
* @param configDir - Directory to resolve relative paths against.
|
|
444
|
+
* @returns The configured TemplateEngine, or undefined if no templates are used.
|
|
445
|
+
*/
|
|
446
|
+
async function buildTemplateEngine(rules, namedTemplates, templateHelperPaths, configDir) {
|
|
447
|
+
const rulesWithTemplates = rules.filter((r) => r.template);
|
|
448
|
+
if (rulesWithTemplates.length === 0)
|
|
449
|
+
return undefined;
|
|
450
|
+
const hbs = createHandlebarsInstance();
|
|
451
|
+
// Load custom helpers
|
|
452
|
+
if (templateHelperPaths?.length && configDir) {
|
|
453
|
+
await loadCustomHelpers(hbs, templateHelperPaths, configDir);
|
|
454
|
+
}
|
|
455
|
+
const engine = new TemplateEngine(hbs);
|
|
456
|
+
// Compile all rule templates
|
|
457
|
+
for (const [index, rule] of rules.entries()) {
|
|
458
|
+
if (!rule.template)
|
|
459
|
+
continue;
|
|
460
|
+
const source = resolveTemplateSource(rule.template, namedTemplates, configDir ?? '.');
|
|
461
|
+
engine.compile(`rule-${String(index)}`, source);
|
|
462
|
+
}
|
|
463
|
+
return engine;
|
|
464
|
+
}
|
|
25
465
|
|
|
26
466
|
/**
|
|
27
467
|
* @module util/normalizeError
|
|
@@ -48,6 +488,53 @@ function normalizeError(error) {
|
|
|
48
488
|
return normalized;
|
|
49
489
|
}
|
|
50
490
|
|
|
491
|
+
/**
|
|
492
|
+
* @module app/configWatcher
|
|
493
|
+
* Watches the config file for changes and triggers debounced reload. Isolated I/O wrapper around chokidar.
|
|
494
|
+
*/
|
|
495
|
+
/**
|
|
496
|
+
* Debounced config file watcher.
|
|
497
|
+
*/
|
|
498
|
+
class ConfigWatcher {
|
|
499
|
+
options;
|
|
500
|
+
watcher;
|
|
501
|
+
debounce;
|
|
502
|
+
constructor(options) {
|
|
503
|
+
this.options = options;
|
|
504
|
+
}
|
|
505
|
+
start() {
|
|
506
|
+
if (!this.options.enabled)
|
|
507
|
+
return;
|
|
508
|
+
this.watcher = chokidar.watch(this.options.configPath, {
|
|
509
|
+
ignoreInitial: true,
|
|
510
|
+
});
|
|
511
|
+
this.watcher.on('change', () => {
|
|
512
|
+
if (this.debounce)
|
|
513
|
+
clearTimeout(this.debounce);
|
|
514
|
+
this.debounce = setTimeout(() => {
|
|
515
|
+
void this.options.onChange();
|
|
516
|
+
}, this.options.debounceMs);
|
|
517
|
+
});
|
|
518
|
+
this.watcher.on('error', (error) => {
|
|
519
|
+
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
520
|
+
});
|
|
521
|
+
this.options.logger.info({
|
|
522
|
+
configPath: this.options.configPath,
|
|
523
|
+
debounceMs: this.options.debounceMs,
|
|
524
|
+
}, 'Config watcher started');
|
|
525
|
+
}
|
|
526
|
+
async stop() {
|
|
527
|
+
if (this.debounce) {
|
|
528
|
+
clearTimeout(this.debounce);
|
|
529
|
+
this.debounce = undefined;
|
|
530
|
+
}
|
|
531
|
+
if (this.watcher) {
|
|
532
|
+
await this.watcher.close();
|
|
533
|
+
this.watcher = undefined;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
51
538
|
/**
|
|
52
539
|
* Best-effort base directory inference for a glob pattern.
|
|
53
540
|
*
|
|
@@ -641,7 +1128,12 @@ const inferenceRuleSchema = z.object({
|
|
|
641
1128
|
map: z
|
|
642
1129
|
.union([jsonMapMapSchema, z.string()])
|
|
643
1130
|
.optional()
|
|
644
|
-
.describe('JsonMap transformation (inline definition
|
|
1131
|
+
.describe('JsonMap transformation (inline definition, named map reference, or .json file path).'),
|
|
1132
|
+
/** Handlebars template (inline string, named ref, or .hbs/.handlebars file path). */
|
|
1133
|
+
template: z
|
|
1134
|
+
.string()
|
|
1135
|
+
.optional()
|
|
1136
|
+
.describe('Handlebars content template (inline string, named ref, or .hbs/.handlebars file path).'),
|
|
645
1137
|
});
|
|
646
1138
|
/**
|
|
647
1139
|
* Top-level configuration for jeeves-watcher.
|
|
@@ -679,6 +1171,22 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
679
1171
|
.record(z.string(), jsonMapMapSchema)
|
|
680
1172
|
.optional()
|
|
681
1173
|
.describe('Reusable named JsonMap transformations.'),
|
|
1174
|
+
/** Reusable named Handlebars templates (inline strings or .hbs/.handlebars file paths). */
|
|
1175
|
+
templates: z
|
|
1176
|
+
.record(z.string(), z.string())
|
|
1177
|
+
.optional()
|
|
1178
|
+
.describe('Named reusable Handlebars templates (inline strings or .hbs/.handlebars file paths).'),
|
|
1179
|
+
/** Custom Handlebars helper registration. */
|
|
1180
|
+
templateHelpers: z
|
|
1181
|
+
.object({
|
|
1182
|
+
/** File paths to custom helper modules. */
|
|
1183
|
+
paths: z
|
|
1184
|
+
.array(z.string())
|
|
1185
|
+
.optional()
|
|
1186
|
+
.describe('File paths to custom helper modules.'),
|
|
1187
|
+
})
|
|
1188
|
+
.optional()
|
|
1189
|
+
.describe('Custom Handlebars helper registration.'),
|
|
682
1190
|
/** Logging configuration. */
|
|
683
1191
|
logging: loggingConfigSchema.optional().describe('Logging configuration.'),
|
|
684
1192
|
/** Timeout in milliseconds for graceful shutdown. */
|
|
@@ -931,263 +1439,57 @@ function createGeminiProvider(config, logger) {
|
|
|
931
1439
|
if (attempt > 1) {
|
|
932
1440
|
log.warn({ attempt, provider: 'gemini', model: config.model }, 'Retrying embedding request');
|
|
933
1441
|
}
|
|
934
|
-
// embedDocuments returns vectors for multiple texts
|
|
935
|
-
return embedder.embedDocuments(texts);
|
|
936
|
-
}, {
|
|
937
|
-
attempts: 5,
|
|
938
|
-
baseDelayMs: 500,
|
|
939
|
-
maxDelayMs: 10_000,
|
|
940
|
-
jitter: 0.2,
|
|
941
|
-
onRetry: ({ attempt, delayMs, error }) => {
|
|
942
|
-
log.warn({
|
|
943
|
-
attempt,
|
|
944
|
-
delayMs,
|
|
945
|
-
provider: 'gemini',
|
|
946
|
-
model: config.model,
|
|
947
|
-
err: normalizeError(error),
|
|
948
|
-
}, 'Embedding call failed; will retry');
|
|
949
|
-
},
|
|
950
|
-
});
|
|
951
|
-
// Validate dimensions
|
|
952
|
-
for (const vector of vectors) {
|
|
953
|
-
if (vector.length !== dimensions) {
|
|
954
|
-
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
955
|
-
}
|
|
956
|
-
}
|
|
957
|
-
return vectors;
|
|
958
|
-
},
|
|
959
|
-
};
|
|
960
|
-
}
|
|
961
|
-
function createMockFromConfig(config) {
|
|
962
|
-
const dimensions = config.dimensions ?? 768;
|
|
963
|
-
return createMockProvider(dimensions);
|
|
964
|
-
}
|
|
965
|
-
const embeddingProviderRegistry = new Map([
|
|
966
|
-
['mock', createMockFromConfig],
|
|
967
|
-
['gemini', createGeminiProvider],
|
|
968
|
-
]);
|
|
969
|
-
/**
|
|
970
|
-
* Create an embedding provider based on the given configuration.
|
|
971
|
-
*
|
|
972
|
-
* Each provider is responsible for its own default dimensions.
|
|
973
|
-
*
|
|
974
|
-
* @param config - The embedding configuration.
|
|
975
|
-
* @param logger - Optional pino logger for retry warnings.
|
|
976
|
-
* @returns An {@link EmbeddingProvider} instance.
|
|
977
|
-
* @throws If the configured provider is not supported.
|
|
978
|
-
*/
|
|
979
|
-
function createEmbeddingProvider(config, logger) {
|
|
980
|
-
const factory = embeddingProviderRegistry.get(config.provider);
|
|
981
|
-
if (!factory) {
|
|
982
|
-
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
983
|
-
}
|
|
984
|
-
return factory(config, logger);
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
/**
|
|
988
|
-
* @module gitignore
|
|
989
|
-
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
990
|
-
*/
|
|
991
|
-
/**
|
|
992
|
-
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
993
|
-
* Returns `undefined` if no repo is found.
|
|
994
|
-
*/
|
|
995
|
-
function findRepoRoot(startDir) {
|
|
996
|
-
let dir = resolve(startDir);
|
|
997
|
-
const root = resolve('/');
|
|
998
|
-
while (dir !== root) {
|
|
999
|
-
if (existsSync(join(dir, '.git')) &&
|
|
1000
|
-
statSync(join(dir, '.git')).isDirectory()) {
|
|
1001
|
-
return dir;
|
|
1002
|
-
}
|
|
1003
|
-
const parent = dirname(dir);
|
|
1004
|
-
if (parent === dir)
|
|
1005
|
-
break;
|
|
1006
|
-
dir = parent;
|
|
1007
|
-
}
|
|
1008
|
-
return undefined;
|
|
1009
|
-
}
|
|
1010
|
-
/**
|
|
1011
|
-
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
1012
|
-
* that can be scanned for a repo root.
|
|
1013
|
-
*/
|
|
1014
|
-
function watchPathToScanDir(watchPath) {
|
|
1015
|
-
const absPath = resolve(watchPath);
|
|
1016
|
-
try {
|
|
1017
|
-
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
1018
|
-
}
|
|
1019
|
-
catch {
|
|
1020
|
-
// ignore
|
|
1021
|
-
}
|
|
1022
|
-
// If this is a glob, fall back to the non-glob prefix.
|
|
1023
|
-
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1024
|
-
if (!globMatch)
|
|
1025
|
-
return undefined;
|
|
1026
|
-
const prefix = watchPath.slice(0, globMatch.index);
|
|
1027
|
-
const trimmed = prefix.trim();
|
|
1028
|
-
const baseDir = trimmed.length === 0
|
|
1029
|
-
? '.'
|
|
1030
|
-
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1031
|
-
? trimmed
|
|
1032
|
-
: dirname(trimmed);
|
|
1033
|
-
const resolved = resolve(baseDir);
|
|
1034
|
-
if (!existsSync(resolved))
|
|
1035
|
-
return undefined;
|
|
1036
|
-
return resolved;
|
|
1037
|
-
}
|
|
1038
|
-
/**
|
|
1039
|
-
* Recursively find all `.gitignore` files under `dir`.
|
|
1040
|
-
* Skips `.git` and `node_modules` directories for performance.
|
|
1041
|
-
*/
|
|
1042
|
-
function findGitignoreFiles(dir) {
|
|
1043
|
-
const results = [];
|
|
1044
|
-
const gitignorePath = join(dir, '.gitignore');
|
|
1045
|
-
if (existsSync(gitignorePath)) {
|
|
1046
|
-
results.push(gitignorePath);
|
|
1047
|
-
}
|
|
1048
|
-
let entries;
|
|
1049
|
-
try {
|
|
1050
|
-
entries = readdirSync(dir);
|
|
1051
|
-
}
|
|
1052
|
-
catch {
|
|
1053
|
-
return results;
|
|
1054
|
-
}
|
|
1055
|
-
for (const entry of entries) {
|
|
1056
|
-
if (entry === '.git' || entry === 'node_modules')
|
|
1057
|
-
continue;
|
|
1058
|
-
const fullPath = join(dir, entry);
|
|
1059
|
-
try {
|
|
1060
|
-
if (statSync(fullPath).isDirectory()) {
|
|
1061
|
-
results.push(...findGitignoreFiles(fullPath));
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
|
-
catch {
|
|
1065
|
-
// Skip inaccessible entries
|
|
1066
|
-
}
|
|
1067
|
-
}
|
|
1068
|
-
return results;
|
|
1069
|
-
}
|
|
1070
|
-
/**
|
|
1071
|
-
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1072
|
-
*/
|
|
1073
|
-
function parseGitignore(gitignorePath) {
|
|
1074
|
-
const content = readFileSync(gitignorePath, 'utf8');
|
|
1075
|
-
return ignore().add(content);
|
|
1076
|
-
}
|
|
1077
|
-
/**
|
|
1078
|
-
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1079
|
-
*/
|
|
1080
|
-
function toForwardSlash(p) {
|
|
1081
|
-
return p.replace(/\\/g, '/');
|
|
1082
|
-
}
|
|
1083
|
-
/**
|
|
1084
|
-
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1085
|
-
* `.gitignore` chain in git repositories.
|
|
1086
|
-
*/
|
|
1087
|
-
class GitignoreFilter {
|
|
1088
|
-
repos = new Map();
|
|
1089
|
-
/**
|
|
1090
|
-
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1091
|
-
*
|
|
1092
|
-
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1093
|
-
*/
|
|
1094
|
-
constructor(watchPaths) {
|
|
1095
|
-
this.scan(watchPaths);
|
|
1096
|
-
}
|
|
1097
|
-
/**
|
|
1098
|
-
* Scan paths for git repos and their `.gitignore` files.
|
|
1099
|
-
*/
|
|
1100
|
-
scan(watchPaths) {
|
|
1101
|
-
this.repos.clear();
|
|
1102
|
-
const scannedDirs = new Set();
|
|
1103
|
-
for (const watchPath of watchPaths) {
|
|
1104
|
-
const scanDir = watchPathToScanDir(watchPath);
|
|
1105
|
-
if (!scanDir)
|
|
1106
|
-
continue;
|
|
1107
|
-
if (scannedDirs.has(scanDir))
|
|
1108
|
-
continue;
|
|
1109
|
-
scannedDirs.add(scanDir);
|
|
1110
|
-
const repoRoot = findRepoRoot(scanDir);
|
|
1111
|
-
if (!repoRoot)
|
|
1112
|
-
continue;
|
|
1113
|
-
if (this.repos.has(repoRoot))
|
|
1114
|
-
continue;
|
|
1115
|
-
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1116
|
-
const entries = gitignoreFiles.map((gf) => ({
|
|
1117
|
-
dir: dirname(gf),
|
|
1118
|
-
ig: parseGitignore(gf),
|
|
1119
|
-
}));
|
|
1120
|
-
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1121
|
-
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1122
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1123
|
-
}
|
|
1124
|
-
}
|
|
1125
|
-
/**
|
|
1126
|
-
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1127
|
-
*
|
|
1128
|
-
* @param filePath - Absolute file path to check.
|
|
1129
|
-
* @returns `true` if the file should be ignored.
|
|
1130
|
-
*/
|
|
1131
|
-
isIgnored(filePath) {
|
|
1132
|
-
const absPath = resolve(filePath);
|
|
1133
|
-
for (const [, repo] of this.repos) {
|
|
1134
|
-
// Check if file is within this repo
|
|
1135
|
-
const relToRepo = relative(repo.root, absPath);
|
|
1136
|
-
if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
|
|
1137
|
-
continue;
|
|
1138
|
-
}
|
|
1139
|
-
// Check each `.gitignore` entry (deepest-first)
|
|
1140
|
-
for (const entry of repo.entries) {
|
|
1141
|
-
const relToEntry = relative(entry.dir, absPath);
|
|
1142
|
-
if (relToEntry.startsWith('..'))
|
|
1143
|
-
continue;
|
|
1144
|
-
const normalized = toForwardSlash(relToEntry);
|
|
1145
|
-
if (entry.ig.ignores(normalized)) {
|
|
1146
|
-
return true;
|
|
1147
|
-
}
|
|
1148
|
-
}
|
|
1149
|
-
}
|
|
1150
|
-
return false;
|
|
1151
|
-
}
|
|
1152
|
-
/**
|
|
1153
|
-
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1154
|
-
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1155
|
-
*
|
|
1156
|
-
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1157
|
-
*/
|
|
1158
|
-
invalidate(gitignorePath) {
|
|
1159
|
-
const absPath = resolve(gitignorePath);
|
|
1160
|
-
const gitignoreDir = dirname(absPath);
|
|
1161
|
-
for (const [, repo] of this.repos) {
|
|
1162
|
-
const relToRepo = relative(repo.root, gitignoreDir);
|
|
1163
|
-
if (relToRepo.startsWith('..'))
|
|
1164
|
-
continue;
|
|
1165
|
-
// Remove old entry for this directory
|
|
1166
|
-
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1167
|
-
// Re-parse if file still exists
|
|
1168
|
-
if (existsSync(absPath)) {
|
|
1169
|
-
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1170
|
-
// Re-sort deepest-first
|
|
1171
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1172
|
-
}
|
|
1173
|
-
return;
|
|
1174
|
-
}
|
|
1175
|
-
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1176
|
-
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1177
|
-
if (repoRoot && existsSync(absPath)) {
|
|
1178
|
-
const entries = [
|
|
1179
|
-
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1180
|
-
];
|
|
1181
|
-
if (this.repos.has(repoRoot)) {
|
|
1182
|
-
const repo = this.repos.get(repoRoot);
|
|
1183
|
-
repo.entries.push(entries[0]);
|
|
1184
|
-
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1185
|
-
}
|
|
1186
|
-
else {
|
|
1187
|
-
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1442
|
+
// embedDocuments returns vectors for multiple texts
|
|
1443
|
+
return embedder.embedDocuments(texts);
|
|
1444
|
+
}, {
|
|
1445
|
+
attempts: 5,
|
|
1446
|
+
baseDelayMs: 500,
|
|
1447
|
+
maxDelayMs: 10_000,
|
|
1448
|
+
jitter: 0.2,
|
|
1449
|
+
onRetry: ({ attempt, delayMs, error }) => {
|
|
1450
|
+
log.warn({
|
|
1451
|
+
attempt,
|
|
1452
|
+
delayMs,
|
|
1453
|
+
provider: 'gemini',
|
|
1454
|
+
model: config.model,
|
|
1455
|
+
err: normalizeError(error),
|
|
1456
|
+
}, 'Embedding call failed; will retry');
|
|
1457
|
+
},
|
|
1458
|
+
});
|
|
1459
|
+
// Validate dimensions
|
|
1460
|
+
for (const vector of vectors) {
|
|
1461
|
+
if (vector.length !== dimensions) {
|
|
1462
|
+
throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(dimensions)}, got ${String(vector.length)}`);
|
|
1463
|
+
}
|
|
1188
1464
|
}
|
|
1189
|
-
|
|
1465
|
+
return vectors;
|
|
1466
|
+
},
|
|
1467
|
+
};
|
|
1468
|
+
}
|
|
1469
|
+
function createMockFromConfig(config) {
|
|
1470
|
+
const dimensions = config.dimensions ?? 768;
|
|
1471
|
+
return createMockProvider(dimensions);
|
|
1472
|
+
}
|
|
1473
|
+
const embeddingProviderRegistry = new Map([
|
|
1474
|
+
['mock', createMockFromConfig],
|
|
1475
|
+
['gemini', createGeminiProvider],
|
|
1476
|
+
]);
|
|
1477
|
+
/**
|
|
1478
|
+
* Create an embedding provider based on the given configuration.
|
|
1479
|
+
*
|
|
1480
|
+
* Each provider is responsible for its own default dimensions.
|
|
1481
|
+
*
|
|
1482
|
+
* @param config - The embedding configuration.
|
|
1483
|
+
* @param logger - Optional pino logger for retry warnings.
|
|
1484
|
+
* @returns An {@link EmbeddingProvider} instance.
|
|
1485
|
+
* @throws If the configured provider is not supported.
|
|
1486
|
+
*/
|
|
1487
|
+
function createEmbeddingProvider(config, logger) {
|
|
1488
|
+
const factory = embeddingProviderRegistry.get(config.provider);
|
|
1489
|
+
if (!factory) {
|
|
1490
|
+
throw new Error(`Unsupported embedding provider: ${config.provider}`);
|
|
1190
1491
|
}
|
|
1492
|
+
return factory(config, logger);
|
|
1191
1493
|
}
|
|
1192
1494
|
|
|
1193
1495
|
/**
|
|
@@ -1419,7 +1721,7 @@ function createJsonMapLib() {
|
|
|
1419
1721
|
};
|
|
1420
1722
|
}
|
|
1421
1723
|
/**
|
|
1422
|
-
* Apply compiled inference rules to file attributes, returning merged metadata.
|
|
1724
|
+
* Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
|
|
1423
1725
|
*
|
|
1424
1726
|
* Rules are evaluated in order; later rules override earlier ones.
|
|
1425
1727
|
* If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
|
|
@@ -1429,15 +1731,18 @@ function createJsonMapLib() {
|
|
|
1429
1731
|
* @param attributes - The file attributes to match against.
|
|
1430
1732
|
* @param namedMaps - Optional record of named JsonMap definitions.
|
|
1431
1733
|
* @param logger - Optional logger for warnings (falls back to console.warn).
|
|
1432
|
-
* @
|
|
1734
|
+
* @param templateEngine - Optional template engine for rendering content templates.
|
|
1735
|
+
* @param configDir - Optional config directory for resolving .json map file paths.
|
|
1736
|
+
* @returns The merged metadata and optional rendered content.
|
|
1433
1737
|
*/
|
|
1434
|
-
async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
1738
|
+
async function applyRules(compiledRules, attributes, namedMaps, logger, templateEngine, configDir) {
|
|
1435
1739
|
// JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
|
|
1436
1740
|
// Our helper functions accept multiple args, which JsonMap supports at runtime.
|
|
1437
1741
|
const lib = createJsonMapLib();
|
|
1438
1742
|
let merged = {};
|
|
1743
|
+
let renderedContent = null;
|
|
1439
1744
|
const log = logger ?? console;
|
|
1440
|
-
for (const { rule, validate } of compiledRules) {
|
|
1745
|
+
for (const [ruleIndex, { rule, validate }] of compiledRules.entries()) {
|
|
1441
1746
|
if (validate(attributes)) {
|
|
1442
1747
|
// Apply set resolution
|
|
1443
1748
|
const setOutput = resolveSet(rule.set, attributes);
|
|
@@ -1447,10 +1752,24 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1447
1752
|
let mapDef;
|
|
1448
1753
|
// Resolve map reference
|
|
1449
1754
|
if (typeof rule.map === 'string') {
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1755
|
+
if (rule.map.endsWith('.json') && configDir) {
|
|
1756
|
+
// File path: load from .json file
|
|
1757
|
+
try {
|
|
1758
|
+
const mapPath = resolve(configDir, rule.map);
|
|
1759
|
+
const raw = readFileSync(mapPath, 'utf-8');
|
|
1760
|
+
mapDef = JSON.parse(raw);
|
|
1761
|
+
}
|
|
1762
|
+
catch (error) {
|
|
1763
|
+
log.warn(`Failed to load map file "${rule.map}": ${error instanceof Error ? error.message : String(error)}`);
|
|
1764
|
+
continue;
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
else {
|
|
1768
|
+
mapDef = namedMaps?.[rule.map];
|
|
1769
|
+
if (!mapDef) {
|
|
1770
|
+
log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
|
|
1771
|
+
continue;
|
|
1772
|
+
}
|
|
1454
1773
|
}
|
|
1455
1774
|
}
|
|
1456
1775
|
else {
|
|
@@ -1473,9 +1792,31 @@ async function applyRules(compiledRules, attributes, namedMaps, logger) {
|
|
|
1473
1792
|
log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
1474
1793
|
}
|
|
1475
1794
|
}
|
|
1795
|
+
// Render template if present
|
|
1796
|
+
if (rule.template && templateEngine) {
|
|
1797
|
+
const templateKey = `rule-${String(ruleIndex)}`;
|
|
1798
|
+
// Build template context: attributes (with json spread at top) + map output
|
|
1799
|
+
const context = {
|
|
1800
|
+
...(attributes.json ?? {}),
|
|
1801
|
+
...attributes,
|
|
1802
|
+
...merged,
|
|
1803
|
+
};
|
|
1804
|
+
try {
|
|
1805
|
+
const result = templateEngine.render(templateKey, context);
|
|
1806
|
+
if (result && result.trim()) {
|
|
1807
|
+
renderedContent = result;
|
|
1808
|
+
}
|
|
1809
|
+
else {
|
|
1810
|
+
log.warn(`Template for rule ${String(ruleIndex)} rendered empty output. Falling back to raw content.`);
|
|
1811
|
+
}
|
|
1812
|
+
}
|
|
1813
|
+
catch (error) {
|
|
1814
|
+
log.warn(`Template render failed for rule ${String(ruleIndex)}: ${error instanceof Error ? error.message : String(error)}. Falling back to raw content.`);
|
|
1815
|
+
}
|
|
1816
|
+
}
|
|
1476
1817
|
}
|
|
1477
1818
|
}
|
|
1478
|
-
return merged;
|
|
1819
|
+
return { metadata: merged, renderedContent };
|
|
1479
1820
|
}
|
|
1480
1821
|
|
|
1481
1822
|
/**
|
|
@@ -1564,23 +1905,32 @@ function compileRules(rules) {
|
|
|
1564
1905
|
* @param metadataDir - The metadata directory for enrichment files.
|
|
1565
1906
|
* @param maps - Optional named JsonMap definitions.
|
|
1566
1907
|
* @param logger - Optional logger for rule warnings.
|
|
1908
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1909
|
+
* @param configDir - Optional config directory for resolving file paths.
|
|
1567
1910
|
* @returns The merged metadata and intermediate data.
|
|
1568
1911
|
*/
|
|
1569
|
-
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
|
|
1912
|
+
async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir) {
|
|
1570
1913
|
const ext = extname(filePath);
|
|
1571
1914
|
const stats = await stat(filePath);
|
|
1572
1915
|
// 1. Extract text and structured data
|
|
1573
1916
|
const extracted = await extractText(filePath, ext);
|
|
1574
1917
|
// 2. Build attributes + apply rules
|
|
1575
1918
|
const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
|
|
1576
|
-
const inferred = await applyRules(compiledRules, attributes, maps, logger);
|
|
1919
|
+
const { metadata: inferred, renderedContent } = await applyRules(compiledRules, attributes, maps, logger, templateEngine, configDir);
|
|
1577
1920
|
// 3. Read enrichment metadata (merge, enrichment wins)
|
|
1578
1921
|
const enrichment = await readMetadata(filePath, metadataDir);
|
|
1579
1922
|
const metadata = {
|
|
1580
1923
|
...inferred,
|
|
1581
1924
|
...(enrichment ?? {}),
|
|
1582
1925
|
};
|
|
1583
|
-
return {
|
|
1926
|
+
return {
|
|
1927
|
+
inferred,
|
|
1928
|
+
enrichment,
|
|
1929
|
+
metadata,
|
|
1930
|
+
attributes,
|
|
1931
|
+
extracted,
|
|
1932
|
+
renderedContent,
|
|
1933
|
+
};
|
|
1584
1934
|
}
|
|
1585
1935
|
|
|
1586
1936
|
/**
|
|
@@ -1651,6 +2001,7 @@ class DocumentProcessor {
|
|
|
1651
2001
|
vectorStore;
|
|
1652
2002
|
compiledRules;
|
|
1653
2003
|
logger;
|
|
2004
|
+
templateEngine;
|
|
1654
2005
|
/**
|
|
1655
2006
|
* Create a new DocumentProcessor.
|
|
1656
2007
|
*
|
|
@@ -1659,13 +2010,15 @@ class DocumentProcessor {
|
|
|
1659
2010
|
* @param vectorStore - The vector store client.
|
|
1660
2011
|
* @param compiledRules - The compiled inference rules.
|
|
1661
2012
|
* @param logger - The logger instance.
|
|
2013
|
+
* @param templateEngine - Optional template engine for content templates.
|
|
1662
2014
|
*/
|
|
1663
|
-
constructor(config, embeddingProvider, vectorStore, compiledRules, logger) {
|
|
2015
|
+
constructor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) {
|
|
1664
2016
|
this.config = config;
|
|
1665
2017
|
this.embeddingProvider = embeddingProvider;
|
|
1666
2018
|
this.vectorStore = vectorStore;
|
|
1667
2019
|
this.compiledRules = compiledRules;
|
|
1668
2020
|
this.logger = logger;
|
|
2021
|
+
this.templateEngine = templateEngine;
|
|
1669
2022
|
}
|
|
1670
2023
|
/**
|
|
1671
2024
|
* Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
|
|
@@ -1676,13 +2029,15 @@ class DocumentProcessor {
|
|
|
1676
2029
|
try {
|
|
1677
2030
|
const ext = extname(filePath);
|
|
1678
2031
|
// 1. Build merged metadata + extract text
|
|
1679
|
-
const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
1680
|
-
if
|
|
2032
|
+
const { metadata, extracted, renderedContent } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
2033
|
+
// Use rendered template content if available, otherwise raw extracted text
|
|
2034
|
+
const textToEmbed = renderedContent ?? extracted.text;
|
|
2035
|
+
if (!textToEmbed.trim()) {
|
|
1681
2036
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
1682
2037
|
return;
|
|
1683
2038
|
}
|
|
1684
2039
|
// 2. Content hash check — skip if unchanged
|
|
1685
|
-
const hash = contentHash(
|
|
2040
|
+
const hash = contentHash(textToEmbed);
|
|
1686
2041
|
const baseId = pointId(filePath, 0);
|
|
1687
2042
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
1688
2043
|
if (existingPayload && existingPayload['content_hash'] === hash) {
|
|
@@ -1694,7 +2049,7 @@ class DocumentProcessor {
|
|
|
1694
2049
|
const chunkSize = this.config.chunkSize ?? 1000;
|
|
1695
2050
|
const chunkOverlap = this.config.chunkOverlap ?? 200;
|
|
1696
2051
|
const splitter = createSplitter(ext, chunkSize, chunkOverlap);
|
|
1697
|
-
const chunks = await splitter.splitText(
|
|
2052
|
+
const chunks = await splitter.splitText(textToEmbed);
|
|
1698
2053
|
// 4. Embed all chunks
|
|
1699
2054
|
const vectors = await this.embeddingProvider.embed(chunks);
|
|
1700
2055
|
// 5. Upsert all chunk points
|
|
@@ -1788,7 +2143,7 @@ class DocumentProcessor {
|
|
|
1788
2143
|
return null;
|
|
1789
2144
|
}
|
|
1790
2145
|
// Build merged metadata (lightweight — no embedding)
|
|
1791
|
-
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
|
|
2146
|
+
const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger, this.templateEngine, this.config.configDir);
|
|
1792
2147
|
// Update all chunk payloads
|
|
1793
2148
|
const totalChunks = getChunkCount(existingPayload);
|
|
1794
2149
|
const ids = chunkIds(filePath, totalChunks);
|
|
@@ -1806,8 +2161,17 @@ class DocumentProcessor {
|
|
|
1806
2161
|
*
|
|
1807
2162
|
* @param compiledRules - The newly compiled rules.
|
|
1808
2163
|
*/
|
|
1809
|
-
|
|
2164
|
+
/**
|
|
2165
|
+
* Update compiled inference rules and optionally the template engine.
|
|
2166
|
+
*
|
|
2167
|
+
* @param compiledRules - The newly compiled rules.
|
|
2168
|
+
* @param templateEngine - Optional updated template engine.
|
|
2169
|
+
*/
|
|
2170
|
+
updateRules(compiledRules, templateEngine) {
|
|
1810
2171
|
this.compiledRules = compiledRules;
|
|
2172
|
+
if (templateEngine) {
|
|
2173
|
+
this.templateEngine = templateEngine;
|
|
2174
|
+
}
|
|
1811
2175
|
this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
|
|
1812
2176
|
}
|
|
1813
2177
|
}
|
|
@@ -2336,6 +2700,76 @@ class SystemHealth {
|
|
|
2336
2700
|
}
|
|
2337
2701
|
}
|
|
2338
2702
|
|
|
2703
|
+
/**
|
|
2704
|
+
* @module watcher/globToDir
|
|
2705
|
+
* Adapts glob-based watch config to chokidar v4+, which removed glob support
|
|
2706
|
+
* (see paulmillr/chokidar#1350). Chokidar v4 treats glob patterns as literal
|
|
2707
|
+
* strings, silently producing zero events. This module extracts static directory
|
|
2708
|
+
* roots from glob patterns for chokidar to watch, then filters emitted events
|
|
2709
|
+
* against the original globs via picomatch.
|
|
2710
|
+
*/
|
|
2711
|
+
/**
|
|
2712
|
+
* Extract the static directory root from a glob pattern.
|
|
2713
|
+
* Stops at the first segment containing glob characters (`*`, `{`, `?`, `[`).
|
|
2714
|
+
*
|
|
2715
|
+
* @param glob - A glob pattern (e.g., `j:/domains/**\/*.json`).
|
|
2716
|
+
* @returns The static directory prefix (e.g., `j:/domains`).
|
|
2717
|
+
*/
|
|
2718
|
+
function globRoot(glob) {
|
|
2719
|
+
const normalized = glob.replace(/\\/g, '/');
|
|
2720
|
+
const segments = normalized.split('/');
|
|
2721
|
+
const staticSegments = [];
|
|
2722
|
+
for (const seg of segments) {
|
|
2723
|
+
if (/[*?{[\]]/.test(seg))
|
|
2724
|
+
break;
|
|
2725
|
+
staticSegments.push(seg);
|
|
2726
|
+
}
|
|
2727
|
+
return staticSegments.join('/') || '.';
|
|
2728
|
+
}
|
|
2729
|
+
/**
|
|
2730
|
+
* Deduplicate directory roots, removing paths that are subdirectories of others.
|
|
2731
|
+
*
|
|
2732
|
+
* @param roots - Array of directory paths.
|
|
2733
|
+
* @returns Deduplicated array with subdirectories removed.
|
|
2734
|
+
*/
|
|
2735
|
+
function deduplicateRoots(roots) {
|
|
2736
|
+
const normalized = roots.map((r) => r.replace(/\\/g, '/').toLowerCase());
|
|
2737
|
+
const sorted = [...new Set(normalized)].sort();
|
|
2738
|
+
return sorted.filter((root, _i, arr) => {
|
|
2739
|
+
const withSlash = root.endsWith('/') ? root : root + '/';
|
|
2740
|
+
return !arr.some((other) => other !== root && withSlash.startsWith(other + '/'));
|
|
2741
|
+
});
|
|
2742
|
+
}
|
|
2743
|
+
/**
|
|
2744
|
+
* Build a picomatch matcher from an array of glob patterns.
|
|
2745
|
+
* Normalizes Windows paths (backslash → forward slash, lowercase drive letter)
|
|
2746
|
+
* before matching.
|
|
2747
|
+
*
|
|
2748
|
+
* @param globs - Glob patterns to match against.
|
|
2749
|
+
* @returns A function that tests whether a file path matches any of the globs.
|
|
2750
|
+
*/
|
|
2751
|
+
function buildGlobMatcher(globs) {
|
|
2752
|
+
const normalizedGlobs = globs.map((g) => g.replace(/\\/g, '/'));
|
|
2753
|
+
const isMatch = picomatch(normalizedGlobs, { dot: true, nocase: true });
|
|
2754
|
+
return (filePath) => {
|
|
2755
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
2756
|
+
return isMatch(normalized);
|
|
2757
|
+
};
|
|
2758
|
+
}
|
|
2759
|
+
/**
|
|
2760
|
+
* Convert an array of glob patterns into chokidar-compatible directory roots
|
|
2761
|
+
* and a filter function for post-hoc event filtering.
|
|
2762
|
+
*
|
|
2763
|
+
* @param globs - Glob patterns from the watch config.
|
|
2764
|
+
* @returns Object with `roots` (directories for chokidar) and `matches` (filter function).
|
|
2765
|
+
*/
|
|
2766
|
+
function resolveWatchPaths(globs) {
|
|
2767
|
+
const rawRoots = globs.map(globRoot);
|
|
2768
|
+
const roots = deduplicateRoots(rawRoots);
|
|
2769
|
+
const matches = buildGlobMatcher(globs);
|
|
2770
|
+
return { roots, matches };
|
|
2771
|
+
}
|
|
2772
|
+
|
|
2339
2773
|
/**
|
|
2340
2774
|
* @module watcher
|
|
2341
2775
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -2350,6 +2784,7 @@ class FileSystemWatcher {
|
|
|
2350
2784
|
logger;
|
|
2351
2785
|
health;
|
|
2352
2786
|
gitignoreFilter;
|
|
2787
|
+
globMatches;
|
|
2353
2788
|
watcher;
|
|
2354
2789
|
/**
|
|
2355
2790
|
* Create a new FileSystemWatcher.
|
|
@@ -2366,6 +2801,7 @@ class FileSystemWatcher {
|
|
|
2366
2801
|
this.processor = processor;
|
|
2367
2802
|
this.logger = logger;
|
|
2368
2803
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
2804
|
+
this.globMatches = () => true;
|
|
2369
2805
|
const healthOptions = {
|
|
2370
2806
|
maxRetries: options.maxRetries,
|
|
2371
2807
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2378,7 +2814,13 @@ class FileSystemWatcher {
|
|
|
2378
2814
|
* Start watching the filesystem and processing events.
|
|
2379
2815
|
*/
|
|
2380
2816
|
start() {
|
|
2381
|
-
|
|
2817
|
+
// Chokidar v4+ removed glob support (paulmillr/chokidar#1350).
|
|
2818
|
+
// Glob patterns are silently treated as literal strings, producing zero
|
|
2819
|
+
// events. We extract static directory roots for chokidar to watch, then
|
|
2820
|
+
// filter emitted events against the original globs via picomatch.
|
|
2821
|
+
const { roots, matches } = resolveWatchPaths(this.config.paths);
|
|
2822
|
+
this.globMatches = matches;
|
|
2823
|
+
this.watcher = chokidar.watch(roots, {
|
|
2382
2824
|
ignored: this.config.ignored,
|
|
2383
2825
|
usePolling: this.config.usePolling,
|
|
2384
2826
|
interval: this.config.pollIntervalMs,
|
|
@@ -2389,6 +2831,8 @@ class FileSystemWatcher {
|
|
|
2389
2831
|
});
|
|
2390
2832
|
this.watcher.on('add', (path) => {
|
|
2391
2833
|
this.handleGitignoreChange(path);
|
|
2834
|
+
if (!this.globMatches(path))
|
|
2835
|
+
return;
|
|
2392
2836
|
if (this.isGitignored(path))
|
|
2393
2837
|
return;
|
|
2394
2838
|
this.logger.debug({ path }, 'File added');
|
|
@@ -2396,6 +2840,8 @@ class FileSystemWatcher {
|
|
|
2396
2840
|
});
|
|
2397
2841
|
this.watcher.on('change', (path) => {
|
|
2398
2842
|
this.handleGitignoreChange(path);
|
|
2843
|
+
if (!this.globMatches(path))
|
|
2844
|
+
return;
|
|
2399
2845
|
if (this.isGitignored(path))
|
|
2400
2846
|
return;
|
|
2401
2847
|
this.logger.debug({ path }, 'File changed');
|
|
@@ -2403,6 +2849,8 @@ class FileSystemWatcher {
|
|
|
2403
2849
|
});
|
|
2404
2850
|
this.watcher.on('unlink', (path) => {
|
|
2405
2851
|
this.handleGitignoreChange(path);
|
|
2852
|
+
if (!this.globMatches(path))
|
|
2853
|
+
return;
|
|
2406
2854
|
if (this.isGitignored(path))
|
|
2407
2855
|
return;
|
|
2408
2856
|
this.logger.debug({ path }, 'File removed');
|
|
@@ -2475,51 +2923,21 @@ class FileSystemWatcher {
|
|
|
2475
2923
|
}
|
|
2476
2924
|
|
|
2477
2925
|
/**
|
|
2478
|
-
* @module app/
|
|
2479
|
-
*
|
|
2480
|
-
*/
|
|
2481
|
-
/**
|
|
2482
|
-
* Debounced config file watcher.
|
|
2926
|
+
* @module app/factories
|
|
2927
|
+
* Component factory interfaces and defaults for {@link JeevesWatcher}. Override in tests to inject mocks.
|
|
2483
2928
|
*/
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
|
|
2495
|
-
|
|
2496
|
-
});
|
|
2497
|
-
this.watcher.on('change', () => {
|
|
2498
|
-
if (this.debounce)
|
|
2499
|
-
clearTimeout(this.debounce);
|
|
2500
|
-
this.debounce = setTimeout(() => {
|
|
2501
|
-
void this.options.onChange();
|
|
2502
|
-
}, this.options.debounceMs);
|
|
2503
|
-
});
|
|
2504
|
-
this.watcher.on('error', (error) => {
|
|
2505
|
-
this.options.logger.error({ err: normalizeError(error) }, 'Config watcher error');
|
|
2506
|
-
});
|
|
2507
|
-
this.options.logger.info({
|
|
2508
|
-
configPath: this.options.configPath,
|
|
2509
|
-
debounceMs: this.options.debounceMs,
|
|
2510
|
-
}, 'Config watcher started');
|
|
2511
|
-
}
|
|
2512
|
-
async stop() {
|
|
2513
|
-
if (this.debounce) {
|
|
2514
|
-
clearTimeout(this.debounce);
|
|
2515
|
-
this.debounce = undefined;
|
|
2516
|
-
}
|
|
2517
|
-
if (this.watcher) {
|
|
2518
|
-
await this.watcher.close();
|
|
2519
|
-
this.watcher = undefined;
|
|
2520
|
-
}
|
|
2521
|
-
}
|
|
2522
|
-
}
|
|
2929
|
+
/** Default component factories wiring real implementations. */
|
|
2930
|
+
const defaultFactories = {
|
|
2931
|
+
loadConfig,
|
|
2932
|
+
createLogger,
|
|
2933
|
+
createEmbeddingProvider,
|
|
2934
|
+
createVectorStoreClient: (config, dimensions, logger) => new VectorStoreClient(config, dimensions, logger),
|
|
2935
|
+
compileRules,
|
|
2936
|
+
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine),
|
|
2937
|
+
createEventQueue: (options) => new EventQueue(options),
|
|
2938
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2939
|
+
createApiServer,
|
|
2940
|
+
};
|
|
2523
2941
|
|
|
2524
2942
|
/**
|
|
2525
2943
|
* @module app/shutdown
|
|
@@ -2539,17 +2957,28 @@ function installShutdownHandlers(stop) {
|
|
|
2539
2957
|
process.on('SIGINT', () => void shutdown());
|
|
2540
2958
|
}
|
|
2541
2959
|
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2960
|
+
/**
|
|
2961
|
+
* @module app/startFromConfig
|
|
2962
|
+
* Convenience entry point: loads config from disk and starts a {@link JeevesWatcher}.
|
|
2963
|
+
*/
|
|
2964
|
+
/**
|
|
2965
|
+
* Create and start a JeevesWatcher from a config file path.
|
|
2966
|
+
*
|
|
2967
|
+
* @param configPath - Optional path to the configuration file.
|
|
2968
|
+
* @returns The running JeevesWatcher instance.
|
|
2969
|
+
*/
|
|
2970
|
+
async function startFromConfig(configPath) {
|
|
2971
|
+
const config = await loadConfig(configPath);
|
|
2972
|
+
const app = new JeevesWatcher(config, configPath);
|
|
2973
|
+
installShutdownHandlers(() => app.stop());
|
|
2974
|
+
await app.start();
|
|
2975
|
+
return app;
|
|
2976
|
+
}
|
|
2977
|
+
|
|
2978
|
+
/**
|
|
2979
|
+
* @module app
|
|
2980
|
+
* Main application orchestrator. Wires components, manages lifecycle (start/stop/reload).
|
|
2981
|
+
*/
|
|
2553
2982
|
/**
|
|
2554
2983
|
* Main application class that wires together all components.
|
|
2555
2984
|
*/
|
|
@@ -2584,56 +3013,26 @@ class JeevesWatcher {
|
|
|
2584
3013
|
async start() {
|
|
2585
3014
|
const logger = this.factories.createLogger(this.config.logging);
|
|
2586
3015
|
this.logger = logger;
|
|
2587
|
-
|
|
2588
|
-
try {
|
|
2589
|
-
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
2590
|
-
}
|
|
2591
|
-
catch (error) {
|
|
2592
|
-
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
2593
|
-
throw error;
|
|
2594
|
-
}
|
|
2595
|
-
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
2596
|
-
await vectorStore.ensureCollection();
|
|
3016
|
+
const { embeddingProvider, vectorStore } = await this.initEmbeddingAndStore(logger);
|
|
2597
3017
|
const compiledRules = this.factories.compileRules(this.config.inferenceRules ?? []);
|
|
2598
|
-
const
|
|
3018
|
+
const configDir = this.configPath ? dirname(this.configPath) : '.';
|
|
3019
|
+
const templateEngine = await buildTemplateEngine(this.config.inferenceRules ?? [], this.config.templates, this.config.templateHelpers?.paths, configDir);
|
|
3020
|
+
const processor = this.factories.createDocumentProcessor({
|
|
2599
3021
|
metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
|
|
2600
3022
|
chunkSize: this.config.embedding.chunkSize,
|
|
2601
3023
|
chunkOverlap: this.config.embedding.chunkOverlap,
|
|
2602
3024
|
maps: this.config.maps,
|
|
2603
|
-
|
|
2604
|
-
|
|
3025
|
+
configDir,
|
|
3026
|
+
}, embeddingProvider, vectorStore, compiledRules, logger, templateEngine);
|
|
2605
3027
|
this.processor = processor;
|
|
2606
|
-
|
|
3028
|
+
this.queue = this.factories.createEventQueue({
|
|
2607
3029
|
debounceMs: this.config.watch.debounceMs ?? 2000,
|
|
2608
3030
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
2609
3031
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2610
3032
|
});
|
|
2611
|
-
this.
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
? new GitignoreFilter(this.config.watch.paths)
|
|
2615
|
-
: undefined;
|
|
2616
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2617
|
-
maxRetries: this.config.maxRetries,
|
|
2618
|
-
maxBackoffMs: this.config.maxBackoffMs,
|
|
2619
|
-
onFatalError: this.runtimeOptions.onFatalError,
|
|
2620
|
-
gitignoreFilter,
|
|
2621
|
-
});
|
|
2622
|
-
this.watcher = watcher;
|
|
2623
|
-
const server = this.factories.createApiServer({
|
|
2624
|
-
processor,
|
|
2625
|
-
vectorStore,
|
|
2626
|
-
embeddingProvider,
|
|
2627
|
-
queue,
|
|
2628
|
-
config: this.config,
|
|
2629
|
-
logger,
|
|
2630
|
-
});
|
|
2631
|
-
this.server = server;
|
|
2632
|
-
await server.listen({
|
|
2633
|
-
host: this.config.api?.host ?? '127.0.0.1',
|
|
2634
|
-
port: this.config.api?.port ?? 3456,
|
|
2635
|
-
});
|
|
2636
|
-
watcher.start();
|
|
3033
|
+
this.watcher = this.createWatcher(this.queue, processor, logger);
|
|
3034
|
+
this.server = await this.startApiServer(processor, vectorStore, embeddingProvider, logger);
|
|
3035
|
+
this.watcher.start();
|
|
2637
3036
|
this.startConfigWatch();
|
|
2638
3037
|
logger.info('jeeves-watcher started');
|
|
2639
3038
|
}
|
|
@@ -2664,22 +3063,61 @@ class JeevesWatcher {
|
|
|
2664
3063
|
}
|
|
2665
3064
|
this.logger?.info('jeeves-watcher stopped');
|
|
2666
3065
|
}
|
|
3066
|
+
async initEmbeddingAndStore(logger) {
|
|
3067
|
+
let embeddingProvider;
|
|
3068
|
+
try {
|
|
3069
|
+
embeddingProvider = this.factories.createEmbeddingProvider(this.config.embedding, logger);
|
|
3070
|
+
}
|
|
3071
|
+
catch (error) {
|
|
3072
|
+
logger.fatal({ err: normalizeError(error) }, 'Failed to create embedding provider');
|
|
3073
|
+
throw error;
|
|
3074
|
+
}
|
|
3075
|
+
const vectorStore = this.factories.createVectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
|
|
3076
|
+
await vectorStore.ensureCollection();
|
|
3077
|
+
return { embeddingProvider, vectorStore };
|
|
3078
|
+
}
|
|
3079
|
+
createWatcher(queue, processor, logger) {
|
|
3080
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
3081
|
+
const gitignoreFilter = respectGitignore
|
|
3082
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
3083
|
+
: undefined;
|
|
3084
|
+
return this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
3085
|
+
maxRetries: this.config.maxRetries,
|
|
3086
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
3087
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
3088
|
+
gitignoreFilter,
|
|
3089
|
+
});
|
|
3090
|
+
}
|
|
3091
|
+
async startApiServer(processor, vectorStore, embeddingProvider, logger) {
|
|
3092
|
+
const server = this.factories.createApiServer({
|
|
3093
|
+
processor,
|
|
3094
|
+
vectorStore,
|
|
3095
|
+
embeddingProvider,
|
|
3096
|
+
queue: this.queue,
|
|
3097
|
+
config: this.config,
|
|
3098
|
+
logger,
|
|
3099
|
+
});
|
|
3100
|
+
await server.listen({
|
|
3101
|
+
host: this.config.api?.host ?? '127.0.0.1',
|
|
3102
|
+
port: this.config.api?.port ?? 3456,
|
|
3103
|
+
});
|
|
3104
|
+
return server;
|
|
3105
|
+
}
|
|
2667
3106
|
startConfigWatch() {
|
|
2668
3107
|
const logger = this.logger;
|
|
2669
3108
|
if (!logger)
|
|
2670
3109
|
return;
|
|
2671
3110
|
const enabled = this.config.configWatch?.enabled ?? true;
|
|
2672
|
-
if (!enabled)
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
3111
|
+
if (!enabled || !this.configPath) {
|
|
3112
|
+
if (!this.configPath) {
|
|
3113
|
+
logger.debug('Config watch enabled, but no config path was provided');
|
|
3114
|
+
}
|
|
2676
3115
|
return;
|
|
2677
3116
|
}
|
|
2678
|
-
const debounceMs = this.config.configWatch?.debounceMs ?? 10000;
|
|
2679
3117
|
this.configWatcher = new ConfigWatcher({
|
|
2680
3118
|
configPath: this.configPath,
|
|
2681
3119
|
enabled,
|
|
2682
|
-
debounceMs,
|
|
3120
|
+
debounceMs: this.config.configWatch?.debounceMs ?? 10000,
|
|
2683
3121
|
logger,
|
|
2684
3122
|
onChange: async () => this.reloadConfig(),
|
|
2685
3123
|
});
|
|
@@ -2701,7 +3139,9 @@ class JeevesWatcher {
|
|
|
2701
3139
|
const newConfig = await this.factories.loadConfig(this.configPath);
|
|
2702
3140
|
this.config = newConfig;
|
|
2703
3141
|
const compiledRules = this.factories.compileRules(newConfig.inferenceRules ?? []);
|
|
2704
|
-
|
|
3142
|
+
const reloadConfigDir = dirname(this.configPath);
|
|
3143
|
+
const newTemplateEngine = await buildTemplateEngine(newConfig.inferenceRules ?? [], newConfig.templates, newConfig.templateHelpers?.paths, reloadConfigDir);
|
|
3144
|
+
processor.updateRules(compiledRules, newTemplateEngine);
|
|
2705
3145
|
logger.info({ configPath: this.configPath, rules: compiledRules.length }, 'Config reloaded');
|
|
2706
3146
|
}
|
|
2707
3147
|
catch (error) {
|
|
@@ -2709,19 +3149,7 @@ class JeevesWatcher {
|
|
|
2709
3149
|
}
|
|
2710
3150
|
}
|
|
2711
3151
|
}
|
|
2712
|
-
|
|
2713
|
-
* Create and start a JeevesWatcher from a config file path.
|
|
2714
|
-
*
|
|
2715
|
-
* @param configPath - Optional path to the configuration file.
|
|
2716
|
-
* @returns The running JeevesWatcher instance.
|
|
2717
|
-
*/
|
|
2718
|
-
async function startFromConfig(configPath) {
|
|
2719
|
-
const config = await loadConfig(configPath);
|
|
2720
|
-
const app = new JeevesWatcher(config, configPath);
|
|
2721
|
-
installShutdownHandlers(() => app.stop());
|
|
2722
|
-
await app.start();
|
|
2723
|
-
return app;
|
|
2724
|
-
}
|
|
3152
|
+
// startFromConfig re-exported from ./startFromConfig
|
|
2725
3153
|
|
|
2726
3154
|
/**
|
|
2727
3155
|
* @module cli/jeeves-watcher/defaults
|