@karmaniverous/jeeves-watcher 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/config.schema.json +117 -81
- package/dist/cjs/index.js +422 -10
- package/dist/cli/jeeves-watcher/index.js +421 -11
- package/dist/index.d.ts +150 -5
- package/dist/index.iife.js +422 -12
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +422 -12
- package/package.json +2 -1
package/dist/cjs/index.js
CHANGED
|
@@ -10,6 +10,8 @@ var cosmiconfig = require('cosmiconfig');
|
|
|
10
10
|
var zod = require('zod');
|
|
11
11
|
var jsonmap = require('@karmaniverous/jsonmap');
|
|
12
12
|
var googleGenai = require('@langchain/google-genai');
|
|
13
|
+
var node_fs = require('node:fs');
|
|
14
|
+
var ignore = require('ignore');
|
|
13
15
|
var pino = require('pino');
|
|
14
16
|
var uuid = require('uuid');
|
|
15
17
|
var cheerio = require('cheerio');
|
|
@@ -454,6 +456,7 @@ const WATCH_DEFAULTS = {
|
|
|
454
456
|
stabilityThresholdMs: 500,
|
|
455
457
|
usePolling: false,
|
|
456
458
|
pollIntervalMs: 1000,
|
|
459
|
+
respectGitignore: true,
|
|
457
460
|
};
|
|
458
461
|
/** Default embedding configuration. */
|
|
459
462
|
const EMBEDDING_DEFAULTS = {
|
|
@@ -498,6 +501,11 @@ const watchConfigSchema = zod.z.object({
|
|
|
498
501
|
.number()
|
|
499
502
|
.optional()
|
|
500
503
|
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
504
|
+
/** Whether to respect .gitignore files when processing. */
|
|
505
|
+
respectGitignore: zod.z
|
|
506
|
+
.boolean()
|
|
507
|
+
.optional()
|
|
508
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
501
509
|
});
|
|
502
510
|
/**
|
|
503
511
|
* Configuration watch settings.
|
|
@@ -665,6 +673,16 @@ const jeevesWatcherConfigSchema = zod.z.object({
|
|
|
665
673
|
.number()
|
|
666
674
|
.optional()
|
|
667
675
|
.describe('Timeout in milliseconds for graceful shutdown.'),
|
|
676
|
+
/** Maximum consecutive system-level failures before triggering fatal error. Default: Infinity. */
|
|
677
|
+
maxRetries: zod.z
|
|
678
|
+
.number()
|
|
679
|
+
.optional()
|
|
680
|
+
.describe('Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.'),
|
|
681
|
+
/** Maximum backoff delay in milliseconds for system errors. Default: 60000. */
|
|
682
|
+
maxBackoffMs: zod.z
|
|
683
|
+
.number()
|
|
684
|
+
.optional()
|
|
685
|
+
.describe('Maximum backoff delay in milliseconds for system errors. Default: 60000.'),
|
|
668
686
|
});
|
|
669
687
|
|
|
670
688
|
/**
|
|
@@ -953,6 +971,212 @@ function createEmbeddingProvider(config, logger) {
|
|
|
953
971
|
return factory(config, logger);
|
|
954
972
|
}
|
|
955
973
|
|
|
974
|
+
/**
|
|
975
|
+
* @module gitignore
|
|
976
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
977
|
+
*/
|
|
978
|
+
/**
|
|
979
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
980
|
+
* Returns `undefined` if no repo is found.
|
|
981
|
+
*/
|
|
982
|
+
function findRepoRoot(startDir) {
|
|
983
|
+
let dir = node_path.resolve(startDir);
|
|
984
|
+
const root = node_path.resolve('/');
|
|
985
|
+
while (dir !== root) {
|
|
986
|
+
if (node_fs.existsSync(node_path.join(dir, '.git')) &&
|
|
987
|
+
node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
|
|
988
|
+
return dir;
|
|
989
|
+
}
|
|
990
|
+
const parent = node_path.dirname(dir);
|
|
991
|
+
if (parent === dir)
|
|
992
|
+
break;
|
|
993
|
+
dir = parent;
|
|
994
|
+
}
|
|
995
|
+
return undefined;
|
|
996
|
+
}
|
|
997
|
+
/**
|
|
998
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
999
|
+
* that can be scanned for a repo root.
|
|
1000
|
+
*/
|
|
1001
|
+
function watchPathToScanDir(watchPath) {
|
|
1002
|
+
const absPath = node_path.resolve(watchPath);
|
|
1003
|
+
try {
|
|
1004
|
+
return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
|
|
1005
|
+
}
|
|
1006
|
+
catch {
|
|
1007
|
+
// ignore
|
|
1008
|
+
}
|
|
1009
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
1010
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1011
|
+
if (!globMatch)
|
|
1012
|
+
return undefined;
|
|
1013
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
1014
|
+
const trimmed = prefix.trim();
|
|
1015
|
+
const baseDir = trimmed.length === 0
|
|
1016
|
+
? '.'
|
|
1017
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1018
|
+
? trimmed
|
|
1019
|
+
: node_path.dirname(trimmed);
|
|
1020
|
+
const resolved = node_path.resolve(baseDir);
|
|
1021
|
+
if (!node_fs.existsSync(resolved))
|
|
1022
|
+
return undefined;
|
|
1023
|
+
return resolved;
|
|
1024
|
+
}
|
|
1025
|
+
/**
|
|
1026
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
1027
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
1028
|
+
*/
|
|
1029
|
+
function findGitignoreFiles(dir) {
|
|
1030
|
+
const results = [];
|
|
1031
|
+
const gitignorePath = node_path.join(dir, '.gitignore');
|
|
1032
|
+
if (node_fs.existsSync(gitignorePath)) {
|
|
1033
|
+
results.push(gitignorePath);
|
|
1034
|
+
}
|
|
1035
|
+
let entries;
|
|
1036
|
+
try {
|
|
1037
|
+
entries = node_fs.readdirSync(dir);
|
|
1038
|
+
}
|
|
1039
|
+
catch {
|
|
1040
|
+
return results;
|
|
1041
|
+
}
|
|
1042
|
+
for (const entry of entries) {
|
|
1043
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
1044
|
+
continue;
|
|
1045
|
+
const fullPath = node_path.join(dir, entry);
|
|
1046
|
+
try {
|
|
1047
|
+
if (node_fs.statSync(fullPath).isDirectory()) {
|
|
1048
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
catch {
|
|
1052
|
+
// Skip inaccessible entries
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
return results;
|
|
1056
|
+
}
|
|
1057
|
+
/**
|
|
1058
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1059
|
+
*/
|
|
1060
|
+
function parseGitignore(gitignorePath) {
|
|
1061
|
+
const content = node_fs.readFileSync(gitignorePath, 'utf8');
|
|
1062
|
+
return ignore().add(content);
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1066
|
+
*/
|
|
1067
|
+
function toForwardSlash(p) {
|
|
1068
|
+
return p.replace(/\\/g, '/');
|
|
1069
|
+
}
|
|
1070
|
+
/**
|
|
1071
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1072
|
+
* `.gitignore` chain in git repositories.
|
|
1073
|
+
*/
|
|
1074
|
+
class GitignoreFilter {
|
|
1075
|
+
repos = new Map();
|
|
1076
|
+
/**
|
|
1077
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1078
|
+
*
|
|
1079
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1080
|
+
*/
|
|
1081
|
+
constructor(watchPaths) {
|
|
1082
|
+
this.scan(watchPaths);
|
|
1083
|
+
}
|
|
1084
|
+
/**
|
|
1085
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
1086
|
+
*/
|
|
1087
|
+
scan(watchPaths) {
|
|
1088
|
+
this.repos.clear();
|
|
1089
|
+
const scannedDirs = new Set();
|
|
1090
|
+
for (const watchPath of watchPaths) {
|
|
1091
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
1092
|
+
if (!scanDir)
|
|
1093
|
+
continue;
|
|
1094
|
+
if (scannedDirs.has(scanDir))
|
|
1095
|
+
continue;
|
|
1096
|
+
scannedDirs.add(scanDir);
|
|
1097
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
1098
|
+
if (!repoRoot)
|
|
1099
|
+
continue;
|
|
1100
|
+
if (this.repos.has(repoRoot))
|
|
1101
|
+
continue;
|
|
1102
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1103
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
1104
|
+
dir: node_path.dirname(gf),
|
|
1105
|
+
ig: parseGitignore(gf),
|
|
1106
|
+
}));
|
|
1107
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1108
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1109
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
/**
|
|
1113
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1114
|
+
*
|
|
1115
|
+
* @param filePath - Absolute file path to check.
|
|
1116
|
+
* @returns `true` if the file should be ignored.
|
|
1117
|
+
*/
|
|
1118
|
+
isIgnored(filePath) {
|
|
1119
|
+
const absPath = node_path.resolve(filePath);
|
|
1120
|
+
for (const [, repo] of this.repos) {
|
|
1121
|
+
// Check if file is within this repo
|
|
1122
|
+
const relToRepo = node_path.relative(repo.root, absPath);
|
|
1123
|
+
if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
|
|
1124
|
+
continue;
|
|
1125
|
+
}
|
|
1126
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
1127
|
+
for (const entry of repo.entries) {
|
|
1128
|
+
const relToEntry = node_path.relative(entry.dir, absPath);
|
|
1129
|
+
if (relToEntry.startsWith('..'))
|
|
1130
|
+
continue;
|
|
1131
|
+
const normalized = toForwardSlash(relToEntry);
|
|
1132
|
+
if (entry.ig.ignores(normalized)) {
|
|
1133
|
+
return true;
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
return false;
|
|
1138
|
+
}
|
|
1139
|
+
/**
|
|
1140
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1141
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1142
|
+
*
|
|
1143
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1144
|
+
*/
|
|
1145
|
+
invalidate(gitignorePath) {
|
|
1146
|
+
const absPath = node_path.resolve(gitignorePath);
|
|
1147
|
+
const gitignoreDir = node_path.dirname(absPath);
|
|
1148
|
+
for (const [, repo] of this.repos) {
|
|
1149
|
+
const relToRepo = node_path.relative(repo.root, gitignoreDir);
|
|
1150
|
+
if (relToRepo.startsWith('..'))
|
|
1151
|
+
continue;
|
|
1152
|
+
// Remove old entry for this directory
|
|
1153
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1154
|
+
// Re-parse if file still exists
|
|
1155
|
+
if (node_fs.existsSync(absPath)) {
|
|
1156
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1157
|
+
// Re-sort deepest-first
|
|
1158
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1159
|
+
}
|
|
1160
|
+
return;
|
|
1161
|
+
}
|
|
1162
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1163
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1164
|
+
if (repoRoot && node_fs.existsSync(absPath)) {
|
|
1165
|
+
const entries = [
|
|
1166
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1167
|
+
];
|
|
1168
|
+
if (this.repos.has(repoRoot)) {
|
|
1169
|
+
const repo = this.repos.get(repoRoot);
|
|
1170
|
+
repo.entries.push(entries[0]);
|
|
1171
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1172
|
+
}
|
|
1173
|
+
else {
|
|
1174
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
|
|
956
1180
|
/**
|
|
957
1181
|
* @module logger
|
|
958
1182
|
* Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
|
|
@@ -1069,11 +1293,11 @@ async function extractMarkdown(filePath) {
|
|
|
1069
1293
|
}
|
|
1070
1294
|
async function extractPlaintext(filePath) {
|
|
1071
1295
|
const raw = await promises.readFile(filePath, 'utf8');
|
|
1072
|
-
return { text: raw };
|
|
1296
|
+
return { text: raw.replace(/^\uFEFF/, '') };
|
|
1073
1297
|
}
|
|
1074
1298
|
async function extractJson(filePath) {
|
|
1075
1299
|
const raw = await promises.readFile(filePath, 'utf8');
|
|
1076
|
-
const parsed = JSON.parse(raw);
|
|
1300
|
+
const parsed = JSON.parse(raw.replace(/^\uFEFF/, ''));
|
|
1077
1301
|
const json = parsed && typeof parsed === 'object' && !Array.isArray(parsed)
|
|
1078
1302
|
? parsed
|
|
1079
1303
|
: undefined;
|
|
@@ -1095,7 +1319,7 @@ async function extractDocx(filePath) {
|
|
|
1095
1319
|
}
|
|
1096
1320
|
async function extractHtml(filePath) {
|
|
1097
1321
|
const raw = await promises.readFile(filePath, 'utf8');
|
|
1098
|
-
const $ = cheerio__namespace.load(raw);
|
|
1322
|
+
const $ = cheerio__namespace.load(raw.replace(/^\uFEFF/, ''));
|
|
1099
1323
|
$('script, style').remove();
|
|
1100
1324
|
const text = $('body').text().trim() || $.text().trim();
|
|
1101
1325
|
return { text };
|
|
@@ -1925,6 +2149,112 @@ class VectorStoreClient {
|
|
|
1925
2149
|
}
|
|
1926
2150
|
}
|
|
1927
2151
|
|
|
2152
|
+
/**
|
|
2153
|
+
* @module health
|
|
2154
|
+
* Tracks consecutive system-level failures and applies exponential backoff.
|
|
2155
|
+
* Triggers fatal error callback when maxRetries is exceeded.
|
|
2156
|
+
*/
|
|
2157
|
+
/**
|
|
2158
|
+
* Tracks system health via consecutive failure count and exponential backoff.
|
|
2159
|
+
*/
|
|
2160
|
+
class SystemHealth {
|
|
2161
|
+
consecutiveFailures = 0;
|
|
2162
|
+
maxRetries;
|
|
2163
|
+
maxBackoffMs;
|
|
2164
|
+
baseDelayMs;
|
|
2165
|
+
onFatalError;
|
|
2166
|
+
logger;
|
|
2167
|
+
constructor(options) {
|
|
2168
|
+
this.maxRetries = options.maxRetries ?? Number.POSITIVE_INFINITY;
|
|
2169
|
+
this.maxBackoffMs = options.maxBackoffMs ?? 60_000;
|
|
2170
|
+
this.baseDelayMs = options.baseDelayMs ?? 1000;
|
|
2171
|
+
this.onFatalError = options.onFatalError;
|
|
2172
|
+
this.logger = options.logger;
|
|
2173
|
+
}
|
|
2174
|
+
/**
|
|
2175
|
+
* Record a successful system operation. Resets the failure counter.
|
|
2176
|
+
*/
|
|
2177
|
+
recordSuccess() {
|
|
2178
|
+
if (this.consecutiveFailures > 0) {
|
|
2179
|
+
this.logger.info({ previousFailures: this.consecutiveFailures }, 'System health recovered');
|
|
2180
|
+
}
|
|
2181
|
+
this.consecutiveFailures = 0;
|
|
2182
|
+
}
|
|
2183
|
+
/**
|
|
2184
|
+
* Record a system-level failure. If maxRetries is exceeded, triggers fatal error.
|
|
2185
|
+
*
|
|
2186
|
+
* @param error - The error that occurred.
|
|
2187
|
+
* @returns Whether the watcher should continue (false = fatal).
|
|
2188
|
+
*/
|
|
2189
|
+
recordFailure(error) {
|
|
2190
|
+
this.consecutiveFailures += 1;
|
|
2191
|
+
this.logger.error({
|
|
2192
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
2193
|
+
maxRetries: this.maxRetries,
|
|
2194
|
+
err: normalizeError(error),
|
|
2195
|
+
}, 'System-level failure recorded');
|
|
2196
|
+
if (this.consecutiveFailures >= this.maxRetries) {
|
|
2197
|
+
this.logger.fatal({ consecutiveFailures: this.consecutiveFailures }, 'Maximum retries exceeded, triggering fatal error');
|
|
2198
|
+
if (this.onFatalError) {
|
|
2199
|
+
this.onFatalError(error);
|
|
2200
|
+
return false;
|
|
2201
|
+
}
|
|
2202
|
+
throw error instanceof Error
|
|
2203
|
+
? error
|
|
2204
|
+
: new Error(`Fatal system error: ${String(error)}`);
|
|
2205
|
+
}
|
|
2206
|
+
return true;
|
|
2207
|
+
}
|
|
2208
|
+
/**
|
|
2209
|
+
* Compute the current backoff delay based on consecutive failures.
|
|
2210
|
+
*
|
|
2211
|
+
* @returns Delay in milliseconds.
|
|
2212
|
+
*/
|
|
2213
|
+
get currentBackoffMs() {
|
|
2214
|
+
if (this.consecutiveFailures === 0)
|
|
2215
|
+
return 0;
|
|
2216
|
+
const exp = Math.max(0, this.consecutiveFailures - 1);
|
|
2217
|
+
return Math.min(this.maxBackoffMs, this.baseDelayMs * 2 ** exp);
|
|
2218
|
+
}
|
|
2219
|
+
/**
|
|
2220
|
+
* Sleep for the current backoff duration.
|
|
2221
|
+
*
|
|
2222
|
+
* @param signal - Optional abort signal.
|
|
2223
|
+
*/
|
|
2224
|
+
async backoff(signal) {
|
|
2225
|
+
const delay = this.currentBackoffMs;
|
|
2226
|
+
if (delay <= 0)
|
|
2227
|
+
return;
|
|
2228
|
+
this.logger.warn({ delayMs: delay, consecutiveFailures: this.consecutiveFailures }, 'Backing off before next attempt');
|
|
2229
|
+
await new Promise((resolve, reject) => {
|
|
2230
|
+
const timer = setTimeout(() => {
|
|
2231
|
+
cleanup();
|
|
2232
|
+
resolve();
|
|
2233
|
+
}, delay);
|
|
2234
|
+
const onAbort = () => {
|
|
2235
|
+
cleanup();
|
|
2236
|
+
reject(new Error('Backoff aborted'));
|
|
2237
|
+
};
|
|
2238
|
+
const cleanup = () => {
|
|
2239
|
+
clearTimeout(timer);
|
|
2240
|
+
if (signal)
|
|
2241
|
+
signal.removeEventListener('abort', onAbort);
|
|
2242
|
+
};
|
|
2243
|
+
if (signal) {
|
|
2244
|
+
if (signal.aborted) {
|
|
2245
|
+
onAbort();
|
|
2246
|
+
return;
|
|
2247
|
+
}
|
|
2248
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
2249
|
+
}
|
|
2250
|
+
});
|
|
2251
|
+
}
|
|
2252
|
+
/** Current consecutive failure count. */
|
|
2253
|
+
get failures() {
|
|
2254
|
+
return this.consecutiveFailures;
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
|
|
1928
2258
|
/**
|
|
1929
2259
|
* @module watcher
|
|
1930
2260
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -1937,6 +2267,8 @@ class FileSystemWatcher {
|
|
|
1937
2267
|
queue;
|
|
1938
2268
|
processor;
|
|
1939
2269
|
logger;
|
|
2270
|
+
health;
|
|
2271
|
+
gitignoreFilter;
|
|
1940
2272
|
watcher;
|
|
1941
2273
|
/**
|
|
1942
2274
|
* Create a new FileSystemWatcher.
|
|
@@ -1945,12 +2277,21 @@ class FileSystemWatcher {
|
|
|
1945
2277
|
* @param queue - The event queue.
|
|
1946
2278
|
* @param processor - The document processor.
|
|
1947
2279
|
* @param logger - The logger instance.
|
|
2280
|
+
* @param options - Optional health/fatal error options.
|
|
1948
2281
|
*/
|
|
1949
|
-
constructor(config, queue, processor, logger) {
|
|
2282
|
+
constructor(config, queue, processor, logger, options = {}) {
|
|
1950
2283
|
this.config = config;
|
|
1951
2284
|
this.queue = queue;
|
|
1952
2285
|
this.processor = processor;
|
|
1953
2286
|
this.logger = logger;
|
|
2287
|
+
this.gitignoreFilter = options.gitignoreFilter;
|
|
2288
|
+
const healthOptions = {
|
|
2289
|
+
maxRetries: options.maxRetries,
|
|
2290
|
+
maxBackoffMs: options.maxBackoffMs,
|
|
2291
|
+
onFatalError: options.onFatalError,
|
|
2292
|
+
logger,
|
|
2293
|
+
};
|
|
2294
|
+
this.health = new SystemHealth(healthOptions);
|
|
1954
2295
|
}
|
|
1955
2296
|
/**
|
|
1956
2297
|
* Start watching the filesystem and processing events.
|
|
@@ -1966,19 +2307,29 @@ class FileSystemWatcher {
|
|
|
1966
2307
|
ignoreInitial: false,
|
|
1967
2308
|
});
|
|
1968
2309
|
this.watcher.on('add', (path) => {
|
|
2310
|
+
this.handleGitignoreChange(path);
|
|
2311
|
+
if (this.isGitignored(path))
|
|
2312
|
+
return;
|
|
1969
2313
|
this.logger.debug({ path }, 'File added');
|
|
1970
|
-
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2314
|
+
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1971
2315
|
});
|
|
1972
2316
|
this.watcher.on('change', (path) => {
|
|
2317
|
+
this.handleGitignoreChange(path);
|
|
2318
|
+
if (this.isGitignored(path))
|
|
2319
|
+
return;
|
|
1973
2320
|
this.logger.debug({ path }, 'File changed');
|
|
1974
|
-
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2321
|
+
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1975
2322
|
});
|
|
1976
2323
|
this.watcher.on('unlink', (path) => {
|
|
2324
|
+
this.handleGitignoreChange(path);
|
|
2325
|
+
if (this.isGitignored(path))
|
|
2326
|
+
return;
|
|
1977
2327
|
this.logger.debug({ path }, 'File removed');
|
|
1978
|
-
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.processor.deleteFile(path));
|
|
2328
|
+
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
1979
2329
|
});
|
|
1980
2330
|
this.watcher.on('error', (error) => {
|
|
1981
2331
|
this.logger.error({ err: normalizeError(error) }, 'Watcher error');
|
|
2332
|
+
this.health.recordFailure(error);
|
|
1982
2333
|
});
|
|
1983
2334
|
this.queue.process();
|
|
1984
2335
|
this.logger.info({ paths: this.config.paths }, 'Filesystem watcher started');
|
|
@@ -1993,6 +2344,53 @@ class FileSystemWatcher {
|
|
|
1993
2344
|
this.logger.info('Filesystem watcher stopped');
|
|
1994
2345
|
}
|
|
1995
2346
|
}
|
|
2347
|
+
/**
|
|
2348
|
+
* Get the system health tracker.
|
|
2349
|
+
*/
|
|
2350
|
+
get systemHealth() {
|
|
2351
|
+
return this.health;
|
|
2352
|
+
}
|
|
2353
|
+
/**
|
|
2354
|
+
* Check if a path is gitignored and should be skipped.
|
|
2355
|
+
*/
|
|
2356
|
+
isGitignored(path) {
|
|
2357
|
+
if (!this.gitignoreFilter)
|
|
2358
|
+
return false;
|
|
2359
|
+
const ignored = this.gitignoreFilter.isIgnored(path);
|
|
2360
|
+
if (ignored) {
|
|
2361
|
+
this.logger.debug({ path }, 'Skipping gitignored file');
|
|
2362
|
+
}
|
|
2363
|
+
return ignored;
|
|
2364
|
+
}
|
|
2365
|
+
/**
|
|
2366
|
+
* If the changed file is a `.gitignore`, invalidate the filter cache.
|
|
2367
|
+
*/
|
|
2368
|
+
handleGitignoreChange(path) {
|
|
2369
|
+
if (!this.gitignoreFilter)
|
|
2370
|
+
return;
|
|
2371
|
+
if (path.endsWith('.gitignore')) {
|
|
2372
|
+
this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
|
|
2373
|
+
this.gitignoreFilter.invalidate(path);
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2376
|
+
/**
|
|
2377
|
+
* Wrap a processing operation with health tracking.
|
|
2378
|
+
* On success, resets the failure counter.
|
|
2379
|
+
* On failure, records the failure and applies backoff.
|
|
2380
|
+
*/
|
|
2381
|
+
async wrapProcessing(fn) {
|
|
2382
|
+
try {
|
|
2383
|
+
await this.health.backoff();
|
|
2384
|
+
await fn();
|
|
2385
|
+
this.health.recordSuccess();
|
|
2386
|
+
}
|
|
2387
|
+
catch (error) {
|
|
2388
|
+
const shouldContinue = this.health.recordFailure(error);
|
|
2389
|
+
if (!shouldContinue) {
|
|
2390
|
+
await this.stop();
|
|
2391
|
+
}
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
1996
2394
|
}
|
|
1997
2395
|
|
|
1998
2396
|
/**
|
|
@@ -2068,7 +2466,7 @@ const defaultFactories = {
|
|
|
2068
2466
|
compileRules,
|
|
2069
2467
|
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
|
|
2070
2468
|
createEventQueue: (options) => new EventQueue(options),
|
|
2071
|
-
createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
|
|
2469
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2072
2470
|
createApiServer,
|
|
2073
2471
|
};
|
|
2074
2472
|
/**
|
|
@@ -2078,6 +2476,7 @@ class JeevesWatcher {
|
|
|
2078
2476
|
config;
|
|
2079
2477
|
configPath;
|
|
2080
2478
|
factories;
|
|
2479
|
+
runtimeOptions;
|
|
2081
2480
|
logger;
|
|
2082
2481
|
watcher;
|
|
2083
2482
|
queue;
|
|
@@ -2090,11 +2489,13 @@ class JeevesWatcher {
|
|
|
2090
2489
|
* @param config - The application configuration.
|
|
2091
2490
|
* @param configPath - Optional config file path to watch for changes.
|
|
2092
2491
|
* @param factories - Optional component factories (for dependency injection).
|
|
2492
|
+
* @param runtimeOptions - Optional runtime-only options (e.g., onFatalError).
|
|
2093
2493
|
*/
|
|
2094
|
-
constructor(config, configPath, factories = {}) {
|
|
2494
|
+
constructor(config, configPath, factories = {}, runtimeOptions = {}) {
|
|
2095
2495
|
this.config = config;
|
|
2096
2496
|
this.configPath = configPath;
|
|
2097
2497
|
this.factories = { ...defaultFactories, ...factories };
|
|
2498
|
+
this.runtimeOptions = runtimeOptions;
|
|
2098
2499
|
}
|
|
2099
2500
|
/**
|
|
2100
2501
|
* Start the watcher, API server, and all components.
|
|
@@ -2127,7 +2528,16 @@ class JeevesWatcher {
|
|
|
2127
2528
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2128
2529
|
});
|
|
2129
2530
|
this.queue = queue;
|
|
2130
|
-
const
|
|
2531
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
2532
|
+
const gitignoreFilter = respectGitignore
|
|
2533
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
2534
|
+
: undefined;
|
|
2535
|
+
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2536
|
+
maxRetries: this.config.maxRetries,
|
|
2537
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
2538
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
2539
|
+
gitignoreFilter,
|
|
2540
|
+
});
|
|
2131
2541
|
this.watcher = watcher;
|
|
2132
2542
|
const server = this.factories.createApiServer({
|
|
2133
2543
|
processor,
|
|
@@ -2235,7 +2645,9 @@ async function startFromConfig(configPath) {
|
|
|
2235
2645
|
exports.DocumentProcessor = DocumentProcessor;
|
|
2236
2646
|
exports.EventQueue = EventQueue;
|
|
2237
2647
|
exports.FileSystemWatcher = FileSystemWatcher;
|
|
2648
|
+
exports.GitignoreFilter = GitignoreFilter;
|
|
2238
2649
|
exports.JeevesWatcher = JeevesWatcher;
|
|
2650
|
+
exports.SystemHealth = SystemHealth;
|
|
2239
2651
|
exports.VectorStoreClient = VectorStoreClient;
|
|
2240
2652
|
exports.apiConfigSchema = apiConfigSchema;
|
|
2241
2653
|
exports.applyRules = applyRules;
|