@karmaniverous/jeeves-watcher 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/config.schema.json +117 -81
- package/dist/cjs/index.js +422 -10
- package/dist/cli/jeeves-watcher/index.js +421 -11
- package/dist/index.d.ts +150 -5
- package/dist/index.iife.js +422 -12
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +422 -12
- package/package.json +2 -1
package/dist/mjs/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Fastify from 'fastify';
|
|
2
2
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
3
|
-
import { resolve, dirname, join, extname, basename } from 'node:path';
|
|
3
|
+
import { resolve, dirname, join, relative, extname, basename } from 'node:path';
|
|
4
4
|
import picomatch from 'picomatch';
|
|
5
5
|
import { omit, get } from 'radash';
|
|
6
6
|
import { createHash } from 'node:crypto';
|
|
@@ -8,6 +8,8 @@ import { cosmiconfig } from 'cosmiconfig';
|
|
|
8
8
|
import { z, ZodError } from 'zod';
|
|
9
9
|
import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
|
|
10
10
|
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
|
|
11
|
+
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
12
|
+
import ignore from 'ignore';
|
|
11
13
|
import pino from 'pino';
|
|
12
14
|
import { v5 } from 'uuid';
|
|
13
15
|
import * as cheerio from 'cheerio';
|
|
@@ -433,6 +435,7 @@ const WATCH_DEFAULTS = {
|
|
|
433
435
|
stabilityThresholdMs: 500,
|
|
434
436
|
usePolling: false,
|
|
435
437
|
pollIntervalMs: 1000,
|
|
438
|
+
respectGitignore: true,
|
|
436
439
|
};
|
|
437
440
|
/** Default embedding configuration. */
|
|
438
441
|
const EMBEDDING_DEFAULTS = {
|
|
@@ -477,6 +480,11 @@ const watchConfigSchema = z.object({
|
|
|
477
480
|
.number()
|
|
478
481
|
.optional()
|
|
479
482
|
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
483
|
+
/** Whether to respect .gitignore files when processing. */
|
|
484
|
+
respectGitignore: z
|
|
485
|
+
.boolean()
|
|
486
|
+
.optional()
|
|
487
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
480
488
|
});
|
|
481
489
|
/**
|
|
482
490
|
* Configuration watch settings.
|
|
@@ -644,6 +652,16 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
644
652
|
.number()
|
|
645
653
|
.optional()
|
|
646
654
|
.describe('Timeout in milliseconds for graceful shutdown.'),
|
|
655
|
+
/** Maximum consecutive system-level failures before triggering fatal error. Default: Infinity. */
|
|
656
|
+
maxRetries: z
|
|
657
|
+
.number()
|
|
658
|
+
.optional()
|
|
659
|
+
.describe('Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.'),
|
|
660
|
+
/** Maximum backoff delay in milliseconds for system errors. Default: 60000. */
|
|
661
|
+
maxBackoffMs: z
|
|
662
|
+
.number()
|
|
663
|
+
.optional()
|
|
664
|
+
.describe('Maximum backoff delay in milliseconds for system errors. Default: 60000.'),
|
|
647
665
|
});
|
|
648
666
|
|
|
649
667
|
/**
|
|
@@ -932,6 +950,212 @@ function createEmbeddingProvider(config, logger) {
|
|
|
932
950
|
return factory(config, logger);
|
|
933
951
|
}
|
|
934
952
|
|
|
953
|
+
/**
|
|
954
|
+
* @module gitignore
|
|
955
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
956
|
+
*/
|
|
957
|
+
/**
|
|
958
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
959
|
+
* Returns `undefined` if no repo is found.
|
|
960
|
+
*/
|
|
961
|
+
function findRepoRoot(startDir) {
|
|
962
|
+
let dir = resolve(startDir);
|
|
963
|
+
const root = resolve('/');
|
|
964
|
+
while (dir !== root) {
|
|
965
|
+
if (existsSync(join(dir, '.git')) &&
|
|
966
|
+
statSync(join(dir, '.git')).isDirectory()) {
|
|
967
|
+
return dir;
|
|
968
|
+
}
|
|
969
|
+
const parent = dirname(dir);
|
|
970
|
+
if (parent === dir)
|
|
971
|
+
break;
|
|
972
|
+
dir = parent;
|
|
973
|
+
}
|
|
974
|
+
return undefined;
|
|
975
|
+
}
|
|
976
|
+
/**
|
|
977
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
978
|
+
* that can be scanned for a repo root.
|
|
979
|
+
*/
|
|
980
|
+
function watchPathToScanDir(watchPath) {
|
|
981
|
+
const absPath = resolve(watchPath);
|
|
982
|
+
try {
|
|
983
|
+
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
984
|
+
}
|
|
985
|
+
catch {
|
|
986
|
+
// ignore
|
|
987
|
+
}
|
|
988
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
989
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
990
|
+
if (!globMatch)
|
|
991
|
+
return undefined;
|
|
992
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
993
|
+
const trimmed = prefix.trim();
|
|
994
|
+
const baseDir = trimmed.length === 0
|
|
995
|
+
? '.'
|
|
996
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
997
|
+
? trimmed
|
|
998
|
+
: dirname(trimmed);
|
|
999
|
+
const resolved = resolve(baseDir);
|
|
1000
|
+
if (!existsSync(resolved))
|
|
1001
|
+
return undefined;
|
|
1002
|
+
return resolved;
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
1006
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
1007
|
+
*/
|
|
1008
|
+
function findGitignoreFiles(dir) {
|
|
1009
|
+
const results = [];
|
|
1010
|
+
const gitignorePath = join(dir, '.gitignore');
|
|
1011
|
+
if (existsSync(gitignorePath)) {
|
|
1012
|
+
results.push(gitignorePath);
|
|
1013
|
+
}
|
|
1014
|
+
let entries;
|
|
1015
|
+
try {
|
|
1016
|
+
entries = readdirSync(dir);
|
|
1017
|
+
}
|
|
1018
|
+
catch {
|
|
1019
|
+
return results;
|
|
1020
|
+
}
|
|
1021
|
+
for (const entry of entries) {
|
|
1022
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
1023
|
+
continue;
|
|
1024
|
+
const fullPath = join(dir, entry);
|
|
1025
|
+
try {
|
|
1026
|
+
if (statSync(fullPath).isDirectory()) {
|
|
1027
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
catch {
|
|
1031
|
+
// Skip inaccessible entries
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
return results;
|
|
1035
|
+
}
|
|
1036
|
+
/**
|
|
1037
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1038
|
+
*/
|
|
1039
|
+
function parseGitignore(gitignorePath) {
|
|
1040
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
1041
|
+
return ignore().add(content);
|
|
1042
|
+
}
|
|
1043
|
+
/**
|
|
1044
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1045
|
+
*/
|
|
1046
|
+
function toForwardSlash(p) {
|
|
1047
|
+
return p.replace(/\\/g, '/');
|
|
1048
|
+
}
|
|
1049
|
+
/**
|
|
1050
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1051
|
+
* `.gitignore` chain in git repositories.
|
|
1052
|
+
*/
|
|
1053
|
+
class GitignoreFilter {
|
|
1054
|
+
repos = new Map();
|
|
1055
|
+
/**
|
|
1056
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1057
|
+
*
|
|
1058
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1059
|
+
*/
|
|
1060
|
+
constructor(watchPaths) {
|
|
1061
|
+
this.scan(watchPaths);
|
|
1062
|
+
}
|
|
1063
|
+
/**
|
|
1064
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
1065
|
+
*/
|
|
1066
|
+
scan(watchPaths) {
|
|
1067
|
+
this.repos.clear();
|
|
1068
|
+
const scannedDirs = new Set();
|
|
1069
|
+
for (const watchPath of watchPaths) {
|
|
1070
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
1071
|
+
if (!scanDir)
|
|
1072
|
+
continue;
|
|
1073
|
+
if (scannedDirs.has(scanDir))
|
|
1074
|
+
continue;
|
|
1075
|
+
scannedDirs.add(scanDir);
|
|
1076
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
1077
|
+
if (!repoRoot)
|
|
1078
|
+
continue;
|
|
1079
|
+
if (this.repos.has(repoRoot))
|
|
1080
|
+
continue;
|
|
1081
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1082
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
1083
|
+
dir: dirname(gf),
|
|
1084
|
+
ig: parseGitignore(gf),
|
|
1085
|
+
}));
|
|
1086
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1087
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1088
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
/**
|
|
1092
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1093
|
+
*
|
|
1094
|
+
* @param filePath - Absolute file path to check.
|
|
1095
|
+
* @returns `true` if the file should be ignored.
|
|
1096
|
+
*/
|
|
1097
|
+
isIgnored(filePath) {
|
|
1098
|
+
const absPath = resolve(filePath);
|
|
1099
|
+
for (const [, repo] of this.repos) {
|
|
1100
|
+
// Check if file is within this repo
|
|
1101
|
+
const relToRepo = relative(repo.root, absPath);
|
|
1102
|
+
if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
1106
|
+
for (const entry of repo.entries) {
|
|
1107
|
+
const relToEntry = relative(entry.dir, absPath);
|
|
1108
|
+
if (relToEntry.startsWith('..'))
|
|
1109
|
+
continue;
|
|
1110
|
+
const normalized = toForwardSlash(relToEntry);
|
|
1111
|
+
if (entry.ig.ignores(normalized)) {
|
|
1112
|
+
return true;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
return false;
|
|
1117
|
+
}
|
|
1118
|
+
/**
|
|
1119
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1120
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1121
|
+
*
|
|
1122
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1123
|
+
*/
|
|
1124
|
+
invalidate(gitignorePath) {
|
|
1125
|
+
const absPath = resolve(gitignorePath);
|
|
1126
|
+
const gitignoreDir = dirname(absPath);
|
|
1127
|
+
for (const [, repo] of this.repos) {
|
|
1128
|
+
const relToRepo = relative(repo.root, gitignoreDir);
|
|
1129
|
+
if (relToRepo.startsWith('..'))
|
|
1130
|
+
continue;
|
|
1131
|
+
// Remove old entry for this directory
|
|
1132
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1133
|
+
// Re-parse if file still exists
|
|
1134
|
+
if (existsSync(absPath)) {
|
|
1135
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1136
|
+
// Re-sort deepest-first
|
|
1137
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1138
|
+
}
|
|
1139
|
+
return;
|
|
1140
|
+
}
|
|
1141
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1142
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1143
|
+
if (repoRoot && existsSync(absPath)) {
|
|
1144
|
+
const entries = [
|
|
1145
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1146
|
+
];
|
|
1147
|
+
if (this.repos.has(repoRoot)) {
|
|
1148
|
+
const repo = this.repos.get(repoRoot);
|
|
1149
|
+
repo.entries.push(entries[0]);
|
|
1150
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1151
|
+
}
|
|
1152
|
+
else {
|
|
1153
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
|
|
935
1159
|
/**
|
|
936
1160
|
* @module logger
|
|
937
1161
|
* Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
|
|
@@ -1048,11 +1272,11 @@ async function extractMarkdown(filePath) {
|
|
|
1048
1272
|
}
|
|
1049
1273
|
async function extractPlaintext(filePath) {
|
|
1050
1274
|
const raw = await readFile(filePath, 'utf8');
|
|
1051
|
-
return { text: raw };
|
|
1275
|
+
return { text: raw.replace(/^\uFEFF/, '') };
|
|
1052
1276
|
}
|
|
1053
1277
|
async function extractJson(filePath) {
|
|
1054
1278
|
const raw = await readFile(filePath, 'utf8');
|
|
1055
|
-
const parsed = JSON.parse(raw);
|
|
1279
|
+
const parsed = JSON.parse(raw.replace(/^\uFEFF/, ''));
|
|
1056
1280
|
const json = parsed && typeof parsed === 'object' && !Array.isArray(parsed)
|
|
1057
1281
|
? parsed
|
|
1058
1282
|
: undefined;
|
|
@@ -1074,7 +1298,7 @@ async function extractDocx(filePath) {
|
|
|
1074
1298
|
}
|
|
1075
1299
|
async function extractHtml(filePath) {
|
|
1076
1300
|
const raw = await readFile(filePath, 'utf8');
|
|
1077
|
-
const $ = cheerio.load(raw);
|
|
1301
|
+
const $ = cheerio.load(raw.replace(/^\uFEFF/, ''));
|
|
1078
1302
|
$('script, style').remove();
|
|
1079
1303
|
const text = $('body').text().trim() || $.text().trim();
|
|
1080
1304
|
return { text };
|
|
@@ -1904,6 +2128,112 @@ class VectorStoreClient {
|
|
|
1904
2128
|
}
|
|
1905
2129
|
}
|
|
1906
2130
|
|
|
2131
|
+
/**
|
|
2132
|
+
* @module health
|
|
2133
|
+
* Tracks consecutive system-level failures and applies exponential backoff.
|
|
2134
|
+
* Triggers fatal error callback when maxRetries is exceeded.
|
|
2135
|
+
*/
|
|
2136
|
+
/**
|
|
2137
|
+
* Tracks system health via consecutive failure count and exponential backoff.
|
|
2138
|
+
*/
|
|
2139
|
+
class SystemHealth {
|
|
2140
|
+
consecutiveFailures = 0;
|
|
2141
|
+
maxRetries;
|
|
2142
|
+
maxBackoffMs;
|
|
2143
|
+
baseDelayMs;
|
|
2144
|
+
onFatalError;
|
|
2145
|
+
logger;
|
|
2146
|
+
constructor(options) {
|
|
2147
|
+
this.maxRetries = options.maxRetries ?? Number.POSITIVE_INFINITY;
|
|
2148
|
+
this.maxBackoffMs = options.maxBackoffMs ?? 60_000;
|
|
2149
|
+
this.baseDelayMs = options.baseDelayMs ?? 1000;
|
|
2150
|
+
this.onFatalError = options.onFatalError;
|
|
2151
|
+
this.logger = options.logger;
|
|
2152
|
+
}
|
|
2153
|
+
/**
|
|
2154
|
+
* Record a successful system operation. Resets the failure counter.
|
|
2155
|
+
*/
|
|
2156
|
+
recordSuccess() {
|
|
2157
|
+
if (this.consecutiveFailures > 0) {
|
|
2158
|
+
this.logger.info({ previousFailures: this.consecutiveFailures }, 'System health recovered');
|
|
2159
|
+
}
|
|
2160
|
+
this.consecutiveFailures = 0;
|
|
2161
|
+
}
|
|
2162
|
+
/**
|
|
2163
|
+
* Record a system-level failure. If maxRetries is exceeded, triggers fatal error.
|
|
2164
|
+
*
|
|
2165
|
+
* @param error - The error that occurred.
|
|
2166
|
+
* @returns Whether the watcher should continue (false = fatal).
|
|
2167
|
+
*/
|
|
2168
|
+
recordFailure(error) {
|
|
2169
|
+
this.consecutiveFailures += 1;
|
|
2170
|
+
this.logger.error({
|
|
2171
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
2172
|
+
maxRetries: this.maxRetries,
|
|
2173
|
+
err: normalizeError(error),
|
|
2174
|
+
}, 'System-level failure recorded');
|
|
2175
|
+
if (this.consecutiveFailures >= this.maxRetries) {
|
|
2176
|
+
this.logger.fatal({ consecutiveFailures: this.consecutiveFailures }, 'Maximum retries exceeded, triggering fatal error');
|
|
2177
|
+
if (this.onFatalError) {
|
|
2178
|
+
this.onFatalError(error);
|
|
2179
|
+
return false;
|
|
2180
|
+
}
|
|
2181
|
+
throw error instanceof Error
|
|
2182
|
+
? error
|
|
2183
|
+
: new Error(`Fatal system error: ${String(error)}`);
|
|
2184
|
+
}
|
|
2185
|
+
return true;
|
|
2186
|
+
}
|
|
2187
|
+
/**
|
|
2188
|
+
* Compute the current backoff delay based on consecutive failures.
|
|
2189
|
+
*
|
|
2190
|
+
* @returns Delay in milliseconds.
|
|
2191
|
+
*/
|
|
2192
|
+
get currentBackoffMs() {
|
|
2193
|
+
if (this.consecutiveFailures === 0)
|
|
2194
|
+
return 0;
|
|
2195
|
+
const exp = Math.max(0, this.consecutiveFailures - 1);
|
|
2196
|
+
return Math.min(this.maxBackoffMs, this.baseDelayMs * 2 ** exp);
|
|
2197
|
+
}
|
|
2198
|
+
/**
|
|
2199
|
+
* Sleep for the current backoff duration.
|
|
2200
|
+
*
|
|
2201
|
+
* @param signal - Optional abort signal.
|
|
2202
|
+
*/
|
|
2203
|
+
async backoff(signal) {
|
|
2204
|
+
const delay = this.currentBackoffMs;
|
|
2205
|
+
if (delay <= 0)
|
|
2206
|
+
return;
|
|
2207
|
+
this.logger.warn({ delayMs: delay, consecutiveFailures: this.consecutiveFailures }, 'Backing off before next attempt');
|
|
2208
|
+
await new Promise((resolve, reject) => {
|
|
2209
|
+
const timer = setTimeout(() => {
|
|
2210
|
+
cleanup();
|
|
2211
|
+
resolve();
|
|
2212
|
+
}, delay);
|
|
2213
|
+
const onAbort = () => {
|
|
2214
|
+
cleanup();
|
|
2215
|
+
reject(new Error('Backoff aborted'));
|
|
2216
|
+
};
|
|
2217
|
+
const cleanup = () => {
|
|
2218
|
+
clearTimeout(timer);
|
|
2219
|
+
if (signal)
|
|
2220
|
+
signal.removeEventListener('abort', onAbort);
|
|
2221
|
+
};
|
|
2222
|
+
if (signal) {
|
|
2223
|
+
if (signal.aborted) {
|
|
2224
|
+
onAbort();
|
|
2225
|
+
return;
|
|
2226
|
+
}
|
|
2227
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
2228
|
+
}
|
|
2229
|
+
});
|
|
2230
|
+
}
|
|
2231
|
+
/** Current consecutive failure count. */
|
|
2232
|
+
get failures() {
|
|
2233
|
+
return this.consecutiveFailures;
|
|
2234
|
+
}
|
|
2235
|
+
}
|
|
2236
|
+
|
|
1907
2237
|
/**
|
|
1908
2238
|
* @module watcher
|
|
1909
2239
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -1916,6 +2246,8 @@ class FileSystemWatcher {
|
|
|
1916
2246
|
queue;
|
|
1917
2247
|
processor;
|
|
1918
2248
|
logger;
|
|
2249
|
+
health;
|
|
2250
|
+
gitignoreFilter;
|
|
1919
2251
|
watcher;
|
|
1920
2252
|
/**
|
|
1921
2253
|
* Create a new FileSystemWatcher.
|
|
@@ -1924,12 +2256,21 @@ class FileSystemWatcher {
|
|
|
1924
2256
|
* @param queue - The event queue.
|
|
1925
2257
|
* @param processor - The document processor.
|
|
1926
2258
|
* @param logger - The logger instance.
|
|
2259
|
+
* @param options - Optional health/fatal error options.
|
|
1927
2260
|
*/
|
|
1928
|
-
constructor(config, queue, processor, logger) {
|
|
2261
|
+
constructor(config, queue, processor, logger, options = {}) {
|
|
1929
2262
|
this.config = config;
|
|
1930
2263
|
this.queue = queue;
|
|
1931
2264
|
this.processor = processor;
|
|
1932
2265
|
this.logger = logger;
|
|
2266
|
+
this.gitignoreFilter = options.gitignoreFilter;
|
|
2267
|
+
const healthOptions = {
|
|
2268
|
+
maxRetries: options.maxRetries,
|
|
2269
|
+
maxBackoffMs: options.maxBackoffMs,
|
|
2270
|
+
onFatalError: options.onFatalError,
|
|
2271
|
+
logger,
|
|
2272
|
+
};
|
|
2273
|
+
this.health = new SystemHealth(healthOptions);
|
|
1933
2274
|
}
|
|
1934
2275
|
/**
|
|
1935
2276
|
* Start watching the filesystem and processing events.
|
|
@@ -1945,19 +2286,29 @@ class FileSystemWatcher {
|
|
|
1945
2286
|
ignoreInitial: false,
|
|
1946
2287
|
});
|
|
1947
2288
|
this.watcher.on('add', (path) => {
|
|
2289
|
+
this.handleGitignoreChange(path);
|
|
2290
|
+
if (this.isGitignored(path))
|
|
2291
|
+
return;
|
|
1948
2292
|
this.logger.debug({ path }, 'File added');
|
|
1949
|
-
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2293
|
+
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1950
2294
|
});
|
|
1951
2295
|
this.watcher.on('change', (path) => {
|
|
2296
|
+
this.handleGitignoreChange(path);
|
|
2297
|
+
if (this.isGitignored(path))
|
|
2298
|
+
return;
|
|
1952
2299
|
this.logger.debug({ path }, 'File changed');
|
|
1953
|
-
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2300
|
+
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1954
2301
|
});
|
|
1955
2302
|
this.watcher.on('unlink', (path) => {
|
|
2303
|
+
this.handleGitignoreChange(path);
|
|
2304
|
+
if (this.isGitignored(path))
|
|
2305
|
+
return;
|
|
1956
2306
|
this.logger.debug({ path }, 'File removed');
|
|
1957
|
-
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.processor.deleteFile(path));
|
|
2307
|
+
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
1958
2308
|
});
|
|
1959
2309
|
this.watcher.on('error', (error) => {
|
|
1960
2310
|
this.logger.error({ err: normalizeError(error) }, 'Watcher error');
|
|
2311
|
+
this.health.recordFailure(error);
|
|
1961
2312
|
});
|
|
1962
2313
|
this.queue.process();
|
|
1963
2314
|
this.logger.info({ paths: this.config.paths }, 'Filesystem watcher started');
|
|
@@ -1972,6 +2323,53 @@ class FileSystemWatcher {
|
|
|
1972
2323
|
this.logger.info('Filesystem watcher stopped');
|
|
1973
2324
|
}
|
|
1974
2325
|
}
|
|
2326
|
+
/**
|
|
2327
|
+
* Get the system health tracker.
|
|
2328
|
+
*/
|
|
2329
|
+
get systemHealth() {
|
|
2330
|
+
return this.health;
|
|
2331
|
+
}
|
|
2332
|
+
/**
|
|
2333
|
+
* Check if a path is gitignored and should be skipped.
|
|
2334
|
+
*/
|
|
2335
|
+
isGitignored(path) {
|
|
2336
|
+
if (!this.gitignoreFilter)
|
|
2337
|
+
return false;
|
|
2338
|
+
const ignored = this.gitignoreFilter.isIgnored(path);
|
|
2339
|
+
if (ignored) {
|
|
2340
|
+
this.logger.debug({ path }, 'Skipping gitignored file');
|
|
2341
|
+
}
|
|
2342
|
+
return ignored;
|
|
2343
|
+
}
|
|
2344
|
+
/**
|
|
2345
|
+
* If the changed file is a `.gitignore`, invalidate the filter cache.
|
|
2346
|
+
*/
|
|
2347
|
+
handleGitignoreChange(path) {
|
|
2348
|
+
if (!this.gitignoreFilter)
|
|
2349
|
+
return;
|
|
2350
|
+
if (path.endsWith('.gitignore')) {
|
|
2351
|
+
this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
|
|
2352
|
+
this.gitignoreFilter.invalidate(path);
|
|
2353
|
+
}
|
|
2354
|
+
}
|
|
2355
|
+
/**
|
|
2356
|
+
* Wrap a processing operation with health tracking.
|
|
2357
|
+
* On success, resets the failure counter.
|
|
2358
|
+
* On failure, records the failure and applies backoff.
|
|
2359
|
+
*/
|
|
2360
|
+
async wrapProcessing(fn) {
|
|
2361
|
+
try {
|
|
2362
|
+
await this.health.backoff();
|
|
2363
|
+
await fn();
|
|
2364
|
+
this.health.recordSuccess();
|
|
2365
|
+
}
|
|
2366
|
+
catch (error) {
|
|
2367
|
+
const shouldContinue = this.health.recordFailure(error);
|
|
2368
|
+
if (!shouldContinue) {
|
|
2369
|
+
await this.stop();
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
1975
2373
|
}
|
|
1976
2374
|
|
|
1977
2375
|
/**
|
|
@@ -2047,7 +2445,7 @@ const defaultFactories = {
|
|
|
2047
2445
|
compileRules,
|
|
2048
2446
|
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
|
|
2049
2447
|
createEventQueue: (options) => new EventQueue(options),
|
|
2050
|
-
createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
|
|
2448
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2051
2449
|
createApiServer,
|
|
2052
2450
|
};
|
|
2053
2451
|
/**
|
|
@@ -2057,6 +2455,7 @@ class JeevesWatcher {
|
|
|
2057
2455
|
config;
|
|
2058
2456
|
configPath;
|
|
2059
2457
|
factories;
|
|
2458
|
+
runtimeOptions;
|
|
2060
2459
|
logger;
|
|
2061
2460
|
watcher;
|
|
2062
2461
|
queue;
|
|
@@ -2069,11 +2468,13 @@ class JeevesWatcher {
|
|
|
2069
2468
|
* @param config - The application configuration.
|
|
2070
2469
|
* @param configPath - Optional config file path to watch for changes.
|
|
2071
2470
|
* @param factories - Optional component factories (for dependency injection).
|
|
2471
|
+
* @param runtimeOptions - Optional runtime-only options (e.g., onFatalError).
|
|
2072
2472
|
*/
|
|
2073
|
-
constructor(config, configPath, factories = {}) {
|
|
2473
|
+
constructor(config, configPath, factories = {}, runtimeOptions = {}) {
|
|
2074
2474
|
this.config = config;
|
|
2075
2475
|
this.configPath = configPath;
|
|
2076
2476
|
this.factories = { ...defaultFactories, ...factories };
|
|
2477
|
+
this.runtimeOptions = runtimeOptions;
|
|
2077
2478
|
}
|
|
2078
2479
|
/**
|
|
2079
2480
|
* Start the watcher, API server, and all components.
|
|
@@ -2106,7 +2507,16 @@ class JeevesWatcher {
|
|
|
2106
2507
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2107
2508
|
});
|
|
2108
2509
|
this.queue = queue;
|
|
2109
|
-
const
|
|
2510
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
2511
|
+
const gitignoreFilter = respectGitignore
|
|
2512
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
2513
|
+
: undefined;
|
|
2514
|
+
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2515
|
+
maxRetries: this.config.maxRetries,
|
|
2516
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
2517
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
2518
|
+
gitignoreFilter,
|
|
2519
|
+
});
|
|
2110
2520
|
this.watcher = watcher;
|
|
2111
2521
|
const server = this.factories.createApiServer({
|
|
2112
2522
|
processor,
|
|
@@ -2211,4 +2621,4 @@ async function startFromConfig(configPath) {
|
|
|
2211
2621
|
return app;
|
|
2212
2622
|
}
|
|
2213
2623
|
|
|
2214
|
-
export { DocumentProcessor, EventQueue, FileSystemWatcher, JeevesWatcher, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
|
2624
|
+
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
package/package.json
CHANGED
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"commander": "^14.0.3",
|
|
27
27
|
"cosmiconfig": "*",
|
|
28
28
|
"fastify": "*",
|
|
29
|
+
"ignore": "^7.0.5",
|
|
29
30
|
"js-yaml": "*",
|
|
30
31
|
"json5": "*",
|
|
31
32
|
"mammoth": "^1.11.0",
|
|
@@ -171,5 +172,5 @@
|
|
|
171
172
|
},
|
|
172
173
|
"type": "module",
|
|
173
174
|
"types": "dist/index.d.ts",
|
|
174
|
-
"version": "0.2.
|
|
175
|
+
"version": "0.2.5"
|
|
175
176
|
}
|