@karmaniverous/jeeves-watcher 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/config.schema.json +22 -0
- package/dist/cjs/index.js +171 -15
- package/dist/cli/jeeves-watcher/index.js +170 -15
- package/dist/index.d.ts +101 -5
- package/dist/index.iife.js +171 -15
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +171 -16
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -94,6 +94,20 @@ The watcher will:
|
|
|
94
94
|
|
|
95
95
|
## Configuration
|
|
96
96
|
|
|
97
|
+
### Environment Variable Substitution
|
|
98
|
+
|
|
99
|
+
Config strings support `${VAR_NAME}` syntax for environment variable injection:
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"embedding": {
|
|
104
|
+
"apiKey": "${GOOGLE_API_KEY}"
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
If `GOOGLE_API_KEY` is set in the environment, the value is substituted at config load time. **Unresolvable expressions are left untouched** — this allows `${...}` template syntax used in inference rule `set` values (e.g. `${frontmatter.title}`, `${file.path}`) to pass through for later resolution by the rules engine.
|
|
110
|
+
|
|
97
111
|
### Watch Paths
|
|
98
112
|
|
|
99
113
|
```json
|
package/config.schema.json
CHANGED
|
@@ -141,6 +141,22 @@
|
|
|
141
141
|
"$ref": "#/definitions/__schema52"
|
|
142
142
|
}
|
|
143
143
|
]
|
|
144
|
+
},
|
|
145
|
+
"maxRetries": {
|
|
146
|
+
"description": "Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.",
|
|
147
|
+
"allOf": [
|
|
148
|
+
{
|
|
149
|
+
"$ref": "#/definitions/__schema53"
|
|
150
|
+
}
|
|
151
|
+
]
|
|
152
|
+
},
|
|
153
|
+
"maxBackoffMs": {
|
|
154
|
+
"description": "Maximum backoff delay in milliseconds for system errors. Default: 60000.",
|
|
155
|
+
"allOf": [
|
|
156
|
+
{
|
|
157
|
+
"$ref": "#/definitions/__schema54"
|
|
158
|
+
}
|
|
159
|
+
]
|
|
144
160
|
}
|
|
145
161
|
},
|
|
146
162
|
"required": [
|
|
@@ -572,6 +588,12 @@
|
|
|
572
588
|
},
|
|
573
589
|
"__schema52": {
|
|
574
590
|
"type": "number"
|
|
591
|
+
},
|
|
592
|
+
"__schema53": {
|
|
593
|
+
"type": "number"
|
|
594
|
+
},
|
|
595
|
+
"__schema54": {
|
|
596
|
+
"type": "number"
|
|
575
597
|
}
|
|
576
598
|
}
|
|
577
599
|
}
|
package/dist/cjs/index.js
CHANGED
|
@@ -665,6 +665,16 @@ const jeevesWatcherConfigSchema = zod.z.object({
|
|
|
665
665
|
.number()
|
|
666
666
|
.optional()
|
|
667
667
|
.describe('Timeout in milliseconds for graceful shutdown.'),
|
|
668
|
+
/** Maximum consecutive system-level failures before triggering fatal error. Default: Infinity. */
|
|
669
|
+
maxRetries: zod.z
|
|
670
|
+
.number()
|
|
671
|
+
.optional()
|
|
672
|
+
.describe('Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.'),
|
|
673
|
+
/** Maximum backoff delay in milliseconds for system errors. Default: 60000. */
|
|
674
|
+
maxBackoffMs: zod.z
|
|
675
|
+
.number()
|
|
676
|
+
.optional()
|
|
677
|
+
.describe('Maximum backoff delay in milliseconds for system errors. Default: 60000.'),
|
|
668
678
|
});
|
|
669
679
|
|
|
670
680
|
/**
|
|
@@ -677,15 +687,13 @@ const ENV_PATTERN = /\$\{([^}]+)\}/g;
|
|
|
677
687
|
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
678
688
|
*
|
|
679
689
|
* @param value - The string to process.
|
|
680
|
-
* @returns The string with env vars
|
|
681
|
-
* @throws If a referenced env var is not set.
|
|
690
|
+
* @returns The string with resolved env vars; unresolvable expressions left untouched.
|
|
682
691
|
*/
|
|
683
692
|
function substituteString(value) {
|
|
684
693
|
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
685
694
|
const envValue = process.env[varName];
|
|
686
|
-
if (envValue === undefined)
|
|
687
|
-
|
|
688
|
-
}
|
|
695
|
+
if (envValue === undefined)
|
|
696
|
+
return match;
|
|
689
697
|
return envValue;
|
|
690
698
|
});
|
|
691
699
|
}
|
|
@@ -1071,11 +1079,11 @@ async function extractMarkdown(filePath) {
|
|
|
1071
1079
|
}
|
|
1072
1080
|
async function extractPlaintext(filePath) {
|
|
1073
1081
|
const raw = await promises.readFile(filePath, 'utf8');
|
|
1074
|
-
return { text: raw };
|
|
1082
|
+
return { text: raw.replace(/^\uFEFF/, '') };
|
|
1075
1083
|
}
|
|
1076
1084
|
async function extractJson(filePath) {
|
|
1077
1085
|
const raw = await promises.readFile(filePath, 'utf8');
|
|
1078
|
-
const parsed = JSON.parse(raw);
|
|
1086
|
+
const parsed = JSON.parse(raw.replace(/^\uFEFF/, ''));
|
|
1079
1087
|
const json = parsed && typeof parsed === 'object' && !Array.isArray(parsed)
|
|
1080
1088
|
? parsed
|
|
1081
1089
|
: undefined;
|
|
@@ -1097,7 +1105,7 @@ async function extractDocx(filePath) {
|
|
|
1097
1105
|
}
|
|
1098
1106
|
async function extractHtml(filePath) {
|
|
1099
1107
|
const raw = await promises.readFile(filePath, 'utf8');
|
|
1100
|
-
const $ = cheerio__namespace.load(raw);
|
|
1108
|
+
const $ = cheerio__namespace.load(raw.replace(/^\uFEFF/, ''));
|
|
1101
1109
|
$('script, style').remove();
|
|
1102
1110
|
const text = $('body').text().trim() || $.text().trim();
|
|
1103
1111
|
return { text };
|
|
@@ -1927,6 +1935,112 @@ class VectorStoreClient {
|
|
|
1927
1935
|
}
|
|
1928
1936
|
}
|
|
1929
1937
|
|
|
1938
|
+
/**
|
|
1939
|
+
* @module health
|
|
1940
|
+
* Tracks consecutive system-level failures and applies exponential backoff.
|
|
1941
|
+
* Triggers fatal error callback when maxRetries is exceeded.
|
|
1942
|
+
*/
|
|
1943
|
+
/**
|
|
1944
|
+
* Tracks system health via consecutive failure count and exponential backoff.
|
|
1945
|
+
*/
|
|
1946
|
+
class SystemHealth {
|
|
1947
|
+
consecutiveFailures = 0;
|
|
1948
|
+
maxRetries;
|
|
1949
|
+
maxBackoffMs;
|
|
1950
|
+
baseDelayMs;
|
|
1951
|
+
onFatalError;
|
|
1952
|
+
logger;
|
|
1953
|
+
constructor(options) {
|
|
1954
|
+
this.maxRetries = options.maxRetries ?? Number.POSITIVE_INFINITY;
|
|
1955
|
+
this.maxBackoffMs = options.maxBackoffMs ?? 60_000;
|
|
1956
|
+
this.baseDelayMs = options.baseDelayMs ?? 1000;
|
|
1957
|
+
this.onFatalError = options.onFatalError;
|
|
1958
|
+
this.logger = options.logger;
|
|
1959
|
+
}
|
|
1960
|
+
/**
|
|
1961
|
+
* Record a successful system operation. Resets the failure counter.
|
|
1962
|
+
*/
|
|
1963
|
+
recordSuccess() {
|
|
1964
|
+
if (this.consecutiveFailures > 0) {
|
|
1965
|
+
this.logger.info({ previousFailures: this.consecutiveFailures }, 'System health recovered');
|
|
1966
|
+
}
|
|
1967
|
+
this.consecutiveFailures = 0;
|
|
1968
|
+
}
|
|
1969
|
+
/**
|
|
1970
|
+
* Record a system-level failure. If maxRetries is exceeded, triggers fatal error.
|
|
1971
|
+
*
|
|
1972
|
+
* @param error - The error that occurred.
|
|
1973
|
+
* @returns Whether the watcher should continue (false = fatal).
|
|
1974
|
+
*/
|
|
1975
|
+
recordFailure(error) {
|
|
1976
|
+
this.consecutiveFailures += 1;
|
|
1977
|
+
this.logger.error({
|
|
1978
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
1979
|
+
maxRetries: this.maxRetries,
|
|
1980
|
+
err: normalizeError(error),
|
|
1981
|
+
}, 'System-level failure recorded');
|
|
1982
|
+
if (this.consecutiveFailures >= this.maxRetries) {
|
|
1983
|
+
this.logger.fatal({ consecutiveFailures: this.consecutiveFailures }, 'Maximum retries exceeded, triggering fatal error');
|
|
1984
|
+
if (this.onFatalError) {
|
|
1985
|
+
this.onFatalError(error);
|
|
1986
|
+
return false;
|
|
1987
|
+
}
|
|
1988
|
+
throw error instanceof Error
|
|
1989
|
+
? error
|
|
1990
|
+
: new Error(`Fatal system error: ${String(error)}`);
|
|
1991
|
+
}
|
|
1992
|
+
return true;
|
|
1993
|
+
}
|
|
1994
|
+
/**
|
|
1995
|
+
* Compute the current backoff delay based on consecutive failures.
|
|
1996
|
+
*
|
|
1997
|
+
* @returns Delay in milliseconds.
|
|
1998
|
+
*/
|
|
1999
|
+
get currentBackoffMs() {
|
|
2000
|
+
if (this.consecutiveFailures === 0)
|
|
2001
|
+
return 0;
|
|
2002
|
+
const exp = Math.max(0, this.consecutiveFailures - 1);
|
|
2003
|
+
return Math.min(this.maxBackoffMs, this.baseDelayMs * 2 ** exp);
|
|
2004
|
+
}
|
|
2005
|
+
/**
|
|
2006
|
+
* Sleep for the current backoff duration.
|
|
2007
|
+
*
|
|
2008
|
+
* @param signal - Optional abort signal.
|
|
2009
|
+
*/
|
|
2010
|
+
async backoff(signal) {
|
|
2011
|
+
const delay = this.currentBackoffMs;
|
|
2012
|
+
if (delay <= 0)
|
|
2013
|
+
return;
|
|
2014
|
+
this.logger.warn({ delayMs: delay, consecutiveFailures: this.consecutiveFailures }, 'Backing off before next attempt');
|
|
2015
|
+
await new Promise((resolve, reject) => {
|
|
2016
|
+
const timer = setTimeout(() => {
|
|
2017
|
+
cleanup();
|
|
2018
|
+
resolve();
|
|
2019
|
+
}, delay);
|
|
2020
|
+
const onAbort = () => {
|
|
2021
|
+
cleanup();
|
|
2022
|
+
reject(new Error('Backoff aborted'));
|
|
2023
|
+
};
|
|
2024
|
+
const cleanup = () => {
|
|
2025
|
+
clearTimeout(timer);
|
|
2026
|
+
if (signal)
|
|
2027
|
+
signal.removeEventListener('abort', onAbort);
|
|
2028
|
+
};
|
|
2029
|
+
if (signal) {
|
|
2030
|
+
if (signal.aborted) {
|
|
2031
|
+
onAbort();
|
|
2032
|
+
return;
|
|
2033
|
+
}
|
|
2034
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
2035
|
+
}
|
|
2036
|
+
});
|
|
2037
|
+
}
|
|
2038
|
+
/** Current consecutive failure count. */
|
|
2039
|
+
get failures() {
|
|
2040
|
+
return this.consecutiveFailures;
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
|
|
1930
2044
|
/**
|
|
1931
2045
|
* @module watcher
|
|
1932
2046
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -1939,6 +2053,7 @@ class FileSystemWatcher {
|
|
|
1939
2053
|
queue;
|
|
1940
2054
|
processor;
|
|
1941
2055
|
logger;
|
|
2056
|
+
health;
|
|
1942
2057
|
watcher;
|
|
1943
2058
|
/**
|
|
1944
2059
|
* Create a new FileSystemWatcher.
|
|
@@ -1947,12 +2062,20 @@ class FileSystemWatcher {
|
|
|
1947
2062
|
* @param queue - The event queue.
|
|
1948
2063
|
* @param processor - The document processor.
|
|
1949
2064
|
* @param logger - The logger instance.
|
|
2065
|
+
* @param options - Optional health/fatal error options.
|
|
1950
2066
|
*/
|
|
1951
|
-
constructor(config, queue, processor, logger) {
|
|
2067
|
+
constructor(config, queue, processor, logger, options = {}) {
|
|
1952
2068
|
this.config = config;
|
|
1953
2069
|
this.queue = queue;
|
|
1954
2070
|
this.processor = processor;
|
|
1955
2071
|
this.logger = logger;
|
|
2072
|
+
const healthOptions = {
|
|
2073
|
+
maxRetries: options.maxRetries,
|
|
2074
|
+
maxBackoffMs: options.maxBackoffMs,
|
|
2075
|
+
onFatalError: options.onFatalError,
|
|
2076
|
+
logger,
|
|
2077
|
+
};
|
|
2078
|
+
this.health = new SystemHealth(healthOptions);
|
|
1956
2079
|
}
|
|
1957
2080
|
/**
|
|
1958
2081
|
* Start watching the filesystem and processing events.
|
|
@@ -1969,18 +2092,19 @@ class FileSystemWatcher {
|
|
|
1969
2092
|
});
|
|
1970
2093
|
this.watcher.on('add', (path) => {
|
|
1971
2094
|
this.logger.debug({ path }, 'File added');
|
|
1972
|
-
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2095
|
+
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1973
2096
|
});
|
|
1974
2097
|
this.watcher.on('change', (path) => {
|
|
1975
2098
|
this.logger.debug({ path }, 'File changed');
|
|
1976
|
-
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2099
|
+
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1977
2100
|
});
|
|
1978
2101
|
this.watcher.on('unlink', (path) => {
|
|
1979
2102
|
this.logger.debug({ path }, 'File removed');
|
|
1980
|
-
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.processor.deleteFile(path));
|
|
2103
|
+
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
1981
2104
|
});
|
|
1982
2105
|
this.watcher.on('error', (error) => {
|
|
1983
2106
|
this.logger.error({ err: normalizeError(error) }, 'Watcher error');
|
|
2107
|
+
this.health.recordFailure(error);
|
|
1984
2108
|
});
|
|
1985
2109
|
this.queue.process();
|
|
1986
2110
|
this.logger.info({ paths: this.config.paths }, 'Filesystem watcher started');
|
|
@@ -1995,6 +2119,30 @@ class FileSystemWatcher {
|
|
|
1995
2119
|
this.logger.info('Filesystem watcher stopped');
|
|
1996
2120
|
}
|
|
1997
2121
|
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Get the system health tracker.
|
|
2124
|
+
*/
|
|
2125
|
+
get systemHealth() {
|
|
2126
|
+
return this.health;
|
|
2127
|
+
}
|
|
2128
|
+
/**
|
|
2129
|
+
* Wrap a processing operation with health tracking.
|
|
2130
|
+
* On success, resets the failure counter.
|
|
2131
|
+
* On failure, records the failure and applies backoff.
|
|
2132
|
+
*/
|
|
2133
|
+
async wrapProcessing(fn) {
|
|
2134
|
+
try {
|
|
2135
|
+
await this.health.backoff();
|
|
2136
|
+
await fn();
|
|
2137
|
+
this.health.recordSuccess();
|
|
2138
|
+
}
|
|
2139
|
+
catch (error) {
|
|
2140
|
+
const shouldContinue = this.health.recordFailure(error);
|
|
2141
|
+
if (!shouldContinue) {
|
|
2142
|
+
await this.stop();
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
1998
2146
|
}
|
|
1999
2147
|
|
|
2000
2148
|
/**
|
|
@@ -2070,7 +2218,7 @@ const defaultFactories = {
|
|
|
2070
2218
|
compileRules,
|
|
2071
2219
|
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
|
|
2072
2220
|
createEventQueue: (options) => new EventQueue(options),
|
|
2073
|
-
createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
|
|
2221
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2074
2222
|
createApiServer,
|
|
2075
2223
|
};
|
|
2076
2224
|
/**
|
|
@@ -2080,6 +2228,7 @@ class JeevesWatcher {
|
|
|
2080
2228
|
config;
|
|
2081
2229
|
configPath;
|
|
2082
2230
|
factories;
|
|
2231
|
+
runtimeOptions;
|
|
2083
2232
|
logger;
|
|
2084
2233
|
watcher;
|
|
2085
2234
|
queue;
|
|
@@ -2092,11 +2241,13 @@ class JeevesWatcher {
|
|
|
2092
2241
|
* @param config - The application configuration.
|
|
2093
2242
|
* @param configPath - Optional config file path to watch for changes.
|
|
2094
2243
|
* @param factories - Optional component factories (for dependency injection).
|
|
2244
|
+
* @param runtimeOptions - Optional runtime-only options (e.g., onFatalError).
|
|
2095
2245
|
*/
|
|
2096
|
-
constructor(config, configPath, factories = {}) {
|
|
2246
|
+
constructor(config, configPath, factories = {}, runtimeOptions = {}) {
|
|
2097
2247
|
this.config = config;
|
|
2098
2248
|
this.configPath = configPath;
|
|
2099
2249
|
this.factories = { ...defaultFactories, ...factories };
|
|
2250
|
+
this.runtimeOptions = runtimeOptions;
|
|
2100
2251
|
}
|
|
2101
2252
|
/**
|
|
2102
2253
|
* Start the watcher, API server, and all components.
|
|
@@ -2129,7 +2280,11 @@ class JeevesWatcher {
|
|
|
2129
2280
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2130
2281
|
});
|
|
2131
2282
|
this.queue = queue;
|
|
2132
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger
|
|
2283
|
+
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2284
|
+
maxRetries: this.config.maxRetries,
|
|
2285
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
2286
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
2287
|
+
});
|
|
2133
2288
|
this.watcher = watcher;
|
|
2134
2289
|
const server = this.factories.createApiServer({
|
|
2135
2290
|
processor,
|
|
@@ -2238,6 +2393,7 @@ exports.DocumentProcessor = DocumentProcessor;
|
|
|
2238
2393
|
exports.EventQueue = EventQueue;
|
|
2239
2394
|
exports.FileSystemWatcher = FileSystemWatcher;
|
|
2240
2395
|
exports.JeevesWatcher = JeevesWatcher;
|
|
2396
|
+
exports.SystemHealth = SystemHealth;
|
|
2241
2397
|
exports.VectorStoreClient = VectorStoreClient;
|
|
2242
2398
|
exports.apiConfigSchema = apiConfigSchema;
|
|
2243
2399
|
exports.applyRules = applyRules;
|
|
@@ -668,6 +668,16 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
668
668
|
.number()
|
|
669
669
|
.optional()
|
|
670
670
|
.describe('Timeout in milliseconds for graceful shutdown.'),
|
|
671
|
+
/** Maximum consecutive system-level failures before triggering fatal error. Default: Infinity. */
|
|
672
|
+
maxRetries: z
|
|
673
|
+
.number()
|
|
674
|
+
.optional()
|
|
675
|
+
.describe('Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.'),
|
|
676
|
+
/** Maximum backoff delay in milliseconds for system errors. Default: 60000. */
|
|
677
|
+
maxBackoffMs: z
|
|
678
|
+
.number()
|
|
679
|
+
.optional()
|
|
680
|
+
.describe('Maximum backoff delay in milliseconds for system errors. Default: 60000.'),
|
|
671
681
|
});
|
|
672
682
|
|
|
673
683
|
/**
|
|
@@ -680,15 +690,13 @@ const ENV_PATTERN = /\$\{([^}]+)\}/g;
|
|
|
680
690
|
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
681
691
|
*
|
|
682
692
|
* @param value - The string to process.
|
|
683
|
-
* @returns The string with env vars
|
|
684
|
-
* @throws If a referenced env var is not set.
|
|
693
|
+
* @returns The string with resolved env vars; unresolvable expressions left untouched.
|
|
685
694
|
*/
|
|
686
695
|
function substituteString(value) {
|
|
687
696
|
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
688
697
|
const envValue = process.env[varName];
|
|
689
|
-
if (envValue === undefined)
|
|
690
|
-
|
|
691
|
-
}
|
|
698
|
+
if (envValue === undefined)
|
|
699
|
+
return match;
|
|
692
700
|
return envValue;
|
|
693
701
|
});
|
|
694
702
|
}
|
|
@@ -1074,11 +1082,11 @@ async function extractMarkdown(filePath) {
|
|
|
1074
1082
|
}
|
|
1075
1083
|
async function extractPlaintext(filePath) {
|
|
1076
1084
|
const raw = await readFile(filePath, 'utf8');
|
|
1077
|
-
return { text: raw };
|
|
1085
|
+
return { text: raw.replace(/^\uFEFF/, '') };
|
|
1078
1086
|
}
|
|
1079
1087
|
async function extractJson(filePath) {
|
|
1080
1088
|
const raw = await readFile(filePath, 'utf8');
|
|
1081
|
-
const parsed = JSON.parse(raw);
|
|
1089
|
+
const parsed = JSON.parse(raw.replace(/^\uFEFF/, ''));
|
|
1082
1090
|
const json = parsed && typeof parsed === 'object' && !Array.isArray(parsed)
|
|
1083
1091
|
? parsed
|
|
1084
1092
|
: undefined;
|
|
@@ -1100,7 +1108,7 @@ async function extractDocx(filePath) {
|
|
|
1100
1108
|
}
|
|
1101
1109
|
async function extractHtml(filePath) {
|
|
1102
1110
|
const raw = await readFile(filePath, 'utf8');
|
|
1103
|
-
const $ = cheerio.load(raw);
|
|
1111
|
+
const $ = cheerio.load(raw.replace(/^\uFEFF/, ''));
|
|
1104
1112
|
$('script, style').remove();
|
|
1105
1113
|
const text = $('body').text().trim() || $.text().trim();
|
|
1106
1114
|
return { text };
|
|
@@ -1930,6 +1938,112 @@ class VectorStoreClient {
|
|
|
1930
1938
|
}
|
|
1931
1939
|
}
|
|
1932
1940
|
|
|
1941
|
+
/**
|
|
1942
|
+
* @module health
|
|
1943
|
+
* Tracks consecutive system-level failures and applies exponential backoff.
|
|
1944
|
+
* Triggers fatal error callback when maxRetries is exceeded.
|
|
1945
|
+
*/
|
|
1946
|
+
/**
|
|
1947
|
+
* Tracks system health via consecutive failure count and exponential backoff.
|
|
1948
|
+
*/
|
|
1949
|
+
class SystemHealth {
|
|
1950
|
+
consecutiveFailures = 0;
|
|
1951
|
+
maxRetries;
|
|
1952
|
+
maxBackoffMs;
|
|
1953
|
+
baseDelayMs;
|
|
1954
|
+
onFatalError;
|
|
1955
|
+
logger;
|
|
1956
|
+
constructor(options) {
|
|
1957
|
+
this.maxRetries = options.maxRetries ?? Number.POSITIVE_INFINITY;
|
|
1958
|
+
this.maxBackoffMs = options.maxBackoffMs ?? 60_000;
|
|
1959
|
+
this.baseDelayMs = options.baseDelayMs ?? 1000;
|
|
1960
|
+
this.onFatalError = options.onFatalError;
|
|
1961
|
+
this.logger = options.logger;
|
|
1962
|
+
}
|
|
1963
|
+
/**
|
|
1964
|
+
* Record a successful system operation. Resets the failure counter.
|
|
1965
|
+
*/
|
|
1966
|
+
recordSuccess() {
|
|
1967
|
+
if (this.consecutiveFailures > 0) {
|
|
1968
|
+
this.logger.info({ previousFailures: this.consecutiveFailures }, 'System health recovered');
|
|
1969
|
+
}
|
|
1970
|
+
this.consecutiveFailures = 0;
|
|
1971
|
+
}
|
|
1972
|
+
/**
|
|
1973
|
+
* Record a system-level failure. If maxRetries is exceeded, triggers fatal error.
|
|
1974
|
+
*
|
|
1975
|
+
* @param error - The error that occurred.
|
|
1976
|
+
* @returns Whether the watcher should continue (false = fatal).
|
|
1977
|
+
*/
|
|
1978
|
+
recordFailure(error) {
|
|
1979
|
+
this.consecutiveFailures += 1;
|
|
1980
|
+
this.logger.error({
|
|
1981
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
1982
|
+
maxRetries: this.maxRetries,
|
|
1983
|
+
err: normalizeError(error),
|
|
1984
|
+
}, 'System-level failure recorded');
|
|
1985
|
+
if (this.consecutiveFailures >= this.maxRetries) {
|
|
1986
|
+
this.logger.fatal({ consecutiveFailures: this.consecutiveFailures }, 'Maximum retries exceeded, triggering fatal error');
|
|
1987
|
+
if (this.onFatalError) {
|
|
1988
|
+
this.onFatalError(error);
|
|
1989
|
+
return false;
|
|
1990
|
+
}
|
|
1991
|
+
throw error instanceof Error
|
|
1992
|
+
? error
|
|
1993
|
+
: new Error(`Fatal system error: ${String(error)}`);
|
|
1994
|
+
}
|
|
1995
|
+
return true;
|
|
1996
|
+
}
|
|
1997
|
+
/**
|
|
1998
|
+
* Compute the current backoff delay based on consecutive failures.
|
|
1999
|
+
*
|
|
2000
|
+
* @returns Delay in milliseconds.
|
|
2001
|
+
*/
|
|
2002
|
+
get currentBackoffMs() {
|
|
2003
|
+
if (this.consecutiveFailures === 0)
|
|
2004
|
+
return 0;
|
|
2005
|
+
const exp = Math.max(0, this.consecutiveFailures - 1);
|
|
2006
|
+
return Math.min(this.maxBackoffMs, this.baseDelayMs * 2 ** exp);
|
|
2007
|
+
}
|
|
2008
|
+
/**
|
|
2009
|
+
* Sleep for the current backoff duration.
|
|
2010
|
+
*
|
|
2011
|
+
* @param signal - Optional abort signal.
|
|
2012
|
+
*/
|
|
2013
|
+
async backoff(signal) {
|
|
2014
|
+
const delay = this.currentBackoffMs;
|
|
2015
|
+
if (delay <= 0)
|
|
2016
|
+
return;
|
|
2017
|
+
this.logger.warn({ delayMs: delay, consecutiveFailures: this.consecutiveFailures }, 'Backing off before next attempt');
|
|
2018
|
+
await new Promise((resolve, reject) => {
|
|
2019
|
+
const timer = setTimeout(() => {
|
|
2020
|
+
cleanup();
|
|
2021
|
+
resolve();
|
|
2022
|
+
}, delay);
|
|
2023
|
+
const onAbort = () => {
|
|
2024
|
+
cleanup();
|
|
2025
|
+
reject(new Error('Backoff aborted'));
|
|
2026
|
+
};
|
|
2027
|
+
const cleanup = () => {
|
|
2028
|
+
clearTimeout(timer);
|
|
2029
|
+
if (signal)
|
|
2030
|
+
signal.removeEventListener('abort', onAbort);
|
|
2031
|
+
};
|
|
2032
|
+
if (signal) {
|
|
2033
|
+
if (signal.aborted) {
|
|
2034
|
+
onAbort();
|
|
2035
|
+
return;
|
|
2036
|
+
}
|
|
2037
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
2038
|
+
}
|
|
2039
|
+
});
|
|
2040
|
+
}
|
|
2041
|
+
/** Current consecutive failure count. */
|
|
2042
|
+
get failures() {
|
|
2043
|
+
return this.consecutiveFailures;
|
|
2044
|
+
}
|
|
2045
|
+
}
|
|
2046
|
+
|
|
1933
2047
|
/**
|
|
1934
2048
|
* @module watcher
|
|
1935
2049
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -1942,6 +2056,7 @@ class FileSystemWatcher {
|
|
|
1942
2056
|
queue;
|
|
1943
2057
|
processor;
|
|
1944
2058
|
logger;
|
|
2059
|
+
health;
|
|
1945
2060
|
watcher;
|
|
1946
2061
|
/**
|
|
1947
2062
|
* Create a new FileSystemWatcher.
|
|
@@ -1950,12 +2065,20 @@ class FileSystemWatcher {
|
|
|
1950
2065
|
* @param queue - The event queue.
|
|
1951
2066
|
* @param processor - The document processor.
|
|
1952
2067
|
* @param logger - The logger instance.
|
|
2068
|
+
* @param options - Optional health/fatal error options.
|
|
1953
2069
|
*/
|
|
1954
|
-
constructor(config, queue, processor, logger) {
|
|
2070
|
+
constructor(config, queue, processor, logger, options = {}) {
|
|
1955
2071
|
this.config = config;
|
|
1956
2072
|
this.queue = queue;
|
|
1957
2073
|
this.processor = processor;
|
|
1958
2074
|
this.logger = logger;
|
|
2075
|
+
const healthOptions = {
|
|
2076
|
+
maxRetries: options.maxRetries,
|
|
2077
|
+
maxBackoffMs: options.maxBackoffMs,
|
|
2078
|
+
onFatalError: options.onFatalError,
|
|
2079
|
+
logger,
|
|
2080
|
+
};
|
|
2081
|
+
this.health = new SystemHealth(healthOptions);
|
|
1959
2082
|
}
|
|
1960
2083
|
/**
|
|
1961
2084
|
* Start watching the filesystem and processing events.
|
|
@@ -1972,18 +2095,19 @@ class FileSystemWatcher {
|
|
|
1972
2095
|
});
|
|
1973
2096
|
this.watcher.on('add', (path) => {
|
|
1974
2097
|
this.logger.debug({ path }, 'File added');
|
|
1975
|
-
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2098
|
+
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1976
2099
|
});
|
|
1977
2100
|
this.watcher.on('change', (path) => {
|
|
1978
2101
|
this.logger.debug({ path }, 'File changed');
|
|
1979
|
-
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.processor.processFile(path));
|
|
2102
|
+
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
1980
2103
|
});
|
|
1981
2104
|
this.watcher.on('unlink', (path) => {
|
|
1982
2105
|
this.logger.debug({ path }, 'File removed');
|
|
1983
|
-
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.processor.deleteFile(path));
|
|
2106
|
+
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
1984
2107
|
});
|
|
1985
2108
|
this.watcher.on('error', (error) => {
|
|
1986
2109
|
this.logger.error({ err: normalizeError(error) }, 'Watcher error');
|
|
2110
|
+
this.health.recordFailure(error);
|
|
1987
2111
|
});
|
|
1988
2112
|
this.queue.process();
|
|
1989
2113
|
this.logger.info({ paths: this.config.paths }, 'Filesystem watcher started');
|
|
@@ -1998,6 +2122,30 @@ class FileSystemWatcher {
|
|
|
1998
2122
|
this.logger.info('Filesystem watcher stopped');
|
|
1999
2123
|
}
|
|
2000
2124
|
}
|
|
2125
|
+
/**
|
|
2126
|
+
* Get the system health tracker.
|
|
2127
|
+
*/
|
|
2128
|
+
get systemHealth() {
|
|
2129
|
+
return this.health;
|
|
2130
|
+
}
|
|
2131
|
+
/**
|
|
2132
|
+
* Wrap a processing operation with health tracking.
|
|
2133
|
+
* On success, resets the failure counter.
|
|
2134
|
+
* On failure, records the failure and applies backoff.
|
|
2135
|
+
*/
|
|
2136
|
+
async wrapProcessing(fn) {
|
|
2137
|
+
try {
|
|
2138
|
+
await this.health.backoff();
|
|
2139
|
+
await fn();
|
|
2140
|
+
this.health.recordSuccess();
|
|
2141
|
+
}
|
|
2142
|
+
catch (error) {
|
|
2143
|
+
const shouldContinue = this.health.recordFailure(error);
|
|
2144
|
+
if (!shouldContinue) {
|
|
2145
|
+
await this.stop();
|
|
2146
|
+
}
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2001
2149
|
}
|
|
2002
2150
|
|
|
2003
2151
|
/**
|
|
@@ -2073,7 +2221,7 @@ const defaultFactories = {
|
|
|
2073
2221
|
compileRules,
|
|
2074
2222
|
createDocumentProcessor: (config, embeddingProvider, vectorStore, compiledRules, logger) => new DocumentProcessor(config, embeddingProvider, vectorStore, compiledRules, logger),
|
|
2075
2223
|
createEventQueue: (options) => new EventQueue(options),
|
|
2076
|
-
createFileSystemWatcher: (config, queue, processor, logger) => new FileSystemWatcher(config, queue, processor, logger),
|
|
2224
|
+
createFileSystemWatcher: (config, queue, processor, logger, options) => new FileSystemWatcher(config, queue, processor, logger, options),
|
|
2077
2225
|
createApiServer,
|
|
2078
2226
|
};
|
|
2079
2227
|
/**
|
|
@@ -2083,6 +2231,7 @@ class JeevesWatcher {
|
|
|
2083
2231
|
config;
|
|
2084
2232
|
configPath;
|
|
2085
2233
|
factories;
|
|
2234
|
+
runtimeOptions;
|
|
2086
2235
|
logger;
|
|
2087
2236
|
watcher;
|
|
2088
2237
|
queue;
|
|
@@ -2095,11 +2244,13 @@ class JeevesWatcher {
|
|
|
2095
2244
|
* @param config - The application configuration.
|
|
2096
2245
|
* @param configPath - Optional config file path to watch for changes.
|
|
2097
2246
|
* @param factories - Optional component factories (for dependency injection).
|
|
2247
|
+
* @param runtimeOptions - Optional runtime-only options (e.g., onFatalError).
|
|
2098
2248
|
*/
|
|
2099
|
-
constructor(config, configPath, factories = {}) {
|
|
2249
|
+
constructor(config, configPath, factories = {}, runtimeOptions = {}) {
|
|
2100
2250
|
this.config = config;
|
|
2101
2251
|
this.configPath = configPath;
|
|
2102
2252
|
this.factories = { ...defaultFactories, ...factories };
|
|
2253
|
+
this.runtimeOptions = runtimeOptions;
|
|
2103
2254
|
}
|
|
2104
2255
|
/**
|
|
2105
2256
|
* Start the watcher, API server, and all components.
|
|
@@ -2132,7 +2283,11 @@ class JeevesWatcher {
|
|
|
2132
2283
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2133
2284
|
});
|
|
2134
2285
|
this.queue = queue;
|
|
2135
|
-
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger
|
|
2286
|
+
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2287
|
+
maxRetries: this.config.maxRetries,
|
|
2288
|
+
maxBackoffMs: this.config.maxBackoffMs,
|
|
2289
|
+
onFatalError: this.runtimeOptions.onFatalError,
|
|
2290
|
+
});
|
|
2136
2291
|
this.watcher = watcher;
|
|
2137
2292
|
const server = this.factories.createApiServer({
|
|
2138
2293
|
processor,
|