@karmaniverous/jeeves-watcher 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/config.schema.json +99 -85
- package/dist/cjs/index.js +254 -0
- package/dist/cli/jeeves-watcher/index.js +254 -1
- package/dist/index.d.ts +50 -1
- package/dist/index.iife.js +254 -2
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +255 -2
- package/package.json +2 -1
package/dist/mjs/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Fastify from 'fastify';
|
|
2
2
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
3
|
-
import { resolve, dirname, join, extname, basename } from 'node:path';
|
|
3
|
+
import { resolve, dirname, join, relative, extname, basename } from 'node:path';
|
|
4
4
|
import picomatch from 'picomatch';
|
|
5
5
|
import { omit, get } from 'radash';
|
|
6
6
|
import { createHash } from 'node:crypto';
|
|
@@ -8,6 +8,8 @@ import { cosmiconfig } from 'cosmiconfig';
|
|
|
8
8
|
import { z, ZodError } from 'zod';
|
|
9
9
|
import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
|
|
10
10
|
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
|
|
11
|
+
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
12
|
+
import ignore from 'ignore';
|
|
11
13
|
import pino from 'pino';
|
|
12
14
|
import { v5 } from 'uuid';
|
|
13
15
|
import * as cheerio from 'cheerio';
|
|
@@ -433,6 +435,7 @@ const WATCH_DEFAULTS = {
|
|
|
433
435
|
stabilityThresholdMs: 500,
|
|
434
436
|
usePolling: false,
|
|
435
437
|
pollIntervalMs: 1000,
|
|
438
|
+
respectGitignore: true,
|
|
436
439
|
};
|
|
437
440
|
/** Default embedding configuration. */
|
|
438
441
|
const EMBEDDING_DEFAULTS = {
|
|
@@ -477,6 +480,11 @@ const watchConfigSchema = z.object({
|
|
|
477
480
|
.number()
|
|
478
481
|
.optional()
|
|
479
482
|
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
483
|
+
/** Whether to respect .gitignore files when processing. */
|
|
484
|
+
respectGitignore: z
|
|
485
|
+
.boolean()
|
|
486
|
+
.optional()
|
|
487
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
480
488
|
});
|
|
481
489
|
/**
|
|
482
490
|
* Configuration watch settings.
|
|
@@ -942,6 +950,212 @@ function createEmbeddingProvider(config, logger) {
|
|
|
942
950
|
return factory(config, logger);
|
|
943
951
|
}
|
|
944
952
|
|
|
953
|
+
/**
|
|
954
|
+
* @module gitignore
|
|
955
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
956
|
+
*/
|
|
957
|
+
/**
|
|
958
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
959
|
+
* Returns `undefined` if no repo is found.
|
|
960
|
+
*/
|
|
961
|
+
function findRepoRoot(startDir) {
|
|
962
|
+
let dir = resolve(startDir);
|
|
963
|
+
const root = resolve('/');
|
|
964
|
+
while (dir !== root) {
|
|
965
|
+
if (existsSync(join(dir, '.git')) &&
|
|
966
|
+
statSync(join(dir, '.git')).isDirectory()) {
|
|
967
|
+
return dir;
|
|
968
|
+
}
|
|
969
|
+
const parent = dirname(dir);
|
|
970
|
+
if (parent === dir)
|
|
971
|
+
break;
|
|
972
|
+
dir = parent;
|
|
973
|
+
}
|
|
974
|
+
return undefined;
|
|
975
|
+
}
|
|
976
|
+
/**
|
|
977
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
978
|
+
* that can be scanned for a repo root.
|
|
979
|
+
*/
|
|
980
|
+
function watchPathToScanDir(watchPath) {
|
|
981
|
+
const absPath = resolve(watchPath);
|
|
982
|
+
try {
|
|
983
|
+
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
984
|
+
}
|
|
985
|
+
catch {
|
|
986
|
+
// ignore
|
|
987
|
+
}
|
|
988
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
989
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
990
|
+
if (!globMatch)
|
|
991
|
+
return undefined;
|
|
992
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
993
|
+
const trimmed = prefix.trim();
|
|
994
|
+
const baseDir = trimmed.length === 0
|
|
995
|
+
? '.'
|
|
996
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
997
|
+
? trimmed
|
|
998
|
+
: dirname(trimmed);
|
|
999
|
+
const resolved = resolve(baseDir);
|
|
1000
|
+
if (!existsSync(resolved))
|
|
1001
|
+
return undefined;
|
|
1002
|
+
return resolved;
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
1006
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
1007
|
+
*/
|
|
1008
|
+
function findGitignoreFiles(dir) {
|
|
1009
|
+
const results = [];
|
|
1010
|
+
const gitignorePath = join(dir, '.gitignore');
|
|
1011
|
+
if (existsSync(gitignorePath)) {
|
|
1012
|
+
results.push(gitignorePath);
|
|
1013
|
+
}
|
|
1014
|
+
let entries;
|
|
1015
|
+
try {
|
|
1016
|
+
entries = readdirSync(dir);
|
|
1017
|
+
}
|
|
1018
|
+
catch {
|
|
1019
|
+
return results;
|
|
1020
|
+
}
|
|
1021
|
+
for (const entry of entries) {
|
|
1022
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
1023
|
+
continue;
|
|
1024
|
+
const fullPath = join(dir, entry);
|
|
1025
|
+
try {
|
|
1026
|
+
if (statSync(fullPath).isDirectory()) {
|
|
1027
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
catch {
|
|
1031
|
+
// Skip inaccessible entries
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
return results;
|
|
1035
|
+
}
|
|
1036
|
+
/**
|
|
1037
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1038
|
+
*/
|
|
1039
|
+
function parseGitignore(gitignorePath) {
|
|
1040
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
1041
|
+
return ignore().add(content);
|
|
1042
|
+
}
|
|
1043
|
+
/**
|
|
1044
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1045
|
+
*/
|
|
1046
|
+
function toForwardSlash(p) {
|
|
1047
|
+
return p.replace(/\\/g, '/');
|
|
1048
|
+
}
|
|
1049
|
+
/**
|
|
1050
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1051
|
+
* `.gitignore` chain in git repositories.
|
|
1052
|
+
*/
|
|
1053
|
+
class GitignoreFilter {
|
|
1054
|
+
repos = new Map();
|
|
1055
|
+
/**
|
|
1056
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1057
|
+
*
|
|
1058
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1059
|
+
*/
|
|
1060
|
+
constructor(watchPaths) {
|
|
1061
|
+
this.scan(watchPaths);
|
|
1062
|
+
}
|
|
1063
|
+
/**
|
|
1064
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
1065
|
+
*/
|
|
1066
|
+
scan(watchPaths) {
|
|
1067
|
+
this.repos.clear();
|
|
1068
|
+
const scannedDirs = new Set();
|
|
1069
|
+
for (const watchPath of watchPaths) {
|
|
1070
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
1071
|
+
if (!scanDir)
|
|
1072
|
+
continue;
|
|
1073
|
+
if (scannedDirs.has(scanDir))
|
|
1074
|
+
continue;
|
|
1075
|
+
scannedDirs.add(scanDir);
|
|
1076
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
1077
|
+
if (!repoRoot)
|
|
1078
|
+
continue;
|
|
1079
|
+
if (this.repos.has(repoRoot))
|
|
1080
|
+
continue;
|
|
1081
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1082
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
1083
|
+
dir: dirname(gf),
|
|
1084
|
+
ig: parseGitignore(gf),
|
|
1085
|
+
}));
|
|
1086
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1087
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1088
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
/**
|
|
1092
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1093
|
+
*
|
|
1094
|
+
* @param filePath - Absolute file path to check.
|
|
1095
|
+
* @returns `true` if the file should be ignored.
|
|
1096
|
+
*/
|
|
1097
|
+
isIgnored(filePath) {
|
|
1098
|
+
const absPath = resolve(filePath);
|
|
1099
|
+
for (const [, repo] of this.repos) {
|
|
1100
|
+
// Check if file is within this repo
|
|
1101
|
+
const relToRepo = relative(repo.root, absPath);
|
|
1102
|
+
if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
1106
|
+
for (const entry of repo.entries) {
|
|
1107
|
+
const relToEntry = relative(entry.dir, absPath);
|
|
1108
|
+
if (relToEntry.startsWith('..'))
|
|
1109
|
+
continue;
|
|
1110
|
+
const normalized = toForwardSlash(relToEntry);
|
|
1111
|
+
if (entry.ig.ignores(normalized)) {
|
|
1112
|
+
return true;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
return false;
|
|
1117
|
+
}
|
|
1118
|
+
/**
|
|
1119
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1120
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1121
|
+
*
|
|
1122
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1123
|
+
*/
|
|
1124
|
+
invalidate(gitignorePath) {
|
|
1125
|
+
const absPath = resolve(gitignorePath);
|
|
1126
|
+
const gitignoreDir = dirname(absPath);
|
|
1127
|
+
for (const [, repo] of this.repos) {
|
|
1128
|
+
const relToRepo = relative(repo.root, gitignoreDir);
|
|
1129
|
+
if (relToRepo.startsWith('..'))
|
|
1130
|
+
continue;
|
|
1131
|
+
// Remove old entry for this directory
|
|
1132
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1133
|
+
// Re-parse if file still exists
|
|
1134
|
+
if (existsSync(absPath)) {
|
|
1135
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1136
|
+
// Re-sort deepest-first
|
|
1137
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1138
|
+
}
|
|
1139
|
+
return;
|
|
1140
|
+
}
|
|
1141
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1142
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1143
|
+
if (repoRoot && existsSync(absPath)) {
|
|
1144
|
+
const entries = [
|
|
1145
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1146
|
+
];
|
|
1147
|
+
if (this.repos.has(repoRoot)) {
|
|
1148
|
+
const repo = this.repos.get(repoRoot);
|
|
1149
|
+
repo.entries.push(entries[0]);
|
|
1150
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1151
|
+
}
|
|
1152
|
+
else {
|
|
1153
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
|
|
945
1159
|
/**
|
|
946
1160
|
* @module logger
|
|
947
1161
|
* Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
|
|
@@ -2033,6 +2247,7 @@ class FileSystemWatcher {
|
|
|
2033
2247
|
processor;
|
|
2034
2248
|
logger;
|
|
2035
2249
|
health;
|
|
2250
|
+
gitignoreFilter;
|
|
2036
2251
|
watcher;
|
|
2037
2252
|
/**
|
|
2038
2253
|
* Create a new FileSystemWatcher.
|
|
@@ -2048,6 +2263,7 @@ class FileSystemWatcher {
|
|
|
2048
2263
|
this.queue = queue;
|
|
2049
2264
|
this.processor = processor;
|
|
2050
2265
|
this.logger = logger;
|
|
2266
|
+
this.gitignoreFilter = options.gitignoreFilter;
|
|
2051
2267
|
const healthOptions = {
|
|
2052
2268
|
maxRetries: options.maxRetries,
|
|
2053
2269
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2070,14 +2286,23 @@ class FileSystemWatcher {
|
|
|
2070
2286
|
ignoreInitial: false,
|
|
2071
2287
|
});
|
|
2072
2288
|
this.watcher.on('add', (path) => {
|
|
2289
|
+
this.handleGitignoreChange(path);
|
|
2290
|
+
if (this.isGitignored(path))
|
|
2291
|
+
return;
|
|
2073
2292
|
this.logger.debug({ path }, 'File added');
|
|
2074
2293
|
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
2075
2294
|
});
|
|
2076
2295
|
this.watcher.on('change', (path) => {
|
|
2296
|
+
this.handleGitignoreChange(path);
|
|
2297
|
+
if (this.isGitignored(path))
|
|
2298
|
+
return;
|
|
2077
2299
|
this.logger.debug({ path }, 'File changed');
|
|
2078
2300
|
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
2079
2301
|
});
|
|
2080
2302
|
this.watcher.on('unlink', (path) => {
|
|
2303
|
+
this.handleGitignoreChange(path);
|
|
2304
|
+
if (this.isGitignored(path))
|
|
2305
|
+
return;
|
|
2081
2306
|
this.logger.debug({ path }, 'File removed');
|
|
2082
2307
|
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
2083
2308
|
});
|
|
@@ -2104,6 +2329,29 @@ class FileSystemWatcher {
|
|
|
2104
2329
|
get systemHealth() {
|
|
2105
2330
|
return this.health;
|
|
2106
2331
|
}
|
|
2332
|
+
/**
|
|
2333
|
+
* Check if a path is gitignored and should be skipped.
|
|
2334
|
+
*/
|
|
2335
|
+
isGitignored(path) {
|
|
2336
|
+
if (!this.gitignoreFilter)
|
|
2337
|
+
return false;
|
|
2338
|
+
const ignored = this.gitignoreFilter.isIgnored(path);
|
|
2339
|
+
if (ignored) {
|
|
2340
|
+
this.logger.debug({ path }, 'Skipping gitignored file');
|
|
2341
|
+
}
|
|
2342
|
+
return ignored;
|
|
2343
|
+
}
|
|
2344
|
+
/**
|
|
2345
|
+
* If the changed file is a `.gitignore`, invalidate the filter cache.
|
|
2346
|
+
*/
|
|
2347
|
+
handleGitignoreChange(path) {
|
|
2348
|
+
if (!this.gitignoreFilter)
|
|
2349
|
+
return;
|
|
2350
|
+
if (path.endsWith('.gitignore')) {
|
|
2351
|
+
this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
|
|
2352
|
+
this.gitignoreFilter.invalidate(path);
|
|
2353
|
+
}
|
|
2354
|
+
}
|
|
2107
2355
|
/**
|
|
2108
2356
|
* Wrap a processing operation with health tracking.
|
|
2109
2357
|
* On success, resets the failure counter.
|
|
@@ -2259,10 +2507,15 @@ class JeevesWatcher {
|
|
|
2259
2507
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2260
2508
|
});
|
|
2261
2509
|
this.queue = queue;
|
|
2510
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
2511
|
+
const gitignoreFilter = respectGitignore
|
|
2512
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
2513
|
+
: undefined;
|
|
2262
2514
|
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2263
2515
|
maxRetries: this.config.maxRetries,
|
|
2264
2516
|
maxBackoffMs: this.config.maxBackoffMs,
|
|
2265
2517
|
onFatalError: this.runtimeOptions.onFatalError,
|
|
2518
|
+
gitignoreFilter,
|
|
2266
2519
|
});
|
|
2267
2520
|
this.watcher = watcher;
|
|
2268
2521
|
const server = this.factories.createApiServer({
|
|
@@ -2368,4 +2621,4 @@ async function startFromConfig(configPath) {
|
|
|
2368
2621
|
return app;
|
|
2369
2622
|
}
|
|
2370
2623
|
|
|
2371
|
-
export { DocumentProcessor, EventQueue, FileSystemWatcher, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
|
2624
|
+
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
package/package.json
CHANGED
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"commander": "^14.0.3",
|
|
27
27
|
"cosmiconfig": "*",
|
|
28
28
|
"fastify": "*",
|
|
29
|
+
"ignore": "^7.0.5",
|
|
29
30
|
"js-yaml": "*",
|
|
30
31
|
"json5": "*",
|
|
31
32
|
"mammoth": "^1.11.0",
|
|
@@ -171,5 +172,5 @@
|
|
|
171
172
|
},
|
|
172
173
|
"type": "module",
|
|
173
174
|
"types": "dist/index.d.ts",
|
|
174
|
-
"version": "0.2.
|
|
175
|
+
"version": "0.2.5"
|
|
175
176
|
}
|