@karmaniverous/jeeves-watcher 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/config.schema.json +99 -85
- package/dist/cjs/index.js +254 -0
- package/dist/cli/jeeves-watcher/index.js +254 -3
- package/dist/index.d.ts +50 -1
- package/dist/index.iife.js +254 -2
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +255 -2
- package/package.json +2 -1
package/dist/cjs/index.js
CHANGED
|
@@ -10,6 +10,8 @@ var cosmiconfig = require('cosmiconfig');
|
|
|
10
10
|
var zod = require('zod');
|
|
11
11
|
var jsonmap = require('@karmaniverous/jsonmap');
|
|
12
12
|
var googleGenai = require('@langchain/google-genai');
|
|
13
|
+
var node_fs = require('node:fs');
|
|
14
|
+
var ignore = require('ignore');
|
|
13
15
|
var pino = require('pino');
|
|
14
16
|
var uuid = require('uuid');
|
|
15
17
|
var cheerio = require('cheerio');
|
|
@@ -454,6 +456,7 @@ const WATCH_DEFAULTS = {
|
|
|
454
456
|
stabilityThresholdMs: 500,
|
|
455
457
|
usePolling: false,
|
|
456
458
|
pollIntervalMs: 1000,
|
|
459
|
+
respectGitignore: true,
|
|
457
460
|
};
|
|
458
461
|
/** Default embedding configuration. */
|
|
459
462
|
const EMBEDDING_DEFAULTS = {
|
|
@@ -498,6 +501,11 @@ const watchConfigSchema = zod.z.object({
|
|
|
498
501
|
.number()
|
|
499
502
|
.optional()
|
|
500
503
|
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
504
|
+
/** Whether to respect .gitignore files when processing. */
|
|
505
|
+
respectGitignore: zod.z
|
|
506
|
+
.boolean()
|
|
507
|
+
.optional()
|
|
508
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
501
509
|
});
|
|
502
510
|
/**
|
|
503
511
|
* Configuration watch settings.
|
|
@@ -963,6 +971,212 @@ function createEmbeddingProvider(config, logger) {
|
|
|
963
971
|
return factory(config, logger);
|
|
964
972
|
}
|
|
965
973
|
|
|
974
|
+
/**
|
|
975
|
+
* @module gitignore
|
|
976
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
977
|
+
*/
|
|
978
|
+
/**
|
|
979
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
980
|
+
* Returns `undefined` if no repo is found.
|
|
981
|
+
*/
|
|
982
|
+
function findRepoRoot(startDir) {
|
|
983
|
+
let dir = node_path.resolve(startDir);
|
|
984
|
+
const root = node_path.resolve('/');
|
|
985
|
+
while (dir !== root) {
|
|
986
|
+
if (node_fs.existsSync(node_path.join(dir, '.git')) &&
|
|
987
|
+
node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
|
|
988
|
+
return dir;
|
|
989
|
+
}
|
|
990
|
+
const parent = node_path.dirname(dir);
|
|
991
|
+
if (parent === dir)
|
|
992
|
+
break;
|
|
993
|
+
dir = parent;
|
|
994
|
+
}
|
|
995
|
+
return undefined;
|
|
996
|
+
}
|
|
997
|
+
/**
|
|
998
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
999
|
+
* that can be scanned for a repo root.
|
|
1000
|
+
*/
|
|
1001
|
+
function watchPathToScanDir(watchPath) {
|
|
1002
|
+
const absPath = node_path.resolve(watchPath);
|
|
1003
|
+
try {
|
|
1004
|
+
return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
|
|
1005
|
+
}
|
|
1006
|
+
catch {
|
|
1007
|
+
// ignore
|
|
1008
|
+
}
|
|
1009
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
1010
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1011
|
+
if (!globMatch)
|
|
1012
|
+
return undefined;
|
|
1013
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
1014
|
+
const trimmed = prefix.trim();
|
|
1015
|
+
const baseDir = trimmed.length === 0
|
|
1016
|
+
? '.'
|
|
1017
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1018
|
+
? trimmed
|
|
1019
|
+
: node_path.dirname(trimmed);
|
|
1020
|
+
const resolved = node_path.resolve(baseDir);
|
|
1021
|
+
if (!node_fs.existsSync(resolved))
|
|
1022
|
+
return undefined;
|
|
1023
|
+
return resolved;
|
|
1024
|
+
}
|
|
1025
|
+
/**
|
|
1026
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
1027
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
1028
|
+
*/
|
|
1029
|
+
function findGitignoreFiles(dir) {
|
|
1030
|
+
const results = [];
|
|
1031
|
+
const gitignorePath = node_path.join(dir, '.gitignore');
|
|
1032
|
+
if (node_fs.existsSync(gitignorePath)) {
|
|
1033
|
+
results.push(gitignorePath);
|
|
1034
|
+
}
|
|
1035
|
+
let entries;
|
|
1036
|
+
try {
|
|
1037
|
+
entries = node_fs.readdirSync(dir);
|
|
1038
|
+
}
|
|
1039
|
+
catch {
|
|
1040
|
+
return results;
|
|
1041
|
+
}
|
|
1042
|
+
for (const entry of entries) {
|
|
1043
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
1044
|
+
continue;
|
|
1045
|
+
const fullPath = node_path.join(dir, entry);
|
|
1046
|
+
try {
|
|
1047
|
+
if (node_fs.statSync(fullPath).isDirectory()) {
|
|
1048
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
catch {
|
|
1052
|
+
// Skip inaccessible entries
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
return results;
|
|
1056
|
+
}
|
|
1057
|
+
/**
|
|
1058
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1059
|
+
*/
|
|
1060
|
+
function parseGitignore(gitignorePath) {
|
|
1061
|
+
const content = node_fs.readFileSync(gitignorePath, 'utf8');
|
|
1062
|
+
return ignore().add(content);
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1066
|
+
*/
|
|
1067
|
+
function toForwardSlash(p) {
|
|
1068
|
+
return p.replace(/\\/g, '/');
|
|
1069
|
+
}
|
|
1070
|
+
/**
|
|
1071
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1072
|
+
* `.gitignore` chain in git repositories.
|
|
1073
|
+
*/
|
|
1074
|
+
class GitignoreFilter {
|
|
1075
|
+
repos = new Map();
|
|
1076
|
+
/**
|
|
1077
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1078
|
+
*
|
|
1079
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1080
|
+
*/
|
|
1081
|
+
constructor(watchPaths) {
|
|
1082
|
+
this.scan(watchPaths);
|
|
1083
|
+
}
|
|
1084
|
+
/**
|
|
1085
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
1086
|
+
*/
|
|
1087
|
+
scan(watchPaths) {
|
|
1088
|
+
this.repos.clear();
|
|
1089
|
+
const scannedDirs = new Set();
|
|
1090
|
+
for (const watchPath of watchPaths) {
|
|
1091
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
1092
|
+
if (!scanDir)
|
|
1093
|
+
continue;
|
|
1094
|
+
if (scannedDirs.has(scanDir))
|
|
1095
|
+
continue;
|
|
1096
|
+
scannedDirs.add(scanDir);
|
|
1097
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
1098
|
+
if (!repoRoot)
|
|
1099
|
+
continue;
|
|
1100
|
+
if (this.repos.has(repoRoot))
|
|
1101
|
+
continue;
|
|
1102
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1103
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
1104
|
+
dir: node_path.dirname(gf),
|
|
1105
|
+
ig: parseGitignore(gf),
|
|
1106
|
+
}));
|
|
1107
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1108
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1109
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
/**
|
|
1113
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1114
|
+
*
|
|
1115
|
+
* @param filePath - Absolute file path to check.
|
|
1116
|
+
* @returns `true` if the file should be ignored.
|
|
1117
|
+
*/
|
|
1118
|
+
isIgnored(filePath) {
|
|
1119
|
+
const absPath = node_path.resolve(filePath);
|
|
1120
|
+
for (const [, repo] of this.repos) {
|
|
1121
|
+
// Check if file is within this repo
|
|
1122
|
+
const relToRepo = node_path.relative(repo.root, absPath);
|
|
1123
|
+
if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
|
|
1124
|
+
continue;
|
|
1125
|
+
}
|
|
1126
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
1127
|
+
for (const entry of repo.entries) {
|
|
1128
|
+
const relToEntry = node_path.relative(entry.dir, absPath);
|
|
1129
|
+
if (relToEntry.startsWith('..'))
|
|
1130
|
+
continue;
|
|
1131
|
+
const normalized = toForwardSlash(relToEntry);
|
|
1132
|
+
if (entry.ig.ignores(normalized)) {
|
|
1133
|
+
return true;
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
return false;
|
|
1138
|
+
}
|
|
1139
|
+
/**
|
|
1140
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1141
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1142
|
+
*
|
|
1143
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1144
|
+
*/
|
|
1145
|
+
invalidate(gitignorePath) {
|
|
1146
|
+
const absPath = node_path.resolve(gitignorePath);
|
|
1147
|
+
const gitignoreDir = node_path.dirname(absPath);
|
|
1148
|
+
for (const [, repo] of this.repos) {
|
|
1149
|
+
const relToRepo = node_path.relative(repo.root, gitignoreDir);
|
|
1150
|
+
if (relToRepo.startsWith('..'))
|
|
1151
|
+
continue;
|
|
1152
|
+
// Remove old entry for this directory
|
|
1153
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1154
|
+
// Re-parse if file still exists
|
|
1155
|
+
if (node_fs.existsSync(absPath)) {
|
|
1156
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1157
|
+
// Re-sort deepest-first
|
|
1158
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1159
|
+
}
|
|
1160
|
+
return;
|
|
1161
|
+
}
|
|
1162
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1163
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1164
|
+
if (repoRoot && node_fs.existsSync(absPath)) {
|
|
1165
|
+
const entries = [
|
|
1166
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1167
|
+
];
|
|
1168
|
+
if (this.repos.has(repoRoot)) {
|
|
1169
|
+
const repo = this.repos.get(repoRoot);
|
|
1170
|
+
repo.entries.push(entries[0]);
|
|
1171
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1172
|
+
}
|
|
1173
|
+
else {
|
|
1174
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
|
|
966
1180
|
/**
|
|
967
1181
|
* @module logger
|
|
968
1182
|
* Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
|
|
@@ -2054,6 +2268,7 @@ class FileSystemWatcher {
|
|
|
2054
2268
|
processor;
|
|
2055
2269
|
logger;
|
|
2056
2270
|
health;
|
|
2271
|
+
gitignoreFilter;
|
|
2057
2272
|
watcher;
|
|
2058
2273
|
/**
|
|
2059
2274
|
* Create a new FileSystemWatcher.
|
|
@@ -2069,6 +2284,7 @@ class FileSystemWatcher {
|
|
|
2069
2284
|
this.queue = queue;
|
|
2070
2285
|
this.processor = processor;
|
|
2071
2286
|
this.logger = logger;
|
|
2287
|
+
this.gitignoreFilter = options.gitignoreFilter;
|
|
2072
2288
|
const healthOptions = {
|
|
2073
2289
|
maxRetries: options.maxRetries,
|
|
2074
2290
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2091,14 +2307,23 @@ class FileSystemWatcher {
|
|
|
2091
2307
|
ignoreInitial: false,
|
|
2092
2308
|
});
|
|
2093
2309
|
this.watcher.on('add', (path) => {
|
|
2310
|
+
this.handleGitignoreChange(path);
|
|
2311
|
+
if (this.isGitignored(path))
|
|
2312
|
+
return;
|
|
2094
2313
|
this.logger.debug({ path }, 'File added');
|
|
2095
2314
|
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
2096
2315
|
});
|
|
2097
2316
|
this.watcher.on('change', (path) => {
|
|
2317
|
+
this.handleGitignoreChange(path);
|
|
2318
|
+
if (this.isGitignored(path))
|
|
2319
|
+
return;
|
|
2098
2320
|
this.logger.debug({ path }, 'File changed');
|
|
2099
2321
|
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
2100
2322
|
});
|
|
2101
2323
|
this.watcher.on('unlink', (path) => {
|
|
2324
|
+
this.handleGitignoreChange(path);
|
|
2325
|
+
if (this.isGitignored(path))
|
|
2326
|
+
return;
|
|
2102
2327
|
this.logger.debug({ path }, 'File removed');
|
|
2103
2328
|
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
2104
2329
|
});
|
|
@@ -2125,6 +2350,29 @@ class FileSystemWatcher {
|
|
|
2125
2350
|
get systemHealth() {
|
|
2126
2351
|
return this.health;
|
|
2127
2352
|
}
|
|
2353
|
+
/**
|
|
2354
|
+
* Check if a path is gitignored and should be skipped.
|
|
2355
|
+
*/
|
|
2356
|
+
isGitignored(path) {
|
|
2357
|
+
if (!this.gitignoreFilter)
|
|
2358
|
+
return false;
|
|
2359
|
+
const ignored = this.gitignoreFilter.isIgnored(path);
|
|
2360
|
+
if (ignored) {
|
|
2361
|
+
this.logger.debug({ path }, 'Skipping gitignored file');
|
|
2362
|
+
}
|
|
2363
|
+
return ignored;
|
|
2364
|
+
}
|
|
2365
|
+
/**
|
|
2366
|
+
* If the changed file is a `.gitignore`, invalidate the filter cache.
|
|
2367
|
+
*/
|
|
2368
|
+
handleGitignoreChange(path) {
|
|
2369
|
+
if (!this.gitignoreFilter)
|
|
2370
|
+
return;
|
|
2371
|
+
if (path.endsWith('.gitignore')) {
|
|
2372
|
+
this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
|
|
2373
|
+
this.gitignoreFilter.invalidate(path);
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2128
2376
|
/**
|
|
2129
2377
|
* Wrap a processing operation with health tracking.
|
|
2130
2378
|
* On success, resets the failure counter.
|
|
@@ -2280,10 +2528,15 @@ class JeevesWatcher {
|
|
|
2280
2528
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2281
2529
|
});
|
|
2282
2530
|
this.queue = queue;
|
|
2531
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
2532
|
+
const gitignoreFilter = respectGitignore
|
|
2533
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
2534
|
+
: undefined;
|
|
2283
2535
|
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2284
2536
|
maxRetries: this.config.maxRetries,
|
|
2285
2537
|
maxBackoffMs: this.config.maxBackoffMs,
|
|
2286
2538
|
onFatalError: this.runtimeOptions.onFatalError,
|
|
2539
|
+
gitignoreFilter,
|
|
2287
2540
|
});
|
|
2288
2541
|
this.watcher = watcher;
|
|
2289
2542
|
const server = this.factories.createApiServer({
|
|
@@ -2392,6 +2645,7 @@ async function startFromConfig(configPath) {
|
|
|
2392
2645
|
exports.DocumentProcessor = DocumentProcessor;
|
|
2393
2646
|
exports.EventQueue = EventQueue;
|
|
2394
2647
|
exports.FileSystemWatcher = FileSystemWatcher;
|
|
2648
|
+
exports.GitignoreFilter = GitignoreFilter;
|
|
2395
2649
|
exports.JeevesWatcher = JeevesWatcher;
|
|
2396
2650
|
exports.SystemHealth = SystemHealth;
|
|
2397
2651
|
exports.VectorStoreClient = VectorStoreClient;
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
#!/usr/bin/env node
|
|
3
1
|
import { Command } from '@commander-js/extra-typings';
|
|
4
2
|
import Fastify from 'fastify';
|
|
5
3
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
6
|
-
import { resolve, dirname, join, extname, basename } from 'node:path';
|
|
4
|
+
import { resolve, dirname, join, relative, extname, basename } from 'node:path';
|
|
7
5
|
import picomatch from 'picomatch';
|
|
8
6
|
import { omit, get } from 'radash';
|
|
9
7
|
import { createHash } from 'node:crypto';
|
|
@@ -11,6 +9,8 @@ import { cosmiconfig } from 'cosmiconfig';
|
|
|
11
9
|
import { z, ZodError } from 'zod';
|
|
12
10
|
import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
|
|
13
11
|
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
|
|
12
|
+
import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
|
|
13
|
+
import ignore from 'ignore';
|
|
14
14
|
import pino from 'pino';
|
|
15
15
|
import { v5 } from 'uuid';
|
|
16
16
|
import * as cheerio from 'cheerio';
|
|
@@ -436,6 +436,7 @@ const WATCH_DEFAULTS = {
|
|
|
436
436
|
stabilityThresholdMs: 500,
|
|
437
437
|
usePolling: false,
|
|
438
438
|
pollIntervalMs: 1000,
|
|
439
|
+
respectGitignore: true,
|
|
439
440
|
};
|
|
440
441
|
/** Default embedding configuration. */
|
|
441
442
|
const EMBEDDING_DEFAULTS = {
|
|
@@ -501,6 +502,11 @@ const watchConfigSchema = z.object({
|
|
|
501
502
|
.number()
|
|
502
503
|
.optional()
|
|
503
504
|
.describe('Time in milliseconds a file must remain unchanged before processing.'),
|
|
505
|
+
/** Whether to respect .gitignore files when processing. */
|
|
506
|
+
respectGitignore: z
|
|
507
|
+
.boolean()
|
|
508
|
+
.optional()
|
|
509
|
+
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
504
510
|
});
|
|
505
511
|
/**
|
|
506
512
|
* Configuration watch settings.
|
|
@@ -966,6 +972,212 @@ function createEmbeddingProvider(config, logger) {
|
|
|
966
972
|
return factory(config, logger);
|
|
967
973
|
}
|
|
968
974
|
|
|
975
|
+
/**
|
|
976
|
+
* @module gitignore
|
|
977
|
+
* Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
|
|
978
|
+
*/
|
|
979
|
+
/**
|
|
980
|
+
* Find the git repo root by walking up from `startDir` looking for `.git/`.
|
|
981
|
+
* Returns `undefined` if no repo is found.
|
|
982
|
+
*/
|
|
983
|
+
function findRepoRoot(startDir) {
|
|
984
|
+
let dir = resolve(startDir);
|
|
985
|
+
const root = resolve('/');
|
|
986
|
+
while (dir !== root) {
|
|
987
|
+
if (existsSync(join(dir, '.git')) &&
|
|
988
|
+
statSync(join(dir, '.git')).isDirectory()) {
|
|
989
|
+
return dir;
|
|
990
|
+
}
|
|
991
|
+
const parent = dirname(dir);
|
|
992
|
+
if (parent === dir)
|
|
993
|
+
break;
|
|
994
|
+
dir = parent;
|
|
995
|
+
}
|
|
996
|
+
return undefined;
|
|
997
|
+
}
|
|
998
|
+
/**
|
|
999
|
+
* Convert a watch path (directory, file path, or glob) to a concrete directory
|
|
1000
|
+
* that can be scanned for a repo root.
|
|
1001
|
+
*/
|
|
1002
|
+
function watchPathToScanDir(watchPath) {
|
|
1003
|
+
const absPath = resolve(watchPath);
|
|
1004
|
+
try {
|
|
1005
|
+
return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
|
|
1006
|
+
}
|
|
1007
|
+
catch {
|
|
1008
|
+
// ignore
|
|
1009
|
+
}
|
|
1010
|
+
// If this is a glob, fall back to the non-glob prefix.
|
|
1011
|
+
const globMatch = /[*?[{]/.exec(watchPath);
|
|
1012
|
+
if (!globMatch)
|
|
1013
|
+
return undefined;
|
|
1014
|
+
const prefix = watchPath.slice(0, globMatch.index);
|
|
1015
|
+
const trimmed = prefix.trim();
|
|
1016
|
+
const baseDir = trimmed.length === 0
|
|
1017
|
+
? '.'
|
|
1018
|
+
: trimmed.endsWith('/') || trimmed.endsWith('\\')
|
|
1019
|
+
? trimmed
|
|
1020
|
+
: dirname(trimmed);
|
|
1021
|
+
const resolved = resolve(baseDir);
|
|
1022
|
+
if (!existsSync(resolved))
|
|
1023
|
+
return undefined;
|
|
1024
|
+
return resolved;
|
|
1025
|
+
}
|
|
1026
|
+
/**
|
|
1027
|
+
* Recursively find all `.gitignore` files under `dir`.
|
|
1028
|
+
* Skips `.git` and `node_modules` directories for performance.
|
|
1029
|
+
*/
|
|
1030
|
+
function findGitignoreFiles(dir) {
|
|
1031
|
+
const results = [];
|
|
1032
|
+
const gitignorePath = join(dir, '.gitignore');
|
|
1033
|
+
if (existsSync(gitignorePath)) {
|
|
1034
|
+
results.push(gitignorePath);
|
|
1035
|
+
}
|
|
1036
|
+
let entries;
|
|
1037
|
+
try {
|
|
1038
|
+
entries = readdirSync(dir);
|
|
1039
|
+
}
|
|
1040
|
+
catch {
|
|
1041
|
+
return results;
|
|
1042
|
+
}
|
|
1043
|
+
for (const entry of entries) {
|
|
1044
|
+
if (entry === '.git' || entry === 'node_modules')
|
|
1045
|
+
continue;
|
|
1046
|
+
const fullPath = join(dir, entry);
|
|
1047
|
+
try {
|
|
1048
|
+
if (statSync(fullPath).isDirectory()) {
|
|
1049
|
+
results.push(...findGitignoreFiles(fullPath));
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
catch {
|
|
1053
|
+
// Skip inaccessible entries
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
return results;
|
|
1057
|
+
}
|
|
1058
|
+
/**
|
|
1059
|
+
* Parse a `.gitignore` file into an `ignore` instance.
|
|
1060
|
+
*/
|
|
1061
|
+
function parseGitignore(gitignorePath) {
|
|
1062
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
1063
|
+
return ignore().add(content);
|
|
1064
|
+
}
|
|
1065
|
+
/**
|
|
1066
|
+
* Normalize a path to use forward slashes (required by `ignore` package).
|
|
1067
|
+
*/
|
|
1068
|
+
function toForwardSlash(p) {
|
|
1069
|
+
return p.replace(/\\/g, '/');
|
|
1070
|
+
}
|
|
1071
|
+
/**
|
|
1072
|
+
* Processor-level gitignore filter. Checks file paths against the nearest
|
|
1073
|
+
* `.gitignore` chain in git repositories.
|
|
1074
|
+
*/
|
|
1075
|
+
class GitignoreFilter {
|
|
1076
|
+
repos = new Map();
|
|
1077
|
+
/**
|
|
1078
|
+
* Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
|
|
1079
|
+
*
|
|
1080
|
+
* @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
|
|
1081
|
+
*/
|
|
1082
|
+
constructor(watchPaths) {
|
|
1083
|
+
this.scan(watchPaths);
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Scan paths for git repos and their `.gitignore` files.
|
|
1087
|
+
*/
|
|
1088
|
+
scan(watchPaths) {
|
|
1089
|
+
this.repos.clear();
|
|
1090
|
+
const scannedDirs = new Set();
|
|
1091
|
+
for (const watchPath of watchPaths) {
|
|
1092
|
+
const scanDir = watchPathToScanDir(watchPath);
|
|
1093
|
+
if (!scanDir)
|
|
1094
|
+
continue;
|
|
1095
|
+
if (scannedDirs.has(scanDir))
|
|
1096
|
+
continue;
|
|
1097
|
+
scannedDirs.add(scanDir);
|
|
1098
|
+
const repoRoot = findRepoRoot(scanDir);
|
|
1099
|
+
if (!repoRoot)
|
|
1100
|
+
continue;
|
|
1101
|
+
if (this.repos.has(repoRoot))
|
|
1102
|
+
continue;
|
|
1103
|
+
const gitignoreFiles = findGitignoreFiles(repoRoot);
|
|
1104
|
+
const entries = gitignoreFiles.map((gf) => ({
|
|
1105
|
+
dir: dirname(gf),
|
|
1106
|
+
ig: parseGitignore(gf),
|
|
1107
|
+
}));
|
|
1108
|
+
// Sort deepest-first so nested `.gitignore` files are checked first
|
|
1109
|
+
entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1110
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
/**
|
|
1114
|
+
* Check whether a file path is ignored by any applicable `.gitignore`.
|
|
1115
|
+
*
|
|
1116
|
+
* @param filePath - Absolute file path to check.
|
|
1117
|
+
* @returns `true` if the file should be ignored.
|
|
1118
|
+
*/
|
|
1119
|
+
isIgnored(filePath) {
|
|
1120
|
+
const absPath = resolve(filePath);
|
|
1121
|
+
for (const [, repo] of this.repos) {
|
|
1122
|
+
// Check if file is within this repo
|
|
1123
|
+
const relToRepo = relative(repo.root, absPath);
|
|
1124
|
+
if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
|
|
1125
|
+
continue;
|
|
1126
|
+
}
|
|
1127
|
+
// Check each `.gitignore` entry (deepest-first)
|
|
1128
|
+
for (const entry of repo.entries) {
|
|
1129
|
+
const relToEntry = relative(entry.dir, absPath);
|
|
1130
|
+
if (relToEntry.startsWith('..'))
|
|
1131
|
+
continue;
|
|
1132
|
+
const normalized = toForwardSlash(relToEntry);
|
|
1133
|
+
if (entry.ig.ignores(normalized)) {
|
|
1134
|
+
return true;
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
return false;
|
|
1139
|
+
}
|
|
1140
|
+
/**
|
|
1141
|
+
* Invalidate and re-parse a specific `.gitignore` file.
|
|
1142
|
+
* Call when a `.gitignore` file is added, changed, or removed.
|
|
1143
|
+
*
|
|
1144
|
+
* @param gitignorePath - Absolute path to the `.gitignore` file that changed.
|
|
1145
|
+
*/
|
|
1146
|
+
invalidate(gitignorePath) {
|
|
1147
|
+
const absPath = resolve(gitignorePath);
|
|
1148
|
+
const gitignoreDir = dirname(absPath);
|
|
1149
|
+
for (const [, repo] of this.repos) {
|
|
1150
|
+
const relToRepo = relative(repo.root, gitignoreDir);
|
|
1151
|
+
if (relToRepo.startsWith('..'))
|
|
1152
|
+
continue;
|
|
1153
|
+
// Remove old entry for this directory
|
|
1154
|
+
repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
|
|
1155
|
+
// Re-parse if file still exists
|
|
1156
|
+
if (existsSync(absPath)) {
|
|
1157
|
+
repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
|
|
1158
|
+
// Re-sort deepest-first
|
|
1159
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1160
|
+
}
|
|
1161
|
+
return;
|
|
1162
|
+
}
|
|
1163
|
+
// If not in any known repo, check if it's in a repo we haven't scanned
|
|
1164
|
+
const repoRoot = findRepoRoot(gitignoreDir);
|
|
1165
|
+
if (repoRoot && existsSync(absPath)) {
|
|
1166
|
+
const entries = [
|
|
1167
|
+
{ dir: gitignoreDir, ig: parseGitignore(absPath) },
|
|
1168
|
+
];
|
|
1169
|
+
if (this.repos.has(repoRoot)) {
|
|
1170
|
+
const repo = this.repos.get(repoRoot);
|
|
1171
|
+
repo.entries.push(entries[0]);
|
|
1172
|
+
repo.entries.sort((a, b) => b.dir.length - a.dir.length);
|
|
1173
|
+
}
|
|
1174
|
+
else {
|
|
1175
|
+
this.repos.set(repoRoot, { root: repoRoot, entries });
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
|
|
969
1181
|
/**
|
|
970
1182
|
* @module logger
|
|
971
1183
|
* Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
|
|
@@ -2057,6 +2269,7 @@ class FileSystemWatcher {
|
|
|
2057
2269
|
processor;
|
|
2058
2270
|
logger;
|
|
2059
2271
|
health;
|
|
2272
|
+
gitignoreFilter;
|
|
2060
2273
|
watcher;
|
|
2061
2274
|
/**
|
|
2062
2275
|
* Create a new FileSystemWatcher.
|
|
@@ -2072,6 +2285,7 @@ class FileSystemWatcher {
|
|
|
2072
2285
|
this.queue = queue;
|
|
2073
2286
|
this.processor = processor;
|
|
2074
2287
|
this.logger = logger;
|
|
2288
|
+
this.gitignoreFilter = options.gitignoreFilter;
|
|
2075
2289
|
const healthOptions = {
|
|
2076
2290
|
maxRetries: options.maxRetries,
|
|
2077
2291
|
maxBackoffMs: options.maxBackoffMs,
|
|
@@ -2094,14 +2308,23 @@ class FileSystemWatcher {
|
|
|
2094
2308
|
ignoreInitial: false,
|
|
2095
2309
|
});
|
|
2096
2310
|
this.watcher.on('add', (path) => {
|
|
2311
|
+
this.handleGitignoreChange(path);
|
|
2312
|
+
if (this.isGitignored(path))
|
|
2313
|
+
return;
|
|
2097
2314
|
this.logger.debug({ path }, 'File added');
|
|
2098
2315
|
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
2099
2316
|
});
|
|
2100
2317
|
this.watcher.on('change', (path) => {
|
|
2318
|
+
this.handleGitignoreChange(path);
|
|
2319
|
+
if (this.isGitignored(path))
|
|
2320
|
+
return;
|
|
2101
2321
|
this.logger.debug({ path }, 'File changed');
|
|
2102
2322
|
this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
2103
2323
|
});
|
|
2104
2324
|
this.watcher.on('unlink', (path) => {
|
|
2325
|
+
this.handleGitignoreChange(path);
|
|
2326
|
+
if (this.isGitignored(path))
|
|
2327
|
+
return;
|
|
2105
2328
|
this.logger.debug({ path }, 'File removed');
|
|
2106
2329
|
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
2107
2330
|
});
|
|
@@ -2128,6 +2351,29 @@ class FileSystemWatcher {
|
|
|
2128
2351
|
get systemHealth() {
|
|
2129
2352
|
return this.health;
|
|
2130
2353
|
}
|
|
2354
|
+
/**
|
|
2355
|
+
* Check if a path is gitignored and should be skipped.
|
|
2356
|
+
*/
|
|
2357
|
+
isGitignored(path) {
|
|
2358
|
+
if (!this.gitignoreFilter)
|
|
2359
|
+
return false;
|
|
2360
|
+
const ignored = this.gitignoreFilter.isIgnored(path);
|
|
2361
|
+
if (ignored) {
|
|
2362
|
+
this.logger.debug({ path }, 'Skipping gitignored file');
|
|
2363
|
+
}
|
|
2364
|
+
return ignored;
|
|
2365
|
+
}
|
|
2366
|
+
/**
|
|
2367
|
+
* If the changed file is a `.gitignore`, invalidate the filter cache.
|
|
2368
|
+
*/
|
|
2369
|
+
handleGitignoreChange(path) {
|
|
2370
|
+
if (!this.gitignoreFilter)
|
|
2371
|
+
return;
|
|
2372
|
+
if (path.endsWith('.gitignore')) {
|
|
2373
|
+
this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
|
|
2374
|
+
this.gitignoreFilter.invalidate(path);
|
|
2375
|
+
}
|
|
2376
|
+
}
|
|
2131
2377
|
/**
|
|
2132
2378
|
* Wrap a processing operation with health tracking.
|
|
2133
2379
|
* On success, resets the failure counter.
|
|
@@ -2283,10 +2529,15 @@ class JeevesWatcher {
|
|
|
2283
2529
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
2284
2530
|
});
|
|
2285
2531
|
this.queue = queue;
|
|
2532
|
+
const respectGitignore = this.config.watch.respectGitignore ?? true;
|
|
2533
|
+
const gitignoreFilter = respectGitignore
|
|
2534
|
+
? new GitignoreFilter(this.config.watch.paths)
|
|
2535
|
+
: undefined;
|
|
2286
2536
|
const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
|
|
2287
2537
|
maxRetries: this.config.maxRetries,
|
|
2288
2538
|
maxBackoffMs: this.config.maxBackoffMs,
|
|
2289
2539
|
onFatalError: this.runtimeOptions.onFatalError,
|
|
2540
|
+
gitignoreFilter,
|
|
2290
2541
|
});
|
|
2291
2542
|
this.watcher = watcher;
|
|
2292
2543
|
const server = this.factories.createApiServer({
|