@karmaniverous/jeeves-watcher 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -10,6 +10,8 @@ var cosmiconfig = require('cosmiconfig');
10
10
  var zod = require('zod');
11
11
  var jsonmap = require('@karmaniverous/jsonmap');
12
12
  var googleGenai = require('@langchain/google-genai');
13
+ var node_fs = require('node:fs');
14
+ var ignore = require('ignore');
13
15
  var pino = require('pino');
14
16
  var uuid = require('uuid');
15
17
  var cheerio = require('cheerio');
@@ -454,6 +456,7 @@ const WATCH_DEFAULTS = {
454
456
  stabilityThresholdMs: 500,
455
457
  usePolling: false,
456
458
  pollIntervalMs: 1000,
459
+ respectGitignore: true,
457
460
  };
458
461
  /** Default embedding configuration. */
459
462
  const EMBEDDING_DEFAULTS = {
@@ -498,6 +501,11 @@ const watchConfigSchema = zod.z.object({
498
501
  .number()
499
502
  .optional()
500
503
  .describe('Time in milliseconds a file must remain unchanged before processing.'),
504
+ /** Whether to respect .gitignore files when processing. */
505
+ respectGitignore: zod.z
506
+ .boolean()
507
+ .optional()
508
+ .describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
501
509
  });
502
510
  /**
503
511
  * Configuration watch settings.
@@ -963,6 +971,212 @@ function createEmbeddingProvider(config, logger) {
963
971
  return factory(config, logger);
964
972
  }
965
973
 
974
+ /**
975
+ * @module gitignore
976
+ * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
977
+ */
978
+ /**
979
+ * Find the git repo root by walking up from `startDir` looking for `.git/`.
980
+ * Returns `undefined` if no repo is found.
981
+ */
982
+ function findRepoRoot(startDir) {
983
+ let dir = node_path.resolve(startDir);
984
+ const root = node_path.resolve('/');
985
+ while (dir !== root) {
986
+ if (node_fs.existsSync(node_path.join(dir, '.git')) &&
987
+ node_fs.statSync(node_path.join(dir, '.git')).isDirectory()) {
988
+ return dir;
989
+ }
990
+ const parent = node_path.dirname(dir);
991
+ if (parent === dir)
992
+ break;
993
+ dir = parent;
994
+ }
995
+ return undefined;
996
+ }
997
+ /**
998
+ * Convert a watch path (directory, file path, or glob) to a concrete directory
999
+ * that can be scanned for a repo root.
1000
+ */
1001
+ function watchPathToScanDir(watchPath) {
1002
+ const absPath = node_path.resolve(watchPath);
1003
+ try {
1004
+ return node_fs.statSync(absPath).isDirectory() ? absPath : node_path.dirname(absPath);
1005
+ }
1006
+ catch {
1007
+ // ignore
1008
+ }
1009
+ // If this is a glob, fall back to the non-glob prefix.
1010
+ const globMatch = /[*?[{]/.exec(watchPath);
1011
+ if (!globMatch)
1012
+ return undefined;
1013
+ const prefix = watchPath.slice(0, globMatch.index);
1014
+ const trimmed = prefix.trim();
1015
+ const baseDir = trimmed.length === 0
1016
+ ? '.'
1017
+ : trimmed.endsWith('/') || trimmed.endsWith('\\')
1018
+ ? trimmed
1019
+ : node_path.dirname(trimmed);
1020
+ const resolved = node_path.resolve(baseDir);
1021
+ if (!node_fs.existsSync(resolved))
1022
+ return undefined;
1023
+ return resolved;
1024
+ }
1025
+ /**
1026
+ * Recursively find all `.gitignore` files under `dir`.
1027
+ * Skips `.git` and `node_modules` directories for performance.
1028
+ */
1029
+ function findGitignoreFiles(dir) {
1030
+ const results = [];
1031
+ const gitignorePath = node_path.join(dir, '.gitignore');
1032
+ if (node_fs.existsSync(gitignorePath)) {
1033
+ results.push(gitignorePath);
1034
+ }
1035
+ let entries;
1036
+ try {
1037
+ entries = node_fs.readdirSync(dir);
1038
+ }
1039
+ catch {
1040
+ return results;
1041
+ }
1042
+ for (const entry of entries) {
1043
+ if (entry === '.git' || entry === 'node_modules')
1044
+ continue;
1045
+ const fullPath = node_path.join(dir, entry);
1046
+ try {
1047
+ if (node_fs.statSync(fullPath).isDirectory()) {
1048
+ results.push(...findGitignoreFiles(fullPath));
1049
+ }
1050
+ }
1051
+ catch {
1052
+ // Skip inaccessible entries
1053
+ }
1054
+ }
1055
+ return results;
1056
+ }
1057
+ /**
1058
+ * Parse a `.gitignore` file into an `ignore` instance.
1059
+ */
1060
+ function parseGitignore(gitignorePath) {
1061
+ const content = node_fs.readFileSync(gitignorePath, 'utf8');
1062
+ return ignore().add(content);
1063
+ }
1064
+ /**
1065
+ * Normalize a path to use forward slashes (required by `ignore` package).
1066
+ */
1067
+ function toForwardSlash(p) {
1068
+ return p.replace(/\\/g, '/');
1069
+ }
1070
+ /**
1071
+ * Processor-level gitignore filter. Checks file paths against the nearest
1072
+ * `.gitignore` chain in git repositories.
1073
+ */
1074
+ class GitignoreFilter {
1075
+ repos = new Map();
1076
+ /**
1077
+ * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
1078
+ *
1079
+ * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
1080
+ */
1081
+ constructor(watchPaths) {
1082
+ this.scan(watchPaths);
1083
+ }
1084
+ /**
1085
+ * Scan paths for git repos and their `.gitignore` files.
1086
+ */
1087
+ scan(watchPaths) {
1088
+ this.repos.clear();
1089
+ const scannedDirs = new Set();
1090
+ for (const watchPath of watchPaths) {
1091
+ const scanDir = watchPathToScanDir(watchPath);
1092
+ if (!scanDir)
1093
+ continue;
1094
+ if (scannedDirs.has(scanDir))
1095
+ continue;
1096
+ scannedDirs.add(scanDir);
1097
+ const repoRoot = findRepoRoot(scanDir);
1098
+ if (!repoRoot)
1099
+ continue;
1100
+ if (this.repos.has(repoRoot))
1101
+ continue;
1102
+ const gitignoreFiles = findGitignoreFiles(repoRoot);
1103
+ const entries = gitignoreFiles.map((gf) => ({
1104
+ dir: node_path.dirname(gf),
1105
+ ig: parseGitignore(gf),
1106
+ }));
1107
+ // Sort deepest-first so nested `.gitignore` files are checked first
1108
+ entries.sort((a, b) => b.dir.length - a.dir.length);
1109
+ this.repos.set(repoRoot, { root: repoRoot, entries });
1110
+ }
1111
+ }
1112
+ /**
1113
+ * Check whether a file path is ignored by any applicable `.gitignore`.
1114
+ *
1115
+ * @param filePath - Absolute file path to check.
1116
+ * @returns `true` if the file should be ignored.
1117
+ */
1118
+ isIgnored(filePath) {
1119
+ const absPath = node_path.resolve(filePath);
1120
+ for (const [, repo] of this.repos) {
1121
+ // Check if file is within this repo
1122
+ const relToRepo = node_path.relative(repo.root, absPath);
1123
+ if (relToRepo.startsWith('..') || relToRepo.startsWith(node_path.resolve('/'))) {
1124
+ continue;
1125
+ }
1126
+ // Check each `.gitignore` entry (deepest-first)
1127
+ for (const entry of repo.entries) {
1128
+ const relToEntry = node_path.relative(entry.dir, absPath);
1129
+ if (relToEntry.startsWith('..'))
1130
+ continue;
1131
+ const normalized = toForwardSlash(relToEntry);
1132
+ if (entry.ig.ignores(normalized)) {
1133
+ return true;
1134
+ }
1135
+ }
1136
+ }
1137
+ return false;
1138
+ }
1139
+ /**
1140
+ * Invalidate and re-parse a specific `.gitignore` file.
1141
+ * Call when a `.gitignore` file is added, changed, or removed.
1142
+ *
1143
+ * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
1144
+ */
1145
+ invalidate(gitignorePath) {
1146
+ const absPath = node_path.resolve(gitignorePath);
1147
+ const gitignoreDir = node_path.dirname(absPath);
1148
+ for (const [, repo] of this.repos) {
1149
+ const relToRepo = node_path.relative(repo.root, gitignoreDir);
1150
+ if (relToRepo.startsWith('..'))
1151
+ continue;
1152
+ // Remove old entry for this directory
1153
+ repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
1154
+ // Re-parse if file still exists
1155
+ if (node_fs.existsSync(absPath)) {
1156
+ repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
1157
+ // Re-sort deepest-first
1158
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1159
+ }
1160
+ return;
1161
+ }
1162
+ // If not in any known repo, check if it's in a repo we haven't scanned
1163
+ const repoRoot = findRepoRoot(gitignoreDir);
1164
+ if (repoRoot && node_fs.existsSync(absPath)) {
1165
+ const entries = [
1166
+ { dir: gitignoreDir, ig: parseGitignore(absPath) },
1167
+ ];
1168
+ if (this.repos.has(repoRoot)) {
1169
+ const repo = this.repos.get(repoRoot);
1170
+ repo.entries.push(entries[0]);
1171
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1172
+ }
1173
+ else {
1174
+ this.repos.set(repoRoot, { root: repoRoot, entries });
1175
+ }
1176
+ }
1177
+ }
1178
+ }
1179
+
966
1180
  /**
967
1181
  * @module logger
968
1182
  * Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
@@ -2054,6 +2268,7 @@ class FileSystemWatcher {
2054
2268
  processor;
2055
2269
  logger;
2056
2270
  health;
2271
+ gitignoreFilter;
2057
2272
  watcher;
2058
2273
  /**
2059
2274
  * Create a new FileSystemWatcher.
@@ -2069,6 +2284,7 @@ class FileSystemWatcher {
2069
2284
  this.queue = queue;
2070
2285
  this.processor = processor;
2071
2286
  this.logger = logger;
2287
+ this.gitignoreFilter = options.gitignoreFilter;
2072
2288
  const healthOptions = {
2073
2289
  maxRetries: options.maxRetries,
2074
2290
  maxBackoffMs: options.maxBackoffMs,
@@ -2091,14 +2307,23 @@ class FileSystemWatcher {
2091
2307
  ignoreInitial: false,
2092
2308
  });
2093
2309
  this.watcher.on('add', (path) => {
2310
+ this.handleGitignoreChange(path);
2311
+ if (this.isGitignored(path))
2312
+ return;
2094
2313
  this.logger.debug({ path }, 'File added');
2095
2314
  this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
2096
2315
  });
2097
2316
  this.watcher.on('change', (path) => {
2317
+ this.handleGitignoreChange(path);
2318
+ if (this.isGitignored(path))
2319
+ return;
2098
2320
  this.logger.debug({ path }, 'File changed');
2099
2321
  this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
2100
2322
  });
2101
2323
  this.watcher.on('unlink', (path) => {
2324
+ this.handleGitignoreChange(path);
2325
+ if (this.isGitignored(path))
2326
+ return;
2102
2327
  this.logger.debug({ path }, 'File removed');
2103
2328
  this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
2104
2329
  });
@@ -2125,6 +2350,29 @@ class FileSystemWatcher {
2125
2350
  get systemHealth() {
2126
2351
  return this.health;
2127
2352
  }
2353
+ /**
2354
+ * Check if a path is gitignored and should be skipped.
2355
+ */
2356
+ isGitignored(path) {
2357
+ if (!this.gitignoreFilter)
2358
+ return false;
2359
+ const ignored = this.gitignoreFilter.isIgnored(path);
2360
+ if (ignored) {
2361
+ this.logger.debug({ path }, 'Skipping gitignored file');
2362
+ }
2363
+ return ignored;
2364
+ }
2365
+ /**
2366
+ * If the changed file is a `.gitignore`, invalidate the filter cache.
2367
+ */
2368
+ handleGitignoreChange(path) {
2369
+ if (!this.gitignoreFilter)
2370
+ return;
2371
+ if (path.endsWith('.gitignore')) {
2372
+ this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
2373
+ this.gitignoreFilter.invalidate(path);
2374
+ }
2375
+ }
2128
2376
  /**
2129
2377
  * Wrap a processing operation with health tracking.
2130
2378
  * On success, resets the failure counter.
@@ -2280,10 +2528,15 @@ class JeevesWatcher {
2280
2528
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
2281
2529
  });
2282
2530
  this.queue = queue;
2531
+ const respectGitignore = this.config.watch.respectGitignore ?? true;
2532
+ const gitignoreFilter = respectGitignore
2533
+ ? new GitignoreFilter(this.config.watch.paths)
2534
+ : undefined;
2283
2535
  const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
2284
2536
  maxRetries: this.config.maxRetries,
2285
2537
  maxBackoffMs: this.config.maxBackoffMs,
2286
2538
  onFatalError: this.runtimeOptions.onFatalError,
2539
+ gitignoreFilter,
2287
2540
  });
2288
2541
  this.watcher = watcher;
2289
2542
  const server = this.factories.createApiServer({
@@ -2392,6 +2645,7 @@ async function startFromConfig(configPath) {
2392
2645
  exports.DocumentProcessor = DocumentProcessor;
2393
2646
  exports.EventQueue = EventQueue;
2394
2647
  exports.FileSystemWatcher = FileSystemWatcher;
2648
+ exports.GitignoreFilter = GitignoreFilter;
2395
2649
  exports.JeevesWatcher = JeevesWatcher;
2396
2650
  exports.SystemHealth = SystemHealth;
2397
2651
  exports.VectorStoreClient = VectorStoreClient;
@@ -3,7 +3,7 @@
3
3
  import { Command } from '@commander-js/extra-typings';
4
4
  import Fastify from 'fastify';
5
5
  import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
6
- import { resolve, dirname, join, extname, basename } from 'node:path';
6
+ import { resolve, dirname, join, relative, extname, basename } from 'node:path';
7
7
  import picomatch from 'picomatch';
8
8
  import { omit, get } from 'radash';
9
9
  import { createHash } from 'node:crypto';
@@ -11,6 +11,8 @@ import { cosmiconfig } from 'cosmiconfig';
11
11
  import { z, ZodError } from 'zod';
12
12
  import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
13
13
  import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
14
+ import { existsSync, statSync, readdirSync, readFileSync } from 'node:fs';
15
+ import ignore from 'ignore';
14
16
  import pino from 'pino';
15
17
  import { v5 } from 'uuid';
16
18
  import * as cheerio from 'cheerio';
@@ -436,6 +438,7 @@ const WATCH_DEFAULTS = {
436
438
  stabilityThresholdMs: 500,
437
439
  usePolling: false,
438
440
  pollIntervalMs: 1000,
441
+ respectGitignore: true,
439
442
  };
440
443
  /** Default embedding configuration. */
441
444
  const EMBEDDING_DEFAULTS = {
@@ -501,6 +504,11 @@ const watchConfigSchema = z.object({
501
504
  .number()
502
505
  .optional()
503
506
  .describe('Time in milliseconds a file must remain unchanged before processing.'),
507
+ /** Whether to respect .gitignore files when processing. */
508
+ respectGitignore: z
509
+ .boolean()
510
+ .optional()
511
+ .describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
504
512
  });
505
513
  /**
506
514
  * Configuration watch settings.
@@ -966,6 +974,212 @@ function createEmbeddingProvider(config, logger) {
966
974
  return factory(config, logger);
967
975
  }
968
976
 
977
+ /**
978
+ * @module gitignore
979
+ * Processor-level gitignore filtering. Scans watched paths for `.gitignore` files in git repos, caches parsed patterns, and exposes `isIgnored()` for path checking.
980
+ */
981
+ /**
982
+ * Find the git repo root by walking up from `startDir` looking for `.git/`.
983
+ * Returns `undefined` if no repo is found.
984
+ */
985
+ function findRepoRoot(startDir) {
986
+ let dir = resolve(startDir);
987
+ const root = resolve('/');
988
+ while (dir !== root) {
989
+ if (existsSync(join(dir, '.git')) &&
990
+ statSync(join(dir, '.git')).isDirectory()) {
991
+ return dir;
992
+ }
993
+ const parent = dirname(dir);
994
+ if (parent === dir)
995
+ break;
996
+ dir = parent;
997
+ }
998
+ return undefined;
999
+ }
1000
+ /**
1001
+ * Convert a watch path (directory, file path, or glob) to a concrete directory
1002
+ * that can be scanned for a repo root.
1003
+ */
1004
+ function watchPathToScanDir(watchPath) {
1005
+ const absPath = resolve(watchPath);
1006
+ try {
1007
+ return statSync(absPath).isDirectory() ? absPath : dirname(absPath);
1008
+ }
1009
+ catch {
1010
+ // ignore
1011
+ }
1012
+ // If this is a glob, fall back to the non-glob prefix.
1013
+ const globMatch = /[*?[{]/.exec(watchPath);
1014
+ if (!globMatch)
1015
+ return undefined;
1016
+ const prefix = watchPath.slice(0, globMatch.index);
1017
+ const trimmed = prefix.trim();
1018
+ const baseDir = trimmed.length === 0
1019
+ ? '.'
1020
+ : trimmed.endsWith('/') || trimmed.endsWith('\\')
1021
+ ? trimmed
1022
+ : dirname(trimmed);
1023
+ const resolved = resolve(baseDir);
1024
+ if (!existsSync(resolved))
1025
+ return undefined;
1026
+ return resolved;
1027
+ }
1028
+ /**
1029
+ * Recursively find all `.gitignore` files under `dir`.
1030
+ * Skips `.git` and `node_modules` directories for performance.
1031
+ */
1032
+ function findGitignoreFiles(dir) {
1033
+ const results = [];
1034
+ const gitignorePath = join(dir, '.gitignore');
1035
+ if (existsSync(gitignorePath)) {
1036
+ results.push(gitignorePath);
1037
+ }
1038
+ let entries;
1039
+ try {
1040
+ entries = readdirSync(dir);
1041
+ }
1042
+ catch {
1043
+ return results;
1044
+ }
1045
+ for (const entry of entries) {
1046
+ if (entry === '.git' || entry === 'node_modules')
1047
+ continue;
1048
+ const fullPath = join(dir, entry);
1049
+ try {
1050
+ if (statSync(fullPath).isDirectory()) {
1051
+ results.push(...findGitignoreFiles(fullPath));
1052
+ }
1053
+ }
1054
+ catch {
1055
+ // Skip inaccessible entries
1056
+ }
1057
+ }
1058
+ return results;
1059
+ }
1060
+ /**
1061
+ * Parse a `.gitignore` file into an `ignore` instance.
1062
+ */
1063
+ function parseGitignore(gitignorePath) {
1064
+ const content = readFileSync(gitignorePath, 'utf8');
1065
+ return ignore().add(content);
1066
+ }
1067
+ /**
1068
+ * Normalize a path to use forward slashes (required by `ignore` package).
1069
+ */
1070
+ function toForwardSlash(p) {
1071
+ return p.replace(/\\/g, '/');
1072
+ }
1073
+ /**
1074
+ * Processor-level gitignore filter. Checks file paths against the nearest
1075
+ * `.gitignore` chain in git repositories.
1076
+ */
1077
+ class GitignoreFilter {
1078
+ repos = new Map();
1079
+ /**
1080
+ * Create a GitignoreFilter by scanning watched paths for `.gitignore` files.
1081
+ *
1082
+ * @param watchPaths - Absolute paths being watched (directories or globs resolved to roots).
1083
+ */
1084
+ constructor(watchPaths) {
1085
+ this.scan(watchPaths);
1086
+ }
1087
+ /**
1088
+ * Scan paths for git repos and their `.gitignore` files.
1089
+ */
1090
+ scan(watchPaths) {
1091
+ this.repos.clear();
1092
+ const scannedDirs = new Set();
1093
+ for (const watchPath of watchPaths) {
1094
+ const scanDir = watchPathToScanDir(watchPath);
1095
+ if (!scanDir)
1096
+ continue;
1097
+ if (scannedDirs.has(scanDir))
1098
+ continue;
1099
+ scannedDirs.add(scanDir);
1100
+ const repoRoot = findRepoRoot(scanDir);
1101
+ if (!repoRoot)
1102
+ continue;
1103
+ if (this.repos.has(repoRoot))
1104
+ continue;
1105
+ const gitignoreFiles = findGitignoreFiles(repoRoot);
1106
+ const entries = gitignoreFiles.map((gf) => ({
1107
+ dir: dirname(gf),
1108
+ ig: parseGitignore(gf),
1109
+ }));
1110
+ // Sort deepest-first so nested `.gitignore` files are checked first
1111
+ entries.sort((a, b) => b.dir.length - a.dir.length);
1112
+ this.repos.set(repoRoot, { root: repoRoot, entries });
1113
+ }
1114
+ }
1115
+ /**
1116
+ * Check whether a file path is ignored by any applicable `.gitignore`.
1117
+ *
1118
+ * @param filePath - Absolute file path to check.
1119
+ * @returns `true` if the file should be ignored.
1120
+ */
1121
+ isIgnored(filePath) {
1122
+ const absPath = resolve(filePath);
1123
+ for (const [, repo] of this.repos) {
1124
+ // Check if file is within this repo
1125
+ const relToRepo = relative(repo.root, absPath);
1126
+ if (relToRepo.startsWith('..') || relToRepo.startsWith(resolve('/'))) {
1127
+ continue;
1128
+ }
1129
+ // Check each `.gitignore` entry (deepest-first)
1130
+ for (const entry of repo.entries) {
1131
+ const relToEntry = relative(entry.dir, absPath);
1132
+ if (relToEntry.startsWith('..'))
1133
+ continue;
1134
+ const normalized = toForwardSlash(relToEntry);
1135
+ if (entry.ig.ignores(normalized)) {
1136
+ return true;
1137
+ }
1138
+ }
1139
+ }
1140
+ return false;
1141
+ }
1142
+ /**
1143
+ * Invalidate and re-parse a specific `.gitignore` file.
1144
+ * Call when a `.gitignore` file is added, changed, or removed.
1145
+ *
1146
+ * @param gitignorePath - Absolute path to the `.gitignore` file that changed.
1147
+ */
1148
+ invalidate(gitignorePath) {
1149
+ const absPath = resolve(gitignorePath);
1150
+ const gitignoreDir = dirname(absPath);
1151
+ for (const [, repo] of this.repos) {
1152
+ const relToRepo = relative(repo.root, gitignoreDir);
1153
+ if (relToRepo.startsWith('..'))
1154
+ continue;
1155
+ // Remove old entry for this directory
1156
+ repo.entries = repo.entries.filter((e) => e.dir !== gitignoreDir);
1157
+ // Re-parse if file still exists
1158
+ if (existsSync(absPath)) {
1159
+ repo.entries.push({ dir: gitignoreDir, ig: parseGitignore(absPath) });
1160
+ // Re-sort deepest-first
1161
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1162
+ }
1163
+ return;
1164
+ }
1165
+ // If not in any known repo, check if it's in a repo we haven't scanned
1166
+ const repoRoot = findRepoRoot(gitignoreDir);
1167
+ if (repoRoot && existsSync(absPath)) {
1168
+ const entries = [
1169
+ { dir: gitignoreDir, ig: parseGitignore(absPath) },
1170
+ ];
1171
+ if (this.repos.has(repoRoot)) {
1172
+ const repo = this.repos.get(repoRoot);
1173
+ repo.entries.push(entries[0]);
1174
+ repo.entries.sort((a, b) => b.dir.length - a.dir.length);
1175
+ }
1176
+ else {
1177
+ this.repos.set(repoRoot, { root: repoRoot, entries });
1178
+ }
1179
+ }
1180
+ }
1181
+ }
1182
+
969
1183
  /**
970
1184
  * @module logger
971
1185
  * Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
@@ -2057,6 +2271,7 @@ class FileSystemWatcher {
2057
2271
  processor;
2058
2272
  logger;
2059
2273
  health;
2274
+ gitignoreFilter;
2060
2275
  watcher;
2061
2276
  /**
2062
2277
  * Create a new FileSystemWatcher.
@@ -2072,6 +2287,7 @@ class FileSystemWatcher {
2072
2287
  this.queue = queue;
2073
2288
  this.processor = processor;
2074
2289
  this.logger = logger;
2290
+ this.gitignoreFilter = options.gitignoreFilter;
2075
2291
  const healthOptions = {
2076
2292
  maxRetries: options.maxRetries,
2077
2293
  maxBackoffMs: options.maxBackoffMs,
@@ -2094,14 +2310,23 @@ class FileSystemWatcher {
2094
2310
  ignoreInitial: false,
2095
2311
  });
2096
2312
  this.watcher.on('add', (path) => {
2313
+ this.handleGitignoreChange(path);
2314
+ if (this.isGitignored(path))
2315
+ return;
2097
2316
  this.logger.debug({ path }, 'File added');
2098
2317
  this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
2099
2318
  });
2100
2319
  this.watcher.on('change', (path) => {
2320
+ this.handleGitignoreChange(path);
2321
+ if (this.isGitignored(path))
2322
+ return;
2101
2323
  this.logger.debug({ path }, 'File changed');
2102
2324
  this.queue.enqueue({ type: 'modify', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
2103
2325
  });
2104
2326
  this.watcher.on('unlink', (path) => {
2327
+ this.handleGitignoreChange(path);
2328
+ if (this.isGitignored(path))
2329
+ return;
2105
2330
  this.logger.debug({ path }, 'File removed');
2106
2331
  this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
2107
2332
  });
@@ -2128,6 +2353,29 @@ class FileSystemWatcher {
2128
2353
  get systemHealth() {
2129
2354
  return this.health;
2130
2355
  }
2356
+ /**
2357
+ * Check if a path is gitignored and should be skipped.
2358
+ */
2359
+ isGitignored(path) {
2360
+ if (!this.gitignoreFilter)
2361
+ return false;
2362
+ const ignored = this.gitignoreFilter.isIgnored(path);
2363
+ if (ignored) {
2364
+ this.logger.debug({ path }, 'Skipping gitignored file');
2365
+ }
2366
+ return ignored;
2367
+ }
2368
+ /**
2369
+ * If the changed file is a `.gitignore`, invalidate the filter cache.
2370
+ */
2371
+ handleGitignoreChange(path) {
2372
+ if (!this.gitignoreFilter)
2373
+ return;
2374
+ if (path.endsWith('.gitignore')) {
2375
+ this.logger.info({ path }, 'Gitignore file changed, refreshing filter');
2376
+ this.gitignoreFilter.invalidate(path);
2377
+ }
2378
+ }
2131
2379
  /**
2132
2380
  * Wrap a processing operation with health tracking.
2133
2381
  * On success, resets the failure counter.
@@ -2283,10 +2531,15 @@ class JeevesWatcher {
2283
2531
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
2284
2532
  });
2285
2533
  this.queue = queue;
2534
+ const respectGitignore = this.config.watch.respectGitignore ?? true;
2535
+ const gitignoreFilter = respectGitignore
2536
+ ? new GitignoreFilter(this.config.watch.paths)
2537
+ : undefined;
2286
2538
  const watcher = this.factories.createFileSystemWatcher(this.config.watch, queue, processor, logger, {
2287
2539
  maxRetries: this.config.maxRetries,
2288
2540
  maxBackoffMs: this.config.maxBackoffMs,
2289
2541
  onFatalError: this.runtimeOptions.onFatalError,
2542
+ gitignoreFilter,
2290
2543
  });
2291
2544
  this.watcher = watcher;
2292
2545
  const server = this.factories.createApiServer({