@karmaniverous/jeeves-watcher 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -189,7 +189,7 @@
189
189
  "description": "Named Qdrant filter patterns for skill-activated behaviors.",
190
190
  "allOf": [
191
191
  {
192
- "$ref": "#/definitions/__schema82"
192
+ "$ref": "#/definitions/__schema83"
193
193
  }
194
194
  ]
195
195
  },
@@ -197,7 +197,7 @@
197
197
  "description": "Search configuration including score thresholds and hybrid search.",
198
198
  "allOf": [
199
199
  {
200
- "$ref": "#/definitions/__schema83"
200
+ "$ref": "#/definitions/__schema84"
201
201
  }
202
202
  ]
203
203
  },
@@ -205,7 +205,7 @@
205
205
  "description": "Logging configuration.",
206
206
  "allOf": [
207
207
  {
208
- "$ref": "#/definitions/__schema84"
208
+ "$ref": "#/definitions/__schema85"
209
209
  }
210
210
  ]
211
211
  },
@@ -213,7 +213,7 @@
213
213
  "description": "Timeout in milliseconds for graceful shutdown.",
214
214
  "allOf": [
215
215
  {
216
- "$ref": "#/definitions/__schema87"
216
+ "$ref": "#/definitions/__schema88"
217
217
  }
218
218
  ]
219
219
  },
@@ -221,7 +221,7 @@
221
221
  "description": "Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.",
222
222
  "allOf": [
223
223
  {
224
- "$ref": "#/definitions/__schema88"
224
+ "$ref": "#/definitions/__schema89"
225
225
  }
226
226
  ]
227
227
  },
@@ -229,7 +229,7 @@
229
229
  "description": "Maximum backoff delay in milliseconds for system errors. Default: 60000.",
230
230
  "allOf": [
231
231
  {
232
- "$ref": "#/definitions/__schema89"
232
+ "$ref": "#/definitions/__schema90"
233
233
  }
234
234
  ]
235
235
  }
@@ -1046,17 +1046,31 @@
1046
1046
  "callbackUrl": {
1047
1047
  "type": "string",
1048
1048
  "format": "uri"
1049
+ },
1050
+ "concurrency": {
1051
+ "default": 50,
1052
+ "description": "Maximum concurrent file operations during reindex (default 50).",
1053
+ "allOf": [
1054
+ {
1055
+ "$ref": "#/definitions/__schema82"
1056
+ }
1057
+ ]
1049
1058
  }
1050
1059
  }
1051
1060
  },
1052
1061
  "__schema82": {
1062
+ "type": "integer",
1063
+ "minimum": 1,
1064
+ "maximum": 9007199254740991
1065
+ },
1066
+ "__schema83": {
1053
1067
  "type": "object",
1054
1068
  "propertyNames": {
1055
1069
  "type": "string"
1056
1070
  },
1057
1071
  "additionalProperties": {}
1058
1072
  },
1059
- "__schema83": {
1073
+ "__schema84": {
1060
1074
  "type": "object",
1061
1075
  "properties": {
1062
1076
  "scoreThresholds": {
@@ -1101,14 +1115,14 @@
1101
1115
  }
1102
1116
  }
1103
1117
  },
1104
- "__schema84": {
1118
+ "__schema85": {
1105
1119
  "type": "object",
1106
1120
  "properties": {
1107
1121
  "level": {
1108
1122
  "description": "Logging level (trace, debug, info, warn, error, fatal).",
1109
1123
  "allOf": [
1110
1124
  {
1111
- "$ref": "#/definitions/__schema85"
1125
+ "$ref": "#/definitions/__schema86"
1112
1126
  }
1113
1127
  ]
1114
1128
  },
@@ -1116,26 +1130,26 @@
1116
1130
  "description": "Path to log file (logs to stdout if omitted).",
1117
1131
  "allOf": [
1118
1132
  {
1119
- "$ref": "#/definitions/__schema86"
1133
+ "$ref": "#/definitions/__schema87"
1120
1134
  }
1121
1135
  ]
1122
1136
  }
1123
1137
  }
1124
1138
  },
1125
- "__schema85": {
1126
- "type": "string"
1127
- },
1128
1139
  "__schema86": {
1129
1140
  "type": "string"
1130
1141
  },
1131
1142
  "__schema87": {
1132
- "type": "number"
1143
+ "type": "string"
1133
1144
  },
1134
1145
  "__schema88": {
1135
1146
  "type": "number"
1136
1147
  },
1137
1148
  "__schema89": {
1138
1149
  "type": "number"
1150
+ },
1151
+ "__schema90": {
1152
+ "type": "number"
1139
1153
  }
1140
1154
  }
1141
1155
  }
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { createRequire } from 'node:module';
3
3
  import { Command } from '@commander-js/extra-typings';
4
+ import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
4
5
  import { readdir, stat, writeFile, rm, readFile, mkdir } from 'node:fs/promises';
5
6
  import { resolve, dirname, join, extname, basename, isAbsolute, relative } from 'node:path';
6
7
  import picomatch from 'picomatch';
@@ -17,7 +18,6 @@ import dayjs from 'dayjs';
17
18
  import { toMdast } from 'hast-util-to-mdast';
18
19
  import { fromADF } from 'mdast-util-from-adf';
19
20
  import { toMarkdown } from 'mdast-util-to-markdown';
20
- import { capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
21
21
  import rehypeParse from 'rehype-parse';
22
22
  import { unified } from 'unified';
23
23
  import yaml from 'js-yaml';
@@ -225,6 +225,8 @@ async function listFilesFromGlobs(patterns, ignored = []) {
225
225
  *
226
226
  * Shared helper for processing all files matching configured globs.
227
227
  */
228
+ /** Default concurrency limit for reindex operations. */
229
+ const DEFAULT_REINDEX_CONCURRENCY = 50;
228
230
  /**
229
231
  * Process all files from globs using the specified processor method.
230
232
  *
@@ -232,15 +234,17 @@ async function listFilesFromGlobs(patterns, ignored = []) {
232
234
  * @param ignoredPaths - The glob patterns to ignore.
233
235
  * @param processor - The document processor instance.
234
236
  * @param method - The processor method to call ('processFile' or 'processRulesUpdate').
237
+ * @param concurrency - Maximum concurrent file operations (default 50).
238
+ * @param callbacks - Optional progress tracking callbacks.
235
239
  * @returns The number of files processed.
236
240
  */
237
- async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
241
+ async function processAllFiles(watchPaths, ignoredPaths, processor, method, concurrency = DEFAULT_REINDEX_CONCURRENCY, callbacks) {
238
242
  const files = await listFilesFromGlobs(watchPaths, ignoredPaths);
239
- for (const file of files) {
240
- // Sequential on purpose to avoid surprising load.
241
- // Queue integration can come later.
243
+ callbacks?.onTotal?.(files.length);
244
+ await parallel(concurrency, files, async (file) => {
242
245
  await processor[method](file);
243
- }
246
+ callbacks?.onFileProcessed?.();
247
+ });
244
248
  return files.length;
245
249
  }
246
250
 
@@ -289,8 +293,8 @@ async function executeReindex(deps, scope) {
289
293
  // Reprocess only files with issues
290
294
  const issues = deps.issuesManager.getAll();
291
295
  const issuePaths = Object.keys(issues);
292
- filesProcessed = 0;
293
- for (const filePath of issuePaths) {
296
+ const concurrency = config.reindex?.concurrency ?? 50;
297
+ await parallel(concurrency, issuePaths, async (filePath) => {
294
298
  try {
295
299
  await processor.processFile(filePath);
296
300
  filesProcessed++;
@@ -299,11 +303,15 @@ async function executeReindex(deps, scope) {
299
303
  errors++;
300
304
  logger.warn({ filePath, err: normalizeError(error) }, 'Failed to reprocess issue file');
301
305
  }
302
- }
306
+ });
303
307
  }
304
308
  else {
305
309
  // Full reindex - process all watched files
306
- filesProcessed = await processAllFiles(config.watch.paths, config.watch.ignored, processor, 'processFile');
310
+ const concurrency = config.reindex?.concurrency ?? 50;
311
+ filesProcessed = await processAllFiles(config.watch.paths, config.watch.ignored, processor, 'processFile', concurrency, {
312
+ onTotal: (total) => reindexTracker?.setTotal(total),
313
+ onFileProcessed: () => reindexTracker?.incrementProcessed(),
314
+ });
307
315
  }
308
316
  const durationMs = Date.now() - startTime;
309
317
  logger.info({ scope, filesProcessed, durationMs }, `Reindex (${scope}) completed`);
@@ -2156,6 +2164,13 @@ const jeevesWatcherConfigSchema = z.object({
2156
2164
  .object({
2157
2165
  /** URL to call when reindex completes. */
2158
2166
  callbackUrl: z.url().optional(),
2167
+ /** Maximum concurrent file operations during reindex. */
2168
+ concurrency: z
2169
+ .number()
2170
+ .int()
2171
+ .min(1)
2172
+ .default(50)
2173
+ .describe('Maximum concurrent file operations during reindex (default 50).'),
2159
2174
  })
2160
2175
  .optional()
2161
2176
  .describe('Reindex configuration.'),
@@ -3209,7 +3224,7 @@ function createRebuildMetadataHandler(deps) {
3209
3224
  */
3210
3225
  function createReindexHandler(deps) {
3211
3226
  return wrapHandler(async (_request, reply) => {
3212
- const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile');
3227
+ const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile', deps.concurrency);
3213
3228
  return await reply.status(200).send({ ok: true, filesIndexed: count });
3214
3229
  }, deps.logger, 'Reindex');
3215
3230
  }
@@ -3544,17 +3559,31 @@ class ReindexTracker {
3544
3559
  _active = false;
3545
3560
  _scope;
3546
3561
  _startedAt;
3562
+ _filesProcessed = 0;
3563
+ _totalFiles = 0;
3547
3564
  /** Mark a reindex as started. */
3548
3565
  start(scope) {
3549
3566
  this._active = true;
3550
3567
  this._scope = scope;
3551
3568
  this._startedAt = new Date().toISOString();
3569
+ this._filesProcessed = 0;
3570
+ this._totalFiles = 0;
3571
+ }
3572
+ /** Set the total number of files to process. */
3573
+ setTotal(total) {
3574
+ this._totalFiles = total;
3575
+ }
3576
+ /** Increment the processed file count. */
3577
+ incrementProcessed() {
3578
+ this._filesProcessed++;
3552
3579
  }
3553
3580
  /** Mark the current reindex as complete. */
3554
3581
  complete() {
3555
3582
  this._active = false;
3556
3583
  this._scope = undefined;
3557
3584
  this._startedAt = undefined;
3585
+ this._filesProcessed = 0;
3586
+ this._totalFiles = 0;
3558
3587
  }
3559
3588
  /** Get current reindex status. */
3560
3589
  getStatus() {
@@ -3564,6 +3593,8 @@ class ReindexTracker {
3564
3593
  active: true,
3565
3594
  scope: this._scope,
3566
3595
  startedAt: this._startedAt,
3596
+ filesProcessed: this._filesProcessed,
3597
+ totalFiles: this._totalFiles,
3567
3598
  };
3568
3599
  }
3569
3600
  }
@@ -3623,7 +3654,12 @@ function createApiServer(options) {
3623
3654
  logger,
3624
3655
  hybridConfig,
3625
3656
  }));
3626
- app.post('/reindex', createReindexHandler({ watch: config.watch, processor, logger }));
3657
+ app.post('/reindex', createReindexHandler({
3658
+ watch: config.watch,
3659
+ processor,
3660
+ logger,
3661
+ concurrency: config.reindex?.concurrency ?? 50,
3662
+ }));
3627
3663
  app.post('/rebuild-metadata', createRebuildMetadataHandler({
3628
3664
  metadataDir: config.metadataDir,
3629
3665
  vectorStore,
package/dist/index.d.ts CHANGED
@@ -186,6 +186,7 @@ declare const jeevesWatcherConfigSchema: z.ZodObject<{
186
186
  }, z.core.$strip>>>;
187
187
  reindex: z.ZodOptional<z.ZodObject<{
188
188
  callbackUrl: z.ZodOptional<z.ZodURL>;
189
+ concurrency: z.ZodDefault<z.ZodNumber>;
189
190
  }, z.core.$strip>>;
190
191
  slots: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
191
192
  search: z.ZodOptional<z.ZodObject<{
@@ -1240,6 +1241,10 @@ interface ReindexStatus {
1240
1241
  scope?: string;
1241
1242
  /** ISO 8601 timestamp when the current reindex started (when {@link active} is true). */
1242
1243
  startedAt?: string;
1244
+ /** Number of files processed so far (when {@link active} is true). */
1245
+ filesProcessed?: number;
1246
+ /** Total number of files to process (when {@link active} is true). */
1247
+ totalFiles?: number;
1243
1248
  }
1244
1249
  /**
1245
1250
  * Tracks the state of reindex operations.
@@ -1248,8 +1253,14 @@ declare class ReindexTracker {
1248
1253
  private _active;
1249
1254
  private _scope?;
1250
1255
  private _startedAt?;
1256
+ private _filesProcessed;
1257
+ private _totalFiles;
1251
1258
  /** Mark a reindex as started. */
1252
1259
  start(scope: 'issues' | 'full'): void;
1260
+ /** Set the total number of files to process. */
1261
+ setTotal(total: number): void;
1262
+ /** Increment the processed file count. */
1263
+ incrementProcessed(): void;
1253
1264
  /** Mark the current reindex as complete. */
1254
1265
  complete(): void;
1255
1266
  /** Get current reindex status. */
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import dayjs from 'dayjs';
8
8
  import { toMdast } from 'hast-util-to-mdast';
9
9
  import { fromADF } from 'mdast-util-from-adf';
10
10
  import { toMarkdown } from 'mdast-util-to-markdown';
11
- import { capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
11
+ import { capitalize, title, camel, snake, dash, isEqual, get, parallel, omit } from 'radash';
12
12
  import rehypeParse from 'rehype-parse';
13
13
  import { unified } from 'unified';
14
14
  import yaml from 'js-yaml';
@@ -1317,6 +1317,8 @@ async function listFilesFromGlobs(patterns, ignored = []) {
1317
1317
  *
1318
1318
  * Shared helper for processing all files matching configured globs.
1319
1319
  */
1320
+ /** Default concurrency limit for reindex operations. */
1321
+ const DEFAULT_REINDEX_CONCURRENCY = 50;
1320
1322
  /**
1321
1323
  * Process all files from globs using the specified processor method.
1322
1324
  *
@@ -1324,15 +1326,17 @@ async function listFilesFromGlobs(patterns, ignored = []) {
1324
1326
  * @param ignoredPaths - The glob patterns to ignore.
1325
1327
  * @param processor - The document processor instance.
1326
1328
  * @param method - The processor method to call ('processFile' or 'processRulesUpdate').
1329
+ * @param concurrency - Maximum concurrent file operations (default 50).
1330
+ * @param callbacks - Optional progress tracking callbacks.
1327
1331
  * @returns The number of files processed.
1328
1332
  */
1329
- async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
1333
+ async function processAllFiles(watchPaths, ignoredPaths, processor, method, concurrency = DEFAULT_REINDEX_CONCURRENCY, callbacks) {
1330
1334
  const files = await listFilesFromGlobs(watchPaths, ignoredPaths);
1331
- for (const file of files) {
1332
- // Sequential on purpose to avoid surprising load.
1333
- // Queue integration can come later.
1335
+ callbacks?.onTotal?.(files.length);
1336
+ await parallel(concurrency, files, async (file) => {
1334
1337
  await processor[method](file);
1335
- }
1338
+ callbacks?.onFileProcessed?.();
1339
+ });
1336
1340
  return files.length;
1337
1341
  }
1338
1342
 
@@ -1381,8 +1385,8 @@ async function executeReindex(deps, scope) {
1381
1385
  // Reprocess only files with issues
1382
1386
  const issues = deps.issuesManager.getAll();
1383
1387
  const issuePaths = Object.keys(issues);
1384
- filesProcessed = 0;
1385
- for (const filePath of issuePaths) {
1388
+ const concurrency = config.reindex?.concurrency ?? 50;
1389
+ await parallel(concurrency, issuePaths, async (filePath) => {
1386
1390
  try {
1387
1391
  await processor.processFile(filePath);
1388
1392
  filesProcessed++;
@@ -1391,11 +1395,15 @@ async function executeReindex(deps, scope) {
1391
1395
  errors++;
1392
1396
  logger.warn({ filePath, err: normalizeError(error) }, 'Failed to reprocess issue file');
1393
1397
  }
1394
- }
1398
+ });
1395
1399
  }
1396
1400
  else {
1397
1401
  // Full reindex - process all watched files
1398
- filesProcessed = await processAllFiles(config.watch.paths, config.watch.ignored, processor, 'processFile');
1402
+ const concurrency = config.reindex?.concurrency ?? 50;
1403
+ filesProcessed = await processAllFiles(config.watch.paths, config.watch.ignored, processor, 'processFile', concurrency, {
1404
+ onTotal: (total) => reindexTracker?.setTotal(total),
1405
+ onFileProcessed: () => reindexTracker?.incrementProcessed(),
1406
+ });
1399
1407
  }
1400
1408
  const durationMs = Date.now() - startTime;
1401
1409
  logger.info({ scope, filesProcessed, durationMs }, `Reindex (${scope}) completed`);
@@ -1842,6 +1850,13 @@ const jeevesWatcherConfigSchema = z.object({
1842
1850
  .object({
1843
1851
  /** URL to call when reindex completes. */
1844
1852
  callbackUrl: z.url().optional(),
1853
+ /** Maximum concurrent file operations during reindex. */
1854
+ concurrency: z
1855
+ .number()
1856
+ .int()
1857
+ .min(1)
1858
+ .default(50)
1859
+ .describe('Maximum concurrent file operations during reindex (default 50).'),
1845
1860
  })
1846
1861
  .optional()
1847
1862
  .describe('Reindex configuration.'),
@@ -2895,7 +2910,7 @@ function createRebuildMetadataHandler(deps) {
2895
2910
  */
2896
2911
  function createReindexHandler(deps) {
2897
2912
  return wrapHandler(async (_request, reply) => {
2898
- const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile');
2913
+ const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile', deps.concurrency);
2899
2914
  return await reply.status(200).send({ ok: true, filesIndexed: count });
2900
2915
  }, deps.logger, 'Reindex');
2901
2916
  }
@@ -3230,17 +3245,31 @@ class ReindexTracker {
3230
3245
  _active = false;
3231
3246
  _scope;
3232
3247
  _startedAt;
3248
+ _filesProcessed = 0;
3249
+ _totalFiles = 0;
3233
3250
  /** Mark a reindex as started. */
3234
3251
  start(scope) {
3235
3252
  this._active = true;
3236
3253
  this._scope = scope;
3237
3254
  this._startedAt = new Date().toISOString();
3255
+ this._filesProcessed = 0;
3256
+ this._totalFiles = 0;
3257
+ }
3258
+ /** Set the total number of files to process. */
3259
+ setTotal(total) {
3260
+ this._totalFiles = total;
3261
+ }
3262
+ /** Increment the processed file count. */
3263
+ incrementProcessed() {
3264
+ this._filesProcessed++;
3238
3265
  }
3239
3266
  /** Mark the current reindex as complete. */
3240
3267
  complete() {
3241
3268
  this._active = false;
3242
3269
  this._scope = undefined;
3243
3270
  this._startedAt = undefined;
3271
+ this._filesProcessed = 0;
3272
+ this._totalFiles = 0;
3244
3273
  }
3245
3274
  /** Get current reindex status. */
3246
3275
  getStatus() {
@@ -3250,6 +3279,8 @@ class ReindexTracker {
3250
3279
  active: true,
3251
3280
  scope: this._scope,
3252
3281
  startedAt: this._startedAt,
3282
+ filesProcessed: this._filesProcessed,
3283
+ totalFiles: this._totalFiles,
3253
3284
  };
3254
3285
  }
3255
3286
  }
@@ -3309,7 +3340,12 @@ function createApiServer(options) {
3309
3340
  logger,
3310
3341
  hybridConfig,
3311
3342
  }));
3312
- app.post('/reindex', createReindexHandler({ watch: config.watch, processor, logger }));
3343
+ app.post('/reindex', createReindexHandler({
3344
+ watch: config.watch,
3345
+ processor,
3346
+ logger,
3347
+ concurrency: config.reindex?.concurrency ?? 50,
3348
+ }));
3313
3349
  app.post('/rebuild-metadata', createRebuildMetadataHandler({
3314
3350
  metadataDir: config.metadataDir,
3315
3351
  vectorStore,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@karmaniverous/jeeves-watcher",
3
- "version": "0.9.4",
3
+ "version": "0.9.5",
4
4
  "author": "Jason Williscroft",
5
5
  "description": "Filesystem watcher that keeps a Qdrant vector store in sync with document changes",
6
6
  "license": "BSD-3-Clause",