@karmaniverous/jeeves-watcher 0.9.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +28 -14
- package/dist/cli/jeeves-watcher/index.js +85 -21
- package/dist/index.d.ts +11 -0
- package/dist/index.js +85 -21
- package/package.json +1 -1
package/config.schema.json
CHANGED
|
@@ -189,7 +189,7 @@
|
|
|
189
189
|
"description": "Named Qdrant filter patterns for skill-activated behaviors.",
|
|
190
190
|
"allOf": [
|
|
191
191
|
{
|
|
192
|
-
"$ref": "#/definitions/
|
|
192
|
+
"$ref": "#/definitions/__schema83"
|
|
193
193
|
}
|
|
194
194
|
]
|
|
195
195
|
},
|
|
@@ -197,7 +197,7 @@
|
|
|
197
197
|
"description": "Search configuration including score thresholds and hybrid search.",
|
|
198
198
|
"allOf": [
|
|
199
199
|
{
|
|
200
|
-
"$ref": "#/definitions/
|
|
200
|
+
"$ref": "#/definitions/__schema84"
|
|
201
201
|
}
|
|
202
202
|
]
|
|
203
203
|
},
|
|
@@ -205,7 +205,7 @@
|
|
|
205
205
|
"description": "Logging configuration.",
|
|
206
206
|
"allOf": [
|
|
207
207
|
{
|
|
208
|
-
"$ref": "#/definitions/
|
|
208
|
+
"$ref": "#/definitions/__schema85"
|
|
209
209
|
}
|
|
210
210
|
]
|
|
211
211
|
},
|
|
@@ -213,7 +213,7 @@
|
|
|
213
213
|
"description": "Timeout in milliseconds for graceful shutdown.",
|
|
214
214
|
"allOf": [
|
|
215
215
|
{
|
|
216
|
-
"$ref": "#/definitions/
|
|
216
|
+
"$ref": "#/definitions/__schema88"
|
|
217
217
|
}
|
|
218
218
|
]
|
|
219
219
|
},
|
|
@@ -221,7 +221,7 @@
|
|
|
221
221
|
"description": "Maximum consecutive system-level failures before triggering fatal error. Default: Infinity.",
|
|
222
222
|
"allOf": [
|
|
223
223
|
{
|
|
224
|
-
"$ref": "#/definitions/
|
|
224
|
+
"$ref": "#/definitions/__schema89"
|
|
225
225
|
}
|
|
226
226
|
]
|
|
227
227
|
},
|
|
@@ -229,7 +229,7 @@
|
|
|
229
229
|
"description": "Maximum backoff delay in milliseconds for system errors. Default: 60000.",
|
|
230
230
|
"allOf": [
|
|
231
231
|
{
|
|
232
|
-
"$ref": "#/definitions/
|
|
232
|
+
"$ref": "#/definitions/__schema90"
|
|
233
233
|
}
|
|
234
234
|
]
|
|
235
235
|
}
|
|
@@ -1046,17 +1046,31 @@
|
|
|
1046
1046
|
"callbackUrl": {
|
|
1047
1047
|
"type": "string",
|
|
1048
1048
|
"format": "uri"
|
|
1049
|
+
},
|
|
1050
|
+
"concurrency": {
|
|
1051
|
+
"default": 50,
|
|
1052
|
+
"description": "Maximum concurrent file operations during reindex (default 50).",
|
|
1053
|
+
"allOf": [
|
|
1054
|
+
{
|
|
1055
|
+
"$ref": "#/definitions/__schema82"
|
|
1056
|
+
}
|
|
1057
|
+
]
|
|
1049
1058
|
}
|
|
1050
1059
|
}
|
|
1051
1060
|
},
|
|
1052
1061
|
"__schema82": {
|
|
1062
|
+
"type": "integer",
|
|
1063
|
+
"minimum": 1,
|
|
1064
|
+
"maximum": 9007199254740991
|
|
1065
|
+
},
|
|
1066
|
+
"__schema83": {
|
|
1053
1067
|
"type": "object",
|
|
1054
1068
|
"propertyNames": {
|
|
1055
1069
|
"type": "string"
|
|
1056
1070
|
},
|
|
1057
1071
|
"additionalProperties": {}
|
|
1058
1072
|
},
|
|
1059
|
-
"
|
|
1073
|
+
"__schema84": {
|
|
1060
1074
|
"type": "object",
|
|
1061
1075
|
"properties": {
|
|
1062
1076
|
"scoreThresholds": {
|
|
@@ -1101,14 +1115,14 @@
|
|
|
1101
1115
|
}
|
|
1102
1116
|
}
|
|
1103
1117
|
},
|
|
1104
|
-
"
|
|
1118
|
+
"__schema85": {
|
|
1105
1119
|
"type": "object",
|
|
1106
1120
|
"properties": {
|
|
1107
1121
|
"level": {
|
|
1108
1122
|
"description": "Logging level (trace, debug, info, warn, error, fatal).",
|
|
1109
1123
|
"allOf": [
|
|
1110
1124
|
{
|
|
1111
|
-
"$ref": "#/definitions/
|
|
1125
|
+
"$ref": "#/definitions/__schema86"
|
|
1112
1126
|
}
|
|
1113
1127
|
]
|
|
1114
1128
|
},
|
|
@@ -1116,26 +1130,26 @@
|
|
|
1116
1130
|
"description": "Path to log file (logs to stdout if omitted).",
|
|
1117
1131
|
"allOf": [
|
|
1118
1132
|
{
|
|
1119
|
-
"$ref": "#/definitions/
|
|
1133
|
+
"$ref": "#/definitions/__schema87"
|
|
1120
1134
|
}
|
|
1121
1135
|
]
|
|
1122
1136
|
}
|
|
1123
1137
|
}
|
|
1124
1138
|
},
|
|
1125
|
-
"__schema85": {
|
|
1126
|
-
"type": "string"
|
|
1127
|
-
},
|
|
1128
1139
|
"__schema86": {
|
|
1129
1140
|
"type": "string"
|
|
1130
1141
|
},
|
|
1131
1142
|
"__schema87": {
|
|
1132
|
-
"type": "
|
|
1143
|
+
"type": "string"
|
|
1133
1144
|
},
|
|
1134
1145
|
"__schema88": {
|
|
1135
1146
|
"type": "number"
|
|
1136
1147
|
},
|
|
1137
1148
|
"__schema89": {
|
|
1138
1149
|
"type": "number"
|
|
1150
|
+
},
|
|
1151
|
+
"__schema90": {
|
|
1152
|
+
"type": "number"
|
|
1139
1153
|
}
|
|
1140
1154
|
}
|
|
1141
1155
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { createRequire } from 'node:module';
|
|
3
3
|
import { Command } from '@commander-js/extra-typings';
|
|
4
|
+
import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
4
5
|
import { readdir, stat, writeFile, rm, readFile, mkdir } from 'node:fs/promises';
|
|
5
6
|
import { resolve, dirname, join, extname, basename, isAbsolute, relative } from 'node:path';
|
|
6
7
|
import picomatch from 'picomatch';
|
|
@@ -17,7 +18,6 @@ import dayjs from 'dayjs';
|
|
|
17
18
|
import { toMdast } from 'hast-util-to-mdast';
|
|
18
19
|
import { fromADF } from 'mdast-util-from-adf';
|
|
19
20
|
import { toMarkdown } from 'mdast-util-to-markdown';
|
|
20
|
-
import { capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
21
21
|
import rehypeParse from 'rehype-parse';
|
|
22
22
|
import { unified } from 'unified';
|
|
23
23
|
import yaml from 'js-yaml';
|
|
@@ -25,6 +25,7 @@ import { JSONPath } from 'jsonpath-plus';
|
|
|
25
25
|
import { createHash } from 'node:crypto';
|
|
26
26
|
import crypto from 'crypto';
|
|
27
27
|
import { cosmiconfig } from 'cosmiconfig';
|
|
28
|
+
import https from 'node:https';
|
|
28
29
|
import pino from 'pino';
|
|
29
30
|
import { v5 } from 'uuid';
|
|
30
31
|
import * as cheerio from 'cheerio';
|
|
@@ -224,6 +225,8 @@ async function listFilesFromGlobs(patterns, ignored = []) {
|
|
|
224
225
|
*
|
|
225
226
|
* Shared helper for processing all files matching configured globs.
|
|
226
227
|
*/
|
|
228
|
+
/** Default concurrency limit for reindex operations. */
|
|
229
|
+
const DEFAULT_REINDEX_CONCURRENCY = 50;
|
|
227
230
|
/**
|
|
228
231
|
* Process all files from globs using the specified processor method.
|
|
229
232
|
*
|
|
@@ -231,15 +234,17 @@ async function listFilesFromGlobs(patterns, ignored = []) {
|
|
|
231
234
|
* @param ignoredPaths - The glob patterns to ignore.
|
|
232
235
|
* @param processor - The document processor instance.
|
|
233
236
|
* @param method - The processor method to call ('processFile' or 'processRulesUpdate').
|
|
237
|
+
* @param concurrency - Maximum concurrent file operations (default 50).
|
|
238
|
+
* @param callbacks - Optional progress tracking callbacks.
|
|
234
239
|
* @returns The number of files processed.
|
|
235
240
|
*/
|
|
236
|
-
async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
|
|
241
|
+
async function processAllFiles(watchPaths, ignoredPaths, processor, method, concurrency = DEFAULT_REINDEX_CONCURRENCY, callbacks) {
|
|
237
242
|
const files = await listFilesFromGlobs(watchPaths, ignoredPaths);
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
// Queue integration can come later.
|
|
243
|
+
callbacks?.onTotal?.(files.length);
|
|
244
|
+
await parallel(concurrency, files, async (file) => {
|
|
241
245
|
await processor[method](file);
|
|
242
|
-
|
|
246
|
+
callbacks?.onFileProcessed?.();
|
|
247
|
+
});
|
|
243
248
|
return files.length;
|
|
244
249
|
}
|
|
245
250
|
|
|
@@ -288,8 +293,8 @@ async function executeReindex(deps, scope) {
|
|
|
288
293
|
// Reprocess only files with issues
|
|
289
294
|
const issues = deps.issuesManager.getAll();
|
|
290
295
|
const issuePaths = Object.keys(issues);
|
|
291
|
-
|
|
292
|
-
|
|
296
|
+
const concurrency = config.reindex?.concurrency ?? 50;
|
|
297
|
+
await parallel(concurrency, issuePaths, async (filePath) => {
|
|
293
298
|
try {
|
|
294
299
|
await processor.processFile(filePath);
|
|
295
300
|
filesProcessed++;
|
|
@@ -298,11 +303,15 @@ async function executeReindex(deps, scope) {
|
|
|
298
303
|
errors++;
|
|
299
304
|
logger.warn({ filePath, err: normalizeError(error) }, 'Failed to reprocess issue file');
|
|
300
305
|
}
|
|
301
|
-
}
|
|
306
|
+
});
|
|
302
307
|
}
|
|
303
308
|
else {
|
|
304
309
|
// Full reindex - process all watched files
|
|
305
|
-
|
|
310
|
+
const concurrency = config.reindex?.concurrency ?? 50;
|
|
311
|
+
filesProcessed = await processAllFiles(config.watch.paths, config.watch.ignored, processor, 'processFile', concurrency, {
|
|
312
|
+
onTotal: (total) => reindexTracker?.setTotal(total),
|
|
313
|
+
onFileProcessed: () => reindexTracker?.incrementProcessed(),
|
|
314
|
+
});
|
|
306
315
|
}
|
|
307
316
|
const durationMs = Date.now() - startTime;
|
|
308
317
|
logger.info({ scope, filesProcessed, durationMs }, `Reindex (${scope}) completed`);
|
|
@@ -2155,6 +2164,13 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
2155
2164
|
.object({
|
|
2156
2165
|
/** URL to call when reindex completes. */
|
|
2157
2166
|
callbackUrl: z.url().optional(),
|
|
2167
|
+
/** Maximum concurrent file operations during reindex. */
|
|
2168
|
+
concurrency: z
|
|
2169
|
+
.number()
|
|
2170
|
+
.int()
|
|
2171
|
+
.min(1)
|
|
2172
|
+
.default(50)
|
|
2173
|
+
.describe('Maximum concurrent file operations during reindex (default 50).'),
|
|
2158
2174
|
})
|
|
2159
2175
|
.optional()
|
|
2160
2176
|
.describe('Reindex configuration.'),
|
|
@@ -3208,7 +3224,7 @@ function createRebuildMetadataHandler(deps) {
|
|
|
3208
3224
|
*/
|
|
3209
3225
|
function createReindexHandler(deps) {
|
|
3210
3226
|
return wrapHandler(async (_request, reply) => {
|
|
3211
|
-
const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile');
|
|
3227
|
+
const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile', deps.concurrency);
|
|
3212
3228
|
return await reply.status(200).send({ ok: true, filesIndexed: count });
|
|
3213
3229
|
}, deps.logger, 'Reindex');
|
|
3214
3230
|
}
|
|
@@ -3543,17 +3559,31 @@ class ReindexTracker {
|
|
|
3543
3559
|
_active = false;
|
|
3544
3560
|
_scope;
|
|
3545
3561
|
_startedAt;
|
|
3562
|
+
_filesProcessed = 0;
|
|
3563
|
+
_totalFiles = 0;
|
|
3546
3564
|
/** Mark a reindex as started. */
|
|
3547
3565
|
start(scope) {
|
|
3548
3566
|
this._active = true;
|
|
3549
3567
|
this._scope = scope;
|
|
3550
3568
|
this._startedAt = new Date().toISOString();
|
|
3569
|
+
this._filesProcessed = 0;
|
|
3570
|
+
this._totalFiles = 0;
|
|
3571
|
+
}
|
|
3572
|
+
/** Set the total number of files to process. */
|
|
3573
|
+
setTotal(total) {
|
|
3574
|
+
this._totalFiles = total;
|
|
3575
|
+
}
|
|
3576
|
+
/** Increment the processed file count. */
|
|
3577
|
+
incrementProcessed() {
|
|
3578
|
+
this._filesProcessed++;
|
|
3551
3579
|
}
|
|
3552
3580
|
/** Mark the current reindex as complete. */
|
|
3553
3581
|
complete() {
|
|
3554
3582
|
this._active = false;
|
|
3555
3583
|
this._scope = undefined;
|
|
3556
3584
|
this._startedAt = undefined;
|
|
3585
|
+
this._filesProcessed = 0;
|
|
3586
|
+
this._totalFiles = 0;
|
|
3557
3587
|
}
|
|
3558
3588
|
/** Get current reindex status. */
|
|
3559
3589
|
getStatus() {
|
|
@@ -3563,6 +3593,8 @@ class ReindexTracker {
|
|
|
3563
3593
|
active: true,
|
|
3564
3594
|
scope: this._scope,
|
|
3565
3595
|
startedAt: this._startedAt,
|
|
3596
|
+
filesProcessed: this._filesProcessed,
|
|
3597
|
+
totalFiles: this._totalFiles,
|
|
3566
3598
|
};
|
|
3567
3599
|
}
|
|
3568
3600
|
}
|
|
@@ -3622,7 +3654,12 @@ function createApiServer(options) {
|
|
|
3622
3654
|
logger,
|
|
3623
3655
|
hybridConfig,
|
|
3624
3656
|
}));
|
|
3625
|
-
app.post('/reindex', createReindexHandler({
|
|
3657
|
+
app.post('/reindex', createReindexHandler({
|
|
3658
|
+
watch: config.watch,
|
|
3659
|
+
processor,
|
|
3660
|
+
logger,
|
|
3661
|
+
concurrency: config.reindex?.concurrency ?? 50,
|
|
3662
|
+
}));
|
|
3626
3663
|
app.post('/rebuild-metadata', createRebuildMetadataHandler({
|
|
3627
3664
|
metadataDir: config.metadataDir,
|
|
3628
3665
|
vectorStore,
|
|
@@ -3897,8 +3934,40 @@ function getLogger(logger) {
|
|
|
3897
3934
|
/**
|
|
3898
3935
|
* @module embedding/geminiProvider
|
|
3899
3936
|
* Gemini embedding provider using the Google Generative AI REST API directly.
|
|
3937
|
+
* Uses node:https with a keep-alive agent for reliable performance in
|
|
3938
|
+
* long-running processes (avoids undici/fetch event-loop contention).
|
|
3900
3939
|
*/
|
|
3901
3940
|
const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta';
|
|
3941
|
+
/** Persistent HTTPS agent for connection reuse. */
|
|
3942
|
+
const agent = new https.Agent({ keepAlive: true });
|
|
3943
|
+
/** Make an HTTPS POST request using node:https (bypasses undici/fetch). */
|
|
3944
|
+
function httpsPost(url, body) {
|
|
3945
|
+
return new Promise((resolve, reject) => {
|
|
3946
|
+
const parsed = new URL(url);
|
|
3947
|
+
const req = https.request({
|
|
3948
|
+
hostname: parsed.hostname,
|
|
3949
|
+
path: parsed.pathname + parsed.search,
|
|
3950
|
+
method: 'POST',
|
|
3951
|
+
agent,
|
|
3952
|
+
headers: {
|
|
3953
|
+
'Content-Type': 'application/json',
|
|
3954
|
+
'Content-Length': Buffer.byteLength(body),
|
|
3955
|
+
},
|
|
3956
|
+
}, (res) => {
|
|
3957
|
+
const chunks = [];
|
|
3958
|
+
res.on('data', (chunk) => chunks.push(chunk));
|
|
3959
|
+
res.on('end', () => {
|
|
3960
|
+
resolve({
|
|
3961
|
+
status: res.statusCode ?? 0,
|
|
3962
|
+
body: Buffer.concat(chunks).toString('utf8'),
|
|
3963
|
+
});
|
|
3964
|
+
});
|
|
3965
|
+
});
|
|
3966
|
+
req.on('error', reject);
|
|
3967
|
+
req.write(body);
|
|
3968
|
+
req.end();
|
|
3969
|
+
});
|
|
3970
|
+
}
|
|
3902
3971
|
/**
|
|
3903
3972
|
* Create a Gemini embedding provider using the Google Generative AI REST API.
|
|
3904
3973
|
*
|
|
@@ -3927,16 +3996,11 @@ function createGeminiProvider(config, logger) {
|
|
|
3927
3996
|
model: `models/${model}`,
|
|
3928
3997
|
content: { parts: [{ text }] },
|
|
3929
3998
|
}));
|
|
3930
|
-
const response = await
|
|
3931
|
-
|
|
3932
|
-
|
|
3933
|
-
body: JSON.stringify({ requests }),
|
|
3934
|
-
});
|
|
3935
|
-
if (!response.ok) {
|
|
3936
|
-
const body = await response.text();
|
|
3937
|
-
throw new Error(`Gemini API error ${String(response.status)}: ${body}`);
|
|
3999
|
+
const response = await httpsPost(url, JSON.stringify({ requests }));
|
|
4000
|
+
if (response.status < 200 || response.status >= 300) {
|
|
4001
|
+
throw new Error(`Gemini API error ${String(response.status)}: ${response.body}`);
|
|
3938
4002
|
}
|
|
3939
|
-
const data = (
|
|
4003
|
+
const data = JSON.parse(response.body);
|
|
3940
4004
|
return data.embeddings.map((e) => e.values);
|
|
3941
4005
|
}, {
|
|
3942
4006
|
attempts: 5,
|
package/dist/index.d.ts
CHANGED
|
@@ -186,6 +186,7 @@ declare const jeevesWatcherConfigSchema: z.ZodObject<{
|
|
|
186
186
|
}, z.core.$strip>>>;
|
|
187
187
|
reindex: z.ZodOptional<z.ZodObject<{
|
|
188
188
|
callbackUrl: z.ZodOptional<z.ZodURL>;
|
|
189
|
+
concurrency: z.ZodDefault<z.ZodNumber>;
|
|
189
190
|
}, z.core.$strip>>;
|
|
190
191
|
slots: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
191
192
|
search: z.ZodOptional<z.ZodObject<{
|
|
@@ -1240,6 +1241,10 @@ interface ReindexStatus {
|
|
|
1240
1241
|
scope?: string;
|
|
1241
1242
|
/** ISO 8601 timestamp when the current reindex started (when {@link active} is true). */
|
|
1242
1243
|
startedAt?: string;
|
|
1244
|
+
/** Number of files processed so far (when {@link active} is true). */
|
|
1245
|
+
filesProcessed?: number;
|
|
1246
|
+
/** Total number of files to process (when {@link active} is true). */
|
|
1247
|
+
totalFiles?: number;
|
|
1243
1248
|
}
|
|
1244
1249
|
/**
|
|
1245
1250
|
* Tracks the state of reindex operations.
|
|
@@ -1248,8 +1253,14 @@ declare class ReindexTracker {
|
|
|
1248
1253
|
private _active;
|
|
1249
1254
|
private _scope?;
|
|
1250
1255
|
private _startedAt?;
|
|
1256
|
+
private _filesProcessed;
|
|
1257
|
+
private _totalFiles;
|
|
1251
1258
|
/** Mark a reindex as started. */
|
|
1252
1259
|
start(scope: 'issues' | 'full'): void;
|
|
1260
|
+
/** Set the total number of files to process. */
|
|
1261
|
+
setTotal(total: number): void;
|
|
1262
|
+
/** Increment the processed file count. */
|
|
1263
|
+
incrementProcessed(): void;
|
|
1253
1264
|
/** Mark the current reindex as complete. */
|
|
1254
1265
|
complete(): void;
|
|
1255
1266
|
/** Get current reindex status. */
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import dayjs from 'dayjs';
|
|
|
8
8
|
import { toMdast } from 'hast-util-to-mdast';
|
|
9
9
|
import { fromADF } from 'mdast-util-from-adf';
|
|
10
10
|
import { toMarkdown } from 'mdast-util-to-markdown';
|
|
11
|
-
import { capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
11
|
+
import { capitalize, title, camel, snake, dash, isEqual, get, parallel, omit } from 'radash';
|
|
12
12
|
import rehypeParse from 'rehype-parse';
|
|
13
13
|
import { unified } from 'unified';
|
|
14
14
|
import yaml from 'js-yaml';
|
|
@@ -22,6 +22,7 @@ import { createHash } from 'node:crypto';
|
|
|
22
22
|
import crypto from 'crypto';
|
|
23
23
|
import chokidar from 'chokidar';
|
|
24
24
|
import { cosmiconfig } from 'cosmiconfig';
|
|
25
|
+
import https from 'node:https';
|
|
25
26
|
import pino from 'pino';
|
|
26
27
|
import { v5 } from 'uuid';
|
|
27
28
|
import * as cheerio from 'cheerio';
|
|
@@ -1316,6 +1317,8 @@ async function listFilesFromGlobs(patterns, ignored = []) {
|
|
|
1316
1317
|
*
|
|
1317
1318
|
* Shared helper for processing all files matching configured globs.
|
|
1318
1319
|
*/
|
|
1320
|
+
/** Default concurrency limit for reindex operations. */
|
|
1321
|
+
const DEFAULT_REINDEX_CONCURRENCY = 50;
|
|
1319
1322
|
/**
|
|
1320
1323
|
* Process all files from globs using the specified processor method.
|
|
1321
1324
|
*
|
|
@@ -1323,15 +1326,17 @@ async function listFilesFromGlobs(patterns, ignored = []) {
|
|
|
1323
1326
|
* @param ignoredPaths - The glob patterns to ignore.
|
|
1324
1327
|
* @param processor - The document processor instance.
|
|
1325
1328
|
* @param method - The processor method to call ('processFile' or 'processRulesUpdate').
|
|
1329
|
+
* @param concurrency - Maximum concurrent file operations (default 50).
|
|
1330
|
+
* @param callbacks - Optional progress tracking callbacks.
|
|
1326
1331
|
* @returns The number of files processed.
|
|
1327
1332
|
*/
|
|
1328
|
-
async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
|
|
1333
|
+
async function processAllFiles(watchPaths, ignoredPaths, processor, method, concurrency = DEFAULT_REINDEX_CONCURRENCY, callbacks) {
|
|
1329
1334
|
const files = await listFilesFromGlobs(watchPaths, ignoredPaths);
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
// Queue integration can come later.
|
|
1335
|
+
callbacks?.onTotal?.(files.length);
|
|
1336
|
+
await parallel(concurrency, files, async (file) => {
|
|
1333
1337
|
await processor[method](file);
|
|
1334
|
-
|
|
1338
|
+
callbacks?.onFileProcessed?.();
|
|
1339
|
+
});
|
|
1335
1340
|
return files.length;
|
|
1336
1341
|
}
|
|
1337
1342
|
|
|
@@ -1380,8 +1385,8 @@ async function executeReindex(deps, scope) {
|
|
|
1380
1385
|
// Reprocess only files with issues
|
|
1381
1386
|
const issues = deps.issuesManager.getAll();
|
|
1382
1387
|
const issuePaths = Object.keys(issues);
|
|
1383
|
-
|
|
1384
|
-
|
|
1388
|
+
const concurrency = config.reindex?.concurrency ?? 50;
|
|
1389
|
+
await parallel(concurrency, issuePaths, async (filePath) => {
|
|
1385
1390
|
try {
|
|
1386
1391
|
await processor.processFile(filePath);
|
|
1387
1392
|
filesProcessed++;
|
|
@@ -1390,11 +1395,15 @@ async function executeReindex(deps, scope) {
|
|
|
1390
1395
|
errors++;
|
|
1391
1396
|
logger.warn({ filePath, err: normalizeError(error) }, 'Failed to reprocess issue file');
|
|
1392
1397
|
}
|
|
1393
|
-
}
|
|
1398
|
+
});
|
|
1394
1399
|
}
|
|
1395
1400
|
else {
|
|
1396
1401
|
// Full reindex - process all watched files
|
|
1397
|
-
|
|
1402
|
+
const concurrency = config.reindex?.concurrency ?? 50;
|
|
1403
|
+
filesProcessed = await processAllFiles(config.watch.paths, config.watch.ignored, processor, 'processFile', concurrency, {
|
|
1404
|
+
onTotal: (total) => reindexTracker?.setTotal(total),
|
|
1405
|
+
onFileProcessed: () => reindexTracker?.incrementProcessed(),
|
|
1406
|
+
});
|
|
1398
1407
|
}
|
|
1399
1408
|
const durationMs = Date.now() - startTime;
|
|
1400
1409
|
logger.info({ scope, filesProcessed, durationMs }, `Reindex (${scope}) completed`);
|
|
@@ -1841,6 +1850,13 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
1841
1850
|
.object({
|
|
1842
1851
|
/** URL to call when reindex completes. */
|
|
1843
1852
|
callbackUrl: z.url().optional(),
|
|
1853
|
+
/** Maximum concurrent file operations during reindex. */
|
|
1854
|
+
concurrency: z
|
|
1855
|
+
.number()
|
|
1856
|
+
.int()
|
|
1857
|
+
.min(1)
|
|
1858
|
+
.default(50)
|
|
1859
|
+
.describe('Maximum concurrent file operations during reindex (default 50).'),
|
|
1844
1860
|
})
|
|
1845
1861
|
.optional()
|
|
1846
1862
|
.describe('Reindex configuration.'),
|
|
@@ -2894,7 +2910,7 @@ function createRebuildMetadataHandler(deps) {
|
|
|
2894
2910
|
*/
|
|
2895
2911
|
function createReindexHandler(deps) {
|
|
2896
2912
|
return wrapHandler(async (_request, reply) => {
|
|
2897
|
-
const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile');
|
|
2913
|
+
const count = await processAllFiles(deps.watch.paths, deps.watch.ignored, deps.processor, 'processFile', deps.concurrency);
|
|
2898
2914
|
return await reply.status(200).send({ ok: true, filesIndexed: count });
|
|
2899
2915
|
}, deps.logger, 'Reindex');
|
|
2900
2916
|
}
|
|
@@ -3229,17 +3245,31 @@ class ReindexTracker {
|
|
|
3229
3245
|
_active = false;
|
|
3230
3246
|
_scope;
|
|
3231
3247
|
_startedAt;
|
|
3248
|
+
_filesProcessed = 0;
|
|
3249
|
+
_totalFiles = 0;
|
|
3232
3250
|
/** Mark a reindex as started. */
|
|
3233
3251
|
start(scope) {
|
|
3234
3252
|
this._active = true;
|
|
3235
3253
|
this._scope = scope;
|
|
3236
3254
|
this._startedAt = new Date().toISOString();
|
|
3255
|
+
this._filesProcessed = 0;
|
|
3256
|
+
this._totalFiles = 0;
|
|
3257
|
+
}
|
|
3258
|
+
/** Set the total number of files to process. */
|
|
3259
|
+
setTotal(total) {
|
|
3260
|
+
this._totalFiles = total;
|
|
3261
|
+
}
|
|
3262
|
+
/** Increment the processed file count. */
|
|
3263
|
+
incrementProcessed() {
|
|
3264
|
+
this._filesProcessed++;
|
|
3237
3265
|
}
|
|
3238
3266
|
/** Mark the current reindex as complete. */
|
|
3239
3267
|
complete() {
|
|
3240
3268
|
this._active = false;
|
|
3241
3269
|
this._scope = undefined;
|
|
3242
3270
|
this._startedAt = undefined;
|
|
3271
|
+
this._filesProcessed = 0;
|
|
3272
|
+
this._totalFiles = 0;
|
|
3243
3273
|
}
|
|
3244
3274
|
/** Get current reindex status. */
|
|
3245
3275
|
getStatus() {
|
|
@@ -3249,6 +3279,8 @@ class ReindexTracker {
|
|
|
3249
3279
|
active: true,
|
|
3250
3280
|
scope: this._scope,
|
|
3251
3281
|
startedAt: this._startedAt,
|
|
3282
|
+
filesProcessed: this._filesProcessed,
|
|
3283
|
+
totalFiles: this._totalFiles,
|
|
3252
3284
|
};
|
|
3253
3285
|
}
|
|
3254
3286
|
}
|
|
@@ -3308,7 +3340,12 @@ function createApiServer(options) {
|
|
|
3308
3340
|
logger,
|
|
3309
3341
|
hybridConfig,
|
|
3310
3342
|
}));
|
|
3311
|
-
app.post('/reindex', createReindexHandler({
|
|
3343
|
+
app.post('/reindex', createReindexHandler({
|
|
3344
|
+
watch: config.watch,
|
|
3345
|
+
processor,
|
|
3346
|
+
logger,
|
|
3347
|
+
concurrency: config.reindex?.concurrency ?? 50,
|
|
3348
|
+
}));
|
|
3312
3349
|
app.post('/rebuild-metadata', createRebuildMetadataHandler({
|
|
3313
3350
|
metadataDir: config.metadataDir,
|
|
3314
3351
|
vectorStore,
|
|
@@ -3873,8 +3910,40 @@ function getLogger(logger) {
|
|
|
3873
3910
|
/**
|
|
3874
3911
|
* @module embedding/geminiProvider
|
|
3875
3912
|
* Gemini embedding provider using the Google Generative AI REST API directly.
|
|
3913
|
+
* Uses node:https with a keep-alive agent for reliable performance in
|
|
3914
|
+
* long-running processes (avoids undici/fetch event-loop contention).
|
|
3876
3915
|
*/
|
|
3877
3916
|
const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta';
|
|
3917
|
+
/** Persistent HTTPS agent for connection reuse. */
|
|
3918
|
+
const agent = new https.Agent({ keepAlive: true });
|
|
3919
|
+
/** Make an HTTPS POST request using node:https (bypasses undici/fetch). */
|
|
3920
|
+
function httpsPost(url, body) {
|
|
3921
|
+
return new Promise((resolve, reject) => {
|
|
3922
|
+
const parsed = new URL(url);
|
|
3923
|
+
const req = https.request({
|
|
3924
|
+
hostname: parsed.hostname,
|
|
3925
|
+
path: parsed.pathname + parsed.search,
|
|
3926
|
+
method: 'POST',
|
|
3927
|
+
agent,
|
|
3928
|
+
headers: {
|
|
3929
|
+
'Content-Type': 'application/json',
|
|
3930
|
+
'Content-Length': Buffer.byteLength(body),
|
|
3931
|
+
},
|
|
3932
|
+
}, (res) => {
|
|
3933
|
+
const chunks = [];
|
|
3934
|
+
res.on('data', (chunk) => chunks.push(chunk));
|
|
3935
|
+
res.on('end', () => {
|
|
3936
|
+
resolve({
|
|
3937
|
+
status: res.statusCode ?? 0,
|
|
3938
|
+
body: Buffer.concat(chunks).toString('utf8'),
|
|
3939
|
+
});
|
|
3940
|
+
});
|
|
3941
|
+
});
|
|
3942
|
+
req.on('error', reject);
|
|
3943
|
+
req.write(body);
|
|
3944
|
+
req.end();
|
|
3945
|
+
});
|
|
3946
|
+
}
|
|
3878
3947
|
/**
|
|
3879
3948
|
* Create a Gemini embedding provider using the Google Generative AI REST API.
|
|
3880
3949
|
*
|
|
@@ -3903,16 +3972,11 @@ function createGeminiProvider(config, logger) {
|
|
|
3903
3972
|
model: `models/${model}`,
|
|
3904
3973
|
content: { parts: [{ text }] },
|
|
3905
3974
|
}));
|
|
3906
|
-
const response = await
|
|
3907
|
-
|
|
3908
|
-
|
|
3909
|
-
body: JSON.stringify({ requests }),
|
|
3910
|
-
});
|
|
3911
|
-
if (!response.ok) {
|
|
3912
|
-
const body = await response.text();
|
|
3913
|
-
throw new Error(`Gemini API error ${String(response.status)}: ${body}`);
|
|
3975
|
+
const response = await httpsPost(url, JSON.stringify({ requests }));
|
|
3976
|
+
if (response.status < 200 || response.status >= 300) {
|
|
3977
|
+
throw new Error(`Gemini API error ${String(response.status)}: ${response.body}`);
|
|
3914
3978
|
}
|
|
3915
|
-
const data = (
|
|
3979
|
+
const data = JSON.parse(response.body);
|
|
3916
3980
|
return data.embeddings.map((e) => e.values);
|
|
3917
3981
|
}, {
|
|
3918
3982
|
attempts: 5,
|
package/package.json
CHANGED