@bndynet/ragbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +765 -0
  2. package/README.zh-CN.md +774 -0
  3. package/dist/src/advanced.d.ts +13 -0
  4. package/dist/src/advanced.js +29 -0
  5. package/dist/src/cli.d.ts +2 -0
  6. package/dist/src/cli.js +1013 -0
  7. package/dist/src/config-file.d.ts +69 -0
  8. package/dist/src/config-file.js +246 -0
  9. package/dist/src/folder-index/config.d.ts +2 -0
  10. package/dist/src/folder-index/config.js +56 -0
  11. package/dist/src/folder-index/hash.d.ts +1 -0
  12. package/dist/src/folder-index/hash.js +14 -0
  13. package/dist/src/folder-index/indexer.d.ts +2 -0
  14. package/dist/src/folder-index/indexer.js +154 -0
  15. package/dist/src/folder-index/llm-client.d.ts +3 -0
  16. package/dist/src/folder-index/llm-client.js +45 -0
  17. package/dist/src/folder-index/manifest.d.ts +17 -0
  18. package/dist/src/folder-index/manifest.js +158 -0
  19. package/dist/src/folder-index/multi-query.d.ts +45 -0
  20. package/dist/src/folder-index/multi-query.js +109 -0
  21. package/dist/src/folder-index/pageindex-runner.d.ts +3 -0
  22. package/dist/src/folder-index/pageindex-runner.js +218 -0
  23. package/dist/src/folder-index/path-utils.d.ts +5 -0
  24. package/dist/src/folder-index/path-utils.js +33 -0
  25. package/dist/src/folder-index/query.d.ts +19 -0
  26. package/dist/src/folder-index/query.js +597 -0
  27. package/dist/src/folder-index/queue.d.ts +1 -0
  28. package/dist/src/folder-index/queue.js +18 -0
  29. package/dist/src/folder-index/root-tree.d.ts +3 -0
  30. package/dist/src/folder-index/root-tree.js +82 -0
  31. package/dist/src/folder-index/scan.d.ts +14 -0
  32. package/dist/src/folder-index/scan.js +152 -0
  33. package/dist/src/folder-index/types.d.ts +368 -0
  34. package/dist/src/folder-index/types.js +2 -0
  35. package/dist/src/folder-index/watch.d.ts +17 -0
  36. package/dist/src/folder-index/watch.js +550 -0
  37. package/dist/src/index.d.ts +6 -0
  38. package/dist/src/index.js +45 -0
  39. package/dist/src/sdk.d.ts +101 -0
  40. package/dist/src/sdk.js +352 -0
  41. package/dist/src/serve.d.ts +64 -0
  42. package/dist/src/serve.js +466 -0
  43. package/dist/src/setup-pageindex.d.ts +30 -0
  44. package/dist/src/setup-pageindex.js +184 -0
  45. package/package.json +43 -0
@@ -0,0 +1,1013 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ var __importDefault = (this && this.__importDefault) || function (mod) {
4
+ return (mod && mod.__esModule) ? mod : { "default": mod };
5
+ };
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ const promises_1 = __importDefault(require("node:fs/promises"));
8
+ const node_path_1 = __importDefault(require("node:path"));
9
+ const commander_1 = require("commander");
10
+ const config_file_1 = require("./config-file");
11
+ const config_1 = require("./folder-index/config");
12
+ const indexer_1 = require("./folder-index/indexer");
13
+ const manifest_1 = require("./folder-index/manifest");
14
+ const multi_query_1 = require("./folder-index/multi-query");
15
+ const query_1 = require("./folder-index/query");
16
+ const watch_1 = require("./folder-index/watch");
17
+ const serve_1 = require("./serve");
18
+ const setup_pageindex_1 = require("./setup-pageindex");
19
+ const sdk_1 = require("./sdk");
20
+ function parseConcurrency(value) {
21
+ const parsed = Number.parseInt(value, 10);
22
+ if (!Number.isFinite(parsed) || parsed < 1) {
23
+ throw new Error("--concurrency must be a positive integer");
24
+ }
25
+ return parsed;
26
+ }
27
+ function parseNonNegativeInteger(value, optionName) {
28
+ const parsed = Number.parseInt(value, 10);
29
+ if (!Number.isFinite(parsed) || parsed < 0) {
30
+ throw new Error(`${optionName} must be a non-negative integer`);
31
+ }
32
+ return parsed;
33
+ }
34
+ function parseRetryAttempts(value) {
35
+ return parseNonNegativeInteger(value, "--retry-attempts");
36
+ }
37
+ function parseRetryDelayMs(value) {
38
+ return parseNonNegativeInteger(value, "--retry-delay-ms");
39
+ }
40
+ function parseDebounceMs(value) {
41
+ return parseNonNegativeInteger(value, "--debounce-ms");
42
+ }
43
+ function parseServePort(value) {
44
+ const parsed = Number.parseInt(value, 10);
45
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 65535) {
46
+ throw new Error("--port must be an integer between 0 and 65535");
47
+ }
48
+ return parsed;
49
+ }
50
+ function isVerbose() {
51
+ return process.env.RAGBOX_VERBOSE === "1" || process.env.RAGBOX_E2E_VERBOSE === "1";
52
+ }
53
+ function logProgress(event) {
54
+ if (!isVerbose()) {
55
+ return;
56
+ }
57
+ switch (event.type) {
58
+ case "scan":
59
+ console.error(`[ragbox] scan root=${event.rootDir} output=${event.outputDir} total=${event.total} toIndex=${event.toIndex} unchanged=${event.unchanged} deleted=${event.deleted}`);
60
+ break;
61
+ case "index-start":
62
+ console.error(`[ragbox] index start ${event.index}/${event.total} ${event.path}`);
63
+ break;
64
+ case "index-done":
65
+ console.error(`[ragbox] index done ${event.index}/${event.total} ${event.path}`);
66
+ if (event.summary) {
67
+ console.error(`[ragbox] summary ${event.path}: ${event.summary}`);
68
+ }
69
+ break;
70
+ case "index-failed":
71
+ console.error(`[ragbox] index failed ${event.index}/${event.total} ${event.path}: ${event.error}`);
72
+ break;
73
+ case "write":
74
+ console.error(`[ragbox] wrote manifest=${event.manifestPath}`);
75
+ console.error(`[ragbox] wrote rootTree=${event.rootTreePath}`);
76
+ break;
77
+ }
78
+ }
79
+ function addLlmOptions(command) {
80
+ return command
81
+ .option("--api-key <key>", "OpenAI-compatible API key")
82
+ .option("--base-url <url>", "OpenAI-compatible API base URL")
83
+ .option("--model <model>", "LLM model");
84
+ }
85
+ function addProjectOptions(command) {
86
+ return command.option("--source <name>", "ragbox config source; query accepts comma-separated names");
87
+ }
88
+ function getGlobalOptions(command) {
89
+ let current = command;
90
+ while (current) {
91
+ const options = current.opts();
92
+ if (options.config) {
93
+ return { config: options.config };
94
+ }
95
+ current = current.parent;
96
+ }
97
+ return {};
98
+ }
99
+ function writeJson(value) {
100
+ console.log(JSON.stringify(value, null, 2));
101
+ }
102
+ function writeJsonLine(value) {
103
+ console.log(JSON.stringify(value));
104
+ }
105
+ function indentMultiline(value, prefix) {
106
+ return value
107
+ .split(/\r?\n/)
108
+ .map((line) => `${prefix}${line}`)
109
+ .join("\n");
110
+ }
111
+ function indexCounts(result) {
112
+ return {
113
+ total: result.manifest.documents.length,
114
+ ready: result.ready,
115
+ failed: result.failed,
116
+ added: result.added,
117
+ modified: result.modified,
118
+ retryFailed: result.retryFailed,
119
+ unchanged: result.unchanged,
120
+ deleted: result.deleted
121
+ };
122
+ }
123
+ function indexFailures(result) {
124
+ return result.manifest.documents
125
+ .filter((document) => document.status === "failed")
126
+ .map((document) => ({
127
+ path: document.path,
128
+ absolutePath: document.absolutePath,
129
+ indexPath: document.indexPath,
130
+ error: document.error
131
+ }));
132
+ }
133
+ function indexJsonOutput(result) {
134
+ return {
135
+ version: 1,
136
+ command: "index",
137
+ rootDir: result.manifest.rootDir,
138
+ outputDir: result.outputDir,
139
+ manifestPath: result.manifestPath,
140
+ rootTreePath: result.rootTreePath,
141
+ generatedAt: result.manifest.generatedAt,
142
+ counts: indexCounts(result),
143
+ failures: indexFailures(result)
144
+ };
145
+ }
146
+ function printIndexResult(folder, result) {
147
+ console.log(`Indexed ${folder}`);
148
+ console.log(`ready=${result.ready}`);
149
+ console.log(`failed=${result.failed}`);
150
+ console.log(`added=${result.added}`);
151
+ console.log(`modified=${result.modified}`);
152
+ console.log(`retryFailed=${result.retryFailed}`);
153
+ console.log(`deleted=${result.deleted}`);
154
+ console.log(`unchanged=${result.unchanged}`);
155
+ const failures = indexFailures(result);
156
+ if (failures.length === 0) {
157
+ return;
158
+ }
159
+ console.error("Failed documents:");
160
+ for (const failure of failures) {
161
+ console.error(`- ${failure.path}`);
162
+ if (failure.error) {
163
+ console.error(indentMultiline(failure.error, " "));
164
+ }
165
+ }
166
+ }
167
+ function printSetupPageIndexResult(result) {
168
+ console.log(`PageIndex ready: ${result.pageIndexDir}`);
169
+ console.log(`cli=${result.cliPath}`);
170
+ if (result.pythonPath) {
171
+ console.log(`python=${result.pythonPath}`);
172
+ }
173
+ if (result.configPath) {
174
+ console.log(`config=${result.configPath}`);
175
+ }
176
+ if (result.gitignorePath) {
177
+ console.log(`${result.actions.updatedGitignore ? "updated" : "checked"} gitignore=${result.gitignorePath}`);
178
+ }
179
+ }
180
+ function logProgressAsJsonLine(event) {
181
+ writeJsonLine({
182
+ version: 1,
183
+ timestamp: new Date().toISOString(),
184
+ type: "index-progress",
185
+ event
186
+ });
187
+ }
188
+ async function loadCommandConfig(command, commandOptions) {
189
+ const globalOptions = getGlobalOptions(command);
190
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
191
+ configPath: globalOptions.config,
192
+ source: commandOptions.source
193
+ });
194
+ return {
195
+ rootDir: resolved.rootDir,
196
+ options: resolved.pageIndexOptions
197
+ };
198
+ }
199
+ function mergeDefined(...values) {
200
+ const merged = {};
201
+ for (const value of values) {
202
+ for (const [key, nestedValue] of Object.entries(value)) {
203
+ if (nestedValue !== undefined) {
204
+ merged[key] = nestedValue;
205
+ }
206
+ }
207
+ }
208
+ return merged;
209
+ }
210
+ function buildOptions(configOptions, commandOptions, progress = logProgress) {
211
+ return mergeDefined({
212
+ ...configOptions,
213
+ progress
214
+ }, {
215
+ apiKey: commandOptions.apiKey,
216
+ baseUrl: commandOptions.baseUrl,
217
+ concurrency: commandOptions.concurrency,
218
+ cliPath: commandOptions.pageindexCli,
219
+ model: commandOptions.model,
220
+ outputDir: commandOptions.outputDir,
221
+ pythonPath: commandOptions.pageindexPython,
222
+ watchDebounceMs: commandOptions.debounceMs,
223
+ watchHealthFile: commandOptions.healthFile,
224
+ watchLockFile: commandOptions.lockFile,
225
+ watchRetryAttempts: commandOptions.retryAttempts,
226
+ watchRetryDelayMs: commandOptions.retryDelayMs,
227
+ watchStaging: commandOptions.staging ?? Boolean(commandOptions.stagingOutputDir),
228
+ watchStagingOutputDir: commandOptions.stagingOutputDir,
229
+ watchWebhookUrl: commandOptions.webhook
230
+ });
231
+ }
232
+ function buildQueryOptions(configOptions, commandOptions) {
233
+ return mergeDefined({
234
+ ...configOptions
235
+ }, {
236
+ apiKey: commandOptions.apiKey,
237
+ baseUrl: commandOptions.baseUrl,
238
+ model: commandOptions.model,
239
+ trace: commandOptions.trace
240
+ });
241
+ }
242
+ function parseSourceNames(source) {
243
+ return (source ?? "")
244
+ .split(",")
245
+ .map((name) => name.trim())
246
+ .filter(Boolean);
247
+ }
248
+ function requireFolder(folder, commandName) {
249
+ if (!folder) {
250
+ throw new Error(`Missing folder. Pass a folder argument or configure a source rootDir before running ragbox ${commandName}.`);
251
+ }
252
+ return folder;
253
+ }
254
+ function requireTarget(target) {
255
+ if (!target) {
256
+ throw new Error("Missing query target. Pass a target argument or configure a source with outputDir/rootDir.");
257
+ }
258
+ return target;
259
+ }
260
+ function requireQuestion(question) {
261
+ if (!question) {
262
+ throw new Error("Missing question.");
263
+ }
264
+ return question;
265
+ }
266
+ async function pathExists(value) {
267
+ try {
268
+ await promises_1.default.access(value);
269
+ return true;
270
+ }
271
+ catch {
272
+ return false;
273
+ }
274
+ }
275
+ function startTargetOutputDir(rootDir, options) {
276
+ return node_path_1.default.resolve(options.outputDir ?? node_path_1.default.join(rootDir, manifest_1.PAGEINDEX_DIR));
277
+ }
278
+ async function loadConfiguredQueryTargets(command, commandOptions) {
279
+ const globalOptions = getGlobalOptions(command);
280
+ if (commandOptions.allSources && commandOptions.source) {
281
+ throw new Error("Use either --source or --all-sources, not both.");
282
+ }
283
+ let sourceNames = parseSourceNames(commandOptions.source);
284
+ if (commandOptions.allSources) {
285
+ const { config } = await (0, config_file_1.readRagboxConfig)(globalOptions.config);
286
+ sourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
287
+ if (sourceNames.length === 0) {
288
+ throw new Error("No configured sources found. Add docs or sources to ragbox.config.json.");
289
+ }
290
+ }
291
+ const targets = [];
292
+ let answerOptions;
293
+ for (const sourceName of sourceNames) {
294
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
295
+ configPath: globalOptions.config,
296
+ source: sourceName
297
+ });
298
+ const target = resolved.pageIndexOptions.outputDir ?? resolved.rootDir;
299
+ if (!target) {
300
+ throw new Error(`Source does not define outputDir or rootDir: ${sourceName}`);
301
+ }
302
+ const options = buildQueryOptions(resolved.pageIndexOptions, commandOptions);
303
+ answerOptions ??= options;
304
+ targets.push({
305
+ name: sourceName,
306
+ target,
307
+ options
308
+ });
309
+ }
310
+ return {
311
+ answerOptions: answerOptions ?? buildQueryOptions({}, commandOptions),
312
+ targets
313
+ };
314
+ }
315
+ async function shouldQueryAllSourcesByDefault(command, target, question, sourceNames) {
316
+ if (!target || question || sourceNames.length > 0) {
317
+ return false;
318
+ }
319
+ if (await pathExists(target)) {
320
+ return false;
321
+ }
322
+ const globalOptions = getGlobalOptions(command);
323
+ const { config } = await (0, config_file_1.readRagboxConfig)(globalOptions.config);
324
+ return (0, config_file_1.listRagboxConfigSourceNames)(config).length > 1;
325
+ }
326
+ async function loadDiagnosticTargets(command, commandOptions, target, allSourcesByDefault) {
327
+ if (target) {
328
+ return [
329
+ {
330
+ target,
331
+ options: buildQueryOptions({}, commandOptions)
332
+ }
333
+ ];
334
+ }
335
+ const globalOptions = getGlobalOptions(command);
336
+ if (commandOptions.allSources && commandOptions.source) {
337
+ throw new Error("Use either --source or --all-sources, not both.");
338
+ }
339
+ let sourceNames = parseSourceNames(commandOptions.source);
340
+ if (commandOptions.allSources || (allSourcesByDefault && sourceNames.length === 0)) {
341
+ const { config } = await (0, config_file_1.readRagboxConfig)(globalOptions.config);
342
+ const configuredSourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
343
+ if (commandOptions.allSources || configuredSourceNames.length > 1) {
344
+ sourceNames = configuredSourceNames;
345
+ }
346
+ }
347
+ if (sourceNames.length > 0) {
348
+ const targets = [];
349
+ for (const sourceName of sourceNames) {
350
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
351
+ configPath: globalOptions.config,
352
+ source: sourceName
353
+ });
354
+ const resolvedTarget = resolved.pageIndexOptions.outputDir ?? resolved.rootDir;
355
+ if (!resolvedTarget) {
356
+ throw new Error(`Source does not define outputDir or rootDir: ${sourceName}`);
357
+ }
358
+ targets.push({
359
+ source: sourceName,
360
+ target: resolvedTarget,
361
+ options: buildQueryOptions(resolved.pageIndexOptions, commandOptions)
362
+ });
363
+ }
364
+ return targets;
365
+ }
366
+ const loaded = await loadCommandConfig(command, commandOptions);
367
+ const resolvedTarget = loaded.options.outputDir ?? loaded.rootDir;
368
+ return [
369
+ {
370
+ target: requireTarget(resolvedTarget),
371
+ options: buildQueryOptions(loaded.options, commandOptions)
372
+ }
373
+ ];
374
+ }
375
+ async function loadStartTargets(command, commandOptions, folder) {
376
+ if (folder) {
377
+ if (commandOptions.allSources || commandOptions.source) {
378
+ throw new Error("A folder argument cannot be combined with --source or --all-sources.");
379
+ }
380
+ const loaded = await loadCommandConfig(command, commandOptions);
381
+ const rootDir = node_path_1.default.resolve(folder);
382
+ const options = buildOptions(loaded.options, commandOptions, commandOptions.jsonl ? logProgressAsJsonLine : logProgress);
383
+ return [
384
+ {
385
+ rootDir,
386
+ target: startTargetOutputDir(rootDir, options),
387
+ options
388
+ }
389
+ ];
390
+ }
391
+ const globalOptions = getGlobalOptions(command);
392
+ if (commandOptions.allSources && commandOptions.source) {
393
+ throw new Error("Use either --source or --all-sources, not both.");
394
+ }
395
+ let sourceNames = parseSourceNames(commandOptions.source);
396
+ if (commandOptions.allSources || sourceNames.length === 0) {
397
+ const { config } = await (0, config_file_1.readRagboxConfig)(globalOptions.config);
398
+ const configuredSourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
399
+ if (commandOptions.allSources || configuredSourceNames.length > 1) {
400
+ sourceNames = configuredSourceNames;
401
+ }
402
+ }
403
+ if (sourceNames.length > 0) {
404
+ if (sourceNames.length > 1 && commandOptions.outputDir) {
405
+ throw new Error("--output-dir cannot be used when starting multiple sources.");
406
+ }
407
+ const targets = [];
408
+ for (const sourceName of sourceNames) {
409
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
410
+ configPath: globalOptions.config,
411
+ source: sourceName
412
+ });
413
+ const rootDir = requireFolder(resolved.rootDir, "start");
414
+ const options = buildOptions(resolved.pageIndexOptions, commandOptions, commandOptions.jsonl ? logProgressAsJsonLine : logProgress);
415
+ targets.push({
416
+ source: sourceName,
417
+ rootDir,
418
+ target: startTargetOutputDir(rootDir, options),
419
+ options
420
+ });
421
+ }
422
+ return targets;
423
+ }
424
+ const loaded = await loadCommandConfig(command, commandOptions);
425
+ const rootDir = requireFolder(loaded.rootDir, "start");
426
+ const options = buildOptions(loaded.options, commandOptions, commandOptions.jsonl ? logProgressAsJsonLine : logProgress);
427
+ return [
428
+ {
429
+ source: commandOptions.source,
430
+ rootDir,
431
+ target: startTargetOutputDir(rootDir, options),
432
+ options
433
+ }
434
+ ];
435
+ }
436
+ async function buildStatusOutput(targets) {
437
+ const statusTargets = [];
438
+ for (const target of targets) {
439
+ const validation = await (0, sdk_1.validateIndex)(target.target);
440
+ statusTargets.push({
441
+ source: target.source,
442
+ target: target.target,
443
+ ok: validation.ok,
444
+ inspect: validation.inspect,
445
+ errors: validation.errors,
446
+ warnings: validation.warnings
447
+ });
448
+ }
449
+ return {
450
+ version: 1,
451
+ command: "status",
452
+ ok: statusTargets.every((target) => target.ok),
453
+ targets: statusTargets
454
+ };
455
+ }
456
+ function printStatusOutput(status) {
457
+ for (const target of status.targets) {
458
+ const label = target.source ? `${target.source} ${target.target}` : target.target;
459
+ console.log(`${target.ok ? "ok" : "error"} ${label}`);
460
+ if (target.inspect) {
461
+ const counts = target.inspect.counts;
462
+ console.log(` documents=${counts.total} ready=${counts.ready} failed=${counts.failed}`);
463
+ console.log(` output=${target.inspect.outputDir}`);
464
+ console.log(` generatedAt=${target.inspect.generatedAt}`);
465
+ }
466
+ for (const error of target.errors) {
467
+ console.log(` error ${error.code}: ${error.message}`);
468
+ }
469
+ for (const warning of target.warnings) {
470
+ console.log(` warning ${warning.code}: ${warning.message}`);
471
+ }
472
+ }
473
+ }
474
+ function startTargetLabel(target) {
475
+ return target.source ? `${target.source} ${target.rootDir}` : target.rootDir;
476
+ }
477
+ function writeStartJsonLine(type, fields = {}) {
478
+ writeJsonLine({
479
+ version: 1,
480
+ timestamp: new Date().toISOString(),
481
+ type,
482
+ ...fields
483
+ });
484
+ }
485
+ function printStartWatchEvent(event, source) {
486
+ const prefix = source ? `[${source}] ` : "";
487
+ switch (event.type) {
488
+ case "watch-start":
489
+ console.log(`${prefix}watching ${event.rootDir}`);
490
+ break;
491
+ case "watch-file-event":
492
+ console.log(`${prefix}${event.eventName}: ${event.path}`);
493
+ break;
494
+ case "watch-index-start":
495
+ console.log(`${prefix}index ${event.reason} attempt=${event.attempt}/${event.maxAttempts}`);
496
+ break;
497
+ case "watch-index-done":
498
+ console.log(`${prefix}indexed ready=${event.result.ready} failed=${event.result.failed} added=${event.result.added} modified=${event.result.modified} deleted=${event.result.deleted} unchanged=${event.result.unchanged}`);
499
+ break;
500
+ case "watch-index-failed":
501
+ console.error(`${prefix}index failed: ${event.error}`);
502
+ break;
503
+ case "watch-index-retry":
504
+ console.error(`${prefix}index retry in ${event.delayMs}ms: ${event.error}`);
505
+ break;
506
+ case "watch-output-promoted":
507
+ console.log(`${prefix}promoted staging output ${event.stagingOutputDir}`);
508
+ break;
509
+ case "watch-stop":
510
+ console.log(`${prefix}watch stopped`);
511
+ break;
512
+ }
513
+ }
514
+ async function closeStartHandles(watchHandles, serveHandle) {
515
+ await Promise.allSettled([
516
+ ...watchHandles.map((handle) => handle.close()),
517
+ ...(serveHandle ? [serveHandle.close()] : [])
518
+ ]);
519
+ }
520
+ function isPathLikeCommand(value) {
521
+ return node_path_1.default.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\");
522
+ }
523
+ async function commandPathExists(value) {
524
+ if (!value || !isPathLikeCommand(value)) {
525
+ return undefined;
526
+ }
527
+ return await pathExists(value);
528
+ }
529
+ async function buildDoctorOutput(command, commandOptions, target) {
530
+ const globalOptions = getGlobalOptions(command);
531
+ const checks = [];
532
+ const { configPath } = await (0, config_file_1.readRagboxConfig)(globalOptions.config);
533
+ checks.push({
534
+ name: "config",
535
+ ok: true,
536
+ message: configPath ? `Loaded config: ${configPath}` : "No ragbox config found; using CLI flags, environment, and defaults.",
537
+ path: configPath
538
+ });
539
+ let targets = [];
540
+ try {
541
+ targets = await loadDiagnosticTargets(command, commandOptions, target, true);
542
+ }
543
+ catch (error) {
544
+ const message = error instanceof Error ? error.message : String(error);
545
+ checks.push({
546
+ name: "target",
547
+ ok: false,
548
+ message
549
+ });
550
+ }
551
+ const options = targets[0]?.options ?? buildQueryOptions({}, commandOptions);
552
+ const runtime = (0, config_1.loadPageIndexConfig)(options);
553
+ const cliExists = await commandPathExists(runtime.cliPath);
554
+ checks.push({
555
+ name: "pageindex-cli",
556
+ ok: Boolean(runtime.cliPath) && cliExists !== false,
557
+ message: !runtime.cliPath
558
+ ? "PAGEINDEX_CLI or pageIndex.cli is not configured."
559
+ : cliExists === false
560
+ ? `PageIndex CLI does not exist: ${runtime.cliPath}`
561
+ : `PageIndex CLI configured: ${runtime.cliPath}`,
562
+ path: runtime.cliPath
563
+ });
564
+ checks.push({
565
+ name: "llm-model",
566
+ ok: Boolean(runtime.model),
567
+ message: `LLM model: ${runtime.model}`
568
+ });
569
+ checks.push({
570
+ name: "llm-base-url",
571
+ ok: Boolean(runtime.baseUrl),
572
+ message: `LLM base URL: ${runtime.baseUrl}`
573
+ });
574
+ checks.push({
575
+ name: "llm-api-key",
576
+ ok: Boolean(runtime.apiKey),
577
+ message: runtime.apiKey ? "LLM API key is configured." : "OPENAI_API_KEY or llm.apiKey is not configured."
578
+ });
579
+ const status = await buildStatusOutput(targets);
580
+ checks.push({
581
+ name: "index-status",
582
+ ok: status.ok,
583
+ message: status.targets.length > 0 ? `Checked ${status.targets.length} index target(s).` : "No index target was checked."
584
+ });
585
+ return {
586
+ version: 1,
587
+ command: "doctor",
588
+ ok: checks.every((check) => check.ok),
589
+ checks,
590
+ status
591
+ };
592
+ }
593
+ function printDoctorOutput(doctor) {
594
+ for (const check of doctor.checks) {
595
+ console.log(`${check.ok ? "ok" : "error"} ${check.name}: ${check.message}`);
596
+ }
597
+ printStatusOutput(doctor.status);
598
+ }
599
+ function printInspectResult(result, source) {
600
+ const label = source ? `${source} ${result.target}` : result.target;
601
+ console.log(`Index ${label}`);
602
+ console.log(`rootDir=${result.rootDir}`);
603
+ console.log(`outputDir=${result.outputDir}`);
604
+ console.log(`generatedAt=${result.generatedAt}`);
605
+ console.log(`documents=${result.counts.total}`);
606
+ console.log(`ready=${result.counts.ready}`);
607
+ console.log(`failed=${result.counts.failed}`);
608
+ for (const document of result.documents) {
609
+ console.log(`- ${document.status} ${document.path}`);
610
+ }
611
+ }
612
+ async function runQueryAction(target, question, commandOptions, command) {
613
+ const sourceNames = parseSourceNames(commandOptions.source);
614
+ const implicitAllSources = await shouldQueryAllSourcesByDefault(command, target, question, sourceNames);
615
+ if (commandOptions.allSources || sourceNames.length > 1 || implicitAllSources) {
616
+ if (question) {
617
+ throw new Error("Multi-source query uses configured sources; pass only the question argument.");
618
+ }
619
+ const multiSourceOptions = implicitAllSources ? { ...commandOptions, allSources: true } : commandOptions;
620
+ const loadedSources = await loadConfiguredQueryTargets(command, multiSourceOptions);
621
+ const result = await (0, multi_query_1.queryMultipleIndexes)(loadedSources.targets, requireQuestion(target), loadedSources.answerOptions);
622
+ if (commandOptions.json || commandOptions.trace) {
623
+ writeJson(result);
624
+ return;
625
+ }
626
+ console.log(result.answer);
627
+ return;
628
+ }
629
+ const singleSourceOptions = sourceNames.length === 1 ? { ...commandOptions, source: sourceNames[0] } : commandOptions;
630
+ const loaded = await loadCommandConfig(command, singleSourceOptions);
631
+ let queryTarget = target;
632
+ let queryQuestion = question;
633
+ const configuredTarget = loaded.options.outputDir ?? loaded.rootDir;
634
+ if (!queryQuestion && queryTarget && configuredTarget) {
635
+ const singleArgIsQuestion = singleSourceOptions.source || !(await pathExists(queryTarget));
636
+ if (singleArgIsQuestion) {
637
+ queryQuestion = queryTarget;
638
+ queryTarget = undefined;
639
+ }
640
+ }
641
+ queryTarget ??= configuredTarget;
642
+ const result = await (0, query_1.queryFolder)(requireTarget(queryTarget), requireQuestion(queryQuestion), buildQueryOptions(loaded.options, singleSourceOptions));
643
+ if (commandOptions.json || commandOptions.trace) {
644
+ writeJson(result);
645
+ return;
646
+ }
647
+ console.log(result.answer);
648
+ }
649
+ async function runStartAction(folder, commandOptions, command) {
650
+ const globalOptions = getGlobalOptions(command);
651
+ const targets = await loadStartTargets(command, commandOptions, folder);
652
+ const watchHandles = [];
653
+ let serveHandle;
654
+ let reloading = false;
655
+ let reloadAgain = false;
656
+ async function reloadServe() {
657
+ if (!serveHandle) {
658
+ return;
659
+ }
660
+ if (reloading) {
661
+ reloadAgain = true;
662
+ return;
663
+ }
664
+ reloading = true;
665
+ do {
666
+ reloadAgain = false;
667
+ try {
668
+ const result = await serveHandle.reload();
669
+ if (commandOptions.jsonl) {
670
+ writeStartJsonLine("start-serve-reload", {
671
+ indexes: result.indexes.map((index) => ({
672
+ source: index.source,
673
+ target: index.target,
674
+ ok: index.ok
675
+ }))
676
+ });
677
+ }
678
+ else {
679
+ console.log(`Reloaded serve index snapshot (${result.indexes.filter((index) => index.ok).length}/${result.indexes.length} ready)`);
680
+ }
681
+ }
682
+ catch (error) {
683
+ const message = error instanceof Error ? error.message : String(error);
684
+ if (commandOptions.jsonl) {
685
+ writeStartJsonLine("start-serve-reload-failed", { error: message });
686
+ }
687
+ else {
688
+ console.error(`Serve reload failed: ${message}`);
689
+ }
690
+ }
691
+ } while (reloadAgain);
692
+ reloading = false;
693
+ }
694
+ try {
695
+ if (commandOptions.jsonl) {
696
+ writeStartJsonLine("start", {
697
+ sources: targets.map((target) => target.source).filter(Boolean),
698
+ targets: targets.map((target) => target.target)
699
+ });
700
+ }
701
+ else {
702
+ console.log(`Starting ragbox for ${targets.length} source${targets.length === 1 ? "" : "s"}`);
703
+ }
704
+ for (const target of targets) {
705
+ const handle = await (0, watch_1.startWatchFolder)(target.rootDir, {
706
+ ...target.options,
707
+ watchProgress: (event) => {
708
+ if (commandOptions.jsonl) {
709
+ writeJsonLine(event);
710
+ }
711
+ else {
712
+ printStartWatchEvent(event, target.source);
713
+ }
714
+ if (event.type === "watch-index-done") {
715
+ void reloadServe();
716
+ }
717
+ }
718
+ });
719
+ watchHandles.push(handle);
720
+ }
721
+ const readyResults = await Promise.all(watchHandles.map((handle) => handle.ready));
722
+ const failedReady = readyResults.find((ready) => !ready.ok);
723
+ if (failedReady && !failedReady.ok) {
724
+ throw new Error(`Initial index failed: ${failedReady.error}`);
725
+ }
726
+ const sourceNames = targets.map((target) => target.source).filter((source) => Boolean(source));
727
+ const singleTarget = targets.length === 1 ? targets[0] : undefined;
728
+ serveHandle = await (0, serve_1.startServe)({
729
+ allSources: targets.length > 1 && sourceNames.length === 0,
730
+ apiKey: commandOptions.apiKey,
731
+ authToken: commandOptions.authToken,
732
+ baseUrl: commandOptions.baseUrl,
733
+ configPath: globalOptions.config,
734
+ host: commandOptions.host,
735
+ model: commandOptions.model,
736
+ port: commandOptions.port,
737
+ source: targets.length > 1 ? sourceNames : undefined,
738
+ target: singleTarget ? singleTarget.target : undefined
739
+ });
740
+ if (commandOptions.jsonl) {
741
+ writeStartJsonLine("start-serve", {
742
+ url: serveHandle.url,
743
+ host: serveHandle.host,
744
+ port: serveHandle.port
745
+ });
746
+ }
747
+ else {
748
+ console.log(`Serving ragbox at ${serveHandle.url}`);
749
+ }
750
+ await new Promise((resolve) => {
751
+ let closing = false;
752
+ const stop = () => {
753
+ if (closing) {
754
+ return;
755
+ }
756
+ closing = true;
757
+ void closeStartHandles(watchHandles, serveHandle).finally(resolve);
758
+ };
759
+ process.once("SIGINT", stop);
760
+ process.once("SIGTERM", stop);
761
+ serveHandle?.server.once("close", stop);
762
+ });
763
+ }
764
+ catch (error) {
765
+ await closeStartHandles(watchHandles, serveHandle);
766
+ throw error;
767
+ }
768
+ }
769
+ async function main() {
770
+ const program = new commander_1.Command();
771
+ program
772
+ .name("ragbox")
773
+ .description("Index and query a Markdown/MDX folder with PageIndex")
774
+ .version("0.1.0")
775
+ .option("--config <path-or-name>", "ragbox config file path, or a name like prod for ragbox.config.prod.json");
776
+ program
777
+ .command("init")
778
+ .description("create a ragbox.config.json file")
779
+ .option("--docs-dir <folder>", "default docs folder in the generated config", "./docs")
780
+ .option("-f, --force", "overwrite an existing config file")
781
+ .option("-o, --output <path>", "config file path")
782
+ .option("--output-dir <folder>", "default index output directory in the generated config", "./.ragbox-index")
783
+ .action(async (commandOptions) => {
784
+ const configPath = await (0, config_file_1.writeDefaultRagboxConfig)({
785
+ configPath: commandOptions.output,
786
+ docsDir: commandOptions.docsDir,
787
+ force: commandOptions.force,
788
+ outputDir: commandOptions.outputDir
789
+ });
790
+ console.log(`Created ${configPath}`);
791
+ });
792
+ const setupCommand = program
793
+ .command("setup")
794
+ .description("setup local ragbox dependencies");
795
+ setupCommand
796
+ .command("pageindex")
797
+ .description("clone PageIndex and configure ragbox to use it")
798
+ .option("--dir <folder>", "PageIndex checkout directory", "./.ragbox/PageIndex")
799
+ .option("--repo <url>", "PageIndex git repository", "https://github.com/VectifyAI/PageIndex.git")
800
+ .option("--ref <ref>", "PageIndex branch, tag, or commit to checkout")
801
+ .option("--python <path>", "Python executable used to create the PageIndex virtual environment", "python3")
802
+ .option("--skip-install", "skip virtual environment creation and pip install")
803
+ .option("--no-write-config", "do not create or update ragbox.config.json")
804
+ .option("--no-gitignore", "do not add .ragbox/ to .gitignore")
805
+ .option("--json", "print a stable JSON result")
806
+ .action(async (commandOptions, command) => {
807
+ const globalOptions = getGlobalOptions(command);
808
+ const result = await (0, setup_pageindex_1.setupPageIndex)({
809
+ configPath: globalOptions.config,
810
+ dir: commandOptions.dir,
811
+ gitignore: commandOptions.gitignore !== false,
812
+ install: !commandOptions.skipInstall,
813
+ python: commandOptions.python,
814
+ ref: commandOptions.ref,
815
+ repo: commandOptions.repo,
816
+ writeConfig: commandOptions.writeConfig !== false
817
+ });
818
+ if (commandOptions.json) {
819
+ writeJson(result);
820
+ return;
821
+ }
822
+ printSetupPageIndexResult(result);
823
+ });
824
+ addProjectOptions(addLlmOptions(program
825
+ .command("index")
826
+ .argument("[folder]", "folder to index")
827
+ .option("-c, --concurrency <number>", "PageIndex concurrency", parseConcurrency)
828
+ .option("--pageindex-cli <path>", "PageIndex script path")
829
+ .option("-o, --output-dir <folder>", "folder for ragbox index files")
830
+ .option("--pageindex-python <path>", "Python executable used to run PageIndex")
831
+ .option("--json", "print a stable JSON result")))
832
+ .action(async (folder, commandOptions, command) => {
833
+ const loaded = await loadCommandConfig(command, commandOptions);
834
+ const indexFolderPath = requireFolder(folder ?? loaded.rootDir, "index");
835
+ const result = await (0, indexer_1.indexFolder)(indexFolderPath, buildOptions(loaded.options, commandOptions));
836
+ if (commandOptions.json) {
837
+ writeJson(indexJsonOutput(result));
838
+ return;
839
+ }
840
+ printIndexResult(indexFolderPath, result);
841
+ });
842
+ addProjectOptions(program
843
+ .command("inspect")
844
+ .argument("[target]", "docs folder or ragbox output directory")
845
+ .option("--all-sources", "inspect every configured source")
846
+ .option("--json", "print a stable JSON result"))
847
+ .action(async (target, commandOptions, command) => {
848
+ const targets = await loadDiagnosticTargets(command, commandOptions, target, false);
849
+ const results = [];
850
+ for (const diagnosticTarget of targets) {
851
+ results.push({
852
+ source: diagnosticTarget.source,
853
+ ...(await (0, sdk_1.inspectIndex)(diagnosticTarget.target))
854
+ });
855
+ }
856
+ if (commandOptions.json) {
857
+ writeJson(results.length === 1
858
+ ? results[0]
859
+ : {
860
+ version: 1,
861
+ command: "inspect",
862
+ indexes: results
863
+ });
864
+ return;
865
+ }
866
+ for (const result of results) {
867
+ printInspectResult(result, result.source);
868
+ }
869
+ });
870
+ addProjectOptions(program
871
+ .command("status")
872
+ .argument("[target]", "docs folder or ragbox output directory")
873
+ .option("--all-sources", "check every configured source")
874
+ .option("--json", "print a stable JSON result"))
875
+ .action(async (target, commandOptions, command) => {
876
+ const targets = await loadDiagnosticTargets(command, commandOptions, target, true);
877
+ const status = await buildStatusOutput(targets);
878
+ if (commandOptions.json) {
879
+ writeJson(status);
880
+ return;
881
+ }
882
+ printStatusOutput(status);
883
+ });
884
+ addProjectOptions(addLlmOptions(program
885
+ .command("doctor")
886
+ .argument("[target]", "docs folder or ragbox output directory")
887
+ .option("--all-sources", "check every configured source")
888
+ .option("--json", "print a stable JSON result")))
889
+ .action(async (target, commandOptions, command) => {
890
+ const doctor = await buildDoctorOutput(command, commandOptions, target);
891
+ if (commandOptions.json) {
892
+ writeJson(doctor);
893
+ return;
894
+ }
895
+ printDoctorOutput(doctor);
896
+ });
897
+ addProjectOptions(addLlmOptions(program
898
+ .command("query")
899
+ .argument("[target]", "docs folder or ragbox output directory")
900
+ .argument("[question]", "question to answer")
901
+ .option("--all-sources", "query every configured source and synthesize one answer")
902
+ .option("--json", "print a stable JSON result with selections and sources")
903
+ .option("--trace", "include query trace diagnostics; implies JSON output")))
904
+ .action(async (target, question, commandOptions, command) => {
905
+ await runQueryAction(target, question, commandOptions, command);
906
+ });
907
+ const traceCommand = program
908
+ .command("trace")
909
+ .description("run diagnostic tracing commands");
910
+ addProjectOptions(addLlmOptions(traceCommand
911
+ .command("query")
912
+ .argument("[target]", "docs folder or ragbox output directory")
913
+ .argument("[question]", "question to answer")
914
+ .option("--all-sources", "query every configured source and synthesize one answer")
915
+ .option("--json", "print a stable JSON result with selections and sources")))
916
+ .action(async (target, question, commandOptions, command) => {
917
+ await runQueryAction(target, question, { ...commandOptions, trace: true, json: true }, command);
918
+ });
919
+ addProjectOptions(addLlmOptions(program
920
+ .command("start")
921
+ .argument("[folder]", "folder to index, watch, and serve")
922
+ .option("--all-sources", "start every configured source")
923
+ .option("--auth-token <token>", "bearer token required for non-health endpoints")
924
+ .option("-c, --concurrency <number>", "PageIndex concurrency", parseConcurrency)
925
+ .option("--pageindex-cli <path>", "PageIndex script path")
926
+ .option("-o, --output-dir <folder>", "folder for ragbox index files")
927
+ .option("--pageindex-python <path>", "Python executable used to run PageIndex")
928
+ .option("--debounce-ms <ms>", "watch change debounce in milliseconds", parseDebounceMs)
929
+ .option("--health-file <path>", "write a watch health JSON file")
930
+ .option("--host <host>", "host to bind", process.env.RAGBOX_SERVE_HOST ?? "127.0.0.1")
931
+ .option("--jsonl", "print stable JSON Lines start, watch, and index progress events")
932
+ .option("--lock-file <path>", "create an exclusive lock file while start is running")
933
+ .option("--port <number>", "port to bind", parseServePort)
934
+ .option("--retry-attempts <number>", "retry failed watch index runs", parseRetryAttempts)
935
+ .option("--retry-delay-ms <ms>", "delay between watch retries in milliseconds", parseRetryDelayMs)
936
+ .option("--staging", "index into a staging directory and promote it after a clean run")
937
+ .option("--staging-output-dir <folder>", "staging directory used with --staging")
938
+ .option("--webhook <url>", "POST watch events to a webhook URL")))
939
+ .action(async (folder, commandOptions, command) => {
940
+ await runStartAction(folder, commandOptions, command);
941
+ });
942
+ addProjectOptions(addLlmOptions(program
943
+ .command("serve")
944
+ .argument("[target]", "docs folder or ragbox output directory")
945
+ .option("--all-sources", "serve every configured source by default")
946
+ .option("--auth-token <token>", "bearer token required for non-health endpoints")
947
+ .option("--host <host>", "host to bind", process.env.RAGBOX_SERVE_HOST ?? "127.0.0.1")
948
+ .option("--port <number>", "port to bind", parseServePort)))
949
+ .action(async (target, commandOptions, command) => {
950
+ const globalOptions = getGlobalOptions(command);
951
+ const handle = await (0, serve_1.startServe)({
952
+ allSources: commandOptions.allSources,
953
+ apiKey: commandOptions.apiKey,
954
+ authToken: commandOptions.authToken,
955
+ baseUrl: commandOptions.baseUrl,
956
+ configPath: globalOptions.config,
957
+ host: commandOptions.host,
958
+ model: commandOptions.model,
959
+ port: commandOptions.port,
960
+ source: commandOptions.source,
961
+ target
962
+ });
963
+ console.log(`Serving ragbox at ${handle.url}`);
964
+ await new Promise((resolve) => {
965
+ let closing = false;
966
+ const stop = () => {
967
+ if (closing) {
968
+ return;
969
+ }
970
+ closing = true;
971
+ void handle.close().finally(resolve);
972
+ };
973
+ process.once("SIGINT", stop);
974
+ process.once("SIGTERM", stop);
975
+ handle.server.once("close", () => {
976
+ process.off("SIGINT", stop);
977
+ process.off("SIGTERM", stop);
978
+ resolve();
979
+ });
980
+ });
981
+ });
982
+ addProjectOptions(addLlmOptions(program
983
+ .command("watch")
984
+ .argument("[folder]", "folder to watch")
985
+ .option("-c, --concurrency <number>", "PageIndex concurrency", parseConcurrency)
986
+ .option("--pageindex-cli <path>", "PageIndex script path")
987
+ .option("-o, --output-dir <folder>", "folder for ragbox index files")
988
+ .option("--pageindex-python <path>", "Python executable used to run PageIndex")
989
+ .option("--debounce-ms <ms>", "watch change debounce in milliseconds", parseDebounceMs)
990
+ .option("--health-file <path>", "write a watch health JSON file")
991
+ .option("--jsonl", "print stable JSON Lines watch and index progress events")
992
+ .option("--lock-file <path>", "create an exclusive lock file while watch is running")
993
+ .option("--retry-attempts <number>", "retry failed watch index runs", parseRetryAttempts)
994
+ .option("--retry-delay-ms <ms>", "delay between watch retries in milliseconds", parseRetryDelayMs)
995
+ .option("--staging", "index into a staging directory and promote it after a clean run")
996
+ .option("--staging-output-dir <folder>", "staging directory used with --staging")
997
+ .option("--webhook <url>", "POST watch events to a webhook URL")))
998
+ .action(async (folder, commandOptions, command) => {
999
+ const loaded = await loadCommandConfig(command, commandOptions);
1000
+ const watchFolderPath = requireFolder(folder ?? loaded.rootDir, "watch");
1001
+ const options = buildOptions(loaded.options, commandOptions, commandOptions.jsonl ? logProgressAsJsonLine : logProgress);
1002
+ if (commandOptions.jsonl) {
1003
+ options.watchProgress = writeJsonLine;
1004
+ }
1005
+ await (0, watch_1.watchFolder)(watchFolderPath, options);
1006
+ });
1007
+ await program.parseAsync(process.argv);
1008
+ }
1009
+ main().catch((error) => {
1010
+ const message = error instanceof Error ? error.message : String(error);
1011
+ console.error(message);
1012
+ process.exitCode = 1;
1013
+ });