@bndynet/ragbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +765 -0
  2. package/README.zh-CN.md +774 -0
  3. package/dist/src/advanced.d.ts +13 -0
  4. package/dist/src/advanced.js +29 -0
  5. package/dist/src/cli.d.ts +2 -0
  6. package/dist/src/cli.js +1013 -0
  7. package/dist/src/config-file.d.ts +69 -0
  8. package/dist/src/config-file.js +246 -0
  9. package/dist/src/folder-index/config.d.ts +2 -0
  10. package/dist/src/folder-index/config.js +56 -0
  11. package/dist/src/folder-index/hash.d.ts +1 -0
  12. package/dist/src/folder-index/hash.js +14 -0
  13. package/dist/src/folder-index/indexer.d.ts +2 -0
  14. package/dist/src/folder-index/indexer.js +154 -0
  15. package/dist/src/folder-index/llm-client.d.ts +3 -0
  16. package/dist/src/folder-index/llm-client.js +45 -0
  17. package/dist/src/folder-index/manifest.d.ts +17 -0
  18. package/dist/src/folder-index/manifest.js +158 -0
  19. package/dist/src/folder-index/multi-query.d.ts +45 -0
  20. package/dist/src/folder-index/multi-query.js +109 -0
  21. package/dist/src/folder-index/pageindex-runner.d.ts +3 -0
  22. package/dist/src/folder-index/pageindex-runner.js +218 -0
  23. package/dist/src/folder-index/path-utils.d.ts +5 -0
  24. package/dist/src/folder-index/path-utils.js +33 -0
  25. package/dist/src/folder-index/query.d.ts +19 -0
  26. package/dist/src/folder-index/query.js +597 -0
  27. package/dist/src/folder-index/queue.d.ts +1 -0
  28. package/dist/src/folder-index/queue.js +18 -0
  29. package/dist/src/folder-index/root-tree.d.ts +3 -0
  30. package/dist/src/folder-index/root-tree.js +82 -0
  31. package/dist/src/folder-index/scan.d.ts +14 -0
  32. package/dist/src/folder-index/scan.js +152 -0
  33. package/dist/src/folder-index/types.d.ts +368 -0
  34. package/dist/src/folder-index/types.js +2 -0
  35. package/dist/src/folder-index/watch.d.ts +17 -0
  36. package/dist/src/folder-index/watch.js +550 -0
  37. package/dist/src/index.d.ts +6 -0
  38. package/dist/src/index.js +45 -0
  39. package/dist/src/sdk.d.ts +101 -0
  40. package/dist/src/sdk.js +352 -0
  41. package/dist/src/serve.d.ts +64 -0
  42. package/dist/src/serve.js +466 -0
  43. package/dist/src/setup-pageindex.d.ts +30 -0
  44. package/dist/src/setup-pageindex.js +184 -0
  45. package/package.json +43 -0
@@ -0,0 +1,352 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.createIndex = createIndex;
7
+ exports.queryIndex = queryIndex;
8
+ exports.watchIndex = watchIndex;
9
+ exports.inspectIndex = inspectIndex;
10
+ exports.validateIndex = validateIndex;
11
+ const promises_1 = __importDefault(require("node:fs/promises"));
12
+ const node_path_1 = __importDefault(require("node:path"));
13
+ const config_file_1 = require("./config-file");
14
+ const indexer_1 = require("./folder-index/indexer");
15
+ const manifest_1 = require("./folder-index/manifest");
16
+ const query_1 = require("./folder-index/query");
17
+ const watch_1 = require("./folder-index/watch");
18
+ function mergeDefined(...values) {
19
+ const merged = {};
20
+ for (const value of values) {
21
+ for (const [key, nestedValue] of Object.entries(value)) {
22
+ if (nestedValue !== undefined) {
23
+ merged[key] = nestedValue;
24
+ }
25
+ }
26
+ }
27
+ return merged;
28
+ }
29
+ async function toPageIndexOptions(options = {}) {
30
+ const createOptions = options;
31
+ const queryOptions = options;
32
+ const watchOptions = options;
33
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
34
+ configPath: options.configPath,
35
+ source: options.source
36
+ });
37
+ return mergeDefined(resolved.pageIndexOptions, {
38
+ apiKey: options.apiKey,
39
+ baseUrl: options.baseUrl,
40
+ cliPath: createOptions.pageIndexCli,
41
+ concurrency: createOptions.concurrency,
42
+ env: options.env,
43
+ exclude: createOptions.exclude,
44
+ extraArgs: createOptions.pageIndexExtraArgs,
45
+ include: createOptions.include,
46
+ llmClient: options.llmClient,
47
+ model: options.model,
48
+ outputArg: createOptions.pageIndexOutputArg,
49
+ outputDir: options.outputDir,
50
+ progress: createOptions.onProgress,
51
+ pythonPath: createOptions.pageIndexPython,
52
+ trace: queryOptions.trace,
53
+ watchDebounceMs: watchOptions.debounceMs,
54
+ watchHealthFile: watchOptions.healthFile,
55
+ watchLockFile: watchOptions.lockFile,
56
+ watchProgress: watchOptions.onEvent,
57
+ watchRetryAttempts: watchOptions.retryAttempts,
58
+ watchRetryDelayMs: watchOptions.retryDelayMs,
59
+ watchStaging: watchOptions.staging,
60
+ watchStagingOutputDir: watchOptions.stagingOutputDir,
61
+ watchWebhookUrl: watchOptions.webhookUrl
62
+ });
63
+ }
64
+ function toIndexCounts(result) {
65
+ if ("documents" in result) {
66
+ const ready = result.documents.filter((record) => record.status === "ready").length;
67
+ const failed = result.documents.filter((record) => record.status === "failed").length;
68
+ return {
69
+ total: result.documents.length,
70
+ ready,
71
+ failed,
72
+ added: 0,
73
+ modified: 0,
74
+ retryFailed: 0,
75
+ unchanged: 0,
76
+ deleted: 0
77
+ };
78
+ }
79
+ return {
80
+ total: result.manifest.documents.length,
81
+ ready: result.ready,
82
+ failed: result.failed,
83
+ added: result.added,
84
+ modified: result.modified,
85
+ retryFailed: result.retryFailed,
86
+ unchanged: result.unchanged,
87
+ deleted: result.deleted
88
+ };
89
+ }
90
+ function toCreateIndexResult(result) {
91
+ return {
92
+ version: 1,
93
+ rootDir: result.manifest.rootDir,
94
+ outputDir: result.outputDir,
95
+ manifestPath: result.manifestPath,
96
+ rootTreePath: result.rootTreePath,
97
+ generatedAt: result.manifest.generatedAt,
98
+ counts: toIndexCounts(result),
99
+ manifest: result.manifest,
100
+ rootTree: result.rootTree
101
+ };
102
+ }
103
+ async function readJson(filePath, code, label) {
104
+ try {
105
+ return {
106
+ ok: true,
107
+ value: JSON.parse(await promises_1.default.readFile(filePath, "utf8"))
108
+ };
109
+ }
110
+ catch (error) {
111
+ const message = error instanceof Error ? error.message : String(error);
112
+ return {
113
+ ok: false,
114
+ issue: {
115
+ code,
116
+ message: `${label} is not readable JSON: ${message}`,
117
+ path: filePath
118
+ }
119
+ };
120
+ }
121
+ }
122
+ async function pathExists(filePath) {
123
+ try {
124
+ await promises_1.default.access(filePath);
125
+ return true;
126
+ }
127
+ catch {
128
+ return false;
129
+ }
130
+ }
131
+ async function resolveIndexFiles(target) {
132
+ const resolvedTarget = node_path_1.default.resolve(target);
133
+ const candidates = [resolvedTarget, node_path_1.default.join(resolvedTarget, manifest_1.PAGEINDEX_DIR)];
134
+ for (const outputDir of candidates) {
135
+ const manifestPath = node_path_1.default.join(outputDir, manifest_1.MANIFEST_FILE);
136
+ const rootTreePath = node_path_1.default.join(outputDir, manifest_1.ROOT_TREE_FILE);
137
+ if ((await pathExists(manifestPath)) || (await pathExists(rootTreePath))) {
138
+ return {
139
+ target: resolvedTarget,
140
+ rootDir: outputDir === candidates[1] ? resolvedTarget : outputDir,
141
+ outputDir,
142
+ manifestPath,
143
+ rootTreePath
144
+ };
145
+ }
146
+ }
147
+ return {
148
+ target: resolvedTarget,
149
+ rootDir: resolvedTarget,
150
+ outputDir: resolvedTarget,
151
+ manifestPath: node_path_1.default.join(resolvedTarget, manifest_1.MANIFEST_FILE),
152
+ rootTreePath: node_path_1.default.join(resolvedTarget, manifest_1.ROOT_TREE_FILE)
153
+ };
154
+ }
155
+ function documentSummaries(manifest) {
156
+ return manifest.documents.map((record) => ({
157
+ docId: record.docId,
158
+ path: record.path,
159
+ title: record.title,
160
+ status: record.status,
161
+ indexPath: record.indexPath,
162
+ summary: record.summary,
163
+ size: record.size,
164
+ mtimeMs: record.mtimeMs
165
+ }));
166
+ }
167
+ function collectDocumentNodes(rootTree) {
168
+ const nodes = [];
169
+ function visit(node) {
170
+ if (node.type === "document") {
171
+ nodes.push(node);
172
+ }
173
+ for (const child of node.children ?? []) {
174
+ visit(child);
175
+ }
176
+ }
177
+ visit(rootTree);
178
+ return nodes;
179
+ }
180
+ async function createIndex(folder, options = {}) {
181
+ return toCreateIndexResult(await (0, indexer_1.indexFolder)(folder, await toPageIndexOptions(options)));
182
+ }
183
+ async function queryIndex(target, question, options = {}) {
184
+ return await (0, query_1.queryFolder)(target, question, await toPageIndexOptions(options));
185
+ }
186
+ async function watchIndex(folder, options = {}) {
187
+ const handle = await (0, watch_1.startWatchFolder)(folder, await toPageIndexOptions(options));
188
+ return {
189
+ rootDir: handle.rootDir,
190
+ outputDir: handle.outputDir,
191
+ ready: handle.ready.then((ready) => {
192
+ if (!ready.ok) {
193
+ return ready;
194
+ }
195
+ return {
196
+ ok: true,
197
+ result: toCreateIndexResult(ready.result)
198
+ };
199
+ }),
200
+ closed: handle.closed,
201
+ close: handle.close
202
+ };
203
+ }
204
+ async function inspectIndex(target) {
205
+ const location = await (0, query_1.resolveQueryIndexLocation)(target);
206
+ const manifest = JSON.parse(await promises_1.default.readFile(location.manifestPath, "utf8"));
207
+ await promises_1.default.access(location.rootTreePath);
208
+ return {
209
+ version: 1,
210
+ target: node_path_1.default.resolve(target),
211
+ rootDir: location.rootDir,
212
+ outputDir: location.outputDir ?? node_path_1.default.join(location.rootDir, manifest_1.PAGEINDEX_DIR),
213
+ manifestPath: location.manifestPath,
214
+ rootTreePath: location.rootTreePath,
215
+ generatedAt: manifest.generatedAt,
216
+ counts: toIndexCounts(manifest),
217
+ documents: documentSummaries(manifest)
218
+ };
219
+ }
220
+ async function validateIndex(target) {
221
+ const location = await resolveIndexFiles(target);
222
+ const errors = [];
223
+ const warnings = [];
224
+ let manifest;
225
+ let rootTree;
226
+ if (!(await pathExists(location.manifestPath))) {
227
+ errors.push({
228
+ code: "missing_manifest",
229
+ message: `Missing ${manifest_1.MANIFEST_FILE}`,
230
+ path: location.manifestPath
231
+ });
232
+ }
233
+ else {
234
+ const result = await readJson(location.manifestPath, "invalid_manifest_json", "manifest");
235
+ if (result.ok) {
236
+ manifest = result.value;
237
+ if (manifest.version !== 1) {
238
+ errors.push({
239
+ code: "invalid_manifest_version",
240
+ message: `Unsupported manifest version: ${String(manifest.version)}`,
241
+ path: location.manifestPath
242
+ });
243
+ }
244
+ if (!Array.isArray(manifest.documents)) {
245
+ errors.push({
246
+ code: "invalid_manifest_documents",
247
+ message: "Manifest documents must be an array",
248
+ path: location.manifestPath
249
+ });
250
+ }
251
+ }
252
+ else {
253
+ errors.push(result.issue);
254
+ }
255
+ }
256
+ if (!(await pathExists(location.rootTreePath))) {
257
+ errors.push({
258
+ code: "missing_root_tree",
259
+ message: `Missing ${manifest_1.ROOT_TREE_FILE}`,
260
+ path: location.rootTreePath
261
+ });
262
+ }
263
+ else {
264
+ const result = await readJson(location.rootTreePath, "invalid_root_tree_json", "root tree");
265
+ if (result.ok) {
266
+ rootTree = result.value;
267
+ if (rootTree.node_id !== "root" || rootTree.type !== "root") {
268
+ errors.push({
269
+ code: "invalid_root_tree",
270
+ message: "Root tree must have node_id=root and type=root",
271
+ path: location.rootTreePath
272
+ });
273
+ }
274
+ }
275
+ else {
276
+ errors.push(result.issue);
277
+ }
278
+ }
279
+ if (manifest) {
280
+ const rootDir = manifest.rootDir ? node_path_1.default.resolve(manifest.rootDir) : location.rootDir;
281
+ for (const record of manifest.documents ?? []) {
282
+ if (record.status !== "ready") {
283
+ continue;
284
+ }
285
+ const indexPath = (0, manifest_1.resolveDocumentIndexPath)(rootDir, record.indexPath, location.outputDir);
286
+ if (!(await pathExists(indexPath))) {
287
+ errors.push({
288
+ code: "missing_document_index",
289
+ message: `Missing PageIndex JSON for ready document: ${record.path}`,
290
+ path: indexPath,
291
+ docId: record.docId
292
+ });
293
+ }
294
+ }
295
+ }
296
+ if (manifest && rootTree) {
297
+ const manifestByDocId = new Map(manifest.documents.map((record) => [record.docId, record]));
298
+ const rootTreeDocumentNodes = collectDocumentNodes(rootTree);
299
+ const rootTreeDocumentIds = new Set(rootTreeDocumentNodes.map((node) => node.node_id));
300
+ for (const node of rootTreeDocumentNodes) {
301
+ const record = manifestByDocId.get(node.node_id);
302
+ if (!record) {
303
+ errors.push({
304
+ code: "root_tree_unknown_document",
305
+ message: `Root tree references a document missing from manifest: ${node.node_id}`,
306
+ path: node.path,
307
+ docId: node.node_id
308
+ });
309
+ continue;
310
+ }
311
+ if (node.index_path && node.index_path !== record.indexPath) {
312
+ errors.push({
313
+ code: "root_tree_index_path_mismatch",
314
+ message: `Root tree index path differs from manifest for ${record.path}`,
315
+ path: node.path,
316
+ docId: record.docId
317
+ });
318
+ }
319
+ }
320
+ for (const record of manifest.documents) {
321
+ if (record.status === "ready" && !rootTreeDocumentIds.has(record.docId)) {
322
+ errors.push({
323
+ code: "manifest_document_missing_from_root_tree",
324
+ message: `Ready document is missing from root tree: ${record.path}`,
325
+ path: record.path,
326
+ docId: record.docId
327
+ });
328
+ }
329
+ }
330
+ }
331
+ const inspect = errors.length === 0 && manifest
332
+ ? {
333
+ version: 1,
334
+ target: location.target,
335
+ rootDir: manifest.rootDir ? node_path_1.default.resolve(manifest.rootDir) : location.rootDir,
336
+ outputDir: location.outputDir,
337
+ manifestPath: location.manifestPath,
338
+ rootTreePath: location.rootTreePath,
339
+ generatedAt: manifest.generatedAt,
340
+ counts: toIndexCounts(manifest),
341
+ documents: documentSummaries(manifest)
342
+ }
343
+ : undefined;
344
+ return {
345
+ version: 1,
346
+ target: location.target,
347
+ ok: errors.length === 0,
348
+ errors,
349
+ warnings,
350
+ inspect
351
+ };
352
+ }
@@ -0,0 +1,64 @@
1
+ import http from "node:http";
2
+ import { LlmClient } from "./folder-index/types";
3
+ import { InspectIndexResult, ValidateIndexResult } from "./sdk";
4
+ export type ServeOptions = {
5
+ allSources?: boolean;
6
+ apiKey?: string;
7
+ authToken?: string;
8
+ baseUrl?: string;
9
+ configPath?: string;
10
+ env?: NodeJS.ProcessEnv;
11
+ host?: string;
12
+ llmClient?: LlmClient;
13
+ model?: string;
14
+ port?: number;
15
+ source?: string | string[];
16
+ target?: string;
17
+ };
18
+ export type ServeHandle = {
19
+ url: string;
20
+ host: string;
21
+ port: number;
22
+ server: http.Server;
23
+ reload: () => Promise<ServeIndexesResult>;
24
+ close: () => Promise<void>;
25
+ };
26
+ export type ServeIndexSummary = {
27
+ source?: string;
28
+ target: string;
29
+ ok: boolean;
30
+ generatedAt?: string;
31
+ counts?: InspectIndexResult["counts"];
32
+ errors: ValidateIndexResult["errors"];
33
+ warnings: ValidateIndexResult["warnings"];
34
+ };
35
+ export type ServeIndexesResult = {
36
+ version: 1;
37
+ indexes: ServeIndexSummary[];
38
+ };
39
+ export type ServeHealthResult = {
40
+ version: 1;
41
+ ok: boolean;
42
+ status: "ready" | "degraded" | "error";
43
+ uptimeMs: number;
44
+ lastReloadAt: string;
45
+ indexes: {
46
+ total: number;
47
+ ready: number;
48
+ failed: number;
49
+ };
50
+ };
51
+ export type ServeRootResult = {
52
+ version: 1;
53
+ name: "ragbox";
54
+ status: ServeHealthResult["status"];
55
+ ok: boolean;
56
+ health: ServeHealthResult;
57
+ endpoints: Array<{
58
+ method: "GET" | "POST";
59
+ path: "/" | "/health" | "/indexes" | "/query" | "/reload";
60
+ authRequired: boolean;
61
+ description: string;
62
+ }>;
63
+ };
64
+ export declare function startServe(options?: ServeOptions): Promise<ServeHandle>;