@bndynet/ragbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +765 -0
- package/README.zh-CN.md +774 -0
- package/dist/src/advanced.d.ts +13 -0
- package/dist/src/advanced.js +29 -0
- package/dist/src/cli.d.ts +2 -0
- package/dist/src/cli.js +1013 -0
- package/dist/src/config-file.d.ts +69 -0
- package/dist/src/config-file.js +246 -0
- package/dist/src/folder-index/config.d.ts +2 -0
- package/dist/src/folder-index/config.js +56 -0
- package/dist/src/folder-index/hash.d.ts +1 -0
- package/dist/src/folder-index/hash.js +14 -0
- package/dist/src/folder-index/indexer.d.ts +2 -0
- package/dist/src/folder-index/indexer.js +154 -0
- package/dist/src/folder-index/llm-client.d.ts +3 -0
- package/dist/src/folder-index/llm-client.js +45 -0
- package/dist/src/folder-index/manifest.d.ts +17 -0
- package/dist/src/folder-index/manifest.js +158 -0
- package/dist/src/folder-index/multi-query.d.ts +45 -0
- package/dist/src/folder-index/multi-query.js +109 -0
- package/dist/src/folder-index/pageindex-runner.d.ts +3 -0
- package/dist/src/folder-index/pageindex-runner.js +218 -0
- package/dist/src/folder-index/path-utils.d.ts +5 -0
- package/dist/src/folder-index/path-utils.js +33 -0
- package/dist/src/folder-index/query.d.ts +19 -0
- package/dist/src/folder-index/query.js +597 -0
- package/dist/src/folder-index/queue.d.ts +1 -0
- package/dist/src/folder-index/queue.js +18 -0
- package/dist/src/folder-index/root-tree.d.ts +3 -0
- package/dist/src/folder-index/root-tree.js +82 -0
- package/dist/src/folder-index/scan.d.ts +14 -0
- package/dist/src/folder-index/scan.js +152 -0
- package/dist/src/folder-index/types.d.ts +368 -0
- package/dist/src/folder-index/types.js +2 -0
- package/dist/src/folder-index/watch.d.ts +17 -0
- package/dist/src/folder-index/watch.js +550 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.js +45 -0
- package/dist/src/sdk.d.ts +101 -0
- package/dist/src/sdk.js +352 -0
- package/dist/src/serve.d.ts +64 -0
- package/dist/src/serve.js +466 -0
- package/dist/src/setup-pageindex.d.ts +30 -0
- package/dist/src/setup-pageindex.js +184 -0
- package/package.json +43 -0
package/dist/src/sdk.js
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.createIndex = createIndex;
|
|
7
|
+
exports.queryIndex = queryIndex;
|
|
8
|
+
exports.watchIndex = watchIndex;
|
|
9
|
+
exports.inspectIndex = inspectIndex;
|
|
10
|
+
exports.validateIndex = validateIndex;
|
|
11
|
+
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
12
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
13
|
+
const config_file_1 = require("./config-file");
|
|
14
|
+
const indexer_1 = require("./folder-index/indexer");
|
|
15
|
+
const manifest_1 = require("./folder-index/manifest");
|
|
16
|
+
const query_1 = require("./folder-index/query");
|
|
17
|
+
const watch_1 = require("./folder-index/watch");
|
|
18
|
+
function mergeDefined(...values) {
|
|
19
|
+
const merged = {};
|
|
20
|
+
for (const value of values) {
|
|
21
|
+
for (const [key, nestedValue] of Object.entries(value)) {
|
|
22
|
+
if (nestedValue !== undefined) {
|
|
23
|
+
merged[key] = nestedValue;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return merged;
|
|
28
|
+
}
|
|
29
|
+
async function toPageIndexOptions(options = {}) {
|
|
30
|
+
const createOptions = options;
|
|
31
|
+
const queryOptions = options;
|
|
32
|
+
const watchOptions = options;
|
|
33
|
+
const resolved = await (0, config_file_1.resolveRagboxConfig)({
|
|
34
|
+
configPath: options.configPath,
|
|
35
|
+
source: options.source
|
|
36
|
+
});
|
|
37
|
+
return mergeDefined(resolved.pageIndexOptions, {
|
|
38
|
+
apiKey: options.apiKey,
|
|
39
|
+
baseUrl: options.baseUrl,
|
|
40
|
+
cliPath: createOptions.pageIndexCli,
|
|
41
|
+
concurrency: createOptions.concurrency,
|
|
42
|
+
env: options.env,
|
|
43
|
+
exclude: createOptions.exclude,
|
|
44
|
+
extraArgs: createOptions.pageIndexExtraArgs,
|
|
45
|
+
include: createOptions.include,
|
|
46
|
+
llmClient: options.llmClient,
|
|
47
|
+
model: options.model,
|
|
48
|
+
outputArg: createOptions.pageIndexOutputArg,
|
|
49
|
+
outputDir: options.outputDir,
|
|
50
|
+
progress: createOptions.onProgress,
|
|
51
|
+
pythonPath: createOptions.pageIndexPython,
|
|
52
|
+
trace: queryOptions.trace,
|
|
53
|
+
watchDebounceMs: watchOptions.debounceMs,
|
|
54
|
+
watchHealthFile: watchOptions.healthFile,
|
|
55
|
+
watchLockFile: watchOptions.lockFile,
|
|
56
|
+
watchProgress: watchOptions.onEvent,
|
|
57
|
+
watchRetryAttempts: watchOptions.retryAttempts,
|
|
58
|
+
watchRetryDelayMs: watchOptions.retryDelayMs,
|
|
59
|
+
watchStaging: watchOptions.staging,
|
|
60
|
+
watchStagingOutputDir: watchOptions.stagingOutputDir,
|
|
61
|
+
watchWebhookUrl: watchOptions.webhookUrl
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
function toIndexCounts(result) {
|
|
65
|
+
if ("documents" in result) {
|
|
66
|
+
const ready = result.documents.filter((record) => record.status === "ready").length;
|
|
67
|
+
const failed = result.documents.filter((record) => record.status === "failed").length;
|
|
68
|
+
return {
|
|
69
|
+
total: result.documents.length,
|
|
70
|
+
ready,
|
|
71
|
+
failed,
|
|
72
|
+
added: 0,
|
|
73
|
+
modified: 0,
|
|
74
|
+
retryFailed: 0,
|
|
75
|
+
unchanged: 0,
|
|
76
|
+
deleted: 0
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
total: result.manifest.documents.length,
|
|
81
|
+
ready: result.ready,
|
|
82
|
+
failed: result.failed,
|
|
83
|
+
added: result.added,
|
|
84
|
+
modified: result.modified,
|
|
85
|
+
retryFailed: result.retryFailed,
|
|
86
|
+
unchanged: result.unchanged,
|
|
87
|
+
deleted: result.deleted
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
function toCreateIndexResult(result) {
|
|
91
|
+
return {
|
|
92
|
+
version: 1,
|
|
93
|
+
rootDir: result.manifest.rootDir,
|
|
94
|
+
outputDir: result.outputDir,
|
|
95
|
+
manifestPath: result.manifestPath,
|
|
96
|
+
rootTreePath: result.rootTreePath,
|
|
97
|
+
generatedAt: result.manifest.generatedAt,
|
|
98
|
+
counts: toIndexCounts(result),
|
|
99
|
+
manifest: result.manifest,
|
|
100
|
+
rootTree: result.rootTree
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
async function readJson(filePath, code, label) {
|
|
104
|
+
try {
|
|
105
|
+
return {
|
|
106
|
+
ok: true,
|
|
107
|
+
value: JSON.parse(await promises_1.default.readFile(filePath, "utf8"))
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
112
|
+
return {
|
|
113
|
+
ok: false,
|
|
114
|
+
issue: {
|
|
115
|
+
code,
|
|
116
|
+
message: `${label} is not readable JSON: ${message}`,
|
|
117
|
+
path: filePath
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async function pathExists(filePath) {
|
|
123
|
+
try {
|
|
124
|
+
await promises_1.default.access(filePath);
|
|
125
|
+
return true;
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
async function resolveIndexFiles(target) {
|
|
132
|
+
const resolvedTarget = node_path_1.default.resolve(target);
|
|
133
|
+
const candidates = [resolvedTarget, node_path_1.default.join(resolvedTarget, manifest_1.PAGEINDEX_DIR)];
|
|
134
|
+
for (const outputDir of candidates) {
|
|
135
|
+
const manifestPath = node_path_1.default.join(outputDir, manifest_1.MANIFEST_FILE);
|
|
136
|
+
const rootTreePath = node_path_1.default.join(outputDir, manifest_1.ROOT_TREE_FILE);
|
|
137
|
+
if ((await pathExists(manifestPath)) || (await pathExists(rootTreePath))) {
|
|
138
|
+
return {
|
|
139
|
+
target: resolvedTarget,
|
|
140
|
+
rootDir: outputDir === candidates[1] ? resolvedTarget : outputDir,
|
|
141
|
+
outputDir,
|
|
142
|
+
manifestPath,
|
|
143
|
+
rootTreePath
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
target: resolvedTarget,
|
|
149
|
+
rootDir: resolvedTarget,
|
|
150
|
+
outputDir: resolvedTarget,
|
|
151
|
+
manifestPath: node_path_1.default.join(resolvedTarget, manifest_1.MANIFEST_FILE),
|
|
152
|
+
rootTreePath: node_path_1.default.join(resolvedTarget, manifest_1.ROOT_TREE_FILE)
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
function documentSummaries(manifest) {
|
|
156
|
+
return manifest.documents.map((record) => ({
|
|
157
|
+
docId: record.docId,
|
|
158
|
+
path: record.path,
|
|
159
|
+
title: record.title,
|
|
160
|
+
status: record.status,
|
|
161
|
+
indexPath: record.indexPath,
|
|
162
|
+
summary: record.summary,
|
|
163
|
+
size: record.size,
|
|
164
|
+
mtimeMs: record.mtimeMs
|
|
165
|
+
}));
|
|
166
|
+
}
|
|
167
|
+
function collectDocumentNodes(rootTree) {
|
|
168
|
+
const nodes = [];
|
|
169
|
+
function visit(node) {
|
|
170
|
+
if (node.type === "document") {
|
|
171
|
+
nodes.push(node);
|
|
172
|
+
}
|
|
173
|
+
for (const child of node.children ?? []) {
|
|
174
|
+
visit(child);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
visit(rootTree);
|
|
178
|
+
return nodes;
|
|
179
|
+
}
|
|
180
|
+
async function createIndex(folder, options = {}) {
|
|
181
|
+
return toCreateIndexResult(await (0, indexer_1.indexFolder)(folder, await toPageIndexOptions(options)));
|
|
182
|
+
}
|
|
183
|
+
async function queryIndex(target, question, options = {}) {
|
|
184
|
+
return await (0, query_1.queryFolder)(target, question, await toPageIndexOptions(options));
|
|
185
|
+
}
|
|
186
|
+
async function watchIndex(folder, options = {}) {
|
|
187
|
+
const handle = await (0, watch_1.startWatchFolder)(folder, await toPageIndexOptions(options));
|
|
188
|
+
return {
|
|
189
|
+
rootDir: handle.rootDir,
|
|
190
|
+
outputDir: handle.outputDir,
|
|
191
|
+
ready: handle.ready.then((ready) => {
|
|
192
|
+
if (!ready.ok) {
|
|
193
|
+
return ready;
|
|
194
|
+
}
|
|
195
|
+
return {
|
|
196
|
+
ok: true,
|
|
197
|
+
result: toCreateIndexResult(ready.result)
|
|
198
|
+
};
|
|
199
|
+
}),
|
|
200
|
+
closed: handle.closed,
|
|
201
|
+
close: handle.close
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
async function inspectIndex(target) {
|
|
205
|
+
const location = await (0, query_1.resolveQueryIndexLocation)(target);
|
|
206
|
+
const manifest = JSON.parse(await promises_1.default.readFile(location.manifestPath, "utf8"));
|
|
207
|
+
await promises_1.default.access(location.rootTreePath);
|
|
208
|
+
return {
|
|
209
|
+
version: 1,
|
|
210
|
+
target: node_path_1.default.resolve(target),
|
|
211
|
+
rootDir: location.rootDir,
|
|
212
|
+
outputDir: location.outputDir ?? node_path_1.default.join(location.rootDir, manifest_1.PAGEINDEX_DIR),
|
|
213
|
+
manifestPath: location.manifestPath,
|
|
214
|
+
rootTreePath: location.rootTreePath,
|
|
215
|
+
generatedAt: manifest.generatedAt,
|
|
216
|
+
counts: toIndexCounts(manifest),
|
|
217
|
+
documents: documentSummaries(manifest)
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
async function validateIndex(target) {
|
|
221
|
+
const location = await resolveIndexFiles(target);
|
|
222
|
+
const errors = [];
|
|
223
|
+
const warnings = [];
|
|
224
|
+
let manifest;
|
|
225
|
+
let rootTree;
|
|
226
|
+
if (!(await pathExists(location.manifestPath))) {
|
|
227
|
+
errors.push({
|
|
228
|
+
code: "missing_manifest",
|
|
229
|
+
message: `Missing ${manifest_1.MANIFEST_FILE}`,
|
|
230
|
+
path: location.manifestPath
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
const result = await readJson(location.manifestPath, "invalid_manifest_json", "manifest");
|
|
235
|
+
if (result.ok) {
|
|
236
|
+
manifest = result.value;
|
|
237
|
+
if (manifest.version !== 1) {
|
|
238
|
+
errors.push({
|
|
239
|
+
code: "invalid_manifest_version",
|
|
240
|
+
message: `Unsupported manifest version: ${String(manifest.version)}`,
|
|
241
|
+
path: location.manifestPath
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
if (!Array.isArray(manifest.documents)) {
|
|
245
|
+
errors.push({
|
|
246
|
+
code: "invalid_manifest_documents",
|
|
247
|
+
message: "Manifest documents must be an array",
|
|
248
|
+
path: location.manifestPath
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
errors.push(result.issue);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
if (!(await pathExists(location.rootTreePath))) {
|
|
257
|
+
errors.push({
|
|
258
|
+
code: "missing_root_tree",
|
|
259
|
+
message: `Missing ${manifest_1.ROOT_TREE_FILE}`,
|
|
260
|
+
path: location.rootTreePath
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
else {
|
|
264
|
+
const result = await readJson(location.rootTreePath, "invalid_root_tree_json", "root tree");
|
|
265
|
+
if (result.ok) {
|
|
266
|
+
rootTree = result.value;
|
|
267
|
+
if (rootTree.node_id !== "root" || rootTree.type !== "root") {
|
|
268
|
+
errors.push({
|
|
269
|
+
code: "invalid_root_tree",
|
|
270
|
+
message: "Root tree must have node_id=root and type=root",
|
|
271
|
+
path: location.rootTreePath
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
errors.push(result.issue);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
if (manifest) {
|
|
280
|
+
const rootDir = manifest.rootDir ? node_path_1.default.resolve(manifest.rootDir) : location.rootDir;
|
|
281
|
+
for (const record of manifest.documents ?? []) {
|
|
282
|
+
if (record.status !== "ready") {
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
const indexPath = (0, manifest_1.resolveDocumentIndexPath)(rootDir, record.indexPath, location.outputDir);
|
|
286
|
+
if (!(await pathExists(indexPath))) {
|
|
287
|
+
errors.push({
|
|
288
|
+
code: "missing_document_index",
|
|
289
|
+
message: `Missing PageIndex JSON for ready document: ${record.path}`,
|
|
290
|
+
path: indexPath,
|
|
291
|
+
docId: record.docId
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
if (manifest && rootTree) {
|
|
297
|
+
const manifestByDocId = new Map(manifest.documents.map((record) => [record.docId, record]));
|
|
298
|
+
const rootTreeDocumentNodes = collectDocumentNodes(rootTree);
|
|
299
|
+
const rootTreeDocumentIds = new Set(rootTreeDocumentNodes.map((node) => node.node_id));
|
|
300
|
+
for (const node of rootTreeDocumentNodes) {
|
|
301
|
+
const record = manifestByDocId.get(node.node_id);
|
|
302
|
+
if (!record) {
|
|
303
|
+
errors.push({
|
|
304
|
+
code: "root_tree_unknown_document",
|
|
305
|
+
message: `Root tree references a document missing from manifest: ${node.node_id}`,
|
|
306
|
+
path: node.path,
|
|
307
|
+
docId: node.node_id
|
|
308
|
+
});
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
if (node.index_path && node.index_path !== record.indexPath) {
|
|
312
|
+
errors.push({
|
|
313
|
+
code: "root_tree_index_path_mismatch",
|
|
314
|
+
message: `Root tree index path differs from manifest for ${record.path}`,
|
|
315
|
+
path: node.path,
|
|
316
|
+
docId: record.docId
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
for (const record of manifest.documents) {
|
|
321
|
+
if (record.status === "ready" && !rootTreeDocumentIds.has(record.docId)) {
|
|
322
|
+
errors.push({
|
|
323
|
+
code: "manifest_document_missing_from_root_tree",
|
|
324
|
+
message: `Ready document is missing from root tree: ${record.path}`,
|
|
325
|
+
path: record.path,
|
|
326
|
+
docId: record.docId
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
const inspect = errors.length === 0 && manifest
|
|
332
|
+
? {
|
|
333
|
+
version: 1,
|
|
334
|
+
target: location.target,
|
|
335
|
+
rootDir: manifest.rootDir ? node_path_1.default.resolve(manifest.rootDir) : location.rootDir,
|
|
336
|
+
outputDir: location.outputDir,
|
|
337
|
+
manifestPath: location.manifestPath,
|
|
338
|
+
rootTreePath: location.rootTreePath,
|
|
339
|
+
generatedAt: manifest.generatedAt,
|
|
340
|
+
counts: toIndexCounts(manifest),
|
|
341
|
+
documents: documentSummaries(manifest)
|
|
342
|
+
}
|
|
343
|
+
: undefined;
|
|
344
|
+
return {
|
|
345
|
+
version: 1,
|
|
346
|
+
target: location.target,
|
|
347
|
+
ok: errors.length === 0,
|
|
348
|
+
errors,
|
|
349
|
+
warnings,
|
|
350
|
+
inspect
|
|
351
|
+
};
|
|
352
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import http from "node:http";
|
|
2
|
+
import { LlmClient } from "./folder-index/types";
|
|
3
|
+
import { InspectIndexResult, ValidateIndexResult } from "./sdk";
|
|
4
|
+
export type ServeOptions = {
|
|
5
|
+
allSources?: boolean;
|
|
6
|
+
apiKey?: string;
|
|
7
|
+
authToken?: string;
|
|
8
|
+
baseUrl?: string;
|
|
9
|
+
configPath?: string;
|
|
10
|
+
env?: NodeJS.ProcessEnv;
|
|
11
|
+
host?: string;
|
|
12
|
+
llmClient?: LlmClient;
|
|
13
|
+
model?: string;
|
|
14
|
+
port?: number;
|
|
15
|
+
source?: string | string[];
|
|
16
|
+
target?: string;
|
|
17
|
+
};
|
|
18
|
+
export type ServeHandle = {
|
|
19
|
+
url: string;
|
|
20
|
+
host: string;
|
|
21
|
+
port: number;
|
|
22
|
+
server: http.Server;
|
|
23
|
+
reload: () => Promise<ServeIndexesResult>;
|
|
24
|
+
close: () => Promise<void>;
|
|
25
|
+
};
|
|
26
|
+
export type ServeIndexSummary = {
|
|
27
|
+
source?: string;
|
|
28
|
+
target: string;
|
|
29
|
+
ok: boolean;
|
|
30
|
+
generatedAt?: string;
|
|
31
|
+
counts?: InspectIndexResult["counts"];
|
|
32
|
+
errors: ValidateIndexResult["errors"];
|
|
33
|
+
warnings: ValidateIndexResult["warnings"];
|
|
34
|
+
};
|
|
35
|
+
export type ServeIndexesResult = {
|
|
36
|
+
version: 1;
|
|
37
|
+
indexes: ServeIndexSummary[];
|
|
38
|
+
};
|
|
39
|
+
export type ServeHealthResult = {
|
|
40
|
+
version: 1;
|
|
41
|
+
ok: boolean;
|
|
42
|
+
status: "ready" | "degraded" | "error";
|
|
43
|
+
uptimeMs: number;
|
|
44
|
+
lastReloadAt: string;
|
|
45
|
+
indexes: {
|
|
46
|
+
total: number;
|
|
47
|
+
ready: number;
|
|
48
|
+
failed: number;
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
export type ServeRootResult = {
|
|
52
|
+
version: 1;
|
|
53
|
+
name: "ragbox";
|
|
54
|
+
status: ServeHealthResult["status"];
|
|
55
|
+
ok: boolean;
|
|
56
|
+
health: ServeHealthResult;
|
|
57
|
+
endpoints: Array<{
|
|
58
|
+
method: "GET" | "POST";
|
|
59
|
+
path: "/" | "/health" | "/indexes" | "/query" | "/reload";
|
|
60
|
+
authRequired: boolean;
|
|
61
|
+
description: string;
|
|
62
|
+
}>;
|
|
63
|
+
};
|
|
64
|
+
export declare function startServe(options?: ServeOptions): Promise<ServeHandle>;
|