@bndynet/ragbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +765 -0
- package/README.zh-CN.md +774 -0
- package/dist/src/advanced.d.ts +13 -0
- package/dist/src/advanced.js +29 -0
- package/dist/src/cli.d.ts +2 -0
- package/dist/src/cli.js +1013 -0
- package/dist/src/config-file.d.ts +69 -0
- package/dist/src/config-file.js +246 -0
- package/dist/src/folder-index/config.d.ts +2 -0
- package/dist/src/folder-index/config.js +56 -0
- package/dist/src/folder-index/hash.d.ts +1 -0
- package/dist/src/folder-index/hash.js +14 -0
- package/dist/src/folder-index/indexer.d.ts +2 -0
- package/dist/src/folder-index/indexer.js +154 -0
- package/dist/src/folder-index/llm-client.d.ts +3 -0
- package/dist/src/folder-index/llm-client.js +45 -0
- package/dist/src/folder-index/manifest.d.ts +17 -0
- package/dist/src/folder-index/manifest.js +158 -0
- package/dist/src/folder-index/multi-query.d.ts +45 -0
- package/dist/src/folder-index/multi-query.js +109 -0
- package/dist/src/folder-index/pageindex-runner.d.ts +3 -0
- package/dist/src/folder-index/pageindex-runner.js +218 -0
- package/dist/src/folder-index/path-utils.d.ts +5 -0
- package/dist/src/folder-index/path-utils.js +33 -0
- package/dist/src/folder-index/query.d.ts +19 -0
- package/dist/src/folder-index/query.js +597 -0
- package/dist/src/folder-index/queue.d.ts +1 -0
- package/dist/src/folder-index/queue.js +18 -0
- package/dist/src/folder-index/root-tree.d.ts +3 -0
- package/dist/src/folder-index/root-tree.js +82 -0
- package/dist/src/folder-index/scan.d.ts +14 -0
- package/dist/src/folder-index/scan.js +152 -0
- package/dist/src/folder-index/types.d.ts +368 -0
- package/dist/src/folder-index/types.js +2 -0
- package/dist/src/folder-index/watch.d.ts +17 -0
- package/dist/src/folder-index/watch.js +550 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.js +45 -0
- package/dist/src/sdk.d.ts +101 -0
- package/dist/src/sdk.js +352 -0
- package/dist/src/serve.d.ts +64 -0
- package/dist/src/serve.js +466 -0
- package/dist/src/setup-pageindex.d.ts +30 -0
- package/dist/src/setup-pageindex.js +184 -0
- package/package.json +43 -0
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.startServe = startServe;
|
|
7
|
+
const node_http_1 = __importDefault(require("node:http"));
|
|
8
|
+
const node_url_1 = require("node:url");
|
|
9
|
+
const config_file_1 = require("./config-file");
|
|
10
|
+
const multi_query_1 = require("./folder-index/multi-query");
|
|
11
|
+
const query_1 = require("./folder-index/query");
|
|
12
|
+
const sdk_1 = require("./sdk");
|
|
13
|
+
const DEFAULT_HOST = "127.0.0.1";
|
|
14
|
+
const DEFAULT_PORT = 8787;
|
|
15
|
+
const MAX_JSON_BODY_BYTES = 1024 * 1024;
|
|
16
|
+
class ServeHttpError extends Error {
|
|
17
|
+
status;
|
|
18
|
+
code;
|
|
19
|
+
constructor(status, code, message) {
|
|
20
|
+
super(message);
|
|
21
|
+
this.name = "ServeHttpError";
|
|
22
|
+
this.status = status;
|
|
23
|
+
this.code = code;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
function mergeDefined(...values) {
|
|
27
|
+
const merged = {};
|
|
28
|
+
for (const value of values) {
|
|
29
|
+
for (const [key, nestedValue] of Object.entries(value)) {
|
|
30
|
+
if (nestedValue !== undefined) {
|
|
31
|
+
merged[key] = nestedValue;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return merged;
|
|
36
|
+
}
|
|
37
|
+
function parsePositivePort(value, fallback) {
|
|
38
|
+
if (!value) {
|
|
39
|
+
return fallback;
|
|
40
|
+
}
|
|
41
|
+
const parsed = Number.parseInt(value, 10);
|
|
42
|
+
if (!Number.isFinite(parsed) || parsed < 0 || parsed > 65535) {
|
|
43
|
+
throw new Error(`Invalid serve port: ${value}`);
|
|
44
|
+
}
|
|
45
|
+
return parsed;
|
|
46
|
+
}
|
|
47
|
+
function parseSourceNames(source) {
|
|
48
|
+
if (Array.isArray(source)) {
|
|
49
|
+
return source.map((name) => name.trim()).filter(Boolean);
|
|
50
|
+
}
|
|
51
|
+
return (source ?? "")
|
|
52
|
+
.split(",")
|
|
53
|
+
.map((name) => name.trim())
|
|
54
|
+
.filter(Boolean);
|
|
55
|
+
}
|
|
56
|
+
function isJsonObject(value) {
|
|
57
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
58
|
+
}
|
|
59
|
+
function queryOptionsFromServeOptions(configOptions, options, trace) {
|
|
60
|
+
return mergeDefined({
|
|
61
|
+
...configOptions
|
|
62
|
+
}, {
|
|
63
|
+
apiKey: options.apiKey,
|
|
64
|
+
baseUrl: options.baseUrl,
|
|
65
|
+
env: options.env,
|
|
66
|
+
llmClient: options.llmClient,
|
|
67
|
+
model: options.model,
|
|
68
|
+
trace
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
async function loadBaseQueryOptions(options, trace) {
|
|
72
|
+
const resolved = await (0, config_file_1.resolveRagboxConfig)({
|
|
73
|
+
configPath: options.configPath
|
|
74
|
+
});
|
|
75
|
+
return queryOptionsFromServeOptions(resolved.pageIndexOptions, options, trace);
|
|
76
|
+
}
|
|
77
|
+
async function resolveTargets(options, request = {}) {
|
|
78
|
+
if (request.target) {
|
|
79
|
+
return [
|
|
80
|
+
{
|
|
81
|
+
target: request.target,
|
|
82
|
+
options: await loadBaseQueryOptions(options, request.trace)
|
|
83
|
+
}
|
|
84
|
+
];
|
|
85
|
+
}
|
|
86
|
+
const allSources = request.allSources ?? options.allSources;
|
|
87
|
+
let sourceNames = parseSourceNames(request.source ?? options.source);
|
|
88
|
+
if (allSources) {
|
|
89
|
+
const { config } = await (0, config_file_1.readRagboxConfig)(options.configPath);
|
|
90
|
+
sourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
|
|
91
|
+
if (sourceNames.length === 0) {
|
|
92
|
+
throw new ServeHttpError(400, "invalid_request", "No configured sources found. Add docs or sources to ragbox.config.json.");
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (sourceNames.length === 0 && !options.target) {
|
|
96
|
+
const { config } = await (0, config_file_1.readRagboxConfig)(options.configPath);
|
|
97
|
+
const configuredSourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
|
|
98
|
+
if (configuredSourceNames.length > 0) {
|
|
99
|
+
sourceNames = configuredSourceNames;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
if (sourceNames.length > 0) {
|
|
103
|
+
const targets = [];
|
|
104
|
+
for (const sourceName of sourceNames) {
|
|
105
|
+
const resolved = await (0, config_file_1.resolveRagboxConfig)({
|
|
106
|
+
configPath: options.configPath,
|
|
107
|
+
source: sourceName
|
|
108
|
+
});
|
|
109
|
+
const target = resolved.pageIndexOptions.outputDir ?? resolved.rootDir;
|
|
110
|
+
if (!target) {
|
|
111
|
+
throw new ServeHttpError(400, "invalid_request", `Source does not define outputDir or rootDir: ${sourceName}`);
|
|
112
|
+
}
|
|
113
|
+
targets.push({
|
|
114
|
+
source: sourceName,
|
|
115
|
+
target,
|
|
116
|
+
options: queryOptionsFromServeOptions(resolved.pageIndexOptions, options, request.trace)
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
return targets;
|
|
120
|
+
}
|
|
121
|
+
if (options.target) {
|
|
122
|
+
return [
|
|
123
|
+
{
|
|
124
|
+
target: options.target,
|
|
125
|
+
options: await loadBaseQueryOptions(options, request.trace)
|
|
126
|
+
}
|
|
127
|
+
];
|
|
128
|
+
}
|
|
129
|
+
throw new ServeHttpError(400, "invalid_request", "Missing query target. Pass a target, --source, --all-sources, or configure sources.");
|
|
130
|
+
}
|
|
131
|
+
async function buildIndexes(targets) {
|
|
132
|
+
const indexes = [];
|
|
133
|
+
for (const target of targets) {
|
|
134
|
+
const validation = await (0, sdk_1.validateIndex)(target.target);
|
|
135
|
+
indexes.push({
|
|
136
|
+
source: target.source,
|
|
137
|
+
target: target.target,
|
|
138
|
+
ok: validation.ok,
|
|
139
|
+
generatedAt: validation.inspect?.generatedAt,
|
|
140
|
+
counts: validation.inspect?.counts,
|
|
141
|
+
errors: validation.errors,
|
|
142
|
+
warnings: validation.warnings
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
return {
|
|
146
|
+
version: 1,
|
|
147
|
+
indexes
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
function healthFromIndexes(startedAt, lastReloadAt, indexes) {
|
|
151
|
+
const ready = indexes.indexes.filter((index) => index.ok).length;
|
|
152
|
+
const failed = indexes.indexes.length - ready;
|
|
153
|
+
const ok = indexes.indexes.length > 0 && failed === 0;
|
|
154
|
+
const status = ok ? "ready" : ready > 0 ? "degraded" : "error";
|
|
155
|
+
return {
|
|
156
|
+
version: 1,
|
|
157
|
+
ok,
|
|
158
|
+
status,
|
|
159
|
+
uptimeMs: Date.now() - startedAt,
|
|
160
|
+
lastReloadAt,
|
|
161
|
+
indexes: {
|
|
162
|
+
total: indexes.indexes.length,
|
|
163
|
+
ready,
|
|
164
|
+
failed
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
function rootFromHealth(health, authRequired) {
|
|
169
|
+
return {
|
|
170
|
+
version: 1,
|
|
171
|
+
name: "ragbox",
|
|
172
|
+
status: health.status,
|
|
173
|
+
ok: health.ok,
|
|
174
|
+
health,
|
|
175
|
+
endpoints: [
|
|
176
|
+
{
|
|
177
|
+
method: "GET",
|
|
178
|
+
path: "/",
|
|
179
|
+
authRequired: false,
|
|
180
|
+
description: "Service entrypoint and endpoint list."
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
method: "GET",
|
|
184
|
+
path: "/health",
|
|
185
|
+
authRequired: false,
|
|
186
|
+
description: "Readiness and index health summary."
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
method: "GET",
|
|
190
|
+
path: "/indexes",
|
|
191
|
+
authRequired,
|
|
192
|
+
description: "Validated index snapshot."
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
method: "POST",
|
|
196
|
+
path: "/query",
|
|
197
|
+
authRequired,
|
|
198
|
+
description: "Ask questions about the configured knowledge sources."
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
method: "POST",
|
|
202
|
+
path: "/reload",
|
|
203
|
+
authRequired,
|
|
204
|
+
description: "Reload configured index snapshots."
|
|
205
|
+
}
|
|
206
|
+
]
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
function writeJson(response, status, value) {
|
|
210
|
+
response.writeHead(status, {
|
|
211
|
+
"Content-Type": "application/json; charset=utf-8"
|
|
212
|
+
});
|
|
213
|
+
response.end(`${JSON.stringify(value, null, 2)}\n`);
|
|
214
|
+
}
|
|
215
|
+
function writeError(response, status, code, message) {
|
|
216
|
+
writeJson(response, status, {
|
|
217
|
+
version: 1,
|
|
218
|
+
error: {
|
|
219
|
+
code,
|
|
220
|
+
message
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
function methodNotAllowed(response) {
|
|
225
|
+
writeError(response, 405, "method_not_allowed", "Method not allowed.");
|
|
226
|
+
}
|
|
227
|
+
function notFound(response) {
|
|
228
|
+
writeError(response, 404, "not_found", "Route not found.");
|
|
229
|
+
}
|
|
230
|
+
function readJsonBody(request) {
|
|
231
|
+
return new Promise((resolve, reject) => {
|
|
232
|
+
let body = "";
|
|
233
|
+
let bytes = 0;
|
|
234
|
+
request.on("data", (chunk) => {
|
|
235
|
+
bytes += chunk.length;
|
|
236
|
+
if (bytes > MAX_JSON_BODY_BYTES) {
|
|
237
|
+
reject(new ServeHttpError(400, "invalid_request", "JSON body is too large."));
|
|
238
|
+
request.destroy();
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
body += chunk.toString("utf8");
|
|
242
|
+
});
|
|
243
|
+
request.on("end", () => {
|
|
244
|
+
try {
|
|
245
|
+
const parsed = body.trim() ? JSON.parse(body) : {};
|
|
246
|
+
if (!isJsonObject(parsed)) {
|
|
247
|
+
reject(new ServeHttpError(400, "invalid_request", "Expected a JSON object."));
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
resolve(parsed);
|
|
251
|
+
}
|
|
252
|
+
catch (error) {
|
|
253
|
+
reject(new ServeHttpError(400, "invalid_request", `Invalid JSON body: ${error instanceof Error ? error.message : String(error)}`));
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
request.on("error", reject);
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
function authorizationHeader(request) {
|
|
260
|
+
const header = request.headers.authorization;
|
|
261
|
+
return Array.isArray(header) ? header[0] : header;
|
|
262
|
+
}
|
|
263
|
+
function assertAuthorized(request, authToken) {
|
|
264
|
+
if (!authToken) {
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
if (authorizationHeader(request) !== `Bearer ${authToken}`) {
|
|
268
|
+
throw new ServeHttpError(401, "unauthorized", "Missing or invalid bearer token.");
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
function requestSource(value) {
|
|
272
|
+
if (typeof value === "string") {
|
|
273
|
+
return value;
|
|
274
|
+
}
|
|
275
|
+
if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
|
|
276
|
+
return value;
|
|
277
|
+
}
|
|
278
|
+
if (value === undefined) {
|
|
279
|
+
return undefined;
|
|
280
|
+
}
|
|
281
|
+
throw new ServeHttpError(400, "invalid_request", "source must be a string or string array.");
|
|
282
|
+
}
|
|
283
|
+
function requestTarget(value) {
|
|
284
|
+
if (value === undefined) {
|
|
285
|
+
return undefined;
|
|
286
|
+
}
|
|
287
|
+
if (typeof value === "string" && value.trim()) {
|
|
288
|
+
return value;
|
|
289
|
+
}
|
|
290
|
+
throw new ServeHttpError(400, "invalid_request", "target must be a non-empty string.");
|
|
291
|
+
}
|
|
292
|
+
function requestBoolean(value, name) {
|
|
293
|
+
if (value === undefined) {
|
|
294
|
+
return undefined;
|
|
295
|
+
}
|
|
296
|
+
if (typeof value === "boolean") {
|
|
297
|
+
return value;
|
|
298
|
+
}
|
|
299
|
+
throw new ServeHttpError(400, "invalid_request", `${name} must be a boolean.`);
|
|
300
|
+
}
|
|
301
|
+
function statusForThrownError(error) {
|
|
302
|
+
if (error instanceof ServeHttpError) {
|
|
303
|
+
return {
|
|
304
|
+
status: error.status,
|
|
305
|
+
code: error.code,
|
|
306
|
+
message: error.message
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
310
|
+
if (error instanceof query_1.QueryStageError &&
|
|
311
|
+
(error.stage === "select-documents" || error.stage === "select-nodes" || error.stage === "answer")) {
|
|
312
|
+
return {
|
|
313
|
+
status: 502,
|
|
314
|
+
code: "upstream_error",
|
|
315
|
+
message
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
if (/LLM request failed|OPENAI_API_KEY|chat completions/i.test(message)) {
|
|
319
|
+
return {
|
|
320
|
+
status: 502,
|
|
321
|
+
code: "upstream_error",
|
|
322
|
+
message
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
return {
|
|
326
|
+
status: 500,
|
|
327
|
+
code: "internal_error",
|
|
328
|
+
message
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
async function queryTargets(targets, question, options) {
|
|
332
|
+
if (targets.length === 0) {
|
|
333
|
+
throw new ServeHttpError(400, "invalid_request", "At least one query source is required.");
|
|
334
|
+
}
|
|
335
|
+
if (targets.length === 1) {
|
|
336
|
+
return await (0, query_1.queryFolder)(targets[0].target, question, targets[0].options);
|
|
337
|
+
}
|
|
338
|
+
const multiTargets = targets.map((target) => ({
|
|
339
|
+
name: target.source ?? target.target,
|
|
340
|
+
target: target.target,
|
|
341
|
+
options: target.options
|
|
342
|
+
}));
|
|
343
|
+
return await (0, multi_query_1.queryMultipleIndexes)(multiTargets, question, targets[0].options ?? await loadBaseQueryOptions(options));
|
|
344
|
+
}
|
|
345
|
+
async function startServe(options = {}) {
|
|
346
|
+
const env = options.env ?? process.env;
|
|
347
|
+
const host = options.host ?? env.RAGBOX_SERVE_HOST ?? DEFAULT_HOST;
|
|
348
|
+
const port = options.port ?? parsePositivePort(env.RAGBOX_SERVE_PORT, DEFAULT_PORT);
|
|
349
|
+
const authToken = options.authToken ?? env.RAGBOX_SERVE_TOKEN;
|
|
350
|
+
const serverOptions = {
|
|
351
|
+
...options,
|
|
352
|
+
authToken,
|
|
353
|
+
env,
|
|
354
|
+
host,
|
|
355
|
+
port
|
|
356
|
+
};
|
|
357
|
+
const startedAt = Date.now();
|
|
358
|
+
let defaultTargets = await resolveTargets(serverOptions);
|
|
359
|
+
let lastReloadAt = new Date().toISOString();
|
|
360
|
+
let indexes = await buildIndexes(defaultTargets);
|
|
361
|
+
async function reload() {
|
|
362
|
+
defaultTargets = await resolveTargets(serverOptions);
|
|
363
|
+
indexes = await buildIndexes(defaultTargets);
|
|
364
|
+
lastReloadAt = new Date().toISOString();
|
|
365
|
+
return indexes;
|
|
366
|
+
}
|
|
367
|
+
const server = node_http_1.default.createServer((request, response) => {
|
|
368
|
+
void (async () => {
|
|
369
|
+
const requestUrl = new node_url_1.URL(request.url ?? "/", `http://${request.headers.host ?? `${host}:${port}`}`);
|
|
370
|
+
const route = requestUrl.pathname.replace(/\/+$/, "") || "/";
|
|
371
|
+
if (route === "/") {
|
|
372
|
+
if (request.method !== "GET") {
|
|
373
|
+
methodNotAllowed(response);
|
|
374
|
+
return;
|
|
375
|
+
}
|
|
376
|
+
const health = healthFromIndexes(startedAt, lastReloadAt, indexes);
|
|
377
|
+
writeJson(response, 200, rootFromHealth(health, Boolean(authToken)));
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
if (route === "/health") {
|
|
381
|
+
if (request.method !== "GET") {
|
|
382
|
+
methodNotAllowed(response);
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
const health = healthFromIndexes(startedAt, lastReloadAt, indexes);
|
|
386
|
+
writeJson(response, health.ok ? 200 : 503, health);
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
assertAuthorized(request, authToken);
|
|
390
|
+
if (route === "/indexes") {
|
|
391
|
+
if (request.method !== "GET") {
|
|
392
|
+
methodNotAllowed(response);
|
|
393
|
+
return;
|
|
394
|
+
}
|
|
395
|
+
writeJson(response, 200, indexes);
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
if (route === "/reload") {
|
|
399
|
+
if (request.method !== "POST") {
|
|
400
|
+
methodNotAllowed(response);
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
writeJson(response, 200, await reload());
|
|
404
|
+
return;
|
|
405
|
+
}
|
|
406
|
+
if (route === "/query") {
|
|
407
|
+
if (request.method !== "POST") {
|
|
408
|
+
methodNotAllowed(response);
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
const body = await readJsonBody(request);
|
|
412
|
+
const question = typeof body.question === "string" && body.question.trim() ? body.question : undefined;
|
|
413
|
+
if (!question) {
|
|
414
|
+
throw new ServeHttpError(400, "invalid_request", "question must be a non-empty string.");
|
|
415
|
+
}
|
|
416
|
+
const target = requestTarget(body.target);
|
|
417
|
+
const source = requestSource(body.source);
|
|
418
|
+
const allSources = requestBoolean(body.allSources, "allSources");
|
|
419
|
+
const trace = requestBoolean(body.trace, "trace");
|
|
420
|
+
if (target && (source || allSources)) {
|
|
421
|
+
throw new ServeHttpError(400, "invalid_request", "target cannot be combined with source or allSources.");
|
|
422
|
+
}
|
|
423
|
+
const targets = target || source || allSources
|
|
424
|
+
? await resolveTargets(serverOptions, { allSources, source, target, trace })
|
|
425
|
+
: defaultTargets.map((resolvedTarget) => ({
|
|
426
|
+
...resolvedTarget,
|
|
427
|
+
options: queryOptionsFromServeOptions(resolvedTarget.options, serverOptions, trace)
|
|
428
|
+
}));
|
|
429
|
+
writeJson(response, 200, await queryTargets(targets, question, serverOptions));
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
notFound(response);
|
|
433
|
+
})().catch((error) => {
|
|
434
|
+
const result = statusForThrownError(error);
|
|
435
|
+
writeError(response, result.status, result.code, result.message);
|
|
436
|
+
});
|
|
437
|
+
});
|
|
438
|
+
await new Promise((resolve, reject) => {
|
|
439
|
+
server.once("error", reject);
|
|
440
|
+
server.listen(port, host, () => {
|
|
441
|
+
server.off("error", reject);
|
|
442
|
+
resolve();
|
|
443
|
+
});
|
|
444
|
+
});
|
|
445
|
+
const address = server.address();
|
|
446
|
+
const resolvedHost = address.address === "::" ? "localhost" : address.address;
|
|
447
|
+
const resolvedPort = address.port;
|
|
448
|
+
return {
|
|
449
|
+
url: `http://${resolvedHost}:${resolvedPort}`,
|
|
450
|
+
host: resolvedHost,
|
|
451
|
+
port: resolvedPort,
|
|
452
|
+
server,
|
|
453
|
+
reload,
|
|
454
|
+
close: async () => {
|
|
455
|
+
await new Promise((resolve, reject) => {
|
|
456
|
+
server.close((error) => {
|
|
457
|
+
if (error) {
|
|
458
|
+
reject(error);
|
|
459
|
+
return;
|
|
460
|
+
}
|
|
461
|
+
resolve();
|
|
462
|
+
});
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
};
|
|
466
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export type SetupPageIndexOptions = {
|
|
2
|
+
configPath?: string;
|
|
3
|
+
cwd?: string;
|
|
4
|
+
dir?: string;
|
|
5
|
+
gitignore?: boolean;
|
|
6
|
+
install?: boolean;
|
|
7
|
+
python?: string;
|
|
8
|
+
ref?: string;
|
|
9
|
+
repo?: string;
|
|
10
|
+
writeConfig?: boolean;
|
|
11
|
+
};
|
|
12
|
+
export type SetupPageIndexResult = {
|
|
13
|
+
version: 1;
|
|
14
|
+
command: "setup pageindex";
|
|
15
|
+
pageIndexDir: string;
|
|
16
|
+
cliPath: string;
|
|
17
|
+
pythonPath?: string;
|
|
18
|
+
venvDir?: string;
|
|
19
|
+
configPath?: string;
|
|
20
|
+
gitignorePath?: string;
|
|
21
|
+
actions: {
|
|
22
|
+
checkedOutRef?: string;
|
|
23
|
+
cloned: boolean;
|
|
24
|
+
installedDependencies: boolean;
|
|
25
|
+
reusedExisting: boolean;
|
|
26
|
+
updatedGitignore: boolean;
|
|
27
|
+
wroteConfig: boolean;
|
|
28
|
+
};
|
|
29
|
+
};
|
|
30
|
+
export declare function setupPageIndex(options?: SetupPageIndexOptions): Promise<SetupPageIndexResult>;
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.setupPageIndex = setupPageIndex;
|
|
7
|
+
const node_child_process_1 = require("node:child_process");
|
|
8
|
+
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
9
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
10
|
+
const config_file_1 = require("./config-file");
|
|
11
|
+
const MAX_CAPTURED_OUTPUT = 32 * 1024;
|
|
12
|
+
const DEFAULT_PAGEINDEX_REPO = "https://github.com/VectifyAI/PageIndex.git";
|
|
13
|
+
const DEFAULT_PAGEINDEX_DIR = ".ragbox/PageIndex";
|
|
14
|
+
const DEFAULT_PAGEINDEX_VENV_DIR = ".ragbox/pageindex-venv";
|
|
15
|
+
const DEFAULT_PYTHON = "python3";
|
|
16
|
+
const GITIGNORE_ENTRY = ".ragbox/";
|
|
17
|
+
function appendCapturedOutput(current, chunk) {
|
|
18
|
+
const next = current + chunk.toString("utf8");
|
|
19
|
+
return next.length > MAX_CAPTURED_OUTPUT ? next.slice(-MAX_CAPTURED_OUTPUT) : next;
|
|
20
|
+
}
|
|
21
|
+
function commandFailure(message, stdout, stderr) {
|
|
22
|
+
const details = [
|
|
23
|
+
stdout.trim() ? `STDOUT:\n${stdout.trim()}` : undefined,
|
|
24
|
+
stderr.trim() ? `STDERR:\n${stderr.trim()}` : undefined
|
|
25
|
+
].filter(Boolean);
|
|
26
|
+
return new Error(details.length ? `${message}\n${details.join("\n")}` : message);
|
|
27
|
+
}
|
|
28
|
+
async function runCommand(command, args, options) {
|
|
29
|
+
await new Promise((resolve, reject) => {
|
|
30
|
+
let stdout = "";
|
|
31
|
+
let stderr = "";
|
|
32
|
+
const child = (0, node_child_process_1.spawn)(command, args, {
|
|
33
|
+
cwd: options.cwd,
|
|
34
|
+
env: {
|
|
35
|
+
...process.env,
|
|
36
|
+
...options.env
|
|
37
|
+
},
|
|
38
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
39
|
+
});
|
|
40
|
+
child.stdout.on("data", (chunk) => {
|
|
41
|
+
stdout = appendCapturedOutput(stdout, chunk);
|
|
42
|
+
});
|
|
43
|
+
child.stderr.on("data", (chunk) => {
|
|
44
|
+
stderr = appendCapturedOutput(stderr, chunk);
|
|
45
|
+
});
|
|
46
|
+
child.on("error", (error) => {
|
|
47
|
+
reject(error.code === "ENOENT" ? new Error(options.missingMessage) : error);
|
|
48
|
+
});
|
|
49
|
+
child.on("close", (code) => {
|
|
50
|
+
if (code === 0) {
|
|
51
|
+
resolve();
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
reject(commandFailure(`${options.failureMessage} (exit code ${code ?? "unknown"})`, stdout, stderr));
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
async function pathExists(filePath) {
|
|
59
|
+
try {
|
|
60
|
+
await promises_1.default.access(filePath);
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
async function hasPageIndexEntrypoint(pageIndexDir) {
|
|
68
|
+
try {
|
|
69
|
+
const stat = await promises_1.default.stat(node_path_1.default.join(pageIndexDir, "run_pageindex.py"));
|
|
70
|
+
return stat.isFile();
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
function venvPythonPath(venvDir) {
|
|
77
|
+
return process.platform === "win32" ? node_path_1.default.join(venvDir, "Scripts", "python.exe") : node_path_1.default.join(venvDir, "bin", "python");
|
|
78
|
+
}
|
|
79
|
+
async function ensureGitignoreEntry(cwd) {
|
|
80
|
+
const gitignorePath = node_path_1.default.join(cwd, ".gitignore");
|
|
81
|
+
let current = "";
|
|
82
|
+
try {
|
|
83
|
+
current = await promises_1.default.readFile(gitignorePath, "utf8");
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
const code = error.code;
|
|
87
|
+
if (code !== "ENOENT") {
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
const lines = current.split(/\r?\n/).map((line) => line.trim());
|
|
92
|
+
if (lines.includes(GITIGNORE_ENTRY)) {
|
|
93
|
+
return { gitignorePath, updated: false };
|
|
94
|
+
}
|
|
95
|
+
const separator = current && !current.endsWith("\n") ? "\n" : "";
|
|
96
|
+
await promises_1.default.writeFile(gitignorePath, `${current}${separator}${GITIGNORE_ENTRY}\n`, "utf8");
|
|
97
|
+
return { gitignorePath, updated: true };
|
|
98
|
+
}
|
|
99
|
+
async function ensurePageIndexSource(pageIndexDir, repo, ref, env) {
|
|
100
|
+
if (await pathExists(pageIndexDir)) {
|
|
101
|
+
if (!(await hasPageIndexEntrypoint(pageIndexDir))) {
|
|
102
|
+
throw new Error(`PageIndex directory already exists but run_pageindex.py was not found: ${pageIndexDir}. Pass --dir to use another location or remove the existing directory.`);
|
|
103
|
+
}
|
|
104
|
+
return { cloned: false, reusedExisting: true };
|
|
105
|
+
}
|
|
106
|
+
await promises_1.default.mkdir(node_path_1.default.dirname(pageIndexDir), { recursive: true });
|
|
107
|
+
await runCommand("git", ["clone", repo, pageIndexDir], {
|
|
108
|
+
env,
|
|
109
|
+
failureMessage: `Failed to clone PageIndex from ${repo}`,
|
|
110
|
+
missingMessage: "git is required to install PageIndex. Install git or pass --dir pointing to an existing PageIndex checkout."
|
|
111
|
+
});
|
|
112
|
+
if (ref) {
|
|
113
|
+
await runCommand("git", ["-C", pageIndexDir, "checkout", ref], {
|
|
114
|
+
env,
|
|
115
|
+
failureMessage: `Failed to checkout PageIndex ref ${ref}`,
|
|
116
|
+
missingMessage: "git is required to checkout a PageIndex ref."
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
if (!(await hasPageIndexEntrypoint(pageIndexDir))) {
|
|
120
|
+
throw new Error(`PageIndex repo does not contain run_pageindex.py: ${pageIndexDir}`);
|
|
121
|
+
}
|
|
122
|
+
return { checkedOutRef: ref, cloned: true, reusedExisting: false };
|
|
123
|
+
}
|
|
124
|
+
async function installPageIndexDependencies(pageIndexDir, venvDir, python, env) {
|
|
125
|
+
const requirementsPath = node_path_1.default.join(pageIndexDir, "requirements.txt");
|
|
126
|
+
if (!(await pathExists(requirementsPath))) {
|
|
127
|
+
throw new Error(`PageIndex requirements.txt was not found: ${requirementsPath}`);
|
|
128
|
+
}
|
|
129
|
+
await promises_1.default.mkdir(node_path_1.default.dirname(venvDir), { recursive: true });
|
|
130
|
+
await runCommand(python, ["-m", "venv", venvDir], {
|
|
131
|
+
env,
|
|
132
|
+
failureMessage: `Failed to create PageIndex virtual environment at ${venvDir}`,
|
|
133
|
+
missingMessage: `Python executable was not found: ${python}`
|
|
134
|
+
});
|
|
135
|
+
const pythonPath = venvPythonPath(venvDir);
|
|
136
|
+
await runCommand(pythonPath, ["-m", "pip", "install", "--upgrade", "-r", requirementsPath], {
|
|
137
|
+
env,
|
|
138
|
+
failureMessage: "Failed to install PageIndex Python dependencies",
|
|
139
|
+
missingMessage: `Virtual environment Python was not found after creation: ${pythonPath}`
|
|
140
|
+
});
|
|
141
|
+
return pythonPath;
|
|
142
|
+
}
|
|
143
|
+
async function setupPageIndex(options = {}) {
|
|
144
|
+
const cwd = node_path_1.default.resolve(options.cwd ?? process.cwd());
|
|
145
|
+
const repo = options.repo ?? DEFAULT_PAGEINDEX_REPO;
|
|
146
|
+
const pageIndexDir = node_path_1.default.resolve(cwd, options.dir ?? DEFAULT_PAGEINDEX_DIR);
|
|
147
|
+
const venvDir = node_path_1.default.resolve(cwd, DEFAULT_PAGEINDEX_VENV_DIR);
|
|
148
|
+
const install = options.install ?? true;
|
|
149
|
+
const writeConfig = options.writeConfig ?? true;
|
|
150
|
+
const updateGitignore = options.gitignore ?? true;
|
|
151
|
+
const source = await ensurePageIndexSource(pageIndexDir, repo, options.ref);
|
|
152
|
+
const cliPath = node_path_1.default.join(pageIndexDir, "run_pageindex.py");
|
|
153
|
+
let pythonPath;
|
|
154
|
+
if (install) {
|
|
155
|
+
pythonPath = await installPageIndexDependencies(pageIndexDir, venvDir, options.python ?? DEFAULT_PYTHON);
|
|
156
|
+
}
|
|
157
|
+
const configPath = writeConfig
|
|
158
|
+
? await (0, config_file_1.writePageIndexSetupConfig)({
|
|
159
|
+
cliPath,
|
|
160
|
+
configPath: options.configPath,
|
|
161
|
+
cwd,
|
|
162
|
+
pythonPath
|
|
163
|
+
})
|
|
164
|
+
: undefined;
|
|
165
|
+
const gitignore = updateGitignore ? await ensureGitignoreEntry(cwd) : undefined;
|
|
166
|
+
return {
|
|
167
|
+
version: 1,
|
|
168
|
+
command: "setup pageindex",
|
|
169
|
+
pageIndexDir,
|
|
170
|
+
cliPath,
|
|
171
|
+
pythonPath,
|
|
172
|
+
venvDir: install ? venvDir : undefined,
|
|
173
|
+
configPath,
|
|
174
|
+
gitignorePath: gitignore?.gitignorePath,
|
|
175
|
+
actions: {
|
|
176
|
+
checkedOutRef: source.checkedOutRef,
|
|
177
|
+
cloned: source.cloned,
|
|
178
|
+
installedDependencies: install,
|
|
179
|
+
reusedExisting: source.reusedExisting,
|
|
180
|
+
updatedGitignore: gitignore?.updated ?? false,
|
|
181
|
+
wroteConfig: Boolean(configPath)
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
}
|