@bndynet/ragbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +765 -0
  2. package/README.zh-CN.md +774 -0
  3. package/dist/src/advanced.d.ts +13 -0
  4. package/dist/src/advanced.js +29 -0
  5. package/dist/src/cli.d.ts +2 -0
  6. package/dist/src/cli.js +1013 -0
  7. package/dist/src/config-file.d.ts +69 -0
  8. package/dist/src/config-file.js +246 -0
  9. package/dist/src/folder-index/config.d.ts +2 -0
  10. package/dist/src/folder-index/config.js +56 -0
  11. package/dist/src/folder-index/hash.d.ts +1 -0
  12. package/dist/src/folder-index/hash.js +14 -0
  13. package/dist/src/folder-index/indexer.d.ts +2 -0
  14. package/dist/src/folder-index/indexer.js +154 -0
  15. package/dist/src/folder-index/llm-client.d.ts +3 -0
  16. package/dist/src/folder-index/llm-client.js +45 -0
  17. package/dist/src/folder-index/manifest.d.ts +17 -0
  18. package/dist/src/folder-index/manifest.js +158 -0
  19. package/dist/src/folder-index/multi-query.d.ts +45 -0
  20. package/dist/src/folder-index/multi-query.js +109 -0
  21. package/dist/src/folder-index/pageindex-runner.d.ts +3 -0
  22. package/dist/src/folder-index/pageindex-runner.js +218 -0
  23. package/dist/src/folder-index/path-utils.d.ts +5 -0
  24. package/dist/src/folder-index/path-utils.js +33 -0
  25. package/dist/src/folder-index/query.d.ts +19 -0
  26. package/dist/src/folder-index/query.js +597 -0
  27. package/dist/src/folder-index/queue.d.ts +1 -0
  28. package/dist/src/folder-index/queue.js +18 -0
  29. package/dist/src/folder-index/root-tree.d.ts +3 -0
  30. package/dist/src/folder-index/root-tree.js +82 -0
  31. package/dist/src/folder-index/scan.d.ts +14 -0
  32. package/dist/src/folder-index/scan.js +152 -0
  33. package/dist/src/folder-index/types.d.ts +368 -0
  34. package/dist/src/folder-index/types.js +2 -0
  35. package/dist/src/folder-index/watch.d.ts +17 -0
  36. package/dist/src/folder-index/watch.js +550 -0
  37. package/dist/src/index.d.ts +6 -0
  38. package/dist/src/index.js +45 -0
  39. package/dist/src/sdk.d.ts +101 -0
  40. package/dist/src/sdk.js +352 -0
  41. package/dist/src/serve.d.ts +64 -0
  42. package/dist/src/serve.js +466 -0
  43. package/dist/src/setup-pageindex.d.ts +30 -0
  44. package/dist/src/setup-pageindex.js +184 -0
  45. package/package.json +43 -0
@@ -0,0 +1,466 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.startServe = startServe;
7
+ const node_http_1 = __importDefault(require("node:http"));
8
+ const node_url_1 = require("node:url");
9
+ const config_file_1 = require("./config-file");
10
+ const multi_query_1 = require("./folder-index/multi-query");
11
+ const query_1 = require("./folder-index/query");
12
+ const sdk_1 = require("./sdk");
13
+ const DEFAULT_HOST = "127.0.0.1";
14
+ const DEFAULT_PORT = 8787;
15
+ const MAX_JSON_BODY_BYTES = 1024 * 1024;
16
+ class ServeHttpError extends Error {
17
+ status;
18
+ code;
19
+ constructor(status, code, message) {
20
+ super(message);
21
+ this.name = "ServeHttpError";
22
+ this.status = status;
23
+ this.code = code;
24
+ }
25
+ }
26
+ function mergeDefined(...values) {
27
+ const merged = {};
28
+ for (const value of values) {
29
+ for (const [key, nestedValue] of Object.entries(value)) {
30
+ if (nestedValue !== undefined) {
31
+ merged[key] = nestedValue;
32
+ }
33
+ }
34
+ }
35
+ return merged;
36
+ }
37
+ function parsePositivePort(value, fallback) {
38
+ if (!value) {
39
+ return fallback;
40
+ }
41
+ const parsed = Number.parseInt(value, 10);
42
+ if (!Number.isFinite(parsed) || parsed < 0 || parsed > 65535) {
43
+ throw new Error(`Invalid serve port: ${value}`);
44
+ }
45
+ return parsed;
46
+ }
47
+ function parseSourceNames(source) {
48
+ if (Array.isArray(source)) {
49
+ return source.map((name) => name.trim()).filter(Boolean);
50
+ }
51
+ return (source ?? "")
52
+ .split(",")
53
+ .map((name) => name.trim())
54
+ .filter(Boolean);
55
+ }
56
+ function isJsonObject(value) {
57
+ return typeof value === "object" && value !== null && !Array.isArray(value);
58
+ }
59
+ function queryOptionsFromServeOptions(configOptions, options, trace) {
60
+ return mergeDefined({
61
+ ...configOptions
62
+ }, {
63
+ apiKey: options.apiKey,
64
+ baseUrl: options.baseUrl,
65
+ env: options.env,
66
+ llmClient: options.llmClient,
67
+ model: options.model,
68
+ trace
69
+ });
70
+ }
71
+ async function loadBaseQueryOptions(options, trace) {
72
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
73
+ configPath: options.configPath
74
+ });
75
+ return queryOptionsFromServeOptions(resolved.pageIndexOptions, options, trace);
76
+ }
77
+ async function resolveTargets(options, request = {}) {
78
+ if (request.target) {
79
+ return [
80
+ {
81
+ target: request.target,
82
+ options: await loadBaseQueryOptions(options, request.trace)
83
+ }
84
+ ];
85
+ }
86
+ const allSources = request.allSources ?? options.allSources;
87
+ let sourceNames = parseSourceNames(request.source ?? options.source);
88
+ if (allSources) {
89
+ const { config } = await (0, config_file_1.readRagboxConfig)(options.configPath);
90
+ sourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
91
+ if (sourceNames.length === 0) {
92
+ throw new ServeHttpError(400, "invalid_request", "No configured sources found. Add docs or sources to ragbox.config.json.");
93
+ }
94
+ }
95
+ if (sourceNames.length === 0 && !options.target) {
96
+ const { config } = await (0, config_file_1.readRagboxConfig)(options.configPath);
97
+ const configuredSourceNames = (0, config_file_1.listRagboxConfigSourceNames)(config);
98
+ if (configuredSourceNames.length > 0) {
99
+ sourceNames = configuredSourceNames;
100
+ }
101
+ }
102
+ if (sourceNames.length > 0) {
103
+ const targets = [];
104
+ for (const sourceName of sourceNames) {
105
+ const resolved = await (0, config_file_1.resolveRagboxConfig)({
106
+ configPath: options.configPath,
107
+ source: sourceName
108
+ });
109
+ const target = resolved.pageIndexOptions.outputDir ?? resolved.rootDir;
110
+ if (!target) {
111
+ throw new ServeHttpError(400, "invalid_request", `Source does not define outputDir or rootDir: ${sourceName}`);
112
+ }
113
+ targets.push({
114
+ source: sourceName,
115
+ target,
116
+ options: queryOptionsFromServeOptions(resolved.pageIndexOptions, options, request.trace)
117
+ });
118
+ }
119
+ return targets;
120
+ }
121
+ if (options.target) {
122
+ return [
123
+ {
124
+ target: options.target,
125
+ options: await loadBaseQueryOptions(options, request.trace)
126
+ }
127
+ ];
128
+ }
129
+ throw new ServeHttpError(400, "invalid_request", "Missing query target. Pass a target, --source, --all-sources, or configure sources.");
130
+ }
131
+ async function buildIndexes(targets) {
132
+ const indexes = [];
133
+ for (const target of targets) {
134
+ const validation = await (0, sdk_1.validateIndex)(target.target);
135
+ indexes.push({
136
+ source: target.source,
137
+ target: target.target,
138
+ ok: validation.ok,
139
+ generatedAt: validation.inspect?.generatedAt,
140
+ counts: validation.inspect?.counts,
141
+ errors: validation.errors,
142
+ warnings: validation.warnings
143
+ });
144
+ }
145
+ return {
146
+ version: 1,
147
+ indexes
148
+ };
149
+ }
150
+ function healthFromIndexes(startedAt, lastReloadAt, indexes) {
151
+ const ready = indexes.indexes.filter((index) => index.ok).length;
152
+ const failed = indexes.indexes.length - ready;
153
+ const ok = indexes.indexes.length > 0 && failed === 0;
154
+ const status = ok ? "ready" : ready > 0 ? "degraded" : "error";
155
+ return {
156
+ version: 1,
157
+ ok,
158
+ status,
159
+ uptimeMs: Date.now() - startedAt,
160
+ lastReloadAt,
161
+ indexes: {
162
+ total: indexes.indexes.length,
163
+ ready,
164
+ failed
165
+ }
166
+ };
167
+ }
168
+ function rootFromHealth(health, authRequired) {
169
+ return {
170
+ version: 1,
171
+ name: "ragbox",
172
+ status: health.status,
173
+ ok: health.ok,
174
+ health,
175
+ endpoints: [
176
+ {
177
+ method: "GET",
178
+ path: "/",
179
+ authRequired: false,
180
+ description: "Service entrypoint and endpoint list."
181
+ },
182
+ {
183
+ method: "GET",
184
+ path: "/health",
185
+ authRequired: false,
186
+ description: "Readiness and index health summary."
187
+ },
188
+ {
189
+ method: "GET",
190
+ path: "/indexes",
191
+ authRequired,
192
+ description: "Validated index snapshot."
193
+ },
194
+ {
195
+ method: "POST",
196
+ path: "/query",
197
+ authRequired,
198
+ description: "Ask questions about the configured knowledge sources."
199
+ },
200
+ {
201
+ method: "POST",
202
+ path: "/reload",
203
+ authRequired,
204
+ description: "Reload configured index snapshots."
205
+ }
206
+ ]
207
+ };
208
+ }
209
+ function writeJson(response, status, value) {
210
+ response.writeHead(status, {
211
+ "Content-Type": "application/json; charset=utf-8"
212
+ });
213
+ response.end(`${JSON.stringify(value, null, 2)}\n`);
214
+ }
215
+ function writeError(response, status, code, message) {
216
+ writeJson(response, status, {
217
+ version: 1,
218
+ error: {
219
+ code,
220
+ message
221
+ }
222
+ });
223
+ }
224
+ function methodNotAllowed(response) {
225
+ writeError(response, 405, "method_not_allowed", "Method not allowed.");
226
+ }
227
+ function notFound(response) {
228
+ writeError(response, 404, "not_found", "Route not found.");
229
+ }
230
+ function readJsonBody(request) {
231
+ return new Promise((resolve, reject) => {
232
+ let body = "";
233
+ let bytes = 0;
234
+ request.on("data", (chunk) => {
235
+ bytes += chunk.length;
236
+ if (bytes > MAX_JSON_BODY_BYTES) {
237
+ reject(new ServeHttpError(400, "invalid_request", "JSON body is too large."));
238
+ request.destroy();
239
+ return;
240
+ }
241
+ body += chunk.toString("utf8");
242
+ });
243
+ request.on("end", () => {
244
+ try {
245
+ const parsed = body.trim() ? JSON.parse(body) : {};
246
+ if (!isJsonObject(parsed)) {
247
+ reject(new ServeHttpError(400, "invalid_request", "Expected a JSON object."));
248
+ return;
249
+ }
250
+ resolve(parsed);
251
+ }
252
+ catch (error) {
253
+ reject(new ServeHttpError(400, "invalid_request", `Invalid JSON body: ${error instanceof Error ? error.message : String(error)}`));
254
+ }
255
+ });
256
+ request.on("error", reject);
257
+ });
258
+ }
259
+ function authorizationHeader(request) {
260
+ const header = request.headers.authorization;
261
+ return Array.isArray(header) ? header[0] : header;
262
+ }
263
+ function assertAuthorized(request, authToken) {
264
+ if (!authToken) {
265
+ return;
266
+ }
267
+ if (authorizationHeader(request) !== `Bearer ${authToken}`) {
268
+ throw new ServeHttpError(401, "unauthorized", "Missing or invalid bearer token.");
269
+ }
270
+ }
271
+ function requestSource(value) {
272
+ if (typeof value === "string") {
273
+ return value;
274
+ }
275
+ if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
276
+ return value;
277
+ }
278
+ if (value === undefined) {
279
+ return undefined;
280
+ }
281
+ throw new ServeHttpError(400, "invalid_request", "source must be a string or string array.");
282
+ }
283
+ function requestTarget(value) {
284
+ if (value === undefined) {
285
+ return undefined;
286
+ }
287
+ if (typeof value === "string" && value.trim()) {
288
+ return value;
289
+ }
290
+ throw new ServeHttpError(400, "invalid_request", "target must be a non-empty string.");
291
+ }
292
+ function requestBoolean(value, name) {
293
+ if (value === undefined) {
294
+ return undefined;
295
+ }
296
+ if (typeof value === "boolean") {
297
+ return value;
298
+ }
299
+ throw new ServeHttpError(400, "invalid_request", `${name} must be a boolean.`);
300
+ }
301
+ function statusForThrownError(error) {
302
+ if (error instanceof ServeHttpError) {
303
+ return {
304
+ status: error.status,
305
+ code: error.code,
306
+ message: error.message
307
+ };
308
+ }
309
+ const message = error instanceof Error ? error.message : String(error);
310
+ if (error instanceof query_1.QueryStageError &&
311
+ (error.stage === "select-documents" || error.stage === "select-nodes" || error.stage === "answer")) {
312
+ return {
313
+ status: 502,
314
+ code: "upstream_error",
315
+ message
316
+ };
317
+ }
318
+ if (/LLM request failed|OPENAI_API_KEY|chat completions/i.test(message)) {
319
+ return {
320
+ status: 502,
321
+ code: "upstream_error",
322
+ message
323
+ };
324
+ }
325
+ return {
326
+ status: 500,
327
+ code: "internal_error",
328
+ message
329
+ };
330
+ }
331
+ async function queryTargets(targets, question, options) {
332
+ if (targets.length === 0) {
333
+ throw new ServeHttpError(400, "invalid_request", "At least one query source is required.");
334
+ }
335
+ if (targets.length === 1) {
336
+ return await (0, query_1.queryFolder)(targets[0].target, question, targets[0].options);
337
+ }
338
+ const multiTargets = targets.map((target) => ({
339
+ name: target.source ?? target.target,
340
+ target: target.target,
341
+ options: target.options
342
+ }));
343
+ return await (0, multi_query_1.queryMultipleIndexes)(multiTargets, question, targets[0].options ?? await loadBaseQueryOptions(options));
344
+ }
345
+ async function startServe(options = {}) {
346
+ const env = options.env ?? process.env;
347
+ const host = options.host ?? env.RAGBOX_SERVE_HOST ?? DEFAULT_HOST;
348
+ const port = options.port ?? parsePositivePort(env.RAGBOX_SERVE_PORT, DEFAULT_PORT);
349
+ const authToken = options.authToken ?? env.RAGBOX_SERVE_TOKEN;
350
+ const serverOptions = {
351
+ ...options,
352
+ authToken,
353
+ env,
354
+ host,
355
+ port
356
+ };
357
+ const startedAt = Date.now();
358
+ let defaultTargets = await resolveTargets(serverOptions);
359
+ let lastReloadAt = new Date().toISOString();
360
+ let indexes = await buildIndexes(defaultTargets);
361
+ async function reload() {
362
+ defaultTargets = await resolveTargets(serverOptions);
363
+ indexes = await buildIndexes(defaultTargets);
364
+ lastReloadAt = new Date().toISOString();
365
+ return indexes;
366
+ }
367
+ const server = node_http_1.default.createServer((request, response) => {
368
+ void (async () => {
369
+ const requestUrl = new node_url_1.URL(request.url ?? "/", `http://${request.headers.host ?? `${host}:${port}`}`);
370
+ const route = requestUrl.pathname.replace(/\/+$/, "") || "/";
371
+ if (route === "/") {
372
+ if (request.method !== "GET") {
373
+ methodNotAllowed(response);
374
+ return;
375
+ }
376
+ const health = healthFromIndexes(startedAt, lastReloadAt, indexes);
377
+ writeJson(response, 200, rootFromHealth(health, Boolean(authToken)));
378
+ return;
379
+ }
380
+ if (route === "/health") {
381
+ if (request.method !== "GET") {
382
+ methodNotAllowed(response);
383
+ return;
384
+ }
385
+ const health = healthFromIndexes(startedAt, lastReloadAt, indexes);
386
+ writeJson(response, health.ok ? 200 : 503, health);
387
+ return;
388
+ }
389
+ assertAuthorized(request, authToken);
390
+ if (route === "/indexes") {
391
+ if (request.method !== "GET") {
392
+ methodNotAllowed(response);
393
+ return;
394
+ }
395
+ writeJson(response, 200, indexes);
396
+ return;
397
+ }
398
+ if (route === "/reload") {
399
+ if (request.method !== "POST") {
400
+ methodNotAllowed(response);
401
+ return;
402
+ }
403
+ writeJson(response, 200, await reload());
404
+ return;
405
+ }
406
+ if (route === "/query") {
407
+ if (request.method !== "POST") {
408
+ methodNotAllowed(response);
409
+ return;
410
+ }
411
+ const body = await readJsonBody(request);
412
+ const question = typeof body.question === "string" && body.question.trim() ? body.question : undefined;
413
+ if (!question) {
414
+ throw new ServeHttpError(400, "invalid_request", "question must be a non-empty string.");
415
+ }
416
+ const target = requestTarget(body.target);
417
+ const source = requestSource(body.source);
418
+ const allSources = requestBoolean(body.allSources, "allSources");
419
+ const trace = requestBoolean(body.trace, "trace");
420
+ if (target && (source || allSources)) {
421
+ throw new ServeHttpError(400, "invalid_request", "target cannot be combined with source or allSources.");
422
+ }
423
+ const targets = target || source || allSources
424
+ ? await resolveTargets(serverOptions, { allSources, source, target, trace })
425
+ : defaultTargets.map((resolvedTarget) => ({
426
+ ...resolvedTarget,
427
+ options: queryOptionsFromServeOptions(resolvedTarget.options, serverOptions, trace)
428
+ }));
429
+ writeJson(response, 200, await queryTargets(targets, question, serverOptions));
430
+ return;
431
+ }
432
+ notFound(response);
433
+ })().catch((error) => {
434
+ const result = statusForThrownError(error);
435
+ writeError(response, result.status, result.code, result.message);
436
+ });
437
+ });
438
+ await new Promise((resolve, reject) => {
439
+ server.once("error", reject);
440
+ server.listen(port, host, () => {
441
+ server.off("error", reject);
442
+ resolve();
443
+ });
444
+ });
445
+ const address = server.address();
446
+ const resolvedHost = address.address === "::" ? "localhost" : address.address;
447
+ const resolvedPort = address.port;
448
+ return {
449
+ url: `http://${resolvedHost}:${resolvedPort}`,
450
+ host: resolvedHost,
451
+ port: resolvedPort,
452
+ server,
453
+ reload,
454
+ close: async () => {
455
+ await new Promise((resolve, reject) => {
456
+ server.close((error) => {
457
+ if (error) {
458
+ reject(error);
459
+ return;
460
+ }
461
+ resolve();
462
+ });
463
+ });
464
+ }
465
+ };
466
+ }
@@ -0,0 +1,30 @@
1
+ export type SetupPageIndexOptions = {
2
+ configPath?: string;
3
+ cwd?: string;
4
+ dir?: string;
5
+ gitignore?: boolean;
6
+ install?: boolean;
7
+ python?: string;
8
+ ref?: string;
9
+ repo?: string;
10
+ writeConfig?: boolean;
11
+ };
12
+ export type SetupPageIndexResult = {
13
+ version: 1;
14
+ command: "setup pageindex";
15
+ pageIndexDir: string;
16
+ cliPath: string;
17
+ pythonPath?: string;
18
+ venvDir?: string;
19
+ configPath?: string;
20
+ gitignorePath?: string;
21
+ actions: {
22
+ checkedOutRef?: string;
23
+ cloned: boolean;
24
+ installedDependencies: boolean;
25
+ reusedExisting: boolean;
26
+ updatedGitignore: boolean;
27
+ wroteConfig: boolean;
28
+ };
29
+ };
30
+ export declare function setupPageIndex(options?: SetupPageIndexOptions): Promise<SetupPageIndexResult>;
@@ -0,0 +1,184 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.setupPageIndex = setupPageIndex;
7
+ const node_child_process_1 = require("node:child_process");
8
+ const promises_1 = __importDefault(require("node:fs/promises"));
9
+ const node_path_1 = __importDefault(require("node:path"));
10
+ const config_file_1 = require("./config-file");
11
+ const MAX_CAPTURED_OUTPUT = 32 * 1024;
12
+ const DEFAULT_PAGEINDEX_REPO = "https://github.com/VectifyAI/PageIndex.git";
13
+ const DEFAULT_PAGEINDEX_DIR = ".ragbox/PageIndex";
14
+ const DEFAULT_PAGEINDEX_VENV_DIR = ".ragbox/pageindex-venv";
15
+ const DEFAULT_PYTHON = "python3";
16
+ const GITIGNORE_ENTRY = ".ragbox/";
17
+ function appendCapturedOutput(current, chunk) {
18
+ const next = current + chunk.toString("utf8");
19
+ return next.length > MAX_CAPTURED_OUTPUT ? next.slice(-MAX_CAPTURED_OUTPUT) : next;
20
+ }
21
+ function commandFailure(message, stdout, stderr) {
22
+ const details = [
23
+ stdout.trim() ? `STDOUT:\n${stdout.trim()}` : undefined,
24
+ stderr.trim() ? `STDERR:\n${stderr.trim()}` : undefined
25
+ ].filter(Boolean);
26
+ return new Error(details.length ? `${message}\n${details.join("\n")}` : message);
27
+ }
28
+ async function runCommand(command, args, options) {
29
+ await new Promise((resolve, reject) => {
30
+ let stdout = "";
31
+ let stderr = "";
32
+ const child = (0, node_child_process_1.spawn)(command, args, {
33
+ cwd: options.cwd,
34
+ env: {
35
+ ...process.env,
36
+ ...options.env
37
+ },
38
+ stdio: ["ignore", "pipe", "pipe"]
39
+ });
40
+ child.stdout.on("data", (chunk) => {
41
+ stdout = appendCapturedOutput(stdout, chunk);
42
+ });
43
+ child.stderr.on("data", (chunk) => {
44
+ stderr = appendCapturedOutput(stderr, chunk);
45
+ });
46
+ child.on("error", (error) => {
47
+ reject(error.code === "ENOENT" ? new Error(options.missingMessage) : error);
48
+ });
49
+ child.on("close", (code) => {
50
+ if (code === 0) {
51
+ resolve();
52
+ return;
53
+ }
54
+ reject(commandFailure(`${options.failureMessage} (exit code ${code ?? "unknown"})`, stdout, stderr));
55
+ });
56
+ });
57
+ }
58
+ async function pathExists(filePath) {
59
+ try {
60
+ await promises_1.default.access(filePath);
61
+ return true;
62
+ }
63
+ catch {
64
+ return false;
65
+ }
66
+ }
67
+ async function hasPageIndexEntrypoint(pageIndexDir) {
68
+ try {
69
+ const stat = await promises_1.default.stat(node_path_1.default.join(pageIndexDir, "run_pageindex.py"));
70
+ return stat.isFile();
71
+ }
72
+ catch {
73
+ return false;
74
+ }
75
+ }
76
+ function venvPythonPath(venvDir) {
77
+ return process.platform === "win32" ? node_path_1.default.join(venvDir, "Scripts", "python.exe") : node_path_1.default.join(venvDir, "bin", "python");
78
+ }
79
+ async function ensureGitignoreEntry(cwd) {
80
+ const gitignorePath = node_path_1.default.join(cwd, ".gitignore");
81
+ let current = "";
82
+ try {
83
+ current = await promises_1.default.readFile(gitignorePath, "utf8");
84
+ }
85
+ catch (error) {
86
+ const code = error.code;
87
+ if (code !== "ENOENT") {
88
+ throw error;
89
+ }
90
+ }
91
+ const lines = current.split(/\r?\n/).map((line) => line.trim());
92
+ if (lines.includes(GITIGNORE_ENTRY)) {
93
+ return { gitignorePath, updated: false };
94
+ }
95
+ const separator = current && !current.endsWith("\n") ? "\n" : "";
96
+ await promises_1.default.writeFile(gitignorePath, `${current}${separator}${GITIGNORE_ENTRY}\n`, "utf8");
97
+ return { gitignorePath, updated: true };
98
+ }
99
+ async function ensurePageIndexSource(pageIndexDir, repo, ref, env) {
100
+ if (await pathExists(pageIndexDir)) {
101
+ if (!(await hasPageIndexEntrypoint(pageIndexDir))) {
102
+ throw new Error(`PageIndex directory already exists but run_pageindex.py was not found: ${pageIndexDir}. Pass --dir to use another location or remove the existing directory.`);
103
+ }
104
+ return { cloned: false, reusedExisting: true };
105
+ }
106
+ await promises_1.default.mkdir(node_path_1.default.dirname(pageIndexDir), { recursive: true });
107
+ await runCommand("git", ["clone", repo, pageIndexDir], {
108
+ env,
109
+ failureMessage: `Failed to clone PageIndex from ${repo}`,
110
+ missingMessage: "git is required to install PageIndex. Install git or pass --dir pointing to an existing PageIndex checkout."
111
+ });
112
+ if (ref) {
113
+ await runCommand("git", ["-C", pageIndexDir, "checkout", ref], {
114
+ env,
115
+ failureMessage: `Failed to checkout PageIndex ref ${ref}`,
116
+ missingMessage: "git is required to checkout a PageIndex ref."
117
+ });
118
+ }
119
+ if (!(await hasPageIndexEntrypoint(pageIndexDir))) {
120
+ throw new Error(`PageIndex repo does not contain run_pageindex.py: ${pageIndexDir}`);
121
+ }
122
+ return { checkedOutRef: ref, cloned: true, reusedExisting: false };
123
+ }
124
+ async function installPageIndexDependencies(pageIndexDir, venvDir, python, env) {
125
+ const requirementsPath = node_path_1.default.join(pageIndexDir, "requirements.txt");
126
+ if (!(await pathExists(requirementsPath))) {
127
+ throw new Error(`PageIndex requirements.txt was not found: ${requirementsPath}`);
128
+ }
129
+ await promises_1.default.mkdir(node_path_1.default.dirname(venvDir), { recursive: true });
130
+ await runCommand(python, ["-m", "venv", venvDir], {
131
+ env,
132
+ failureMessage: `Failed to create PageIndex virtual environment at ${venvDir}`,
133
+ missingMessage: `Python executable was not found: ${python}`
134
+ });
135
+ const pythonPath = venvPythonPath(venvDir);
136
+ await runCommand(pythonPath, ["-m", "pip", "install", "--upgrade", "-r", requirementsPath], {
137
+ env,
138
+ failureMessage: "Failed to install PageIndex Python dependencies",
139
+ missingMessage: `Virtual environment Python was not found after creation: ${pythonPath}`
140
+ });
141
+ return pythonPath;
142
+ }
143
+ async function setupPageIndex(options = {}) {
144
+ const cwd = node_path_1.default.resolve(options.cwd ?? process.cwd());
145
+ const repo = options.repo ?? DEFAULT_PAGEINDEX_REPO;
146
+ const pageIndexDir = node_path_1.default.resolve(cwd, options.dir ?? DEFAULT_PAGEINDEX_DIR);
147
+ const venvDir = node_path_1.default.resolve(cwd, DEFAULT_PAGEINDEX_VENV_DIR);
148
+ const install = options.install ?? true;
149
+ const writeConfig = options.writeConfig ?? true;
150
+ const updateGitignore = options.gitignore ?? true;
151
+ const source = await ensurePageIndexSource(pageIndexDir, repo, options.ref);
152
+ const cliPath = node_path_1.default.join(pageIndexDir, "run_pageindex.py");
153
+ let pythonPath;
154
+ if (install) {
155
+ pythonPath = await installPageIndexDependencies(pageIndexDir, venvDir, options.python ?? DEFAULT_PYTHON);
156
+ }
157
+ const configPath = writeConfig
158
+ ? await (0, config_file_1.writePageIndexSetupConfig)({
159
+ cliPath,
160
+ configPath: options.configPath,
161
+ cwd,
162
+ pythonPath
163
+ })
164
+ : undefined;
165
+ const gitignore = updateGitignore ? await ensureGitignoreEntry(cwd) : undefined;
166
+ return {
167
+ version: 1,
168
+ command: "setup pageindex",
169
+ pageIndexDir,
170
+ cliPath,
171
+ pythonPath,
172
+ venvDir: install ? venvDir : undefined,
173
+ configPath,
174
+ gitignorePath: gitignore?.gitignorePath,
175
+ actions: {
176
+ checkedOutRef: source.checkedOutRef,
177
+ cloned: source.cloned,
178
+ installedDependencies: install,
179
+ reusedExisting: source.reusedExisting,
180
+ updatedGitignore: gitignore?.updated ?? false,
181
+ wroteConfig: Boolean(configPath)
182
+ }
183
+ };
184
+ }