@bndynet/ragbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +765 -0
- package/README.zh-CN.md +774 -0
- package/dist/src/advanced.d.ts +13 -0
- package/dist/src/advanced.js +29 -0
- package/dist/src/cli.d.ts +2 -0
- package/dist/src/cli.js +1013 -0
- package/dist/src/config-file.d.ts +69 -0
- package/dist/src/config-file.js +246 -0
- package/dist/src/folder-index/config.d.ts +2 -0
- package/dist/src/folder-index/config.js +56 -0
- package/dist/src/folder-index/hash.d.ts +1 -0
- package/dist/src/folder-index/hash.js +14 -0
- package/dist/src/folder-index/indexer.d.ts +2 -0
- package/dist/src/folder-index/indexer.js +154 -0
- package/dist/src/folder-index/llm-client.d.ts +3 -0
- package/dist/src/folder-index/llm-client.js +45 -0
- package/dist/src/folder-index/manifest.d.ts +17 -0
- package/dist/src/folder-index/manifest.js +158 -0
- package/dist/src/folder-index/multi-query.d.ts +45 -0
- package/dist/src/folder-index/multi-query.js +109 -0
- package/dist/src/folder-index/pageindex-runner.d.ts +3 -0
- package/dist/src/folder-index/pageindex-runner.js +218 -0
- package/dist/src/folder-index/path-utils.d.ts +5 -0
- package/dist/src/folder-index/path-utils.js +33 -0
- package/dist/src/folder-index/query.d.ts +19 -0
- package/dist/src/folder-index/query.js +597 -0
- package/dist/src/folder-index/queue.d.ts +1 -0
- package/dist/src/folder-index/queue.js +18 -0
- package/dist/src/folder-index/root-tree.d.ts +3 -0
- package/dist/src/folder-index/root-tree.js +82 -0
- package/dist/src/folder-index/scan.d.ts +14 -0
- package/dist/src/folder-index/scan.js +152 -0
- package/dist/src/folder-index/types.d.ts +368 -0
- package/dist/src/folder-index/types.js +2 -0
- package/dist/src/folder-index/watch.d.ts +17 -0
- package/dist/src/folder-index/watch.js +550 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.js +45 -0
- package/dist/src/sdk.d.ts +101 -0
- package/dist/src/sdk.js +352 -0
- package/dist/src/serve.d.ts +64 -0
- package/dist/src/serve.js +466 -0
- package/dist/src/setup-pageindex.d.ts +30 -0
- package/dist/src/setup-pageindex.js +184 -0
- package/package.json +43 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { PageIndexOptions } from "./folder-index/types";
|
|
2
|
+
export declare const RAGBOX_CONFIG_FILE = "ragbox.config.json";
|
|
3
|
+
export type RagboxPageIndexConfig = {
|
|
4
|
+
cli?: string;
|
|
5
|
+
concurrency?: number;
|
|
6
|
+
extraArgs?: string[];
|
|
7
|
+
outputArg?: string;
|
|
8
|
+
python?: string;
|
|
9
|
+
};
|
|
10
|
+
export type RagboxLlmConfig = {
|
|
11
|
+
apiKey?: string;
|
|
12
|
+
baseUrl?: string;
|
|
13
|
+
model?: string;
|
|
14
|
+
};
|
|
15
|
+
export type RagboxIndexConfig = {
|
|
16
|
+
exclude?: string[];
|
|
17
|
+
include?: string[];
|
|
18
|
+
outputDir?: string;
|
|
19
|
+
};
|
|
20
|
+
export type RagboxConfigSource = RagboxIndexConfig & {
|
|
21
|
+
index?: RagboxIndexConfig;
|
|
22
|
+
llm?: RagboxLlmConfig;
|
|
23
|
+
pageIndex?: RagboxPageIndexConfig;
|
|
24
|
+
rootDir: string;
|
|
25
|
+
};
|
|
26
|
+
export type RagboxConfig = {
|
|
27
|
+
version: 1;
|
|
28
|
+
docs?: RagboxConfigSource;
|
|
29
|
+
index?: RagboxIndexConfig;
|
|
30
|
+
llm?: RagboxLlmConfig;
|
|
31
|
+
pageIndex?: RagboxPageIndexConfig;
|
|
32
|
+
sources?: Record<string, RagboxConfigSource>;
|
|
33
|
+
};
|
|
34
|
+
export type ResolvedRagboxConfig = {
|
|
35
|
+
config?: RagboxConfig;
|
|
36
|
+
configDir: string;
|
|
37
|
+
configPath?: string;
|
|
38
|
+
pageIndexOptions: PageIndexOptions;
|
|
39
|
+
rootDir?: string;
|
|
40
|
+
sourceName?: string;
|
|
41
|
+
};
|
|
42
|
+
export type ResolveRagboxConfigOptions = {
|
|
43
|
+
configPath?: string;
|
|
44
|
+
cwd?: string;
|
|
45
|
+
source?: string;
|
|
46
|
+
};
|
|
47
|
+
export type WriteDefaultRagboxConfigOptions = {
|
|
48
|
+
configPath?: string;
|
|
49
|
+
cwd?: string;
|
|
50
|
+
docsDir?: string;
|
|
51
|
+
force?: boolean;
|
|
52
|
+
outputDir?: string;
|
|
53
|
+
};
|
|
54
|
+
export type WritePageIndexSetupConfigOptions = {
|
|
55
|
+
cliPath: string;
|
|
56
|
+
configPath?: string;
|
|
57
|
+
cwd?: string;
|
|
58
|
+
pythonPath?: string;
|
|
59
|
+
};
|
|
60
|
+
export declare function createDefaultRagboxConfig(options?: Pick<WriteDefaultRagboxConfigOptions, "docsDir" | "outputDir">): RagboxConfig;
|
|
61
|
+
export declare function listRagboxConfigSourceNames(config: RagboxConfig | undefined): string[];
|
|
62
|
+
export declare function writeDefaultRagboxConfig(options?: WriteDefaultRagboxConfigOptions): Promise<string>;
|
|
63
|
+
export declare function writePageIndexSetupConfig(options: WritePageIndexSetupConfigOptions): Promise<string>;
|
|
64
|
+
export declare function readRagboxConfig(configPath?: string, cwd?: string): Promise<{
|
|
65
|
+
config?: RagboxConfig;
|
|
66
|
+
configPath?: string;
|
|
67
|
+
configDir: string;
|
|
68
|
+
}>;
|
|
69
|
+
export declare function resolveRagboxConfig(options?: ResolveRagboxConfigOptions): Promise<ResolvedRagboxConfig>;
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.RAGBOX_CONFIG_FILE = void 0;
|
|
7
|
+
exports.createDefaultRagboxConfig = createDefaultRagboxConfig;
|
|
8
|
+
exports.listRagboxConfigSourceNames = listRagboxConfigSourceNames;
|
|
9
|
+
exports.writeDefaultRagboxConfig = writeDefaultRagboxConfig;
|
|
10
|
+
exports.writePageIndexSetupConfig = writePageIndexSetupConfig;
|
|
11
|
+
exports.readRagboxConfig = readRagboxConfig;
|
|
12
|
+
exports.resolveRagboxConfig = resolveRagboxConfig;
|
|
13
|
+
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
14
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
15
|
+
exports.RAGBOX_CONFIG_FILE = "ragbox.config.json";
|
|
16
|
+
const DEFAULT_INCLUDE = ["**/*.md", "**/*.mdx"];
|
|
17
|
+
const DEFAULT_EXCLUDE = ["node_modules/**", ".git/**", ".pageindex/**", "dist/**", "build/**"];
|
|
18
|
+
async function pathExists(filePath) {
|
|
19
|
+
try {
|
|
20
|
+
await promises_1.default.access(filePath);
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
function configFileName(name) {
|
|
28
|
+
return name ? `ragbox.config.${name}.json` : exports.RAGBOX_CONFIG_FILE;
|
|
29
|
+
}
|
|
30
|
+
function looksLikeConfigName(value) {
|
|
31
|
+
return !value.endsWith(".json") && !value.includes("/") && !value.includes("\\");
|
|
32
|
+
}
|
|
33
|
+
async function findConfigPath(cwd, name) {
|
|
34
|
+
let currentDir = node_path_1.default.resolve(cwd);
|
|
35
|
+
while (true) {
|
|
36
|
+
const candidate = node_path_1.default.join(currentDir, configFileName(name));
|
|
37
|
+
if (await pathExists(candidate)) {
|
|
38
|
+
return candidate;
|
|
39
|
+
}
|
|
40
|
+
const parent = node_path_1.default.dirname(currentDir);
|
|
41
|
+
if (parent === currentDir) {
|
|
42
|
+
return undefined;
|
|
43
|
+
}
|
|
44
|
+
currentDir = parent;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
async function resolveConfigPath(configPath, cwd) {
|
|
48
|
+
if (!configPath) {
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
if (looksLikeConfigName(configPath)) {
|
|
52
|
+
return (await findConfigPath(cwd, configPath)) ?? node_path_1.default.resolve(cwd, configFileName(configPath));
|
|
53
|
+
}
|
|
54
|
+
return node_path_1.default.resolve(cwd, configPath);
|
|
55
|
+
}
|
|
56
|
+
function resolveConfigRelativePath(configDir, value) {
|
|
57
|
+
if (!value) {
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
return node_path_1.default.isAbsolute(value) ? value : node_path_1.default.resolve(configDir, value);
|
|
61
|
+
}
|
|
62
|
+
function resolveConfigCommandPath(configDir, value) {
|
|
63
|
+
if (!value || node_path_1.default.isAbsolute(value)) {
|
|
64
|
+
return value;
|
|
65
|
+
}
|
|
66
|
+
if (value.startsWith(".") || value.includes("/") || value.includes("\\")) {
|
|
67
|
+
return node_path_1.default.resolve(configDir, value);
|
|
68
|
+
}
|
|
69
|
+
return value;
|
|
70
|
+
}
|
|
71
|
+
function normalizeConfigRelativePath(value) {
|
|
72
|
+
return value.split(node_path_1.default.sep).join("/");
|
|
73
|
+
}
|
|
74
|
+
function toConfigRelativeCommandPath(configDir, cwd, value) {
|
|
75
|
+
const absolutePath = node_path_1.default.isAbsolute(value) ? value : node_path_1.default.resolve(cwd, value);
|
|
76
|
+
const relativePath = normalizeConfigRelativePath(node_path_1.default.relative(configDir, absolutePath));
|
|
77
|
+
if (!relativePath || relativePath.startsWith("..")) {
|
|
78
|
+
return normalizeConfigRelativePath(absolutePath);
|
|
79
|
+
}
|
|
80
|
+
return relativePath.startsWith("./") ? relativePath : `./${relativePath}`;
|
|
81
|
+
}
|
|
82
|
+
function mergePageIndexConfig(...configs) {
|
|
83
|
+
return Object.assign({}, ...configs.filter(Boolean));
|
|
84
|
+
}
|
|
85
|
+
function mergeLlmConfig(...configs) {
|
|
86
|
+
return Object.assign({}, ...configs.filter(Boolean));
|
|
87
|
+
}
|
|
88
|
+
function mergeIndexConfig(...configs) {
|
|
89
|
+
return Object.assign({}, ...configs.filter(Boolean));
|
|
90
|
+
}
|
|
91
|
+
function pageIndexConfigToOptions(configDir, config) {
|
|
92
|
+
return {
|
|
93
|
+
cliPath: resolveConfigCommandPath(configDir, config.cli),
|
|
94
|
+
concurrency: config.concurrency,
|
|
95
|
+
extraArgs: config.extraArgs,
|
|
96
|
+
outputArg: config.outputArg,
|
|
97
|
+
pythonPath: resolveConfigCommandPath(configDir, config.python)
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
function llmConfigToOptions(config) {
|
|
101
|
+
return {
|
|
102
|
+
apiKey: config.apiKey,
|
|
103
|
+
baseUrl: config.baseUrl,
|
|
104
|
+
model: config.model
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
function indexConfigToOptions(configDir, config) {
|
|
108
|
+
return {
|
|
109
|
+
exclude: config.exclude,
|
|
110
|
+
include: config.include,
|
|
111
|
+
outputDir: resolveConfigRelativePath(configDir, config.outputDir)
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
function createDefaultRagboxConfig(options = {}) {
|
|
115
|
+
const docsDir = options.docsDir ?? "./docs";
|
|
116
|
+
const outputDir = options.outputDir ?? "./.ragbox-index";
|
|
117
|
+
return {
|
|
118
|
+
version: 1,
|
|
119
|
+
pageIndex: {
|
|
120
|
+
cli: "/path/to/PageIndex/run_pageindex.py"
|
|
121
|
+
},
|
|
122
|
+
llm: {
|
|
123
|
+
baseUrl: "https://api.openai.com/v1",
|
|
124
|
+
model: "gpt-4o-mini"
|
|
125
|
+
},
|
|
126
|
+
docs: {
|
|
127
|
+
rootDir: docsDir,
|
|
128
|
+
outputDir
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
function inferSourceName(config, requestedSource) {
|
|
133
|
+
if (requestedSource) {
|
|
134
|
+
return requestedSource;
|
|
135
|
+
}
|
|
136
|
+
if (config?.docs) {
|
|
137
|
+
return "docs";
|
|
138
|
+
}
|
|
139
|
+
const sourceNames = Object.keys(config?.sources ?? {});
|
|
140
|
+
return sourceNames.length === 1 ? sourceNames[0] : undefined;
|
|
141
|
+
}
|
|
142
|
+
function findSource(config, sourceName) {
|
|
143
|
+
if (!sourceName) {
|
|
144
|
+
return undefined;
|
|
145
|
+
}
|
|
146
|
+
if (sourceName === "docs" && config?.docs) {
|
|
147
|
+
return config.docs;
|
|
148
|
+
}
|
|
149
|
+
return config?.sources?.[sourceName];
|
|
150
|
+
}
|
|
151
|
+
function listRagboxConfigSourceNames(config) {
|
|
152
|
+
if (!config) {
|
|
153
|
+
return [];
|
|
154
|
+
}
|
|
155
|
+
const names = new Set();
|
|
156
|
+
if (config.docs) {
|
|
157
|
+
names.add("docs");
|
|
158
|
+
}
|
|
159
|
+
for (const sourceName of Object.keys(config.sources ?? {})) {
|
|
160
|
+
names.add(sourceName);
|
|
161
|
+
}
|
|
162
|
+
return [...names];
|
|
163
|
+
}
|
|
164
|
+
async function writeDefaultRagboxConfig(options = {}) {
|
|
165
|
+
const cwd = node_path_1.default.resolve(options.cwd ?? process.cwd());
|
|
166
|
+
const configPath = node_path_1.default.resolve(cwd, options.configPath ?? exports.RAGBOX_CONFIG_FILE);
|
|
167
|
+
if (!options.force && (await pathExists(configPath))) {
|
|
168
|
+
throw new Error(`Config file already exists: ${configPath}`);
|
|
169
|
+
}
|
|
170
|
+
await promises_1.default.mkdir(node_path_1.default.dirname(configPath), { recursive: true });
|
|
171
|
+
await promises_1.default.writeFile(configPath, `${JSON.stringify(createDefaultRagboxConfig({ docsDir: options.docsDir, outputDir: options.outputDir }), null, 2)}\n`, "utf8");
|
|
172
|
+
return configPath;
|
|
173
|
+
}
|
|
174
|
+
async function writePageIndexSetupConfig(options) {
|
|
175
|
+
const cwd = node_path_1.default.resolve(options.cwd ?? process.cwd());
|
|
176
|
+
const configPath = (await resolveConfigPath(options.configPath, cwd)) ?? (await findConfigPath(cwd)) ?? node_path_1.default.resolve(cwd, exports.RAGBOX_CONFIG_FILE);
|
|
177
|
+
const configDir = node_path_1.default.dirname(configPath);
|
|
178
|
+
let config = createDefaultRagboxConfig();
|
|
179
|
+
if (await pathExists(configPath)) {
|
|
180
|
+
config = JSON.parse(await promises_1.default.readFile(configPath, "utf8"));
|
|
181
|
+
if (config.version !== 1) {
|
|
182
|
+
throw new Error(`Unsupported ragbox config version in ${configPath}: ${String(config.version)}`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
const pageIndex = {
|
|
186
|
+
...(config.pageIndex ?? {}),
|
|
187
|
+
cli: toConfigRelativeCommandPath(configDir, cwd, options.cliPath)
|
|
188
|
+
};
|
|
189
|
+
if (options.pythonPath) {
|
|
190
|
+
pageIndex.python = toConfigRelativeCommandPath(configDir, cwd, options.pythonPath);
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
delete pageIndex.python;
|
|
194
|
+
}
|
|
195
|
+
config.pageIndex = pageIndex;
|
|
196
|
+
await promises_1.default.mkdir(node_path_1.default.dirname(configPath), { recursive: true });
|
|
197
|
+
await promises_1.default.writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");
|
|
198
|
+
return configPath;
|
|
199
|
+
}
|
|
200
|
+
async function readRagboxConfig(configPath, cwd = process.cwd()) {
|
|
201
|
+
const resolvedConfigPath = (await resolveConfigPath(configPath, cwd)) ?? (await findConfigPath(cwd));
|
|
202
|
+
if (!resolvedConfigPath) {
|
|
203
|
+
return {
|
|
204
|
+
configDir: node_path_1.default.resolve(cwd)
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
const config = JSON.parse(await promises_1.default.readFile(resolvedConfigPath, "utf8"));
|
|
208
|
+
if (config.version !== 1) {
|
|
209
|
+
throw new Error(`Unsupported ragbox config version in ${resolvedConfigPath}: ${String(config.version)}`);
|
|
210
|
+
}
|
|
211
|
+
return {
|
|
212
|
+
config,
|
|
213
|
+
configDir: node_path_1.default.dirname(resolvedConfigPath),
|
|
214
|
+
configPath: resolvedConfigPath
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
async function resolveRagboxConfig(options = {}) {
|
|
218
|
+
const cwd = options.cwd ?? process.cwd();
|
|
219
|
+
const { config, configDir, configPath } = await readRagboxConfig(options.configPath, cwd);
|
|
220
|
+
const sourceName = inferSourceName(config, options.source);
|
|
221
|
+
const source = findSource(config, sourceName);
|
|
222
|
+
if (options.source && !source) {
|
|
223
|
+
throw new Error(`Source not found in ragbox config: ${options.source}`);
|
|
224
|
+
}
|
|
225
|
+
const pageIndexConfig = mergePageIndexConfig(config?.pageIndex, source?.pageIndex);
|
|
226
|
+
const llmConfig = mergeLlmConfig(config?.llm, source?.llm);
|
|
227
|
+
const indexConfig = mergeIndexConfig(config?.index, source
|
|
228
|
+
? {
|
|
229
|
+
exclude: source.exclude,
|
|
230
|
+
include: source.include,
|
|
231
|
+
outputDir: source.outputDir
|
|
232
|
+
}
|
|
233
|
+
: undefined, source?.index);
|
|
234
|
+
return {
|
|
235
|
+
config,
|
|
236
|
+
configDir,
|
|
237
|
+
configPath,
|
|
238
|
+
pageIndexOptions: {
|
|
239
|
+
...pageIndexConfigToOptions(configDir, pageIndexConfig),
|
|
240
|
+
...llmConfigToOptions(llmConfig),
|
|
241
|
+
...indexConfigToOptions(configDir, indexConfig)
|
|
242
|
+
},
|
|
243
|
+
rootDir: resolveConfigRelativePath(configDir, source?.rootDir),
|
|
244
|
+
sourceName
|
|
245
|
+
};
|
|
246
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.loadPageIndexConfig = loadPageIndexConfig;
|
|
4
|
+
function parsePositiveInt(value, fallback) {
|
|
5
|
+
if (!value) {
|
|
6
|
+
return fallback;
|
|
7
|
+
}
|
|
8
|
+
const parsed = Number.parseInt(value, 10);
|
|
9
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
10
|
+
}
|
|
11
|
+
function parseNonNegativeInt(value, fallback) {
|
|
12
|
+
if (!value) {
|
|
13
|
+
return fallback;
|
|
14
|
+
}
|
|
15
|
+
const parsed = Number.parseInt(value, 10);
|
|
16
|
+
return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
|
|
17
|
+
}
|
|
18
|
+
function parseBoolean(value, fallback = false) {
|
|
19
|
+
if (!value) {
|
|
20
|
+
return fallback;
|
|
21
|
+
}
|
|
22
|
+
return ["1", "true", "yes", "on"].includes(value.toLowerCase());
|
|
23
|
+
}
|
|
24
|
+
function parseExtraArgs(value) {
|
|
25
|
+
const trimmed = value?.trim();
|
|
26
|
+
return trimmed ? trimmed.split(/\s+/) : undefined;
|
|
27
|
+
}
|
|
28
|
+
function loadPageIndexConfig(overrides = {}) {
|
|
29
|
+
const env = overrides.env ?? process.env;
|
|
30
|
+
return {
|
|
31
|
+
pythonPath: overrides.pythonPath ?? env.PAGEINDEX_PYTHON ?? "python3",
|
|
32
|
+
cliPath: overrides.cliPath ?? env.PAGEINDEX_CLI,
|
|
33
|
+
model: overrides.model ?? env.PAGEINDEX_MODEL ?? env.LLM_MODEL ?? "gpt-4o-mini",
|
|
34
|
+
baseUrl: overrides.baseUrl ?? env.OPENAI_BASE_URL ?? "https://api.openai.com/v1",
|
|
35
|
+
apiKey: overrides.apiKey ?? env.OPENAI_API_KEY,
|
|
36
|
+
concurrency: overrides.concurrency ?? parsePositiveInt(env.PAGEINDEX_CONCURRENCY, 1),
|
|
37
|
+
exclude: overrides.exclude,
|
|
38
|
+
include: overrides.include,
|
|
39
|
+
outputDir: overrides.outputDir ?? env.RAGBOX_OUTPUT_DIR,
|
|
40
|
+
outputArg: overrides.outputArg ?? env.PAGEINDEX_OUTPUT_ARG,
|
|
41
|
+
extraArgs: overrides.extraArgs ?? parseExtraArgs(env.PAGEINDEX_EXTRA_ARGS),
|
|
42
|
+
progress: overrides.progress,
|
|
43
|
+
trace: overrides.trace,
|
|
44
|
+
watchDebounceMs: overrides.watchDebounceMs ?? parseNonNegativeInt(env.RAGBOX_WATCH_DEBOUNCE_MS, 500),
|
|
45
|
+
watchHealthFile: overrides.watchHealthFile ?? env.RAGBOX_WATCH_HEALTH_FILE,
|
|
46
|
+
watchLockFile: overrides.watchLockFile ?? env.RAGBOX_WATCH_LOCK_FILE,
|
|
47
|
+
watchProgress: overrides.watchProgress,
|
|
48
|
+
watchRetryAttempts: overrides.watchRetryAttempts ?? parseNonNegativeInt(env.RAGBOX_WATCH_RETRY_ATTEMPTS, 0),
|
|
49
|
+
watchRetryDelayMs: overrides.watchRetryDelayMs ?? parseNonNegativeInt(env.RAGBOX_WATCH_RETRY_DELAY_MS, 1000),
|
|
50
|
+
watchStaging: overrides.watchStaging ??
|
|
51
|
+
(parseBoolean(env.RAGBOX_WATCH_STAGING) || Boolean(overrides.watchStagingOutputDir ?? env.RAGBOX_WATCH_STAGING_OUTPUT_DIR)),
|
|
52
|
+
watchStagingOutputDir: overrides.watchStagingOutputDir ?? env.RAGBOX_WATCH_STAGING_OUTPUT_DIR,
|
|
53
|
+
watchWebhookUrl: overrides.watchWebhookUrl ?? env.RAGBOX_WATCH_WEBHOOK_URL,
|
|
54
|
+
env
|
|
55
|
+
};
|
|
56
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function hashFile(filePath: string): Promise<string>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.hashFile = hashFile;
|
|
4
|
+
const node_crypto_1 = require("node:crypto");
|
|
5
|
+
const node_fs_1 = require("node:fs");
|
|
6
|
+
async function hashFile(filePath) {
|
|
7
|
+
const hash = (0, node_crypto_1.createHash)("sha256");
|
|
8
|
+
const stream = (0, node_fs_1.createReadStream)(filePath);
|
|
9
|
+
return await new Promise((resolve, reject) => {
|
|
10
|
+
stream.on("data", (chunk) => hash.update(chunk));
|
|
11
|
+
stream.on("error", reject);
|
|
12
|
+
stream.on("end", () => resolve(`sha256:${hash.digest("hex")}`));
|
|
13
|
+
});
|
|
14
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.indexFolder = indexFolder;
|
|
7
|
+
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
+
const config_1 = require("./config");
|
|
10
|
+
const manifest_1 = require("./manifest");
|
|
11
|
+
const pageindex_runner_1 = require("./pageindex-runner");
|
|
12
|
+
const queue_1 = require("./queue");
|
|
13
|
+
const root_tree_1 = require("./root-tree");
|
|
14
|
+
const scan_1 = require("./scan");
|
|
15
|
+
const path_utils_1 = require("./path-utils");
|
|
16
|
+
function errorMessage(error) {
|
|
17
|
+
return error instanceof Error ? error.message : String(error);
|
|
18
|
+
}
|
|
19
|
+
function reportProgress(options, event) {
|
|
20
|
+
try {
|
|
21
|
+
options.progress?.(event);
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
// Progress reporting must never change indexing behavior.
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
async function findStaleIndexFiles(rootDir, files, outputDir) {
|
|
28
|
+
const staleFiles = [];
|
|
29
|
+
for (const file of files) {
|
|
30
|
+
try {
|
|
31
|
+
const stat = await promises_1.default.stat((0, manifest_1.resolveDocumentIndexPath)(rootDir, file.indexPath, outputDir));
|
|
32
|
+
if (stat.mtimeMs < file.mtimeMs - 1) {
|
|
33
|
+
staleFiles.push(file);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
if (error.code === "ENOENT") {
|
|
38
|
+
staleFiles.push(file);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
throw error;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return staleFiles;
|
|
45
|
+
}
|
|
46
|
+
async function indexFolder(folder, options = {}) {
|
|
47
|
+
const rootDir = node_path_1.default.resolve(folder);
|
|
48
|
+
const config = (0, config_1.loadPageIndexConfig)(options);
|
|
49
|
+
const outputDir = (0, manifest_1.resolvePageIndexDir)(rootDir, config.outputDir);
|
|
50
|
+
const manifestPath = node_path_1.default.join(outputDir, "manifest.json");
|
|
51
|
+
const rootTreePath = node_path_1.default.join(outputDir, "root-tree.json");
|
|
52
|
+
const excludedDirs = (0, path_utils_1.isStrictSubPath)(rootDir, outputDir) ? [outputDir] : [];
|
|
53
|
+
const previousManifest = await (0, manifest_1.readManifest)(rootDir, config.outputDir);
|
|
54
|
+
const scannedFiles = await (0, scan_1.scanMarkdownFiles)(rootDir, {
|
|
55
|
+
exclude: config.exclude,
|
|
56
|
+
excludedDirs,
|
|
57
|
+
include: config.include
|
|
58
|
+
});
|
|
59
|
+
const diff = (0, manifest_1.diffManifest)(previousManifest, scannedFiles);
|
|
60
|
+
const staleIndexFiles = await findStaleIndexFiles(rootDir, diff.unchanged, config.outputDir);
|
|
61
|
+
const staleIndexPaths = new Set(staleIndexFiles.map((file) => file.path));
|
|
62
|
+
const unchanged = diff.unchanged.filter((file) => !staleIndexPaths.has(file.path));
|
|
63
|
+
const toIndex = [...diff.toIndex, ...staleIndexFiles];
|
|
64
|
+
const previousByPath = new Map(previousManifest.documents.map((record) => [record.path, record]));
|
|
65
|
+
reportProgress(config, {
|
|
66
|
+
type: "scan",
|
|
67
|
+
rootDir,
|
|
68
|
+
outputDir,
|
|
69
|
+
total: scannedFiles.length,
|
|
70
|
+
toIndex: toIndex.length,
|
|
71
|
+
unchanged: unchanged.length,
|
|
72
|
+
deleted: diff.deleted.length
|
|
73
|
+
});
|
|
74
|
+
await promises_1.default.mkdir(node_path_1.default.join(outputDir, manifest_1.INDEXES_DIR), { recursive: true });
|
|
75
|
+
await (0, manifest_1.removeDeletedIndexFiles)(rootDir, diff.deleted, config.outputDir);
|
|
76
|
+
const indexedRecords = await (0, queue_1.runWithConcurrency)(toIndex, config.concurrency, async (scannedFile, index) => {
|
|
77
|
+
const absoluteOutputPath = (0, manifest_1.resolveDocumentIndexPath)(rootDir, scannedFile.indexPath, config.outputDir);
|
|
78
|
+
const progressIndex = index + 1;
|
|
79
|
+
const progressTotal = toIndex.length;
|
|
80
|
+
reportProgress(config, { type: "index-start", path: scannedFile.path, index: progressIndex, total: progressTotal });
|
|
81
|
+
try {
|
|
82
|
+
await (0, pageindex_runner_1.runPageIndex)(scannedFile.absolutePath, absoluteOutputPath, config);
|
|
83
|
+
const summary = await (0, pageindex_runner_1.readPageIndexSummary)(absoluteOutputPath);
|
|
84
|
+
reportProgress(config, {
|
|
85
|
+
type: "index-done",
|
|
86
|
+
path: scannedFile.path,
|
|
87
|
+
index: progressIndex,
|
|
88
|
+
total: progressTotal,
|
|
89
|
+
summary
|
|
90
|
+
});
|
|
91
|
+
return (0, manifest_1.recordFromScannedFile)(scannedFile, { status: "ready", summary });
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
const previous = previousByPath.get(scannedFile.path);
|
|
95
|
+
reportProgress(config, {
|
|
96
|
+
type: "index-failed",
|
|
97
|
+
path: scannedFile.path,
|
|
98
|
+
index: progressIndex,
|
|
99
|
+
total: progressTotal,
|
|
100
|
+
error: errorMessage(error)
|
|
101
|
+
});
|
|
102
|
+
return (0, manifest_1.recordFromScannedFile)(scannedFile, {
|
|
103
|
+
status: "failed",
|
|
104
|
+
summary: previous?.summary,
|
|
105
|
+
error: errorMessage(error)
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
const indexedByPath = new Map(indexedRecords.map((record) => [record.path, record]));
|
|
110
|
+
const documents = [];
|
|
111
|
+
for (const scannedFile of scannedFiles) {
|
|
112
|
+
const indexedRecord = indexedByPath.get(scannedFile.path);
|
|
113
|
+
if (indexedRecord) {
|
|
114
|
+
documents.push(indexedRecord);
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
const previous = previousByPath.get(scannedFile.path);
|
|
118
|
+
documents.push((0, manifest_1.recordFromScannedFile)(scannedFile, {
|
|
119
|
+
status: previous?.status === "ready" ? "ready" : "failed",
|
|
120
|
+
summary: previous?.summary,
|
|
121
|
+
error: previous?.status === "failed" ? previous.error : undefined
|
|
122
|
+
}));
|
|
123
|
+
}
|
|
124
|
+
documents.sort((left, right) => left.path.localeCompare(right.path));
|
|
125
|
+
const manifest = {
|
|
126
|
+
version: 1,
|
|
127
|
+
rootDir: (0, path_utils_1.normalizeAbsolutePath)(rootDir),
|
|
128
|
+
generatedAt: new Date().toISOString(),
|
|
129
|
+
documents
|
|
130
|
+
};
|
|
131
|
+
const rootTree = (0, root_tree_1.generateRootTree)(manifest);
|
|
132
|
+
await (0, manifest_1.writeManifest)(rootDir, manifest, config.outputDir);
|
|
133
|
+
await (0, root_tree_1.writeRootTree)(rootDir, rootTree, config.outputDir);
|
|
134
|
+
await (0, manifest_1.writeFileState)(rootDir, manifest, config.outputDir);
|
|
135
|
+
reportProgress(config, {
|
|
136
|
+
type: "write",
|
|
137
|
+
manifestPath,
|
|
138
|
+
rootTreePath
|
|
139
|
+
});
|
|
140
|
+
return {
|
|
141
|
+
manifest,
|
|
142
|
+
rootTree,
|
|
143
|
+
outputDir,
|
|
144
|
+
manifestPath,
|
|
145
|
+
rootTreePath,
|
|
146
|
+
added: diff.added.length,
|
|
147
|
+
modified: diff.modified.length + staleIndexFiles.length,
|
|
148
|
+
retryFailed: diff.retryFailed.length,
|
|
149
|
+
unchanged: unchanged.length,
|
|
150
|
+
deleted: diff.deleted.length,
|
|
151
|
+
failed: documents.filter((record) => record.status === "failed").length,
|
|
152
|
+
ready: documents.filter((record) => record.status === "ready").length
|
|
153
|
+
};
|
|
154
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.chatCompletionsUrl = chatCompletionsUrl;
|
|
4
|
+
exports.chatCompletion = chatCompletion;
|
|
5
|
+
const config_1 = require("./config");
|
|
6
|
+
function chatCompletionsUrl(baseUrl) {
|
|
7
|
+
const trimmed = baseUrl.replace(/\/$/, "");
|
|
8
|
+
return trimmed.endsWith("/chat/completions") ? trimmed : `${trimmed}/chat/completions`;
|
|
9
|
+
}
|
|
10
|
+
async function chatCompletion(messages, options = {}) {
|
|
11
|
+
const config = (0, config_1.loadPageIndexConfig)(options);
|
|
12
|
+
const request = {
|
|
13
|
+
messages,
|
|
14
|
+
model: config.model,
|
|
15
|
+
temperature: 0
|
|
16
|
+
};
|
|
17
|
+
if (options.llmClient) {
|
|
18
|
+
return await options.llmClient.chatCompletion(request);
|
|
19
|
+
}
|
|
20
|
+
if (!config.apiKey) {
|
|
21
|
+
throw new Error("OPENAI_API_KEY is required for query");
|
|
22
|
+
}
|
|
23
|
+
const response = await fetch(chatCompletionsUrl(config.baseUrl), {
|
|
24
|
+
method: "POST",
|
|
25
|
+
headers: {
|
|
26
|
+
"Content-Type": "application/json",
|
|
27
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
28
|
+
},
|
|
29
|
+
body: JSON.stringify({
|
|
30
|
+
model: request.model,
|
|
31
|
+
messages: request.messages,
|
|
32
|
+
temperature: request.temperature
|
|
33
|
+
})
|
|
34
|
+
});
|
|
35
|
+
if (!response.ok) {
|
|
36
|
+
const body = await response.text();
|
|
37
|
+
throw new Error(`LLM request failed with ${response.status}: ${body}`);
|
|
38
|
+
}
|
|
39
|
+
const payload = (await response.json());
|
|
40
|
+
const content = payload.choices?.[0]?.message?.content;
|
|
41
|
+
if (!content) {
|
|
42
|
+
throw new Error("LLM response did not contain message content");
|
|
43
|
+
}
|
|
44
|
+
return content;
|
|
45
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { DocumentRecord, Manifest, ManifestDiff, ScannedFile } from "./types";
|
|
2
|
+
export declare const PAGEINDEX_DIR = ".pageindex";
|
|
3
|
+
export declare const INDEXES_DIR = "indexes";
|
|
4
|
+
export declare const MANIFEST_FILE = "manifest.json";
|
|
5
|
+
export declare const ROOT_TREE_FILE = "root-tree.json";
|
|
6
|
+
export declare const FILE_STATE_FILE: string;
|
|
7
|
+
export declare function createEmptyManifest(rootDir: string): Manifest;
|
|
8
|
+
export declare function resolvePageIndexDir(rootDir: string, outputDir?: string): string;
|
|
9
|
+
export declare function getPageIndexPath(rootDir: string, relativePath: string, outputDir?: string): string;
|
|
10
|
+
export declare function resolveDocumentIndexPath(rootDir: string, indexPath: string, outputDir?: string): string;
|
|
11
|
+
export declare function readManifest(rootDir: string, outputDir?: string): Promise<Manifest>;
|
|
12
|
+
export declare function diffManifest(previous: Manifest, scannedFiles: ScannedFile[]): ManifestDiff;
|
|
13
|
+
export declare function recordFromScannedFile(scannedFile: ScannedFile, fields?: Partial<DocumentRecord>): DocumentRecord;
|
|
14
|
+
export declare function atomicWriteJson(filePath: string, value: unknown): Promise<void>;
|
|
15
|
+
export declare function writeManifest(rootDir: string, manifest: Manifest, outputDir?: string): Promise<void>;
|
|
16
|
+
export declare function writeFileState(rootDir: string, manifest: Manifest, outputDir?: string): Promise<void>;
|
|
17
|
+
export declare function removeDeletedIndexFiles(rootDir: string, deletedRecords: DocumentRecord[], outputDir?: string): Promise<void>;
|