pageindex 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +48 -0
- package/README.md +265 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +2178 -0
- package/dist/index.cjs +2282 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2209 -0
- package/dist/markdown.d.ts +73 -0
- package/dist/markdown.d.ts.map +1 -0
- package/dist/ocr.d.ts +57 -0
- package/dist/ocr.d.ts.map +1 -0
- package/dist/ocr.js +6161 -0
- package/dist/openai.d.ts +58 -0
- package/dist/openai.d.ts.map +1 -0
- package/dist/pageindex.d.ts +64 -0
- package/dist/pageindex.d.ts.map +1 -0
- package/dist/pdf.d.ts +47 -0
- package/dist/pdf.d.ts.map +1 -0
- package/dist/prompts.d.ts +69 -0
- package/dist/prompts.d.ts.map +1 -0
- package/dist/toc.d.ts +77 -0
- package/dist/toc.d.ts.map +1 -0
- package/dist/tree.d.ts +78 -0
- package/dist/tree.d.ts.map +1 -0
- package/dist/types.d.ts +83 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/utils.d.ts +100 -0
- package/dist/utils.d.ts.map +1 -0
- package/package.json +81 -0
- package/src/cli.ts +272 -0
- package/src/index.ts +82 -0
- package/src/markdown.ts +600 -0
- package/src/ocr.ts +303 -0
- package/src/openai.ts +313 -0
- package/src/pageindex.ts +313 -0
- package/src/pdf-poppler.d.ts +42 -0
- package/src/pdf.ts +165 -0
- package/src/prompts.ts +350 -0
- package/src/toc.ts +468 -0
- package/src/tree.ts +531 -0
- package/src/types.ts +93 -0
- package/src/utils.ts +613 -0
package/package.json
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pageindex",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Vectorless, reasoning-based RAG for document understanding. Multi-runtime (Node.js, Bun, Deno). PDF and Markdown support with OCR.",
|
|
5
|
+
"module": "dist/index.js",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"require": "./dist/index.cjs"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"bin": {
|
|
17
|
+
"pageindex": "dist/cli.js"
|
|
18
|
+
},
|
|
19
|
+
"scripts": {
|
|
20
|
+
"build": "npm run build:esm && npm run build:cjs && npm run build:cli && npm run build:types",
|
|
21
|
+
"build:esm": "esbuild src/index.ts --bundle --format=esm --outfile=dist/index.js --platform=node --external:openai --external:pdf-parse --external:pdf-poppler",
|
|
22
|
+
"build:cjs": "esbuild src/index.ts --bundle --format=cjs --outfile=dist/index.cjs --platform=node --external:openai --external:pdf-parse --external:pdf-poppler",
|
|
23
|
+
"build:cli": "esbuild src/cli.ts --bundle --format=esm --outfile=dist/cli.js --platform=node --external:openai --external:pdf-parse --external:pdf-poppler --banner:js='#!/usr/bin/env node'",
|
|
24
|
+
"build:types": "tsc --declaration --emitDeclarationOnly --outDir dist",
|
|
25
|
+
"typecheck": "tsc --noEmit",
|
|
26
|
+
"test": "bun test",
|
|
27
|
+
"test:node": "node --test tests/*.test.js",
|
|
28
|
+
"benchmark": "bun run benchmarks/benchmark.ts",
|
|
29
|
+
"prepublishOnly": "npm run build"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"pageindex",
|
|
33
|
+
"rag",
|
|
34
|
+
"retrieval",
|
|
35
|
+
"document",
|
|
36
|
+
"pdf",
|
|
37
|
+
"markdown",
|
|
38
|
+
"reasoning",
|
|
39
|
+
"vectorless",
|
|
40
|
+
"llm",
|
|
41
|
+
"openai",
|
|
42
|
+
"tree-index",
|
|
43
|
+
"information-retrieval",
|
|
44
|
+
"ai",
|
|
45
|
+
"ocr",
|
|
46
|
+
"nodejs",
|
|
47
|
+
"bun",
|
|
48
|
+
"deno",
|
|
49
|
+
"multi-runtime"
|
|
50
|
+
],
|
|
51
|
+
"author": "Antonio Oliveira <antonio@oakoliver.com> (https://oakoliver.com)",
|
|
52
|
+
"license": "MIT",
|
|
53
|
+
"repository": {
|
|
54
|
+
"type": "git",
|
|
55
|
+
"url": "git+https://github.com/oakoliver/pageindex.git"
|
|
56
|
+
},
|
|
57
|
+
"homepage": "https://github.com/oakoliver/pageindex#readme",
|
|
58
|
+
"bugs": {
|
|
59
|
+
"url": "https://github.com/oakoliver/pageindex/issues"
|
|
60
|
+
},
|
|
61
|
+
"engines": {
|
|
62
|
+
"node": ">=18.0.0"
|
|
63
|
+
},
|
|
64
|
+
"files": [
|
|
65
|
+
"dist",
|
|
66
|
+
"src",
|
|
67
|
+
"README.md",
|
|
68
|
+
"LICENSE"
|
|
69
|
+
],
|
|
70
|
+
"dependencies": {
|
|
71
|
+
"openai": "^6.29.0",
|
|
72
|
+
"pdf-parse": "^2.4.5",
|
|
73
|
+
"pdf-poppler": "^0.2.3"
|
|
74
|
+
},
|
|
75
|
+
"devDependencies": {
|
|
76
|
+
"@types/node": "^20.0.0",
|
|
77
|
+
"@types/pdf-parse": "^1.1.5",
|
|
78
|
+
"esbuild": "^0.27.4",
|
|
79
|
+
"typescript": "^5.0.0"
|
|
80
|
+
}
|
|
81
|
+
}
|
package/src/cli.ts
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* pageindex CLI
|
|
4
|
+
* Command-line interface for processing PDFs and Markdown documents
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { parseArgs } from "util";
|
|
8
|
+
import { PageIndex } from "./pageindex";
|
|
9
|
+
import { mdToTree } from "./markdown";
|
|
10
|
+
import * as path from "path";
|
|
11
|
+
import * as fs from "fs";
|
|
12
|
+
import * as fsp from "fs/promises";
|
|
13
|
+
|
|
14
|
+
interface CliArgs {
|
|
15
|
+
pdf?: string;
|
|
16
|
+
md?: string;
|
|
17
|
+
model: string;
|
|
18
|
+
tocCheckPages: number;
|
|
19
|
+
maxPagesPerNode: number;
|
|
20
|
+
maxTokensPerNode: number;
|
|
21
|
+
addNodeId: boolean;
|
|
22
|
+
addNodeSummary: boolean;
|
|
23
|
+
addDocDescription: boolean;
|
|
24
|
+
addNodeText: boolean;
|
|
25
|
+
thinning: boolean;
|
|
26
|
+
thinningThreshold: number;
|
|
27
|
+
summaryTokenThreshold: number;
|
|
28
|
+
output?: string;
|
|
29
|
+
lmstudio: boolean;
|
|
30
|
+
ollama: boolean;
|
|
31
|
+
baseUrl?: string;
|
|
32
|
+
// OCR options
|
|
33
|
+
ocr: boolean;
|
|
34
|
+
ocrModel: string;
|
|
35
|
+
ocrPromptType: "text" | "formula" | "table";
|
|
36
|
+
imageDpi: number;
|
|
37
|
+
help: boolean;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function printHelp(): void {
|
|
41
|
+
console.log(`
|
|
42
|
+
bun-pageindex - Vectorless, reasoning-based RAG for document understanding
|
|
43
|
+
|
|
44
|
+
USAGE:
|
|
45
|
+
bun-pageindex --pdf <path> Process a PDF file
|
|
46
|
+
bun-pageindex --md <path> Process a Markdown file
|
|
47
|
+
|
|
48
|
+
OPTIONS:
|
|
49
|
+
--pdf <path> Path to PDF file
|
|
50
|
+
--md <path> Path to Markdown file
|
|
51
|
+
--output, -o <path> Output file path (default: ./results/<name>_structure.json)
|
|
52
|
+
|
|
53
|
+
MODEL OPTIONS:
|
|
54
|
+
--model <name> Model to use (default: gpt-4o-2024-11-20)
|
|
55
|
+
--lmstudio Use LM Studio (localhost:1234)
|
|
56
|
+
--ollama Use Ollama (localhost:11434)
|
|
57
|
+
--base-url <url> Custom OpenAI-compatible API URL
|
|
58
|
+
|
|
59
|
+
PDF OPTIONS:
|
|
60
|
+
--toc-check-pages <n> Pages to check for TOC (default: 20)
|
|
61
|
+
--max-pages-per-node <n> Max pages per node (default: 10)
|
|
62
|
+
--max-tokens-per-node <n> Max tokens per node (default: 20000)
|
|
63
|
+
|
|
64
|
+
OCR OPTIONS (for scanned PDFs):
|
|
65
|
+
--ocr Enable OCR mode for scanned PDFs
|
|
66
|
+
--ocr-model <name> OCR model (default: mlx-community/GLM-OCR-bf16)
|
|
67
|
+
--ocr-prompt-type <type> OCR prompt: text, formula, table (default: text)
|
|
68
|
+
--image-dpi <n> Image DPI for OCR (default: 150)
|
|
69
|
+
|
|
70
|
+
MARKDOWN OPTIONS:
|
|
71
|
+
--thinning Apply tree thinning
|
|
72
|
+
--thinning-threshold <n> Min tokens for thinning (default: 5000)
|
|
73
|
+
--summary-token-threshold <n> Token threshold for summaries (default: 200)
|
|
74
|
+
|
|
75
|
+
OUTPUT OPTIONS:
|
|
76
|
+
--add-node-id Add node IDs (default: true)
|
|
77
|
+
--no-node-id Don't add node IDs
|
|
78
|
+
--add-node-summary Add node summaries (default: true)
|
|
79
|
+
--no-node-summary Don't add node summaries
|
|
80
|
+
--add-doc-description Add document description
|
|
81
|
+
--add-node-text Include raw text in output
|
|
82
|
+
|
|
83
|
+
--help, -h Show this help message
|
|
84
|
+
|
|
85
|
+
EXAMPLES:
|
|
86
|
+
bun-pageindex --pdf document.pdf
|
|
87
|
+
bun-pageindex --md README.md --add-doc-description
|
|
88
|
+
bun-pageindex --pdf paper.pdf --lmstudio --model llama3
|
|
89
|
+
bun-pageindex --pdf report.pdf --base-url http://localhost:8080/v1
|
|
90
|
+
bun-pageindex --pdf scanned.pdf --ocr --lmstudio --model qwen/qwen3-vl-30b
|
|
91
|
+
`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function parseCliArgs(): CliArgs {
|
|
95
|
+
const { values } = parseArgs({
|
|
96
|
+
args: process.argv.slice(2),
|
|
97
|
+
options: {
|
|
98
|
+
pdf: { type: "string" },
|
|
99
|
+
md: { type: "string" },
|
|
100
|
+
model: { type: "string", default: "gpt-4o-2024-11-20" },
|
|
101
|
+
"toc-check-pages": { type: "string", default: "20" },
|
|
102
|
+
"max-pages-per-node": { type: "string", default: "10" },
|
|
103
|
+
"max-tokens-per-node": { type: "string", default: "20000" },
|
|
104
|
+
"add-node-id": { type: "boolean", default: true },
|
|
105
|
+
"no-node-id": { type: "boolean", default: false },
|
|
106
|
+
"add-node-summary": { type: "boolean", default: true },
|
|
107
|
+
"no-node-summary": { type: "boolean", default: false },
|
|
108
|
+
"add-doc-description": { type: "boolean", default: false },
|
|
109
|
+
"add-node-text": { type: "boolean", default: false },
|
|
110
|
+
thinning: { type: "boolean", default: false },
|
|
111
|
+
"thinning-threshold": { type: "string", default: "5000" },
|
|
112
|
+
"summary-token-threshold": { type: "string", default: "200" },
|
|
113
|
+
output: { type: "string", short: "o" },
|
|
114
|
+
lmstudio: { type: "boolean", default: false },
|
|
115
|
+
ollama: { type: "boolean", default: false },
|
|
116
|
+
"base-url": { type: "string" },
|
|
117
|
+
// OCR options
|
|
118
|
+
ocr: { type: "boolean", default: false },
|
|
119
|
+
"ocr-model": { type: "string", default: "mlx-community/GLM-OCR-bf16" },
|
|
120
|
+
"ocr-prompt-type": { type: "string", default: "text" },
|
|
121
|
+
"image-dpi": { type: "string", default: "150" },
|
|
122
|
+
help: { type: "boolean", short: "h", default: false },
|
|
123
|
+
},
|
|
124
|
+
allowPositionals: true,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
pdf: values.pdf,
|
|
129
|
+
md: values.md,
|
|
130
|
+
model: values.model || "gpt-4o-2024-11-20",
|
|
131
|
+
tocCheckPages: parseInt(values["toc-check-pages"] || "20", 10),
|
|
132
|
+
maxPagesPerNode: parseInt(values["max-pages-per-node"] || "10", 10),
|
|
133
|
+
maxTokensPerNode: parseInt(values["max-tokens-per-node"] || "20000", 10),
|
|
134
|
+
addNodeId: values["no-node-id"] ? false : (values["add-node-id"] ?? true),
|
|
135
|
+
addNodeSummary: values["no-node-summary"] ? false : (values["add-node-summary"] ?? true),
|
|
136
|
+
addDocDescription: values["add-doc-description"] ?? false,
|
|
137
|
+
addNodeText: values["add-node-text"] ?? false,
|
|
138
|
+
thinning: values.thinning ?? false,
|
|
139
|
+
thinningThreshold: parseInt(values["thinning-threshold"] || "5000", 10),
|
|
140
|
+
summaryTokenThreshold: parseInt(values["summary-token-threshold"] || "200", 10),
|
|
141
|
+
output: values.output,
|
|
142
|
+
lmstudio: values.lmstudio ?? false,
|
|
143
|
+
ollama: values.ollama ?? false,
|
|
144
|
+
baseUrl: values["base-url"],
|
|
145
|
+
// OCR options
|
|
146
|
+
ocr: values.ocr ?? false,
|
|
147
|
+
ocrModel: values["ocr-model"] || "mlx-community/GLM-OCR-bf16",
|
|
148
|
+
ocrPromptType: (values["ocr-prompt-type"] || "text") as "text" | "formula" | "table",
|
|
149
|
+
imageDpi: parseInt(values["image-dpi"] || "150", 10),
|
|
150
|
+
help: values.help ?? false,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function main(): Promise<void> {
|
|
155
|
+
const args = parseCliArgs();
|
|
156
|
+
|
|
157
|
+
if (args.help) {
|
|
158
|
+
printHelp();
|
|
159
|
+
process.exit(0);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Validate input
|
|
163
|
+
if (!args.pdf && !args.md) {
|
|
164
|
+
console.error("Error: Either --pdf or --md must be specified");
|
|
165
|
+
console.error("Use --help for usage information");
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (args.pdf && args.md) {
|
|
170
|
+
console.error("Error: Only one of --pdf or --md can be specified");
|
|
171
|
+
process.exit(1);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Determine output path
|
|
175
|
+
const inputPath = args.pdf || args.md!;
|
|
176
|
+
const inputName = path.basename(inputPath, path.extname(inputPath));
|
|
177
|
+
const outputDir = "./results";
|
|
178
|
+
const outputPath = args.output || path.join(outputDir, `${inputName}_structure.json`);
|
|
179
|
+
|
|
180
|
+
// Create output directory
|
|
181
|
+
if (!fs.existsSync(outputDir)) {
|
|
182
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
let result;
|
|
186
|
+
|
|
187
|
+
if (args.pdf) {
|
|
188
|
+
// Validate PDF
|
|
189
|
+
if (!args.pdf.toLowerCase().endsWith(".pdf")) {
|
|
190
|
+
console.error("Error: PDF file must have .pdf extension");
|
|
191
|
+
process.exit(1);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (!fs.existsSync(args.pdf)) {
|
|
195
|
+
console.error(`Error: PDF file not found: ${args.pdf}`);
|
|
196
|
+
process.exit(1);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
console.log(`Processing PDF: ${args.pdf}`);
|
|
200
|
+
if (args.ocr) {
|
|
201
|
+
console.log(`[OCR Mode] Using OCR model: ${args.ocrModel}`);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Create PageIndex instance
|
|
205
|
+
const pageIndex = new PageIndex({
|
|
206
|
+
model: args.model,
|
|
207
|
+
tocCheckPageNum: args.tocCheckPages,
|
|
208
|
+
maxPageNumEachNode: args.maxPagesPerNode,
|
|
209
|
+
maxTokenNumEachNode: args.maxTokensPerNode,
|
|
210
|
+
addNodeId: args.addNodeId,
|
|
211
|
+
addNodeSummary: args.addNodeSummary,
|
|
212
|
+
addDocDescription: args.addDocDescription,
|
|
213
|
+
addNodeText: args.addNodeText,
|
|
214
|
+
// OCR options
|
|
215
|
+
extractionMode: args.ocr ? "ocr" : "text",
|
|
216
|
+
ocrModel: args.ocrModel,
|
|
217
|
+
ocrPromptType: args.ocrPromptType,
|
|
218
|
+
imageDpi: args.imageDpi,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// Configure endpoint
|
|
222
|
+
if (args.lmstudio) {
|
|
223
|
+
pageIndex.useLMStudio();
|
|
224
|
+
} else if (args.ollama) {
|
|
225
|
+
pageIndex.useOllama();
|
|
226
|
+
} else if (args.baseUrl) {
|
|
227
|
+
pageIndex.setBaseUrl(args.baseUrl);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Process PDF
|
|
231
|
+
result = await pageIndex.fromPdf(args.pdf);
|
|
232
|
+
|
|
233
|
+
} else {
|
|
234
|
+
// Validate Markdown
|
|
235
|
+
const mdPath = args.md!;
|
|
236
|
+
if (!mdPath.toLowerCase().endsWith(".md") && !mdPath.toLowerCase().endsWith(".markdown")) {
|
|
237
|
+
console.error("Error: Markdown file must have .md or .markdown extension");
|
|
238
|
+
process.exit(1);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (!fs.existsSync(mdPath)) {
|
|
242
|
+
console.error(`Error: Markdown file not found: ${mdPath}`);
|
|
243
|
+
process.exit(1);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
console.log(`Processing Markdown: ${mdPath}`);
|
|
247
|
+
|
|
248
|
+
// Process Markdown
|
|
249
|
+
result = await mdToTree(mdPath, {
|
|
250
|
+
model: args.model,
|
|
251
|
+
addNodeId: args.addNodeId,
|
|
252
|
+
addNodeSummary: args.addNodeSummary,
|
|
253
|
+
addDocDescription: args.addDocDescription,
|
|
254
|
+
addNodeText: args.addNodeText,
|
|
255
|
+
thinning: args.thinning,
|
|
256
|
+
thinningThreshold: args.thinningThreshold,
|
|
257
|
+
summaryTokenThreshold: args.summaryTokenThreshold,
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
console.log("Parsing done, saving to file...");
|
|
262
|
+
|
|
263
|
+
// Save results
|
|
264
|
+
await fsp.writeFile(outputPath, JSON.stringify(result, null, 2));
|
|
265
|
+
console.log(`Tree structure saved to: ${outputPath}`);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Run
|
|
269
|
+
main().catch((error) => {
|
|
270
|
+
console.error("Error:", error.message);
|
|
271
|
+
process.exit(1);
|
|
272
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bun-pageindex
|
|
3
|
+
* Bun-native vectorless, reasoning-based RAG for document understanding
|
|
4
|
+
*
|
|
5
|
+
* @author Antonio Oliveira <antonio@oakoliver.com> (https://oakoliver.com)
|
|
6
|
+
* @license MIT
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Main API exports
|
|
10
|
+
export {
|
|
11
|
+
PageIndex,
|
|
12
|
+
createPageIndex,
|
|
13
|
+
indexPdf,
|
|
14
|
+
indexPdfWithLMStudio,
|
|
15
|
+
indexPdfWithOcr,
|
|
16
|
+
indexPdfWithLMStudioOcr,
|
|
17
|
+
} from "./pageindex";
|
|
18
|
+
|
|
19
|
+
// Types
|
|
20
|
+
export type {
|
|
21
|
+
PageIndexOptions,
|
|
22
|
+
MarkdownOptions,
|
|
23
|
+
TreeNode,
|
|
24
|
+
PageIndexResult,
|
|
25
|
+
TocItem,
|
|
26
|
+
PageContent,
|
|
27
|
+
TocCheckResult,
|
|
28
|
+
ExtractionMode,
|
|
29
|
+
OcrPromptType,
|
|
30
|
+
} from "./types";
|
|
31
|
+
|
|
32
|
+
// PDF utilities
|
|
33
|
+
export { parsePdf, getPdfName, type PdfInfo, type PdfPage } from "./pdf";
|
|
34
|
+
|
|
35
|
+
// OCR utilities
|
|
36
|
+
export {
|
|
37
|
+
pdfToImages,
|
|
38
|
+
pdfBufferToImages,
|
|
39
|
+
ocrImage,
|
|
40
|
+
ocrImages,
|
|
41
|
+
parsePdfWithOcr,
|
|
42
|
+
getPdfInfo,
|
|
43
|
+
type OcrOptions,
|
|
44
|
+
} from "./ocr";
|
|
45
|
+
|
|
46
|
+
// OpenAI utilities
|
|
47
|
+
export {
|
|
48
|
+
chatGPT,
|
|
49
|
+
chatGPTWithFinishReason,
|
|
50
|
+
chatGPTBatch,
|
|
51
|
+
getLMStudioConfig,
|
|
52
|
+
getOllamaConfig,
|
|
53
|
+
type ClientConfig,
|
|
54
|
+
type ChatOptions,
|
|
55
|
+
type ChatResult,
|
|
56
|
+
} from "./openai";
|
|
57
|
+
|
|
58
|
+
// Tree utilities
|
|
59
|
+
export {
|
|
60
|
+
writeNodeId,
|
|
61
|
+
getNodes,
|
|
62
|
+
structureToList,
|
|
63
|
+
getLeafNodes,
|
|
64
|
+
isLeafNode,
|
|
65
|
+
listToTree,
|
|
66
|
+
postProcessing,
|
|
67
|
+
printToc,
|
|
68
|
+
countTokens,
|
|
69
|
+
extractJson,
|
|
70
|
+
formatStructure,
|
|
71
|
+
} from "./utils";
|
|
72
|
+
|
|
73
|
+
// Markdown processing
|
|
74
|
+
export {
|
|
75
|
+
mdToTree,
|
|
76
|
+
markdownToTree,
|
|
77
|
+
extractNodesFromMarkdown,
|
|
78
|
+
extractNodeTextContent,
|
|
79
|
+
buildTreeFromNodes,
|
|
80
|
+
treeThinningForIndex,
|
|
81
|
+
printTocMd,
|
|
82
|
+
} from "./markdown";
|