edgeparse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
2
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
3
+ }) : x)(function(x) {
4
+ if (typeof require !== "undefined") return require.apply(this, arguments);
5
+ throw Error('Dynamic require of "' + x + '" is not supported');
6
+ });
7
+
8
+ // src/index.ts
9
+ function loadNative() {
10
+ const platforms = {
11
+ "linux-x64": "@edgeparse/pdf-linux-x64-gnu",
12
+ "linux-arm64": "@edgeparse/pdf-linux-arm64-gnu",
13
+ "darwin-x64": "@edgeparse/pdf-darwin-x64",
14
+ "darwin-arm64": "@edgeparse/pdf-darwin-arm64",
15
+ "win32-x64": "@edgeparse/pdf-win32-x64-msvc"
16
+ };
17
+ const key = `${process.platform}-${process.arch}`;
18
+ const pkg = platforms[key];
19
+ if (!pkg) throw new Error(`edgeparse: unsupported platform: ${key}`);
20
+ return __require(pkg);
21
+ }
22
+ var native;
23
+ function getNative() {
24
+ if (!native) {
25
+ native = loadNative();
26
+ }
27
+ return native;
28
+ }
29
+ function convert(inputPath, options) {
30
+ const n = getNative();
31
+ return n.convert(inputPath, options ? {
32
+ format: options.format,
33
+ pages: options.pages,
34
+ password: options.password,
35
+ reading_order: options.readingOrder,
36
+ table_method: options.tableMethod,
37
+ image_output: options.imageOutput
38
+ } : void 0);
39
+ }
40
+ function version() {
41
+ return getNative().version();
42
+ }
43
+
44
+ export {
45
+ convert,
46
+ version
47
+ };
package/dist/cli.d.mts ADDED
@@ -0,0 +1 @@
1
+ #!/usr/bin/env node
package/dist/cli.d.ts ADDED
@@ -0,0 +1 @@
1
+ #!/usr/bin/env node
package/dist/cli.js ADDED
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+
4
+ // src/cli.ts
5
+ var import_node_util = require("util");
6
+ var import_node_fs = require("fs");
7
+
8
+ // src/index.ts
9
+ function loadNative() {
10
+ const platforms = {
11
+ "linux-x64": "@edgeparse/pdf-linux-x64-gnu",
12
+ "linux-arm64": "@edgeparse/pdf-linux-arm64-gnu",
13
+ "darwin-x64": "@edgeparse/pdf-darwin-x64",
14
+ "darwin-arm64": "@edgeparse/pdf-darwin-arm64",
15
+ "win32-x64": "@edgeparse/pdf-win32-x64-msvc"
16
+ };
17
+ const key = `${process.platform}-${process.arch}`;
18
+ const pkg = platforms[key];
19
+ if (!pkg) throw new Error(`edgeparse: unsupported platform: ${key}`);
20
+ return require(pkg);
21
+ }
22
+ var native;
23
+ function getNative() {
24
+ if (!native) {
25
+ native = loadNative();
26
+ }
27
+ return native;
28
+ }
29
+ function convert(inputPath2, options) {
30
+ const n = getNative();
31
+ return n.convert(inputPath2, options ? {
32
+ format: options.format,
33
+ pages: options.pages,
34
+ password: options.password,
35
+ reading_order: options.readingOrder,
36
+ table_method: options.tableMethod,
37
+ image_output: options.imageOutput
38
+ } : void 0);
39
+ }
40
+ function version() {
41
+ return getNative().version();
42
+ }
43
+
44
+ // src/cli.ts
45
+ var { values, positionals } = (0, import_node_util.parseArgs)({
46
+ allowPositionals: true,
47
+ options: {
48
+ format: { type: "string", short: "f", default: "markdown" },
49
+ pages: { type: "string", short: "p" },
50
+ password: { type: "string" },
51
+ "reading-order": { type: "string", default: "xycut" },
52
+ "table-method": { type: "string", default: "default" },
53
+ "image-output": { type: "string", default: "off" },
54
+ output: { type: "string", short: "o" },
55
+ version: { type: "boolean", short: "v" },
56
+ help: { type: "boolean", short: "h" }
57
+ }
58
+ });
59
+ if (values.version) {
60
+ console.log(`edgeparse ${version()}`);
61
+ process.exit(0);
62
+ }
63
+ if (values.help || positionals.length === 0) {
64
+ console.log(`Usage: edgeparse [options] <input.pdf>
65
+
66
+ Options:
67
+ -f, --format <fmt> Output format: markdown, json, html, text (default: markdown)
68
+ -p, --pages <range> Page range, e.g. "1,3,5-7"
69
+ --password <pw> Password for encrypted PDFs
70
+ --reading-order <algo> Reading order: xycut (default) or off
71
+ --table-method <m> Table method: default or cluster
72
+ --image-output <mode> Image output: off (default), embedded, external
73
+ -o, --output <path> Output file path (default: stdout)
74
+ -v, --version Show version
75
+ -h, --help Show this help
76
+ `);
77
+ process.exit(values.help ? 0 : 1);
78
+ }
79
+ var inputPath = positionals[0];
80
+ try {
81
+ const result = convert(inputPath, {
82
+ format: values.format,
83
+ pages: values.pages,
84
+ password: values.password,
85
+ readingOrder: values["reading-order"],
86
+ tableMethod: values["table-method"],
87
+ imageOutput: values["image-output"]
88
+ });
89
+ if (values.output) {
90
+ (0, import_node_fs.writeFileSync)(values.output, result, "utf-8");
91
+ } else {
92
+ process.stdout.write(result);
93
+ }
94
+ } catch (err) {
95
+ const msg = err instanceof Error ? err.message : String(err);
96
+ console.error(`edgeparse: ${msg}`);
97
+ process.exit(1);
98
+ }
package/dist/cli.mjs ADDED
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ convert,
4
+ version
5
+ } from "./chunk-2PGOFCEV.mjs";
6
+
7
+ // src/cli.ts
8
+ import { parseArgs } from "util";
9
+ import { writeFileSync } from "fs";
10
+ var { values, positionals } = parseArgs({
11
+ allowPositionals: true,
12
+ options: {
13
+ format: { type: "string", short: "f", default: "markdown" },
14
+ pages: { type: "string", short: "p" },
15
+ password: { type: "string" },
16
+ "reading-order": { type: "string", default: "xycut" },
17
+ "table-method": { type: "string", default: "default" },
18
+ "image-output": { type: "string", default: "off" },
19
+ output: { type: "string", short: "o" },
20
+ version: { type: "boolean", short: "v" },
21
+ help: { type: "boolean", short: "h" }
22
+ }
23
+ });
24
+ if (values.version) {
25
+ console.log(`edgeparse ${version()}`);
26
+ process.exit(0);
27
+ }
28
+ if (values.help || positionals.length === 0) {
29
+ console.log(`Usage: edgeparse [options] <input.pdf>
30
+
31
+ Options:
32
+ -f, --format <fmt> Output format: markdown, json, html, text (default: markdown)
33
+ -p, --pages <range> Page range, e.g. "1,3,5-7"
34
+ --password <pw> Password for encrypted PDFs
35
+ --reading-order <algo> Reading order: xycut (default) or off
36
+ --table-method <m> Table method: default or cluster
37
+ --image-output <mode> Image output: off (default), embedded, external
38
+ -o, --output <path> Output file path (default: stdout)
39
+ -v, --version Show version
40
+ -h, --help Show this help
41
+ `);
42
+ process.exit(values.help ? 0 : 1);
43
+ }
44
+ var inputPath = positionals[0];
45
+ try {
46
+ const result = convert(inputPath, {
47
+ format: values.format,
48
+ pages: values.pages,
49
+ password: values.password,
50
+ readingOrder: values["reading-order"],
51
+ tableMethod: values["table-method"],
52
+ imageOutput: values["image-output"]
53
+ });
54
+ if (values.output) {
55
+ writeFileSync(values.output, result, "utf-8");
56
+ } else {
57
+ process.stdout.write(result);
58
+ }
59
+ } catch (err) {
60
+ const msg = err instanceof Error ? err.message : String(err);
61
+ console.error(`edgeparse: ${msg}`);
62
+ process.exit(1);
63
+ }
@@ -0,0 +1,29 @@
1
+ interface ConvertOptions {
2
+ /** Output format. Valid values: "markdown", "json", "html", "text". Default: "markdown". */
3
+ format?: string;
4
+ /** Page range string, e.g. "1,3,5-7". */
5
+ pages?: string;
6
+ /** Password for encrypted PDFs. */
7
+ password?: string;
8
+ /** Reading order algorithm: "xycut" (default) or "off". */
9
+ readingOrder?: string;
10
+ /** Table detection method: "default" or "cluster". */
11
+ tableMethod?: string;
12
+ /** Image output mode: "off" (default), "embedded", or "external". */
13
+ imageOutput?: string;
14
+ }
15
+
16
+ /**
17
+ * Convert a PDF file and return the extracted content as a string.
18
+ *
19
+ * @param inputPath - Path to the PDF file.
20
+ * @param options - Conversion options.
21
+ * @returns The extracted content as a string.
22
+ */
23
+ declare function convert(inputPath: string, options?: ConvertOptions): string;
24
+ /**
25
+ * Return the edgeparse version string.
26
+ */
27
+ declare function version(): string;
28
+
29
+ export { type ConvertOptions, convert, version };
@@ -0,0 +1,29 @@
1
+ interface ConvertOptions {
2
+ /** Output format. Valid values: "markdown", "json", "html", "text". Default: "markdown". */
3
+ format?: string;
4
+ /** Page range string, e.g. "1,3,5-7". */
5
+ pages?: string;
6
+ /** Password for encrypted PDFs. */
7
+ password?: string;
8
+ /** Reading order algorithm: "xycut" (default) or "off". */
9
+ readingOrder?: string;
10
+ /** Table detection method: "default" or "cluster". */
11
+ tableMethod?: string;
12
+ /** Image output mode: "off" (default), "embedded", or "external". */
13
+ imageOutput?: string;
14
+ }
15
+
16
+ /**
17
+ * Convert a PDF file and return the extracted content as a string.
18
+ *
19
+ * @param inputPath - Path to the PDF file.
20
+ * @param options - Conversion options.
21
+ * @returns The extracted content as a string.
22
+ */
23
+ declare function convert(inputPath: string, options?: ConvertOptions): string;
24
+ /**
25
+ * Return the edgeparse version string.
26
+ */
27
+ declare function version(): string;
28
+
29
+ export { type ConvertOptions, convert, version };
package/dist/index.js ADDED
@@ -0,0 +1,65 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ convert: () => convert,
24
+ version: () => version
25
+ });
26
+ module.exports = __toCommonJS(index_exports);
27
+ function loadNative() {
28
+ const platforms = {
29
+ "linux-x64": "@edgeparse/pdf-linux-x64-gnu",
30
+ "linux-arm64": "@edgeparse/pdf-linux-arm64-gnu",
31
+ "darwin-x64": "@edgeparse/pdf-darwin-x64",
32
+ "darwin-arm64": "@edgeparse/pdf-darwin-arm64",
33
+ "win32-x64": "@edgeparse/pdf-win32-x64-msvc"
34
+ };
35
+ const key = `${process.platform}-${process.arch}`;
36
+ const pkg = platforms[key];
37
+ if (!pkg) throw new Error(`edgeparse: unsupported platform: ${key}`);
38
+ return require(pkg);
39
+ }
40
+ var native;
41
+ function getNative() {
42
+ if (!native) {
43
+ native = loadNative();
44
+ }
45
+ return native;
46
+ }
47
+ function convert(inputPath, options) {
48
+ const n = getNative();
49
+ return n.convert(inputPath, options ? {
50
+ format: options.format,
51
+ pages: options.pages,
52
+ password: options.password,
53
+ reading_order: options.readingOrder,
54
+ table_method: options.tableMethod,
55
+ image_output: options.imageOutput
56
+ } : void 0);
57
+ }
58
+ function version() {
59
+ return getNative().version();
60
+ }
61
+ // Annotate the CommonJS export names for ESM import in node:
62
+ 0 && (module.exports = {
63
+ convert,
64
+ version
65
+ });
package/dist/index.mjs ADDED
@@ -0,0 +1,8 @@
1
+ import {
2
+ convert,
3
+ version
4
+ } from "./chunk-2PGOFCEV.mjs";
5
+ export {
6
+ convert,
7
+ version
8
+ };
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "edgeparse-darwin-arm64",
3
+ "version": "0.1.0",
4
+ "os": ["darwin"],
5
+ "cpu": ["arm64"],
6
+ "main": "edgeparse-node.darwin-arm64.node",
7
+ "files": ["edgeparse-node.darwin-arm64.node"],
8
+ "description": "edgeparse native addon for darwin-arm64",
9
+ "license": "Apache-2.0",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/opendataloader-project/edgeparse.git",
13
+ "directory": "sdks/node/npm/darwin-arm64"
14
+ }
15
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "edgeparse-darwin-x64",
3
+ "version": "0.1.0",
4
+ "os": ["darwin"],
5
+ "cpu": ["x64"],
6
+ "main": "edgeparse-node.darwin-x64.node",
7
+ "files": ["edgeparse-node.darwin-x64.node"],
8
+ "description": "edgeparse native addon for darwin-x64",
9
+ "license": "Apache-2.0",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/opendataloader-project/edgeparse.git",
13
+ "directory": "sdks/node/npm/darwin-x64"
14
+ }
15
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "edgeparse-linux-arm64-gnu",
3
+ "version": "0.1.0",
4
+ "os": ["linux"],
5
+ "cpu": ["arm64"],
6
+ "main": "edgeparse-node.linux-arm64-gnu.node",
7
+ "files": ["edgeparse-node.linux-arm64-gnu.node"],
8
+ "description": "edgeparse native addon for linux-arm64-gnu",
9
+ "license": "Apache-2.0",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/opendataloader-project/edgeparse.git",
13
+ "directory": "sdks/node/npm/linux-arm64-gnu"
14
+ }
15
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "edgeparse-linux-x64-gnu",
3
+ "version": "0.1.0",
4
+ "os": ["linux"],
5
+ "cpu": ["x64"],
6
+ "main": "edgeparse-node.linux-x64-gnu.node",
7
+ "files": ["edgeparse-node.linux-x64-gnu.node"],
8
+ "description": "edgeparse native addon for linux-x64-gnu",
9
+ "license": "Apache-2.0",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/opendataloader-project/edgeparse.git",
13
+ "directory": "sdks/node/npm/linux-x64-gnu"
14
+ }
15
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "edgeparse-win32-x64-msvc",
3
+ "version": "0.1.0",
4
+ "os": ["win32"],
5
+ "cpu": ["x64"],
6
+ "main": "edgeparse-node.win32-x64-msvc.node",
7
+ "files": ["edgeparse-node.win32-x64-msvc.node"],
8
+ "description": "edgeparse native addon for win32-x64-msvc",
9
+ "license": "Apache-2.0",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/opendataloader-project/edgeparse.git",
13
+ "directory": "sdks/node/npm/win32-x64-msvc"
14
+ }
15
+ }
package/package.json ADDED
@@ -0,0 +1,60 @@
1
+ {
2
+ "name": "edgeparse",
3
+ "version": "0.1.0",
4
+ "description": "High-performance PDF extraction — Rust engine, Node.js interface",
5
+ "main": "./dist/index.cjs",
6
+ "module": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "require": "./dist/index.cjs",
12
+ "types": "./dist/index.d.ts"
13
+ }
14
+ },
15
+ "bin": {
16
+ "edgeparse": "./dist/cli.cjs"
17
+ },
18
+ "files": [
19
+ "dist/",
20
+ "npm/",
21
+ "README.md"
22
+ ],
23
+ "optionalDependencies": {
24
+ "edgeparse-darwin-arm64": "0.1.0",
25
+ "edgeparse-darwin-x64": "0.1.0",
26
+ "edgeparse-linux-arm64-gnu": "0.1.0",
27
+ "edgeparse-linux-x64-gnu": "0.1.0",
28
+ "edgeparse-win32-x64-msvc": "0.1.0"
29
+ },
30
+ "engines": {
31
+ "node": ">=18"
32
+ },
33
+ "license": "Apache-2.0",
34
+ "repository": {
35
+ "type": "git",
36
+ "url": "https://github.com/opendataloader-project/edgeparse.git",
37
+ "directory": "sdks/node"
38
+ },
39
+ "keywords": [
40
+ "pdf",
41
+ "extraction",
42
+ "markdown",
43
+ "json",
44
+ "rag",
45
+ "llm",
46
+ "rust"
47
+ ],
48
+ "scripts": {
49
+ "build:native": "cargo build --manifest-path ../../crates/edgeparse-node/Cargo.toml --release",
50
+ "build:ts": "npx tsup src/index.ts src/cli.ts --format cjs,esm --dts",
51
+ "build": "npm run build:native && npm run build:ts",
52
+ "test": "npx vitest run"
53
+ },
54
+ "devDependencies": {
55
+ "@types/node": "^25.5.0",
56
+ "tsup": "^8.0.0",
57
+ "typescript": "^5.0.0",
58
+ "vitest": "^3.0.0"
59
+ }
60
+ }