edgeparse 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,112 @@
1
+ # edgeparse
2
+
3
+ > High-performance PDF extraction for Node.js — Rust engine, JavaScript/TypeScript interface.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/edgeparse.svg)](https://www.npmjs.com/package/edgeparse)
6
+ [![License: Apache-2.0](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://github.com/raphaelmansuy/edgeparse/blob/main/LICENSE)
7
+ [![GitHub](https://img.shields.io/badge/GitHub-raphaelmansuy%2Fedgeparse-181717?logo=github)](https://github.com/raphaelmansuy/edgeparse)
8
+
9
+ EdgeParse converts PDF documents to Markdown, JSON, HTML, or plain text. It is powered by a native Rust engine (via N-API) with pre-built binaries — no compilation required.
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ npm install edgeparse
15
+ # or
16
+ pnpm add edgeparse
17
+ # or
18
+ yarn add edgeparse
19
+ ```
20
+
21
+ Pre-built binaries are available for:
22
+
23
+ | Platform | Architecture |
24
+ |---|---|
25
+ | macOS | x64, arm64 (Apple Silicon) |
26
+ | Linux | x64-gnu, arm64-gnu |
27
+ | Windows | x64-msvc |
28
+
29
+ ## Quick Start
30
+
31
+ ```typescript
32
+ import { convert } from 'edgeparse';
33
+
34
+ // Convert a PDF to Markdown
35
+ const markdown = convert('report.pdf');
36
+ console.log(markdown);
37
+
38
+ // Convert to JSON
39
+ const json = convert('report.pdf', { format: 'json' });
40
+
41
+ // Convert specific pages to HTML
42
+ const html = convert('report.pdf', {
43
+ format: 'html',
44
+ pages: [0, 1, 2], // pages 1–3 (0-indexed)
45
+ });
46
+
47
+ // Password-protected PDF
48
+ const text = convert('secure.pdf', {
49
+ format: 'markdown',
50
+ password: 'secret',
51
+ });
52
+ ```
53
+
54
+ ## API
55
+
56
+ ### `convert(inputPath, options?): string`
57
+
58
+ Converts a PDF file and returns the content as a string.
59
+
60
+ | Parameter | Type | Description |
61
+ |---|---|---|
62
+ | `inputPath` | `string` | Absolute or relative path to the PDF file |
63
+ | `options.format` | `'markdown' \| 'json' \| 'html' \| 'text'` | Output format (default: `'markdown'`) |
64
+ | `options.pages` | `number[]` | Zero-indexed page numbers to extract (default: all) |
65
+ | `options.password` | `string` | Password for encrypted PDFs |
66
+ | `options.readingOrder` | `'xycut' \| 'default'` | Reading order algorithm (default: `'xycut'`) |
67
+ | `options.tableMethod` | `'border' \| 'cluster'` | Table detection method (default: `'border'`) |
68
+ | `options.imageOutput` | `'embedded' \| 'external' \| 'none'` | Image handling (default: `'none'`) |
69
+
70
+ ### `version(): string`
71
+
72
+ Returns the edgeparse engine version string.
73
+
74
+ ```typescript
75
+ import { version } from 'edgeparse';
76
+ console.log(version()); // e.g. "0.1.1"
77
+ ```
78
+
79
+ ## CLI
80
+
81
+ The package also ships an `edgeparse` CLI binary:
82
+
83
+ ```bash
84
+ npx edgeparse document.pdf
85
+ npx edgeparse document.pdf --format json
86
+ npx edgeparse document.pdf --format html --output output/
87
+ ```
88
+
89
+ ## TypeScript
90
+
91
+ Full TypeScript support is included — no `@types` package needed.
92
+
93
+ ```typescript
94
+ import { convert, version } from 'edgeparse';
95
+ import type { ConvertOptions } from 'edgeparse';
96
+ ```
97
+
98
+ ## Performance
99
+
100
+ EdgeParse consistently processes **40+ pages/second** on a modern machine and achieves **88%+ extraction accuracy** on diverse real-world PDFs — dramatically faster than Python-based alternatives.
101
+
102
+ ## Links
103
+
104
+ - [GitHub](https://github.com/raphaelmansuy/edgeparse)
105
+ - [Documentation](https://edgeparse.com)
106
+ - [PyPI (Python)](https://pypi.org/project/edgeparse/)
107
+ - [crates.io (Rust CLI)](https://crates.io/crates/edgeparse-cli)
108
+ - [crates.io (Rust Core)](https://crates.io/crates/edgeparse-core)
109
+
110
+ ## License
111
+
112
+ Apache-2.0 — see [LICENSE](https://github.com/raphaelmansuy/edgeparse/blob/main/LICENSE).
@@ -8,11 +8,11 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
8
8
  // src/index.ts
9
9
  function loadNative() {
10
10
  const platforms = {
11
- "linux-x64": "@edgeparse/pdf-linux-x64-gnu",
12
- "linux-arm64": "@edgeparse/pdf-linux-arm64-gnu",
13
- "darwin-x64": "@edgeparse/pdf-darwin-x64",
14
- "darwin-arm64": "@edgeparse/pdf-darwin-arm64",
15
- "win32-x64": "@edgeparse/pdf-win32-x64-msvc"
11
+ "linux-x64": "edgeparse-linux-x64-gnu",
12
+ "linux-arm64": "edgeparse-linux-arm64-gnu",
13
+ "darwin-x64": "edgeparse-darwin-x64",
14
+ "darwin-arm64": "edgeparse-darwin-arm64",
15
+ "win32-x64": "edgeparse-win32-x64-msvc"
16
16
  };
17
17
  const key = `${process.platform}-${process.arch}`;
18
18
  const pkg = platforms[key];
package/dist/cli.js CHANGED
@@ -8,11 +8,11 @@ var import_node_fs = require("fs");
8
8
  // src/index.ts
9
9
  function loadNative() {
10
10
  const platforms = {
11
- "linux-x64": "@edgeparse/pdf-linux-x64-gnu",
12
- "linux-arm64": "@edgeparse/pdf-linux-arm64-gnu",
13
- "darwin-x64": "@edgeparse/pdf-darwin-x64",
14
- "darwin-arm64": "@edgeparse/pdf-darwin-arm64",
15
- "win32-x64": "@edgeparse/pdf-win32-x64-msvc"
11
+ "linux-x64": "edgeparse-linux-x64-gnu",
12
+ "linux-arm64": "edgeparse-linux-arm64-gnu",
13
+ "darwin-x64": "edgeparse-darwin-x64",
14
+ "darwin-arm64": "edgeparse-darwin-arm64",
15
+ "win32-x64": "edgeparse-win32-x64-msvc"
16
16
  };
17
17
  const key = `${process.platform}-${process.arch}`;
18
18
  const pkg = platforms[key];
package/dist/cli.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  convert,
4
4
  version
5
- } from "./chunk-2PGOFCEV.mjs";
5
+ } from "./chunk-2XS3GOSO.mjs";
6
6
 
7
7
  // src/cli.ts
8
8
  import { parseArgs } from "util";
package/dist/index.js CHANGED
@@ -26,11 +26,11 @@ __export(index_exports, {
26
26
  module.exports = __toCommonJS(index_exports);
27
27
  function loadNative() {
28
28
  const platforms = {
29
- "linux-x64": "@edgeparse/pdf-linux-x64-gnu",
30
- "linux-arm64": "@edgeparse/pdf-linux-arm64-gnu",
31
- "darwin-x64": "@edgeparse/pdf-darwin-x64",
32
- "darwin-arm64": "@edgeparse/pdf-darwin-arm64",
33
- "win32-x64": "@edgeparse/pdf-win32-x64-msvc"
29
+ "linux-x64": "edgeparse-linux-x64-gnu",
30
+ "linux-arm64": "edgeparse-linux-arm64-gnu",
31
+ "darwin-x64": "edgeparse-darwin-x64",
32
+ "darwin-arm64": "edgeparse-darwin-arm64",
33
+ "win32-x64": "edgeparse-win32-x64-msvc"
34
34
  };
35
35
  const key = `${process.platform}-${process.arch}`;
36
36
  const pkg = platforms[key];
package/dist/index.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  convert,
3
3
  version
4
- } from "./chunk-2PGOFCEV.mjs";
4
+ } from "./chunk-2XS3GOSO.mjs";
5
5
  export {
6
6
  convert,
7
7
  version
@@ -1,15 +1,22 @@
1
1
  {
2
2
  "name": "edgeparse-darwin-arm64",
3
- "version": "0.1.0",
4
- "os": ["darwin"],
5
- "cpu": ["arm64"],
3
+ "version": "0.2.0",
4
+ "os": [
5
+ "darwin"
6
+ ],
7
+ "cpu": [
8
+ "arm64"
9
+ ],
6
10
  "main": "edgeparse-node.darwin-arm64.node",
7
- "files": ["edgeparse-node.darwin-arm64.node"],
11
+ "files": [
12
+ "edgeparse-node.darwin-arm64.node"
13
+ ],
8
14
  "description": "edgeparse native addon for darwin-arm64",
9
15
  "license": "Apache-2.0",
10
16
  "repository": {
11
17
  "type": "git",
12
- "url": "https://github.com/opendataloader-project/edgeparse.git",
18
+ "url": "https://github.com/raphaelmansuy/edgeparse.git",
13
19
  "directory": "sdks/node/npm/darwin-arm64"
14
- }
20
+ },
21
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
15
22
  }
@@ -1,15 +1,22 @@
1
1
  {
2
2
  "name": "edgeparse-darwin-x64",
3
- "version": "0.1.0",
4
- "os": ["darwin"],
5
- "cpu": ["x64"],
3
+ "version": "0.2.0",
4
+ "os": [
5
+ "darwin"
6
+ ],
7
+ "cpu": [
8
+ "x64"
9
+ ],
6
10
  "main": "edgeparse-node.darwin-x64.node",
7
- "files": ["edgeparse-node.darwin-x64.node"],
11
+ "files": [
12
+ "edgeparse-node.darwin-x64.node"
13
+ ],
8
14
  "description": "edgeparse native addon for darwin-x64",
9
15
  "license": "Apache-2.0",
10
16
  "repository": {
11
17
  "type": "git",
12
- "url": "https://github.com/opendataloader-project/edgeparse.git",
18
+ "url": "https://github.com/raphaelmansuy/edgeparse.git",
13
19
  "directory": "sdks/node/npm/darwin-x64"
14
- }
20
+ },
21
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
15
22
  }
@@ -1,15 +1,22 @@
1
1
  {
2
2
  "name": "edgeparse-linux-arm64-gnu",
3
- "version": "0.1.0",
4
- "os": ["linux"],
5
- "cpu": ["arm64"],
3
+ "version": "0.2.0",
4
+ "os": [
5
+ "linux"
6
+ ],
7
+ "cpu": [
8
+ "arm64"
9
+ ],
6
10
  "main": "edgeparse-node.linux-arm64-gnu.node",
7
- "files": ["edgeparse-node.linux-arm64-gnu.node"],
11
+ "files": [
12
+ "edgeparse-node.linux-arm64-gnu.node"
13
+ ],
8
14
  "description": "edgeparse native addon for linux-arm64-gnu",
9
15
  "license": "Apache-2.0",
10
16
  "repository": {
11
17
  "type": "git",
12
- "url": "https://github.com/opendataloader-project/edgeparse.git",
18
+ "url": "https://github.com/raphaelmansuy/edgeparse.git",
13
19
  "directory": "sdks/node/npm/linux-arm64-gnu"
14
- }
20
+ },
21
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
15
22
  }
@@ -1,15 +1,22 @@
1
1
  {
2
2
  "name": "edgeparse-linux-x64-gnu",
3
- "version": "0.1.0",
4
- "os": ["linux"],
5
- "cpu": ["x64"],
3
+ "version": "0.2.0",
4
+ "os": [
5
+ "linux"
6
+ ],
7
+ "cpu": [
8
+ "x64"
9
+ ],
6
10
  "main": "edgeparse-node.linux-x64-gnu.node",
7
- "files": ["edgeparse-node.linux-x64-gnu.node"],
11
+ "files": [
12
+ "edgeparse-node.linux-x64-gnu.node"
13
+ ],
8
14
  "description": "edgeparse native addon for linux-x64-gnu",
9
15
  "license": "Apache-2.0",
10
16
  "repository": {
11
17
  "type": "git",
12
- "url": "https://github.com/opendataloader-project/edgeparse.git",
18
+ "url": "https://github.com/raphaelmansuy/edgeparse.git",
13
19
  "directory": "sdks/node/npm/linux-x64-gnu"
14
- }
20
+ },
21
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
15
22
  }
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "edgeparse-win32-x64-msvc",
3
+ "version": "0.2.0",
4
+ "os": [
5
+ "win32"
6
+ ],
7
+ "cpu": [
8
+ "x64"
9
+ ],
10
+ "main": "edgeparse-node.win32-x64-msvc.node",
11
+ "files": [
12
+ "edgeparse-node.win32-x64-msvc.node"
13
+ ],
14
+ "description": "edgeparse native addon for win32-x64-msvc",
15
+ "license": "Apache-2.0",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "https://github.com/raphaelmansuy/edgeparse.git",
19
+ "directory": "sdks/node/npm/win32-x64-msvc"
20
+ },
21
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
22
+ }
@@ -1,15 +1,22 @@
1
1
  {
2
2
  "name": "edgeparse-win32-x64-msvc",
3
- "version": "0.1.0",
4
- "os": ["win32"],
5
- "cpu": ["x64"],
3
+ "version": "0.2.0",
4
+ "os": [
5
+ "win32"
6
+ ],
7
+ "cpu": [
8
+ "x64"
9
+ ],
6
10
  "main": "edgeparse-node.win32-x64-msvc.node",
7
- "files": ["edgeparse-node.win32-x64-msvc.node"],
11
+ "files": [
12
+ "edgeparse-node.win32-x64-msvc.node"
13
+ ],
8
14
  "description": "edgeparse native addon for win32-x64-msvc",
9
15
  "license": "Apache-2.0",
10
16
  "repository": {
11
17
  "type": "git",
12
- "url": "https://github.com/opendataloader-project/edgeparse.git",
18
+ "url": "https://github.com/raphaelmansuy/edgeparse.git",
13
19
  "directory": "sdks/node/npm/win32-x64-msvc"
14
- }
20
+ },
21
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
15
22
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "edgeparse",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "High-performance PDF extraction — Rust engine, Node.js interface",
5
5
  "main": "./dist/index.cjs",
6
6
  "module": "./dist/index.js",
@@ -13,7 +13,7 @@
13
13
  }
14
14
  },
15
15
  "bin": {
16
- "edgeparse": "./dist/cli.cjs"
16
+ "edgeparse": "dist/cli.js"
17
17
  },
18
18
  "files": [
19
19
  "dist/",
@@ -21,19 +21,20 @@
21
21
  "README.md"
22
22
  ],
23
23
  "optionalDependencies": {
24
- "edgeparse-darwin-arm64": "0.1.0",
25
- "edgeparse-darwin-x64": "0.1.0",
26
- "edgeparse-linux-arm64-gnu": "0.1.0",
27
- "edgeparse-linux-x64-gnu": "0.1.0",
28
- "edgeparse-win32-x64-msvc": "0.1.0"
24
+ "edgeparse-darwin-arm64": "0.2.0",
25
+ "edgeparse-darwin-x64": "0.2.0",
26
+ "edgeparse-linux-arm64-gnu": "0.2.0",
27
+ "edgeparse-linux-x64-gnu": "0.2.0",
28
+ "edgeparse-win32-x64-msvc": "0.2.0"
29
29
  },
30
30
  "engines": {
31
31
  "node": ">=18"
32
32
  },
33
33
  "license": "Apache-2.0",
34
+ "homepage": "https://github.com/raphaelmansuy/edgeparse#readme",
34
35
  "repository": {
35
36
  "type": "git",
36
- "url": "https://github.com/opendataloader-project/edgeparse.git",
37
+ "url": "git+https://github.com/raphaelmansuy/edgeparse.git",
37
38
  "directory": "sdks/node"
38
39
  },
39
40
  "keywords": [