edgeparse 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -0
- package/dist/chunk-2XS3GOSO.mjs +47 -0
- package/dist/cli.js +5 -5
- package/dist/cli.mjs +1 -1
- package/dist/index.js +5 -5
- package/dist/index.mjs +1 -1
- package/npm/darwin-arm64/edgeparse-node.darwin-arm64.node +0 -0
- package/npm/darwin-arm64/package.json +13 -6
- package/npm/darwin-x64/package.json +13 -6
- package/npm/linux-arm64-gnu/package.json +13 -6
- package/npm/linux-x64-gnu/package.json +13 -6
- package/npm/win32-x64-msvc/package.json +13 -6
- package/package.json +9 -8
package/README.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# edgeparse
|
|
2
|
+
|
|
3
|
+
> High-performance PDF extraction for Node.js — Rust engine, JavaScript/TypeScript interface.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/edgeparse)
|
|
6
|
+
[](https://github.com/raphaelmansuy/edgeparse/blob/main/LICENSE)
|
|
7
|
+
[](https://github.com/raphaelmansuy/edgeparse)
|
|
8
|
+
|
|
9
|
+
EdgeParse converts PDF documents to Markdown, JSON, HTML, or plain text. It is powered by a native Rust engine (via N-API) with pre-built binaries — no compilation required.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install edgeparse
|
|
15
|
+
# or
|
|
16
|
+
pnpm add edgeparse
|
|
17
|
+
# or
|
|
18
|
+
yarn add edgeparse
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Pre-built binaries are available for:
|
|
22
|
+
|
|
23
|
+
| Platform | Architecture |
|
|
24
|
+
|---|---|
|
|
25
|
+
| macOS | x64, arm64 (Apple Silicon) |
|
|
26
|
+
| Linux | x64-gnu, arm64-gnu |
|
|
27
|
+
| Windows | x64-msvc |
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
import { convert } from 'edgeparse';
|
|
33
|
+
|
|
34
|
+
// Convert a PDF to Markdown
|
|
35
|
+
const markdown = convert('report.pdf');
|
|
36
|
+
console.log(markdown);
|
|
37
|
+
|
|
38
|
+
// Convert to JSON
|
|
39
|
+
const json = convert('report.pdf', { format: 'json' });
|
|
40
|
+
|
|
41
|
+
// Convert specific pages to HTML
|
|
42
|
+
const html = convert('report.pdf', {
|
|
43
|
+
format: 'html',
|
|
44
|
+
pages: [0, 1, 2], // pages 1–3 (0-indexed)
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Password-protected PDF
|
|
48
|
+
const text = convert('secure.pdf', {
|
|
49
|
+
format: 'markdown',
|
|
50
|
+
password: 'secret',
|
|
51
|
+
});
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## API
|
|
55
|
+
|
|
56
|
+
### `convert(inputPath, options?): string`
|
|
57
|
+
|
|
58
|
+
Converts a PDF file and returns the content as a string.
|
|
59
|
+
|
|
60
|
+
| Parameter | Type | Description |
|
|
61
|
+
|---|---|---|
|
|
62
|
+
| `inputPath` | `string` | Absolute or relative path to the PDF file |
|
|
63
|
+
| `options.format` | `'markdown' \| 'json' \| 'html' \| 'text'` | Output format (default: `'markdown'`) |
|
|
64
|
+
| `options.pages` | `number[]` | Zero-indexed page numbers to extract (default: all) |
|
|
65
|
+
| `options.password` | `string` | Password for encrypted PDFs |
|
|
66
|
+
| `options.readingOrder` | `'xycut' \| 'default'` | Reading order algorithm (default: `'xycut'`) |
|
|
67
|
+
| `options.tableMethod` | `'border' \| 'cluster'` | Table detection method (default: `'border'`) |
|
|
68
|
+
| `options.imageOutput` | `'embedded' \| 'external' \| 'none'` | Image handling (default: `'none'`) |
|
|
69
|
+
|
|
70
|
+
### `version(): string`
|
|
71
|
+
|
|
72
|
+
Returns the edgeparse engine version string.
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
import { version } from 'edgeparse';
|
|
76
|
+
console.log(version()); // e.g. "0.1.1"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## CLI
|
|
80
|
+
|
|
81
|
+
The package also ships an `edgeparse` CLI binary:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
npx edgeparse document.pdf
|
|
85
|
+
npx edgeparse document.pdf --format json
|
|
86
|
+
npx edgeparse document.pdf --format html --output output/
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## TypeScript
|
|
90
|
+
|
|
91
|
+
Full TypeScript support is included — no `@types` package needed.
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
import { convert, version } from 'edgeparse';
|
|
95
|
+
import type { ConvertOptions } from 'edgeparse';
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Performance
|
|
99
|
+
|
|
100
|
+
EdgeParse consistently processes **40+ pages/second** on a modern machine and achieves **88%+ extraction accuracy** on diverse real-world PDFs — dramatically faster than Python-based alternatives.
|
|
101
|
+
|
|
102
|
+
## Links
|
|
103
|
+
|
|
104
|
+
- [GitHub](https://github.com/raphaelmansuy/edgeparse)
|
|
105
|
+
- [Documentation](https://edgeparse.com)
|
|
106
|
+
- [PyPI (Python)](https://pypi.org/project/edgeparse/)
|
|
107
|
+
- [crates.io (Rust CLI)](https://crates.io/crates/edgeparse-cli)
|
|
108
|
+
- [crates.io (Rust Core)](https://crates.io/crates/edgeparse-core)
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
Apache-2.0 — see [LICENSE](https://github.com/raphaelmansuy/edgeparse/blob/main/LICENSE).
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
// src/index.ts
|
|
9
|
+
function loadNative() {
|
|
10
|
+
const platforms = {
|
|
11
|
+
"linux-x64": "edgeparse-linux-x64-gnu",
|
|
12
|
+
"linux-arm64": "edgeparse-linux-arm64-gnu",
|
|
13
|
+
"darwin-x64": "edgeparse-darwin-x64",
|
|
14
|
+
"darwin-arm64": "edgeparse-darwin-arm64",
|
|
15
|
+
"win32-x64": "edgeparse-win32-x64-msvc"
|
|
16
|
+
};
|
|
17
|
+
const key = `${process.platform}-${process.arch}`;
|
|
18
|
+
const pkg = platforms[key];
|
|
19
|
+
if (!pkg) throw new Error(`edgeparse: unsupported platform: ${key}`);
|
|
20
|
+
return __require(pkg);
|
|
21
|
+
}
|
|
22
|
+
var native;
|
|
23
|
+
function getNative() {
|
|
24
|
+
if (!native) {
|
|
25
|
+
native = loadNative();
|
|
26
|
+
}
|
|
27
|
+
return native;
|
|
28
|
+
}
|
|
29
|
+
function convert(inputPath, options) {
|
|
30
|
+
const n = getNative();
|
|
31
|
+
return n.convert(inputPath, options ? {
|
|
32
|
+
format: options.format,
|
|
33
|
+
pages: options.pages,
|
|
34
|
+
password: options.password,
|
|
35
|
+
reading_order: options.readingOrder,
|
|
36
|
+
table_method: options.tableMethod,
|
|
37
|
+
image_output: options.imageOutput
|
|
38
|
+
} : void 0);
|
|
39
|
+
}
|
|
40
|
+
function version() {
|
|
41
|
+
return getNative().version();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export {
|
|
45
|
+
convert,
|
|
46
|
+
version
|
|
47
|
+
};
|
package/dist/cli.js
CHANGED
|
@@ -8,11 +8,11 @@ var import_node_fs = require("fs");
|
|
|
8
8
|
// src/index.ts
|
|
9
9
|
function loadNative() {
|
|
10
10
|
const platforms = {
|
|
11
|
-
"linux-x64": "
|
|
12
|
-
"linux-arm64": "
|
|
13
|
-
"darwin-x64": "
|
|
14
|
-
"darwin-arm64": "
|
|
15
|
-
"win32-x64": "
|
|
11
|
+
"linux-x64": "edgeparse-linux-x64-gnu",
|
|
12
|
+
"linux-arm64": "edgeparse-linux-arm64-gnu",
|
|
13
|
+
"darwin-x64": "edgeparse-darwin-x64",
|
|
14
|
+
"darwin-arm64": "edgeparse-darwin-arm64",
|
|
15
|
+
"win32-x64": "edgeparse-win32-x64-msvc"
|
|
16
16
|
};
|
|
17
17
|
const key = `${process.platform}-${process.arch}`;
|
|
18
18
|
const pkg = platforms[key];
|
package/dist/cli.mjs
CHANGED
package/dist/index.js
CHANGED
|
@@ -26,11 +26,11 @@ __export(index_exports, {
|
|
|
26
26
|
module.exports = __toCommonJS(index_exports);
|
|
27
27
|
function loadNative() {
|
|
28
28
|
const platforms = {
|
|
29
|
-
"linux-x64": "
|
|
30
|
-
"linux-arm64": "
|
|
31
|
-
"darwin-x64": "
|
|
32
|
-
"darwin-arm64": "
|
|
33
|
-
"win32-x64": "
|
|
29
|
+
"linux-x64": "edgeparse-linux-x64-gnu",
|
|
30
|
+
"linux-arm64": "edgeparse-linux-arm64-gnu",
|
|
31
|
+
"darwin-x64": "edgeparse-darwin-x64",
|
|
32
|
+
"darwin-arm64": "edgeparse-darwin-arm64",
|
|
33
|
+
"win32-x64": "edgeparse-win32-x64-msvc"
|
|
34
34
|
};
|
|
35
35
|
const key = `${process.platform}-${process.arch}`;
|
|
36
36
|
const pkg = platforms[key];
|
package/dist/index.mjs
CHANGED
|
Binary file
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "edgeparse-darwin-arm64",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"os": [
|
|
5
|
-
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"os": [
|
|
5
|
+
"darwin"
|
|
6
|
+
],
|
|
7
|
+
"cpu": [
|
|
8
|
+
"arm64"
|
|
9
|
+
],
|
|
6
10
|
"main": "edgeparse-node.darwin-arm64.node",
|
|
7
|
-
"files": [
|
|
11
|
+
"files": [
|
|
12
|
+
"edgeparse-node.darwin-arm64.node"
|
|
13
|
+
],
|
|
8
14
|
"description": "edgeparse native addon for darwin-arm64",
|
|
9
15
|
"license": "Apache-2.0",
|
|
10
16
|
"repository": {
|
|
11
17
|
"type": "git",
|
|
12
|
-
"url": "https://github.com/
|
|
18
|
+
"url": "https://github.com/raphaelmansuy/edgeparse.git",
|
|
13
19
|
"directory": "sdks/node/npm/darwin-arm64"
|
|
14
|
-
}
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
|
|
15
22
|
}
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "edgeparse-darwin-x64",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"os": [
|
|
5
|
-
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"os": [
|
|
5
|
+
"darwin"
|
|
6
|
+
],
|
|
7
|
+
"cpu": [
|
|
8
|
+
"x64"
|
|
9
|
+
],
|
|
6
10
|
"main": "edgeparse-node.darwin-x64.node",
|
|
7
|
-
"files": [
|
|
11
|
+
"files": [
|
|
12
|
+
"edgeparse-node.darwin-x64.node"
|
|
13
|
+
],
|
|
8
14
|
"description": "edgeparse native addon for darwin-x64",
|
|
9
15
|
"license": "Apache-2.0",
|
|
10
16
|
"repository": {
|
|
11
17
|
"type": "git",
|
|
12
|
-
"url": "https://github.com/
|
|
18
|
+
"url": "https://github.com/raphaelmansuy/edgeparse.git",
|
|
13
19
|
"directory": "sdks/node/npm/darwin-x64"
|
|
14
|
-
}
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
|
|
15
22
|
}
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "edgeparse-linux-arm64-gnu",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"os": [
|
|
5
|
-
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"os": [
|
|
5
|
+
"linux"
|
|
6
|
+
],
|
|
7
|
+
"cpu": [
|
|
8
|
+
"arm64"
|
|
9
|
+
],
|
|
6
10
|
"main": "edgeparse-node.linux-arm64-gnu.node",
|
|
7
|
-
"files": [
|
|
11
|
+
"files": [
|
|
12
|
+
"edgeparse-node.linux-arm64-gnu.node"
|
|
13
|
+
],
|
|
8
14
|
"description": "edgeparse native addon for linux-arm64-gnu",
|
|
9
15
|
"license": "Apache-2.0",
|
|
10
16
|
"repository": {
|
|
11
17
|
"type": "git",
|
|
12
|
-
"url": "https://github.com/
|
|
18
|
+
"url": "https://github.com/raphaelmansuy/edgeparse.git",
|
|
13
19
|
"directory": "sdks/node/npm/linux-arm64-gnu"
|
|
14
|
-
}
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
|
|
15
22
|
}
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "edgeparse-linux-x64-gnu",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"os": [
|
|
5
|
-
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"os": [
|
|
5
|
+
"linux"
|
|
6
|
+
],
|
|
7
|
+
"cpu": [
|
|
8
|
+
"x64"
|
|
9
|
+
],
|
|
6
10
|
"main": "edgeparse-node.linux-x64-gnu.node",
|
|
7
|
-
"files": [
|
|
11
|
+
"files": [
|
|
12
|
+
"edgeparse-node.linux-x64-gnu.node"
|
|
13
|
+
],
|
|
8
14
|
"description": "edgeparse native addon for linux-x64-gnu",
|
|
9
15
|
"license": "Apache-2.0",
|
|
10
16
|
"repository": {
|
|
11
17
|
"type": "git",
|
|
12
|
-
"url": "https://github.com/
|
|
18
|
+
"url": "https://github.com/raphaelmansuy/edgeparse.git",
|
|
13
19
|
"directory": "sdks/node/npm/linux-x64-gnu"
|
|
14
|
-
}
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
|
|
15
22
|
}
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "edgeparse-win32-x64-msvc",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"os": [
|
|
5
|
-
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"os": [
|
|
5
|
+
"win32"
|
|
6
|
+
],
|
|
7
|
+
"cpu": [
|
|
8
|
+
"x64"
|
|
9
|
+
],
|
|
6
10
|
"main": "edgeparse-node.win32-x64-msvc.node",
|
|
7
|
-
"files": [
|
|
11
|
+
"files": [
|
|
12
|
+
"edgeparse-node.win32-x64-msvc.node"
|
|
13
|
+
],
|
|
8
14
|
"description": "edgeparse native addon for win32-x64-msvc",
|
|
9
15
|
"license": "Apache-2.0",
|
|
10
16
|
"repository": {
|
|
11
17
|
"type": "git",
|
|
12
|
-
"url": "https://github.com/
|
|
18
|
+
"url": "https://github.com/raphaelmansuy/edgeparse.git",
|
|
13
19
|
"directory": "sdks/node/npm/win32-x64-msvc"
|
|
14
|
-
}
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://github.com/raphaelmansuy/edgeparse#readme"
|
|
15
22
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "edgeparse",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "High-performance PDF extraction — Rust engine, Node.js interface",
|
|
5
5
|
"main": "./dist/index.cjs",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
}
|
|
14
14
|
},
|
|
15
15
|
"bin": {
|
|
16
|
-
"edgeparse": "./dist/cli.
|
|
16
|
+
"edgeparse": "./dist/cli.js"
|
|
17
17
|
},
|
|
18
18
|
"files": [
|
|
19
19
|
"dist/",
|
|
@@ -21,19 +21,20 @@
|
|
|
21
21
|
"README.md"
|
|
22
22
|
],
|
|
23
23
|
"optionalDependencies": {
|
|
24
|
-
"edgeparse-darwin-arm64": "0.1.
|
|
25
|
-
"edgeparse-darwin-x64": "0.1.
|
|
26
|
-
"edgeparse-linux-arm64-gnu": "0.1.
|
|
27
|
-
"edgeparse-linux-x64-gnu": "0.1.
|
|
28
|
-
"edgeparse-win32-x64-msvc": "0.1.
|
|
24
|
+
"edgeparse-darwin-arm64": "0.1.1",
|
|
25
|
+
"edgeparse-darwin-x64": "0.1.1",
|
|
26
|
+
"edgeparse-linux-arm64-gnu": "0.1.1",
|
|
27
|
+
"edgeparse-linux-x64-gnu": "0.1.1",
|
|
28
|
+
"edgeparse-win32-x64-msvc": "0.1.1"
|
|
29
29
|
},
|
|
30
30
|
"engines": {
|
|
31
31
|
"node": ">=18"
|
|
32
32
|
},
|
|
33
33
|
"license": "Apache-2.0",
|
|
34
|
+
"homepage": "https://github.com/raphaelmansuy/edgeparse#readme",
|
|
34
35
|
"repository": {
|
|
35
36
|
"type": "git",
|
|
36
|
-
"url": "https://github.com/
|
|
37
|
+
"url": "https://github.com/raphaelmansuy/edgeparse.git",
|
|
37
38
|
"directory": "sdks/node"
|
|
38
39
|
},
|
|
39
40
|
"keywords": [
|