afpp 2.0.0-beta.2 β 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -31
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -8,57 +8,101 @@
|
|
|
8
8
|

|
|
9
9
|

|
|
10
10
|
|
|
11
|
-
Another f\*cking
|
|
11
|
+
Another f\*cking PDF parser. Because parsing PDFs in Node.js should be easy. Live long and parse PDFs. π
|
|
12
12
|
|
|
13
13
|
## Why?
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
There are plenty of PDF-related packages for Node.js. They workβ¦ until they donβt.
|
|
16
|
+
|
|
17
|
+
Afpp was built to solve the headaches I ran into while trying to parse PDFs in Node.js:
|
|
18
|
+
|
|
19
|
+
- π¦ Do I need a package with 30+ MB just to read a PDF?
|
|
20
|
+
- π§΅ Why is the event loop blocked?
|
|
21
|
+
- π Is that a memory leak I smell?
|
|
22
|
+
- π Should reading a PDF really be this performance-heavy?
|
|
23
|
+
- π Why is everything so buggy?
|
|
24
|
+
- π¨ Why does it complain about the lack of a canvas in Node.js?
|
|
25
|
+
- π§± Why does canvas require native C++/Python dependencies to build?
|
|
26
|
+
- πͺ Why does it complain about the missing window object?
|
|
27
|
+
- πͺ Why do I need ImageMagick for this?!
|
|
28
|
+
- π» What the hell is Ghostscript, and why does it keep failing?
|
|
29
|
+
- β Whereβs the TypeScript support?
|
|
30
|
+
- π§ Why are the dependencies older than my dev career?
|
|
31
|
+
- π Why does everything workβ¦ until I try an encrypted PDF?
|
|
32
|
+
- π―οΈ Why does every OS need its own special setup ritual?
|
|
16
33
|
|
|
17
|
-
|
|
34
|
+
## Prerequisites
|
|
18
35
|
|
|
19
|
-
-
|
|
20
|
-
- blocking event loop
|
|
21
|
-
- performance issues
|
|
22
|
-
- buggy as shit
|
|
23
|
-
- not working in esm/commonjs
|
|
24
|
-
- old pdfjs-dist as peer dependency
|
|
25
|
-
- no typescript support
|
|
26
|
-
- parsing of encrypted pdf files (password needed)
|
|
36
|
+
- Node.js >= v22.14.0
|
|
27
37
|
|
|
28
|
-
|
|
38
|
+
## π¦ Installation
|
|
29
39
|
|
|
30
|
-
|
|
40
|
+
You can install `afpp` via npm, Yarn, or pnpm.
|
|
41
|
+
|
|
42
|
+
### npm
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
npm install afpp
|
|
46
|
+
```
|
|
31
47
|
|
|
32
|
-
|
|
48
|
+
### Yarn
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
yarn add afpp
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### pnpm
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pnpm add afpp
|
|
58
|
+
```
|
|
33
59
|
|
|
34
60
|
## Getting started
|
|
35
61
|
|
|
36
|
-
`
|
|
62
|
+
The `afpp` library makes it simple to extract text or images from PDF files in Node.js. Whether your PDF is stored locally, hosted online, or encrypted, `afpp` provides an easy-to-use API to handle it all. All functions have common parameters and accepts string path, buffer, or URL object.
|
|
37
63
|
|
|
38
|
-
|
|
64
|
+
### Get text from path
|
|
39
65
|
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
|
|
66
|
+
```ts
|
|
67
|
+
import { readFile } from 'fs/promises';
|
|
68
|
+
import path from 'path';
|
|
43
69
|
|
|
44
|
-
|
|
70
|
+
import { pdf2string } from 'afpp';
|
|
71
|
+
|
|
72
|
+
(async function main() {
|
|
73
|
+
const pathToFile = path.join('..', 'test', 'example.pdf');
|
|
74
|
+
const input = await readFile(pathToFile);
|
|
75
|
+
const data = await pdf2string(input);
|
|
45
76
|
|
|
46
|
-
(
|
|
47
|
-
const pdfString = await pdf2string(pathToFile);
|
|
48
|
-
console.log(pdfString);
|
|
77
|
+
console.log('Extracted text:', data); // ['page 1 content', 'page 2 content', ...]
|
|
49
78
|
})();
|
|
50
79
|
```
|
|
51
80
|
|
|
52
|
-
|
|
81
|
+
### Get image from URL
|
|
53
82
|
|
|
54
|
-
```
|
|
55
|
-
import {
|
|
56
|
-
|
|
83
|
+
```ts
|
|
84
|
+
import { pdf2image } from 'afpp';
|
|
85
|
+
|
|
86
|
+
(async function main() {
|
|
87
|
+
const url = new URL('https://pdfobject.com/pdf/sample.pdf');
|
|
88
|
+
const arrayOfImages = await pdf2image(url);
|
|
89
|
+
|
|
90
|
+
console.log(arrayOfImages); // [imageBuffer, imageBuffer, ...]
|
|
91
|
+
})();
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Parse pdf buffer
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
import { parsePdf } from 'afpp';
|
|
57
98
|
|
|
58
|
-
|
|
99
|
+
(async function main() {
|
|
100
|
+
// Download PDF from URL
|
|
101
|
+
const response = await fetch('https://pdfobject.com/pdf/sample.pdf');
|
|
102
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
59
103
|
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
console.log(
|
|
104
|
+
// Parse the PDF buffer
|
|
105
|
+
const result = await parsePdf(buffer, {}, (content) => content);
|
|
106
|
+
console.log('Parsed PDF:', result);
|
|
63
107
|
})();
|
|
64
108
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "afpp",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "another f*cking pdf parser",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
"url": "git+ssh://git@github.com/l2ysho/afpp.git"
|
|
26
26
|
},
|
|
27
27
|
"engines": {
|
|
28
|
-
"node": "
|
|
29
|
-
"npm": "10.9.2"
|
|
28
|
+
"node": ">=22.14.0",
|
|
29
|
+
"npm": ">=10.9.2"
|
|
30
30
|
},
|
|
31
31
|
"keywords": [
|
|
32
32
|
"pdf",
|