afpp 1.8.0-beta.3 → 2.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/parsePdf.d.ts +6 -0
- package/dist/parsePdf.js +6 -4
- package/dist/parsePdf.js.map +1 -1
- package/package.json +4 -2
package/dist/parsePdf.d.ts
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
import { Canvas } from '@napi-rs/canvas';
|
|
2
|
+
export interface CanvasFactory {
|
|
3
|
+
createCanvas: (width: number, height: number) => Canvas;
|
|
4
|
+
destroyCanvas: (canvas: Canvas) => void;
|
|
5
|
+
resetCanvas: (canvas: Canvas, width: number, height: number) => void;
|
|
6
|
+
}
|
|
1
7
|
export type PageProcessor<T> = (content: Buffer | string, pageNumber: number, pageCount: number) => Promise<T> | T;
|
|
2
8
|
type ImageEncoding = 'avif' | 'jpeg' | 'png' | 'webp';
|
|
3
9
|
interface ParseOptions {
|
package/dist/parsePdf.js
CHANGED
|
@@ -5,21 +5,22 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.parsePdf = void 0;
|
|
7
7
|
const promises_1 = require("node:fs/promises");
|
|
8
|
-
const canvas_1 = require("@napi-rs/canvas");
|
|
9
8
|
const p_limit_1 = __importDefault(require("p-limit"));
|
|
10
9
|
const pdf_mjs_1 = require("pdfjs-dist/legacy/build/pdf.mjs");
|
|
11
|
-
const processPdfPage = async (page, pageNumber, pageCount, scale, encoding, callback) => {
|
|
10
|
+
const processPdfPage = async (page, canvasFactory, pageNumber, pageCount, scale, encoding, callback) => {
|
|
12
11
|
const textContent = await page.getTextContent({
|
|
13
12
|
includeMarkedContent: false,
|
|
14
13
|
});
|
|
15
14
|
const items = textContent.items;
|
|
16
15
|
if (items.length === 0) {
|
|
17
16
|
const viewport = page.getViewport({ scale });
|
|
18
|
-
const canvas =
|
|
17
|
+
const canvas = canvasFactory.createCanvas(viewport.width, viewport.height);
|
|
18
|
+
// console.log(createCanvas, canvas);
|
|
19
19
|
const context = canvas.getContext('2d');
|
|
20
20
|
await page.render({ canvasContext: context, viewport }).promise;
|
|
21
21
|
//@ts-expect-error this should be fixed in release
|
|
22
22
|
const imageBuffer = await canvas.encode(encoding);
|
|
23
|
+
canvasFactory.destroyCanvas(canvas);
|
|
23
24
|
return callback(imageBuffer, pageNumber, pageCount);
|
|
24
25
|
}
|
|
25
26
|
const pageText = items.map((item) => item.str || '').join(' ');
|
|
@@ -35,7 +36,8 @@ const parsePdfFileBuffer = async (options, scale, concurrency, encoding, callbac
|
|
|
35
36
|
const pageNum = i + 1;
|
|
36
37
|
return limit(async () => {
|
|
37
38
|
const page = await pdfDocument.getPage(pageNum);
|
|
38
|
-
const
|
|
39
|
+
const canvasFactory = pdfDocument.canvasFactory;
|
|
40
|
+
const result = await processPdfPage(page, canvasFactory, pageNum, numPages, scale, encoding, callback);
|
|
39
41
|
results[i] = result;
|
|
40
42
|
});
|
|
41
43
|
});
|
package/dist/parsePdf.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parsePdf.js","sourceRoot":"","sources":["../src/parsePdf.ts"],"names":[],"mappings":";;;;;;AAAA,+CAA4C;
|
|
1
|
+
{"version":3,"file":"parsePdf.js","sourceRoot":"","sources":["../src/parsePdf.ts"],"names":[],"mappings":";;;;;;AAAA,+CAA4C;AAG5C,sDAA6B;AAC7B,6DAA8D;AA0C9D,MAAM,cAAc,GAAG,KAAK,EAC1B,IAAkB,EAClB,aAA4B,EAC5B,UAAkB,EAClB,SAAiB,EACjB,KAAa,EACb,QAAuB,EACvB,QAA0B,EACd,EAAE;IACd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;QAC5C,oBAAoB,EAAE,KAAK;KAC5B,CAAC,CAAC;IACH,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;IAE9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QAE7C,MAAM,MAAM,GAAG,aAAa,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC3E,qCAAqC;QACrC,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC;QAChE,kDAAkD;QAClD,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAClD,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QACpC,OAAO,QAAQ,CAAC,WAAW,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC;IACtD,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/D,OAAO,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC;AACnD,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,KAAK,EAC9B,OAA+B,EAC/B,KAAa,EACb,WAAmB,EACnB,QAAuB,EACvB,QAA0B,EACZ,EAAE;IAChB,MAAM,KAAK,GAAG,IAAA,iBAAM,EAAC,WAAW,CAAC,CAAC;IAClC,MAAM,WAAW,GAAG,IAAA,qBAAW,EAAC,EAAE,GAAG,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;IAC9D,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAC9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,OAAO,GAAQ,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC1D,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;QACtB,OAAO,KAAK,CAAC,KAAK,IAAI,EAAE;YACtB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAChD,MAAM,aAAa,GAAG,WAAW,CAAC,aAA8B,CAAC;YAEjE,MAAM,MAAM,GAAG,MAAM,cAAc,CACjC,IAAI,EACJ,aAAa,EACb,OAAO,EACP,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,QAAQ,CACT,CAAC;YACF,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC7B,OAAO,OAAO,CAAC;AACjB,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;GAmBG;AAEI,MAAM,QAAQ,GAAG,KAAK,EAC3B,KAAyC,EACzC,OAAqB,EACrB,QAA0B,EACZ,EAAE;IAChB,IAAI,OAAO,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,QAAQ,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;IACnC,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAC7C,MAAM,QAAQ,GAAG,OAAO,CAAC,aAAa,IAAI,KAAK,CAAC;IAEhD,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxD,MAAM,IAAI,KAAK,CAAC,uCAAuC,QAAQ,GAAG,CAAC,CAAC;IACtE,CAAC;IAED,MAAM,WAAW,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAEnC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,CAAC,CAAC;QACzC,OAAO,kBAAkB,CACvB,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,GAAG,WAAW,EAAE,EACpD,KAAK,EACL,WAAW,EACX,QAAQ,EACR,QAAQ,CACT,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,kBAAkB,CACvB,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,GAAG,WAAW,EAAE,EAC/C,KAAK,EACL,WAAW,EACX,QAAQ,EACR,QAAQ,CACT,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CACvB,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,WAAW,EAAE,EAC/B,KAAK,EACL,WAAW,EACX,QAAQ,EACR,QAAQ,CACT,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CACvB,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,WAAW,EAAE,EAC9B,KAAK,EACL,WAAW,EACX,QAAQ,EACR,QAAQ,CACT,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AA7DW,QAAA,QAAQ,YA6DnB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "afpp",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0-beta.1",
|
|
4
4
|
"description": "another f*cking pdf parser",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -69,8 +69,10 @@
|
|
|
69
69
|
"typescript-eslint": "8.29.0"
|
|
70
70
|
},
|
|
71
71
|
"dependencies": {
|
|
72
|
-
"@napi-rs/canvas": "0.1.69",
|
|
73
72
|
"p-limit": "6.2.0",
|
|
74
73
|
"pdfjs-dist": "5.1.91"
|
|
74
|
+
},
|
|
75
|
+
"peerDependencies": {
|
|
76
|
+
"@napi-rs/canvas": "0.1.69"
|
|
75
77
|
}
|
|
76
78
|
}
|