afpp 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # afpp
2
2
 
3
+ ![Version](https://img.shields.io/github/v/release/l2ysho/afpp)
4
+ [![codecov](https://codecov.io/github/l2ysho/afpp/graph/badge.svg?token=2PE32I4M9K)](https://codecov.io/github/l2ysho/afpp)
5
+ ![Node](https://img.shields.io/badge/node-%3E%3D%2018.x-brightgreen.svg)
6
+ ![npm Downloads](https://img.shields.io/npm/dt/afpp.svg)
7
+ ![Repo Size](https://img.shields.io/github/repo-size/l2ysho/afpp)
8
+ ![Last Commit](https://img.shields.io/github/last-commit/l2ysho/afpp.svg)
9
+
3
10
  Another f\*cking pdf parser. (alpha)
4
11
 
5
12
  ## Why?
package/dist/index.js CHANGED
@@ -1,6 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.pdf2string = exports.pdf2image = void 0;
3
+ exports.pdf2string = exports.pdf2image = exports.parsePdf = void 0;
4
+ var parsePdf_1 = require("#afpp/src/parsePdf");
5
+ Object.defineProperty(exports, "parsePdf", { enumerable: true, get: function () { return parsePdf_1.parsePdf; } });
4
6
  var pdf2image_1 = require("#afpp/src/pdf2image");
5
7
  Object.defineProperty(exports, "pdf2image", { enumerable: true, get: function () { return pdf2image_1.pdf2image; } });
6
8
  var pdf2string_1 = require("#afpp/src/pdf2string");
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,iDAAgD;AAAvC,sGAAA,SAAS,OAAA;AAClB,mDAAkD;AAAzC,wGAAA,UAAU,OAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,+CAA8C;AAArC,oGAAA,QAAQ,OAAA;AACjB,iDAAgD;AAAvC,sGAAA,SAAS,OAAA;AAClB,mDAAkD;AAAzC,wGAAA,UAAU,OAAA"}
@@ -0,0 +1,79 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.parsePdf = void 0;
4
+ /* eslint-disable no-underscore-dangle */
5
+ const promises_1 = require("node:fs/promises");
6
+ const canvas_1 = require("canvas");
7
+ const defaultParsePdfCallback = (content) => content;
8
+ const parsePdfFileBuffer = async (options, callback = defaultParsePdfCallback) => import('pdfjs-dist/legacy/build/pdf.mjs').then(async (pdfjsLib) => {
9
+ const loadingTask = pdfjsLib.getDocument({
10
+ ...options,
11
+ verbosity: 0,
12
+ });
13
+ const pdfDocument = await loadingTask.promise;
14
+ const { numPages } = pdfDocument;
15
+ const pageContents = Array.from({ length: numPages }, () => null);
16
+ const pagePromises = [];
17
+ for (let pageNum = 1; pageNum <= numPages; pageNum += 1) {
18
+ pagePromises.push(pdfDocument.getPage(pageNum).then(async (page) => {
19
+ const textContent = await page.getTextContent({
20
+ includeMarkedContent: false,
21
+ });
22
+ const items = textContent.items;
23
+ if (items.length === 0) {
24
+ const viewport = page.getViewport({ scale: 1.0 });
25
+ const canvas = (0, canvas_1.createCanvas)(viewport.width, viewport.height);
26
+ const context = canvas.getContext('2d');
27
+ await page.render({ canvasContext: context, viewport }).promise;
28
+ const imageBuffer = canvas.toBuffer();
29
+ pageContents[pageNum - 1] = callback(imageBuffer);
30
+ }
31
+ else {
32
+ const pageText = items.map((item) => item.str || '').join(' ');
33
+ pageContents[pageNum - 1] = callback(pageText);
34
+ }
35
+ }));
36
+ }
37
+ await Promise.all(pagePromises);
38
+ return pageContents;
39
+ });
40
+ /**
41
+ * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL). Pages are returned in mixed array of strings (text content) and buffers (image content) with in callback function.
42
+ *
43
+ * @async
44
+ * @function pdf2string
45
+ *
46
+ * @param {Buffer|Uint8Array|string|URL} input - The PDF source, which can be a file path, URL, Buffer, or Uint8Array.
47
+ * @param {Object} [options] - Optional parsing options for customizing the PDF parsing process.
48
+ * @param {string} [options.password] - The password for encrypted PDF files, if required.
49
+ * @param {function} callback - callback function to add another layer of processing, default callback returns content of page withouth any added processing.
50
+ *
51
+ * @since — v1.0.0
52
+ *
53
+ * @returns {Promise<string>} - A promise that resolves to the string representation of the PDF content.
54
+ *
55
+ * @throws {Error} Throws an error if the input type is invalid.
56
+ */
57
+ const parsePdf = async (input, options, callback = defaultParsePdfCallback) => {
58
+ if (typeof callback !== 'function') {
59
+ throw new Error(`Invalid callback type: ${typeof callback}`);
60
+ }
61
+ if (typeof input === 'string') {
62
+ const fileBuffer = await (0, promises_1.readFile)(input, {});
63
+ const data = new Uint8Array(fileBuffer);
64
+ return parsePdfFileBuffer({ data, ...options }, callback);
65
+ }
66
+ if (Buffer.isBuffer(input)) {
67
+ const data = new Uint8Array(input);
68
+ return parsePdfFileBuffer({ data, ...options }, callback);
69
+ }
70
+ if (input instanceof Uint8Array) {
71
+ return parsePdfFileBuffer({ data: input, ...options }, callback);
72
+ }
73
+ if (input instanceof URL) {
74
+ return parsePdfFileBuffer({ url: input, ...options }, callback);
75
+ }
76
+ throw new Error(`Invalid source type: ${typeof input}`);
77
+ };
78
+ exports.parsePdf = parsePdf;
79
+ //# sourceMappingURL=parsePdf.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parsePdf.js","sourceRoot":"","sources":["../src/parsePdf.ts"],"names":[],"mappings":";;;AAAA,yCAAyC;AACzC,+CAA4C;AAE5C,mCAAsC;AAStC,MAAM,uBAAuB,GAAsC,CAAC,OAAO,EAAE,EAAE,CAC7E,OAAO,CAAC;AAEV,MAAM,kBAAkB,GAAG,KAAK,EAC9B,OAA+B,EAC/B,WAAgC,uBAA8C,EAC9E,EAAE,CACF,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,GAAG,OAAO;QACV,SAAS,EAAE,CAAC;KACb,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAQ,KAAK,CAAC,IAAI,CAClC,EAAE,MAAM,EAAE,QAAQ,EAAE,EACpB,GAAG,EAAE,CAAC,IAAoB,CAC3B,CAAC;IACF,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;gBAC5C,oBAAoB,EAAE,KAAK;aAC5B,CAAC,CAAC;YACH,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;gBAClD,MAAM,MAAM,GAAG,IAAA,qBAAY,EAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAExC,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC;gBAChE,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACtC,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC;YACpD,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACjD,CAAC;QACH,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML;;;;;;;;;;;;;;;;GAgBG;AAEI,MAAM,QAAQ,GAAG,KAAK,EAC3B,KAAyC,EACzC,OAAsB,EACtB,WAAgC,uBAA8C,EAC9E,EAAE;IACF,IAAI,OAAO,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,QAAQ,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IACD,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AAxBW,QAAA,QAAQ,YAwBnB"}
package/dist/pdf2image.js CHANGED
@@ -10,11 +10,6 @@ const parsePdfFileBuffer = async (options) => import('pdfjs-dist/legacy/build/pd
10
10
  verbosity: 0, // TODO enable for debug
11
11
  });
12
12
  const pdfDocument = await loadingTask.promise;
13
- // get a canvas factory method from pdfjs-dist
14
- const { canvasFactory } = pdfDocument._transport;
15
- if (!canvasFactory) {
16
- throw new Error('Get canvas error, check current node version');
17
- }
18
13
  const { numPages } = pdfDocument;
19
14
  const pageContents = new Array(numPages).fill(Buffer.from(''));
20
15
  const pagePromises = [];
@@ -32,7 +27,7 @@ const parsePdfFileBuffer = async (options) => import('pdfjs-dist/legacy/build/pd
32
27
  return pageContents;
33
28
  });
34
29
  /**
35
- * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL) to a string.
30
+ * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL) to an array of image buffers.
36
31
  *
37
32
  * @async
38
33
  * @function pdf2string
@@ -1 +1 @@
1
- {"version":3,"file":"pdf2image.js","sourceRoot":"","sources":["../src/pdf2image.ts"],"names":[],"mappings":";;;AAAA,yCAAyC;AACzC,+CAA4C;AAE5C,mCAAsC;AAKtC,MAAM,kBAAkB,GAAG,KAAK,EAAE,OAA+B,EAAE,EAAE,CACnE,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,GAAG,OAAO;QACV,SAAS,EAAE,CAAC,EAAE,wBAAwB;KACvC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAC9C,8CAA8C;IAC9C,MAAM,EAAE,aAAa,EAAE,GAAG,WAAW,CAAC,UAErC,CAAC;IAEF,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAa,IAAI,KAAK,CAAS,QAAQ,CAAC,CAAC,IAAI,CAC7D,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAChB,CAAC;IACF,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAClD,MAAM,MAAM,GAAG,IAAA,qBAAY,EAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YAExC,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC;YAChE,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC;QAC1C,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML;;;;;;;;;;;;;;;GAeG;AACI,MAAM,SAAS,GAAG,KAAK,EAC5B,KAAyC,EACzC,OAAsB,EACtB,EAAE;IACF,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AApBW,QAAA,SAAS,aAoBpB"}
1
+ {"version":3,"file":"pdf2image.js","sourceRoot":"","sources":["../src/pdf2image.ts"],"names":[],"mappings":";;;AAAA,yCAAyC;AACzC,+CAA4C;AAE5C,mCAAsC;AAItC,MAAM,kBAAkB,GAAG,KAAK,EAAE,OAA+B,EAAE,EAAE,CACnE,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,GAAG,OAAO;QACV,SAAS,EAAE,CAAC,EAAE,wBAAwB;KACvC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAa,IAAI,KAAK,CAAS,QAAQ,CAAC,CAAC,IAAI,CAC7D,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAChB,CAAC;IACF,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAClD,MAAM,MAAM,GAAG,IAAA,qBAAY,EAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YAExC,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC;YAChE,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC;QAC1C,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML;;;;;;;;;;;;;;;GAeG;AACI,MAAM,SAAS,GAAG,KAAK,EAC5B,KAAyC,EACzC,OAAsB,EACtB,EAAE;IACF,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AApBW,QAAA,SAAS,aAoBpB"}
@@ -1,2 +1,3 @@
1
+ export { parsePdf } from '#afpp/src/parsePdf';
1
2
  export { pdf2image } from '#afpp/src/pdf2image';
2
3
  export { pdf2string } from '#afpp/src/pdf2string';
@@ -0,0 +1,23 @@
1
+ type ParsePdfCallback<T> = (content: Buffer | string) => T;
2
+ type ParseOptions = {
3
+ password?: string;
4
+ };
5
+ /**
6
+ * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL). Pages are returned in mixed array of strings (text content) and buffers (image content) with in callback function.
7
+ *
8
+ * @async
9
+ * @function pdf2string
10
+ *
11
+ * @param {Buffer|Uint8Array|string|URL} input - The PDF source, which can be a file path, URL, Buffer, or Uint8Array.
12
+ * @param {Object} [options] - Optional parsing options for customizing the PDF parsing process.
13
+ * @param {string} [options.password] - The password for encrypted PDF files, if required.
14
+ * @param {function} callback - callback function to add another layer of processing, default callback returns content of page withouth any added processing.
15
+ *
16
+ * @since — v1.0.0
17
+ *
18
+ * @returns {Promise<string>} - A promise that resolves to the string representation of the PDF content.
19
+ *
20
+ * @throws {Error} Throws an error if the input type is invalid.
21
+ */
22
+ export declare const parsePdf: <T>(input: Buffer | URL | Uint8Array | string, options?: ParseOptions, callback?: ParsePdfCallback<T>) => Promise<T[]>;
23
+ export {};
@@ -2,7 +2,7 @@ type ParseOptions = {
2
2
  password?: string;
3
3
  };
4
4
  /**
5
- * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL) to a string.
5
+ * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL) to an array of image buffers.
6
6
  *
7
7
  * @async
8
8
  * @function pdf2string
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "afpp",
3
- "version": "1.3.0",
3
+ "version": "1.4.1",
4
4
  "description": "another f*cking pdf parser",
5
5
  "types": "./dist/types/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -15,7 +15,7 @@
15
15
  "prepare": "husky",
16
16
  "test": "NODE_ENV=test npx tsx --test --test-reporter @voxpelli/node-test-pretty-reporter test/*.test.ts",
17
17
  "pretest:coverage": "rm -rf coverage",
18
- "test:coverage": "NODE_V8_COVERAGE=coverage NODE_ENV=test npx tsx --test --experimental-test-coverage --test-reporter lcov --test-reporter-destination=coverage/lcov.info test/*.test.ts",
18
+ "test:coverage": "c8 --reporter=lcov npm test",
19
19
  "typecheck": "tsc -p tsconfig.json --noEmit"
20
20
  },
21
21
  "repository": {
@@ -46,6 +46,7 @@
46
46
  "@typescript-eslint/eslint-plugin": "7.18.0",
47
47
  "@typescript-eslint/parser": "7.18.0",
48
48
  "@voxpelli/node-test-pretty-reporter": "1.1.2",
49
+ "c8": "10.1.2",
49
50
  "commitizen": "4.3.0",
50
51
  "cz-conventional-changelog": "3.3.0",
51
52
  "eslint": "8.56.0",
@@ -61,11 +62,11 @@
61
62
  "husky": "9.1.6",
62
63
  "lint-staged": "15.2.10",
63
64
  "semantic-release": "24.1.1",
64
- "tsx": "4.19.1",
65
- "typescript": "5.6.2"
65
+ "tsx": "4.19.2",
66
+ "typescript": "5.7.3"
66
67
  },
67
68
  "dependencies": {
68
- "canvas": "2.11.2",
69
+ "canvas": "3.1.0",
69
70
  "pdfjs-dist": "4.6.82"
70
71
  }
71
72
  }