afpp 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,12 +1,7 @@
1
1
  # afpp
2
2
 
3
- Another f*cking pdf parser. (alpha)
3
+ Another f\*cking pdf parser. (alpha)
4
4
 
5
- ## todo
6
-
7
- - [x] pdf2string
8
- - [ ] pdf2img
9
-
10
5
  ## Why?
11
6
 
12
7
  If you are parsing pdf files in nodejs and you are satisfied with your actual solution, good for you, you don't need this.
@@ -35,10 +30,10 @@ then you might find this package useful.
35
30
  **commonjs**:
36
31
 
37
32
  ```js
38
- const { pdf2string } = require("afpp");
39
- const path = require("node:path");
33
+ const { pdf2string } = require('afpp');
34
+ const path = require('node:path');
40
35
 
41
- const pathToFile = path.join("example.pdf");
36
+ const pathToFile = path.join('example.pdf');
42
37
 
43
38
  (async function start() {
44
39
  const pdfString = await pdf2string(pathToFile);
@@ -49,10 +44,10 @@ const pathToFile = path.join("example.pdf");
49
44
  **esm**:
50
45
 
51
46
  ```js
52
- import { pdf2string } from "afpp";
53
- import path from "node:path";
47
+ import { pdf2string } from 'afpp';
48
+ import path from 'node:path';
54
49
 
55
- const pathToFile = path.join("example.pdf");
50
+ const pathToFile = path.join('example.pdf');
56
51
 
57
52
  (async function start() {
58
53
  const pdfString = await pdf2string(pathToFile);
package/dist/index.js CHANGED
@@ -2,52 +2,67 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.pdf2string = void 0;
4
4
  const promises_1 = require("node:fs/promises");
5
- const parsePdfFileBuffer = async (data, options) => {
6
- return import("pdfjs-dist/legacy/build/pdf.mjs").then(async (pdfjsLib) => {
7
- const loadingTask = pdfjsLib.getDocument({
8
- data,
9
- password: options?.password,
10
- });
11
- const pdfDocument = await loadingTask.promise;
12
- const { numPages } = pdfDocument;
13
- const pageContents = new Array(numPages);
14
- const pagePromises = [];
15
- for (let pageNum = 1; pageNum <= numPages; pageNum += 1) {
16
- pagePromises.push(pdfDocument.getPage(pageNum).then(async (page) => {
17
- const textContent = await page.getTextContent({
18
- includeMarkedContent: false,
19
- });
20
- // ? Type assertion of items to TextItem[] should be safe because {includeMarkedContent: false}
21
- const items = textContent.items;
22
- if (items.length === 0) {
23
- console.log("empty page");
24
- pageContents[pageNum - 1] = "";
25
- }
26
- else {
27
- const pageText = items.map((item) => item.str || "").join(" ");
28
- pageContents[pageNum - 1] = pageText;
29
- }
30
- }));
31
- }
32
- await Promise.all(pagePromises);
33
- return pageContents;
5
+ const parsePdfFileBuffer = async (options) => import('pdfjs-dist/legacy/build/pdf.mjs').then(async (pdfjsLib) => {
6
+ const loadingTask = pdfjsLib.getDocument({
7
+ ...options,
8
+ verbosity: 0, // TODO enable for debug
34
9
  });
35
- };
36
- const pdf2string = async (source, options) => {
37
- if (typeof source === "string") {
38
- const fileBase64 = await (0, promises_1.readFile)(source, {});
39
- const data = new Uint8Array(fileBase64);
40
- return parsePdfFileBuffer(data, options);
10
+ const pdfDocument = await loadingTask.promise;
11
+ const { numPages } = pdfDocument;
12
+ const pageContents = new Array(numPages).fill('');
13
+ const pagePromises = [];
14
+ for (let pageNum = 1; pageNum <= numPages; pageNum += 1) {
15
+ pagePromises.push(pdfDocument.getPage(pageNum).then(async (page) => {
16
+ const textContent = await page.getTextContent({
17
+ includeMarkedContent: false,
18
+ });
19
+ // ? Type assertion of items to TextItem[] should be safe because {includeMarkedContent: false}
20
+ const items = textContent.items;
21
+ if (items.length === 0) {
22
+ pageContents[pageNum - 1] = '';
23
+ }
24
+ else {
25
+ const pageText = items.map((item) => item.str || '').join(' ');
26
+ pageContents[pageNum - 1] = pageText;
27
+ }
28
+ }));
29
+ }
30
+ await Promise.all(pagePromises);
31
+ return pageContents;
32
+ });
33
+ /**
34
+ * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL) to a string.
35
+ *
36
+ * @async
37
+ * @function pdf2string
38
+ *
39
+ * @param {Buffer|Uint8Array|string|URL} input - The PDF source, which can be a file path, URL, Buffer, or Uint8Array.
40
+ * @param {Object} [options] - Optional parsing options for customizing the PDF parsing process.
41
+ * @param {string} [options.password] - The password for encrypted PDF files, if required.
42
+ *
43
+ * @since — v1.0.0
44
+ *
45
+ * @returns {Promise<string>} - A promise that resolves to the string representation of the PDF content.
46
+ *
47
+ * @throws {Error} Throws an error if the input type is invalid.
48
+ */
49
+ const pdf2string = async (input, options) => {
50
+ if (typeof input === 'string') {
51
+ const fileBuffer = await (0, promises_1.readFile)(input, {});
52
+ const data = new Uint8Array(fileBuffer);
53
+ return parsePdfFileBuffer({ data, ...options });
54
+ }
55
+ if (Buffer.isBuffer(input)) {
56
+ const data = new Uint8Array(input);
57
+ return parsePdfFileBuffer({ data, ...options });
41
58
  }
42
- if (Buffer.isBuffer(source)) {
43
- const fileBase64 = await (0, promises_1.readFile)(source, {});
44
- const data = new Uint8Array(fileBase64);
45
- return parsePdfFileBuffer(data, options);
59
+ if (input instanceof Uint8Array) {
60
+ return parsePdfFileBuffer({ data: input, ...options });
46
61
  }
47
- if (source instanceof Uint8Array) {
48
- return parsePdfFileBuffer(source, options);
62
+ if (input instanceof URL) {
63
+ return parsePdfFileBuffer({ url: input, ...options });
49
64
  }
50
- throw new Error(`Invalid source type: ${typeof source}`);
65
+ throw new Error(`Invalid source type: ${typeof input}`);
51
66
  };
52
67
  exports.pdf2string = pdf2string;
53
68
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":";;;AAAA,+CAA4C;AAY5C,MAAM,kBAAkB,GAAG,KAAK,EAAE,IAAgB,EAAE,OAAsB,EAAE,EAAE;IAC5E,OAAO,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;QACvE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;YACvC,IAAI;YACJ,QAAQ,EAAE,OAAO,EAAE,QAAQ;SAC5B,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;QAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;QACjC,MAAM,YAAY,GAAa,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC;QACnD,MAAM,YAAY,GAAmC,EAAE,CAAC;QAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;YACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;gBAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;oBAC5C,oBAAoB,EAAE,KAAK;iBAC5B,CAAC,CAAC;gBACH,+FAA+F;gBAC/F,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;gBAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACvB,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;oBAC1B,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;gBACjC,CAAC;qBAAM,CAAC;oBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC;gBACvC,CAAC;YACH,CAAC,CAAC,CACH,CAAC;QACJ,CAAC;QACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAChC,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC;AAMF,MAAM,UAAU,GAAG,KAAK,EACtB,MAAoC,EACpC,OAAsB,EACtB,EAAE;IACF,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,MAAM,YAAY,UAAU,EAAE,CAAC;QACjC,OAAO,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,MAAM,EAAE,CAAC,CAAC;AAC3D,CAAC,CAAC;AAEO,gCAAU"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,+CAA4C;AAQ5C,MAAM,kBAAkB,GAAG,KAAK,EAAE,OAA+B,EAAE,EAAE,CACnE,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,GAAG,OAAO;QACV,SAAS,EAAE,CAAC,EAAE,wBAAwB;KACvC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAa,IAAI,KAAK,CAAS,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACpE,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;gBAC5C,oBAAoB,EAAE,KAAK;aAC5B,CAAC,CAAC;YACH,+FAA+F;YAC/F,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;YACjC,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC;YACvC,CAAC;QACH,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,GAAG,KAAK,EACtB,KAAyC,EACzC,OAAsB,EACtB,EAAE;IACF,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AAEO,gCAAU"}
@@ -1,5 +1,21 @@
1
1
  type ParseOptions = {
2
2
  password?: string;
3
3
  };
4
- declare const pdf2string: (source: string | Buffer | Uint8Array, options?: ParseOptions) => Promise<string[]>;
4
+ /**
5
+ * Converts a PDF file from various input formats (Buffer, Uint8Array, string path, or URL) to a string.
6
+ *
7
+ * @async
8
+ * @function pdf2string
9
+ *
10
+ * @param {Buffer|Uint8Array|string|URL} input - The PDF source, which can be a file path, URL, Buffer, or Uint8Array.
11
+ * @param {Object} [options] - Optional parsing options for customizing the PDF parsing process.
12
+ * @param {string} [options.password] - The password for encrypted PDF files, if required.
13
+ *
14
+ * @since — v1.0.0
15
+ *
16
+ * @returns {Promise<string>} - A promise that resolves to the string representation of the PDF content.
17
+ *
18
+ * @throws {Error} Throws an error if the input type is invalid.
19
+ */
20
+ declare const pdf2string: (input: Buffer | URL | Uint8Array | string, options?: ParseOptions) => Promise<string[]>;
5
21
  export { pdf2string };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "afpp",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "another f*cking pdf parser",
5
5
  "types": "./dist/types/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -8,9 +8,14 @@
8
8
  "dist/**"
9
9
  ],
10
10
  "scripts": {
11
- "test": "test",
11
+ "build": "tsc -p tsconfig.build.json",
12
+ "commit": "git-cz",
13
+ "lint": "eslint .",
12
14
  "prebuild": "rm -rf dist",
13
- "build": "tsc -p tsconfig.json"
15
+ "prepare": "husky",
16
+ "test": "NODE_ENV=test npx tsx --test --test-reporter=@voxpelli/node-test-pretty-reporter test/*.test.ts",
17
+ "test:coverage": "NODE_ENV=test npx tsx --test --experimental-test-coverage test/*.test.ts",
18
+ "typecheck": "tsc -p tsconfig.json --noEmit"
14
19
  },
15
20
  "repository": {
16
21
  "type": "git",
@@ -34,12 +39,31 @@
34
39
  },
35
40
  "homepage": "https://github.com/l2ysho/afpp#readme",
36
41
  "dependencies": {
37
- "pdfjs-dist": "4.6.82"
42
+ "pdfjs-dist": "4.6.82",
43
+ "typescript": "5.6.2"
38
44
  },
39
45
  "devDependencies": {
46
+ "@commitlint/cli": "19.5.0",
47
+ "@commitlint/config-conventional": "19.5.0",
40
48
  "@types/node": "22.5.5",
49
+ "@typescript-eslint/eslint-plugin": "7.18.0",
50
+ "@typescript-eslint/parser": "7.18.0",
51
+ "@voxpelli/node-test-pretty-reporter": "1.1.2",
52
+ "commitizen": "4.3.0",
53
+ "cz-conventional-changelog": "3.3.0",
54
+ "eslint": "8.56.0",
55
+ "eslint-config-airbnb": "19.0.4",
56
+ "eslint-config-airbnb-base": "15.0.0",
57
+ "eslint-config-airbnb-typescript": "18.0.0",
58
+ "eslint-config-prettier": "9.1.0",
59
+ "eslint-import-resolver-typescript": "3.6.1",
60
+ "eslint-plugin-import": "2.29.1",
61
+ "eslint-plugin-no-relative-import-paths": "1.5.5",
62
+ "eslint-plugin-perfectionist": "2.11.0",
63
+ "eslint-plugin-prettier": "5.2.1",
64
+ "husky": "9.1.6",
65
+ "lint-staged": "15.2.10",
41
66
  "semantic-release": "24.1.1",
42
- "tsx": "4.19.1",
43
- "typescript": "5.6.2"
67
+ "tsx": "4.19.1"
44
68
  }
45
69
  }