afpp 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,12 +1,7 @@
1
1
  # afpp
2
2
 
3
- Another f*cking pdf parser. (alpha)
3
+ Another f\*cking pdf parser. (alpha)
4
4
 
5
- ## todo
6
-
7
- - [x] pdf2string
8
- - [ ] pdf2img
9
-
10
5
  ## Why?
11
6
 
12
7
  If you are parsing pdf files in nodejs and you are satisfied with your actual solution, good for you, you don't need this.
@@ -19,6 +14,8 @@ But if you’ve encountered one or more of these issues:
19
14
  - buggy as shit
20
15
  - not working in esm/commonjs
21
16
  - old pdfjs-dist as peer dependency
17
+ - no typescript support
18
+ - parsing of encrypted pdf files (password needed)
22
19
 
23
20
  then you might find this package useful.
24
21
 
@@ -33,10 +30,10 @@ then you might find this package useful.
33
30
  **commonjs**:
34
31
 
35
32
  ```js
36
- const { pdf2string } = require("afpp");
37
- const path = require("node:path");
33
+ const { pdf2string } = require('afpp');
34
+ const path = require('node:path');
38
35
 
39
- const pathToFile = path.join("example.pdf");
36
+ const pathToFile = path.join('example.pdf');
40
37
 
41
38
  (async function start() {
42
39
  const pdfString = await pdf2string(pathToFile);
@@ -47,10 +44,10 @@ const pathToFile = path.join("example.pdf");
47
44
  **esm**:
48
45
 
49
46
  ```js
50
- import { pdf2string } from "afpp";
51
- import path from "node:path";
47
+ import { pdf2string } from 'afpp';
48
+ import path from 'node:path';
52
49
 
53
- const pathToFile = path.join("example.pdf");
50
+ const pathToFile = path.join('example.pdf');
54
51
 
55
52
  (async function start() {
56
53
  const pdfString = await pdf2string(pathToFile);
package/dist/index.js CHANGED
@@ -2,47 +2,47 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.pdf2string = void 0;
4
4
  const promises_1 = require("node:fs/promises");
5
- const parsePdfFileBuffer = async (pathToFile) => {
6
- const fileBase64 = await (0, promises_1.readFile)(pathToFile, {});
7
- return import("pdfjs-dist/legacy/build/pdf.mjs").then(async (pdfjsLib) => {
8
- const loadingTask = pdfjsLib.getDocument({
9
- data: new Uint8Array(fileBase64),
10
- });
11
- const pdfDocument = await loadingTask.promise;
12
- const { numPages } = pdfDocument;
13
- const pageContents = new Array(numPages);
14
- const pagePromises = [];
15
- for (let pageNum = 1; pageNum <= numPages; pageNum += 1) {
16
- pagePromises.push(pdfDocument.getPage(pageNum).then(async (page) => {
17
- const textContent = await page.getTextContent({
18
- includeMarkedContent: false,
19
- });
20
- // ? Type assertion of items to TextItem[] should be safe because {includeMarkedContent: false}
21
- const items = textContent.items;
22
- if (items.length === 0) {
23
- console.log("empty page");
24
- pageContents[pageNum - 1] = "";
25
- }
26
- else {
27
- const pageText = items.map((item) => item.str || "").join(" ");
28
- pageContents[pageNum - 1] = pageText;
29
- }
30
- }));
31
- }
32
- await Promise.all(pagePromises);
33
- return pageContents;
5
+ const parsePdfFileBuffer = async (data, options) => import('pdfjs-dist/legacy/build/pdf.mjs').then(async (pdfjsLib) => {
6
+ const loadingTask = pdfjsLib.getDocument({
7
+ data,
8
+ password: options?.password,
34
9
  });
35
- };
36
- //TODO check more valid types + password
37
- const pdf2string = async (source) => {
38
- if (typeof source === "string") {
39
- return parsePdfFileBuffer(source);
10
+ const pdfDocument = await loadingTask.promise;
11
+ const { numPages } = pdfDocument;
12
+ const pageContents = new Array(numPages).fill('');
13
+ const pagePromises = [];
14
+ for (let pageNum = 1; pageNum <= numPages; pageNum += 1) {
15
+ pagePromises.push(pdfDocument.getPage(pageNum).then(async (page) => {
16
+ const textContent = await page.getTextContent({
17
+ includeMarkedContent: false,
18
+ });
19
+ // ? Type assertion of items to TextItem[] should be safe because {includeMarkedContent: false}
20
+ const items = textContent.items;
21
+ if (items.length === 0) {
22
+ pageContents[pageNum - 1] = '';
23
+ }
24
+ else {
25
+ const pageText = items.map((item) => item.str || '').join(' ');
26
+ pageContents[pageNum - 1] = pageText;
27
+ }
28
+ }));
29
+ }
30
+ await Promise.all(pagePromises);
31
+ return pageContents;
32
+ });
33
+ const pdf2string = async (source, options) => {
34
+ if (typeof source === 'string') {
35
+ const fileBase64 = await (0, promises_1.readFile)(source, {});
36
+ const data = new Uint8Array(fileBase64);
37
+ return parsePdfFileBuffer(data, options);
40
38
  }
41
39
  if (Buffer.isBuffer(source)) {
42
- return;
40
+ const fileBase64 = await (0, promises_1.readFile)(source, {});
41
+ const data = new Uint8Array(fileBase64);
42
+ return parsePdfFileBuffer(data, options);
43
43
  }
44
44
  if (source instanceof Uint8Array) {
45
- return;
45
+ return parsePdfFileBuffer(source, options);
46
46
  }
47
47
  throw new Error(`Invalid source type: ${typeof source}`);
48
48
  };
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":";;;AAAA,+CAA4C;AAY5C,MAAM,kBAAkB,GAAG,KAAK,EAAE,UAAkB,EAAE,EAAE;IACtD,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAClD,OAAO,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;QACvE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;YACvC,IAAI,EAAE,IAAI,UAAU,CAAC,UAAU,CAAC;SACjC,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;QAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;QACjC,MAAM,YAAY,GAAa,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC;QACnD,MAAM,YAAY,GAAmC,EAAE,CAAC;QAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;YACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;gBAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;oBAC5C,oBAAoB,EAAE,KAAK;iBAC5B,CAAC,CAAC;gBACH,+FAA+F;gBAC/F,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;gBAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACvB,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;oBAC1B,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;gBACjC,CAAC;qBAAM,CAAC;oBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC;gBACvC,CAAC;YACH,CAAC,CAAC,CACH,CAAC;QACJ,CAAC;QACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAChC,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC;AAEF,wCAAwC;AACxC,MAAM,UAAU,GAAG,KAAK,EAAE,MAAoC,EAAE,EAAE;IAChE,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,OAAO,kBAAkB,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5B,OAAO;IACT,CAAC;IACD,IAAI,MAAM,YAAY,UAAU,EAAE,CAAC;QACjC,OAAO;IACT,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,MAAM,EAAE,CAAC,CAAC;AAC3D,CAAC,CAAC;AAEO,gCAAU"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,+CAA4C;AAK5C,MAAM,kBAAkB,GAAG,KAAK,EAAE,IAAgB,EAAE,OAAsB,EAAE,EAAE,CAC5E,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,IAAI;QACJ,QAAQ,EAAE,OAAO,EAAE,QAAQ;KAC5B,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAa,IAAI,KAAK,CAAS,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACpE,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;gBAC5C,oBAAoB,EAAE,KAAK;aAC5B,CAAC,CAAC;YACH,+FAA+F;YAC/F,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;YACjC,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC;YACvC,CAAC;QACH,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML,MAAM,UAAU,GAAG,KAAK,EACtB,MAAoC,EACpC,OAAsB,EACtB,EAAE;IACF,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,MAAM,YAAY,UAAU,EAAE,CAAC;QACjC,OAAO,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,MAAM,EAAE,CAAC,CAAC;AAC3D,CAAC,CAAC;AAEO,gCAAU"}
@@ -1,2 +1,5 @@
1
- declare const pdf2string: (source: string | Buffer | Uint8Array) => Promise<string[] | undefined>;
1
+ type ParseOptions = {
2
+ password?: string;
3
+ };
4
+ declare const pdf2string: (source: Buffer | Uint8Array | string, options?: ParseOptions) => Promise<string[]>;
2
5
  export { pdf2string };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "afpp",
3
- "version": "1.0.1",
3
+ "version": "1.1.1",
4
4
  "description": "another f*cking pdf parser",
5
5
  "types": "./dist/types/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -8,9 +8,13 @@
8
8
  "dist/**"
9
9
  ],
10
10
  "scripts": {
11
- "test": "test",
11
+ "build": "tsc -p tsconfig.json",
12
+ "commit": "git-cz",
13
+ "lint": "eslint .",
12
14
  "prebuild": "rm -rf dist",
13
- "build": "tsc -p tsconfig.json"
15
+ "prepare": "husky",
16
+ "test": "test",
17
+ "typecheck": "tsc -p tsconfig.all.json --noEmit"
14
18
  },
15
19
  "repository": {
16
20
  "type": "git",
@@ -37,7 +41,25 @@
37
41
  "pdfjs-dist": "4.6.82"
38
42
  },
39
43
  "devDependencies": {
44
+ "@commitlint/cli": "19.5.0",
45
+ "@commitlint/config-conventional": "19.5.0",
40
46
  "@types/node": "22.5.5",
47
+ "@typescript-eslint/eslint-plugin": "7.18.0",
48
+ "@typescript-eslint/parser": "7.18.0",
49
+ "commitizen": "4.3.0",
50
+ "cz-conventional-changelog": "3.3.0",
51
+ "eslint": "8.56.0",
52
+ "eslint-config-airbnb": "19.0.4",
53
+ "eslint-config-airbnb-base": "15.0.0",
54
+ "eslint-config-airbnb-typescript": "18.0.0",
55
+ "eslint-config-prettier": "9.1.0",
56
+ "eslint-import-resolver-typescript": "3.6.1",
57
+ "eslint-plugin-import": "2.29.1",
58
+ "eslint-plugin-no-relative-import-paths": "1.5.5",
59
+ "eslint-plugin-perfectionist": "2.11.0",
60
+ "eslint-plugin-prettier": "5.2.1",
61
+ "husky": "9.1.6",
62
+ "lint-staged": "15.2.10",
41
63
  "semantic-release": "24.1.1",
42
64
  "tsx": "4.19.1",
43
65
  "typescript": "5.6.2"