afpp 1.4.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/parsePdf.js CHANGED
@@ -22,16 +22,16 @@ const parsePdfFileBuffer = async (options, callback = defaultParsePdfCallback) =
22
22
  });
23
23
  const items = textContent.items;
24
24
  if (items.length === 0) {
25
- const viewport = page.getViewport({ scale: 1.0 });
25
+ const viewport = page.getViewport({ scale: 2.0 });
26
26
  const canvas = (0, canvas_1.createCanvas)(viewport.width, viewport.height);
27
27
  const context = canvas.getContext('2d');
28
28
  await page.render({ canvasContext: context, viewport }).promise;
29
29
  const imageBuffer = await (0, utils_1.toBufferAsync)(canvas);
30
- pageContents[pageNum - 1] = await callback(imageBuffer);
30
+ pageContents[pageNum - 1] = await callback(imageBuffer, pageNum);
31
31
  }
32
32
  else {
33
33
  const pageText = items.map((item) => item.str || '').join(' ');
34
- pageContents[pageNum - 1] = await callback(pageText);
34
+ pageContents[pageNum - 1] = await callback(pageText, pageNum);
35
35
  }
36
36
  }));
37
37
  }
@@ -1 +1 @@
1
- {"version":3,"file":"parsePdf.js","sourceRoot":"","sources":["../src/parsePdf.ts"],"names":[],"mappings":";;;AAAA,yCAAyC;AACzC,+CAA4C;AAE5C,mCAAsC;AAOtC,2CAAgD;AAIhD,MAAM,uBAAuB,GAAsC,KAAK,EACtE,OAAO,EAEP,EAAE,CAAC,OAAO,CAAC;AAEb,MAAM,kBAAkB,GAAG,KAAK,EAC9B,OAA+B,EAC/B,WAAgC,uBAA8C,EAC9E,EAAE,CACF,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,GAAG,OAAO;QACV,SAAS,EAAE,CAAC;KACb,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAQ,KAAK,CAAC,IAAI,CAClC,EAAE,MAAM,EAAE,QAAQ,EAAE,EACpB,GAAG,EAAE,CAAC,IAAoB,CAC3B,CAAC;IACF,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;gBAC5C,oBAAoB,EAAE,KAAK;aAC5B,CAAC,CAAC;YACH,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;gBAClD,MAAM,MAAM,GAAG,IAAA,qBAAY,EAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAExC,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC;gBAEhE,MAAM,WAAW,GAAG,MAAM,IAAA,qBAAa,EAAC,MAAM,CAAC,CAAC;gBAEhD,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,CAAC;YAC1D,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACvD,CAAC;QACH,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML;;;;;;;;;;;;;;;;GAgBG;AAEI,MAAM,QAAQ,GAAG,KAAK,EAC3B,KAAyC,EACzC,OAAsB,EACtB,WAAgC,uBAA8C,EAC9E,EAAE;IACF,IAAI,OAAO,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,QAAQ,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IACD,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AAxBW,QAAA,QAAQ,YAwBnB"}
1
+ {"version":3,"file":"parsePdf.js","sourceRoot":"","sources":["../src/parsePdf.ts"],"names":[],"mappings":";;;AAAA,yCAAyC;AACzC,+CAA4C;AAE5C,mCAAsC;AAOtC,2CAAgD;AAOhD,MAAM,uBAAuB,GAAsC,KAAK,EACtE,OAAO,EAEP,EAAE,CAAC,OAAO,CAAC;AAEb,MAAM,kBAAkB,GAAG,KAAK,EAC9B,OAA+B,EAC/B,WAAgC,uBAA8C,EAC9E,EAAE,CACF,MAAM,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;IAChE,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC;QACvC,GAAG,OAAO;QACV,SAAS,EAAE,CAAC;KACb,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAE9C,MAAM,EAAE,QAAQ,EAAE,GAAG,WAAW,CAAC;IACjC,MAAM,YAAY,GAAQ,KAAK,CAAC,IAAI,CAClC,EAAE,MAAM,EAAE,QAAQ,EAAE,EACpB,GAAG,EAAE,CAAC,IAAoB,CAC3B,CAAC;IACF,MAAM,YAAY,GAAmC,EAAE,CAAC;IAExD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,QAAQ,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CACf,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC/C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC;gBAC5C,oBAAoB,EAAE,KAAK;aAC5B,CAAC,CAAC;YACH,MAAM,KAAK,GAAG,WAAW,CAAC,KAAmB,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;gBAClD,MAAM,MAAM,GAAG,IAAA,qBAAY,EAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;gBAExC,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC;gBAEhE,MAAM,WAAW,GAAG,MAAM,IAAA,qBAAa,EAAC,MAAM,CAAC,CAAC;gBAEhD,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACnE,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC/D,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAChE,CAAC;QACH,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAChC,OAAO,YAAY,CAAC;AACtB,CAAC,CAAC,CAAC;AAML;;;;;;;;;;;;;;;;GAgBG;AAEI,MAAM,QAAQ,GAAG,KAAK,EAC3B,KAAyC,EACzC,OAAsB,EACtB,WAAgC,uBAA8C,EAC9E,EAAE;IACF,IAAI,OAAO,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,QAAQ,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAQ,EAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;QACxC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,KAAK,YAAY,UAAU,EAAE,CAAC;QAChC,OAAO,kBAAkB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IACD,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;QACzB,OAAO,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,OAAO,KAAK,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AAxBW,QAAA,QAAQ,YAwBnB"}
package/dist/pdf2image.js CHANGED
@@ -16,7 +16,7 @@ const parsePdfFileBuffer = async (options) => import('pdfjs-dist/legacy/build/pd
16
16
  const pagePromises = [];
17
17
  for (let pageNum = 1; pageNum <= numPages; pageNum += 1) {
18
18
  pagePromises.push(pdfDocument.getPage(pageNum).then(async (page) => {
19
- const viewport = page.getViewport({ scale: 1.0 });
19
+ const viewport = page.getViewport({ scale: 2.0 });
20
20
  const canvas = (0, canvas_1.createCanvas)(viewport.width, viewport.height);
21
21
  const context = canvas.getContext('2d');
22
22
  await page.render({ canvasContext: context, viewport }).promise;
@@ -1,4 +1,4 @@
1
- type ParsePdfCallback<T> = (content: Buffer | string) => Promise<T>;
1
+ type ParsePdfCallback<T> = (content: Buffer | string, page: number) => Promise<T>;
2
2
  type ParseOptions = {
3
3
  password?: string;
4
4
  };
@@ -1,2 +1,3 @@
1
- import { Canvas } from 'canvas';
1
+ import { Canvas, CanvasRenderingContext2D } from 'canvas';
2
2
  export declare function toBufferAsync(canvas: Canvas): Promise<Buffer>;
3
+ export declare function imageDataToGrayscale(canvas: Canvas, context: CanvasRenderingContext2D): void;
package/dist/utils.js CHANGED
@@ -1,8 +1,27 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.toBufferAsync = toBufferAsync;
4
+ exports.imageDataToGrayscale = imageDataToGrayscale;
4
5
  const util_1 = require("util");
5
6
  function toBufferAsync(canvas) {
6
7
  return (0, util_1.promisify)(canvas.toBuffer.bind(canvas))();
7
8
  }
9
+ // Experimental method to switch context to grayscale, usefull for next proccesing, for example OCR
10
+ function imageDataToGrayscale(canvas, context) {
11
+ // Convert image to grayscale manually
12
+ const imageData = context.getImageData(0, 0, canvas.width, canvas.height);
13
+ const pixels = imageData.data;
14
+ for (let i = 0; i < pixels.length; i += 4) {
15
+ const r = pixels[i] || 0;
16
+ const g = pixels[i + 1] || 0;
17
+ const b = pixels[i + 2] || 0;
18
+ // Grayscale formula: (0.3 * R) + (0.59 * G) + (0.11 * B)
19
+ const grayscale = 0.3 * r + 0.59 * g + 0.11 * b;
20
+ pixels[i] = grayscale; // Red channel
21
+ pixels[i + 1] = grayscale; // Green channel
22
+ pixels[i + 2] = grayscale; // Blue channel
23
+ // Alpha channel remains unchanged (pixels[i + 3])
24
+ }
25
+ context.putImageData(imageData, 0, 0);
26
+ }
8
27
  //# sourceMappingURL=utils.js.map
package/dist/utils.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";;AAIA,sCAEC;AAND,+BAAiC;AAIjC,SAAgB,aAAa,CAAC,MAAc;IAC1C,OAAO,IAAA,gBAAS,EAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAqB,CAAC;AACtE,CAAC"}
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";;AAIA,sCAEC;AAGD,oDAuBC;AAhCD,+BAAiC;AAIjC,SAAgB,aAAa,CAAC,MAAc;IAC1C,OAAO,IAAA,gBAAS,EAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAqB,CAAC;AACtE,CAAC;AAED,mGAAmG;AACnG,SAAgB,oBAAoB,CAClC,MAAc,EACd,OAAiC;IAEjC,sCAAsC;IACtC,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC;IAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzB,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QAE7B,yDAAyD;QACzD,MAAM,SAAS,GAAG,GAAG,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC;QAEhD,MAAM,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,cAAc;QACrC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,gBAAgB;QAC3C,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,eAAe;QAC1C,kDAAkD;IACpD,CAAC;IAED,OAAO,CAAC,YAAY,CAAC,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;AACxC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "afpp",
3
- "version": "1.4.4",
3
+ "version": "1.5.1",
4
4
  "description": "another f*cking pdf parser",
5
5
  "types": "./dist/types/index.d.ts",
6
6
  "main": "./dist/index.js",