@credal/actions 0.2.120 → 0.2.122

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- export declare function extractTextFromPdf(buffer: ArrayBuffer): Promise<string>;
1
+ export declare function extractTextFromPdf(input: ArrayBuffer | Uint8Array): Promise<string>;
package/dist/utils/pdf.js CHANGED
@@ -7,34 +7,23 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
- import PDFParser from "pdf2json";
11
- export function extractTextFromPdf(buffer) {
10
+ // npm i pdfjs-dist
11
+ import { getDocument } from "pdfjs-dist";
12
+ export function extractTextFromPdf(input) {
12
13
  return __awaiter(this, void 0, void 0, function* () {
13
- try {
14
- const extractedText = yield new Promise((resolve, reject) => {
15
- const pdfParser = new PDFParser();
16
- pdfParser.on("pdfParser_dataError", (errData) => {
17
- reject(errData.parserError || new Error("PDF parsing failed"));
18
- });
19
- pdfParser.on("pdfParser_dataReady", (pdfData) => {
20
- try {
21
- const text = pdfData.Pages.map((page) => page.Texts.map((textItem) => {
22
- // Handle cases where R array might be empty or have multiple runs
23
- return textItem.R.map((run) => decodeURIComponent(run.T)).join("");
24
- }).join("")).join("\n");
25
- resolve(text);
26
- }
27
- catch (error) {
28
- reject(error);
29
- }
30
- });
31
- pdfParser.parseBuffer(Buffer.from(buffer));
32
- });
33
- return extractedText;
34
- }
35
- catch (error) {
36
- console.error("Error extracting PDF text:", error);
37
- throw error;
14
+ const data = input instanceof Uint8Array ? input : new Uint8Array(input);
15
+ // Load PDF
16
+ const loadingTask = getDocument({ data });
17
+ const pdf = yield loadingTask.promise;
18
+ const pages = [];
19
+ for (let i = 1; i <= pdf.numPages; i++) {
20
+ const page = yield pdf.getPage(i);
21
+ const content = yield page.getTextContent();
22
+ // content.items is typed as TextItem | TextMarkedContent
23
+ const strings = content.items.map(item => ("str" in item ? item.str : "")).join(" ");
24
+ pages.push(strings.trim());
38
25
  }
26
+ yield pdf.destroy();
27
+ return pages.join("\n\n");
39
28
  });
40
29
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@credal/actions",
3
- "version": "0.2.120",
3
+ "version": "0.2.122",
4
4
  "type": "module",
5
5
  "description": "AI Actions by Credal AI",
6
6
  "sideEffects": false,
@@ -33,6 +33,7 @@
33
33
  "@types/jsonwebtoken": "^9.0.9",
34
34
  "@types/node": "^22.10.1",
35
35
  "@types/node-forge": "^1.3.11",
36
+ "@types/pdf-parse": "^1.1.5",
36
37
  "@typescript-eslint/eslint-plugin": "^8.18.0",
37
38
  "@typescript-eslint/parser": "^8.18.0",
38
39
  "eslint": "^9.16.0",
@@ -68,6 +69,7 @@
68
69
  "node-forge": "^1.3.1",
69
70
  "p-limit": "^7.1.1",
70
71
  "pdf2json": "^3.1.6",
72
+ "pdfjs-dist": "^5.4.149",
71
73
  "resend": "^4.7.0",
72
74
  "snowflake-sdk": "^2.0.2",
73
75
  "ts-node": "^10.9.2",