@credal/actions 0.2.136 → 0.2.138

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/utils/pdf.js +7 -21
  2. package/package.json +2 -3
package/dist/utils/pdf.js CHANGED
@@ -7,31 +7,17 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
- import DOMMatrix from "@thednp/dommatrix";
11
- // Set global DOMMatrix
12
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
13
- global.DOMMatrix = DOMMatrix;
14
- import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
10
+ import { extractText } from "unpdf";
15
11
  export function extractTextFromPdf(input) {
16
12
  return __awaiter(this, void 0, void 0, function* () {
17
- // Convert Buffer or ArrayBuffer -> plain Uint8Array
13
+ // Normalize input into Uint8Array
18
14
  const data = input instanceof Uint8Array && !(typeof Buffer !== "undefined" && Buffer.isBuffer(input))
19
15
  ? input
20
16
  : typeof Buffer !== "undefined" && Buffer.isBuffer(input)
21
- ? new Uint8Array(input) // copies bytes out of the Buffer
22
- : new Uint8Array(input); // ArrayBuffer case
23
- // Load PDF
24
- const loadingTask = getDocument({ data });
25
- const pdf = yield loadingTask.promise;
26
- const pages = [];
27
- for (let i = 1; i <= pdf.numPages; i++) {
28
- const page = yield pdf.getPage(i);
29
- const content = yield page.getTextContent();
30
- // content.items is typed as TextItem | TextMarkedContent
31
- const strings = content.items.map(item => ("str" in item ? item.str : "")).join(" ");
32
- pages.push(strings.trim());
33
- }
34
- yield pdf.destroy();
35
- return pages.join("\n\n");
17
+ ? new Uint8Array(input)
18
+ : new Uint8Array(input);
19
+ // Extract text using unpdf
20
+ const { text } = yield extractText(data);
21
+ return text.map(page => page.trim()).join("\n\n");
36
22
  });
37
23
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@credal/actions",
3
- "version": "0.2.136",
3
+ "version": "0.2.138",
4
4
  "type": "module",
5
5
  "description": "AI Actions by Credal AI",
6
6
  "sideEffects": false,
@@ -68,11 +68,10 @@
68
68
  "mongodb": "^6.13.1",
69
69
  "node-forge": "^1.3.1",
70
70
  "p-limit": "^7.1.1",
71
- "pdf2json": "^3.1.6",
72
- "pdfjs-dist": "^5.4.149",
73
71
  "resend": "^4.7.0",
74
72
  "snowflake-sdk": "^2.0.2",
75
73
  "ts-node": "^10.9.2",
74
+ "unpdf": "^1.2.2",
76
75
  "uuid": "^11.1.0",
77
76
  "zod": "^3.25.0"
78
77
  }