@credal/actions 0.2.136 → 0.2.138
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/utils/pdf.js +7 -21
- package/package.json +2 -3
package/dist/utils/pdf.js
CHANGED
|
@@ -7,31 +7,17 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
7
7
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
|
-
import
|
|
11
|
-
// Set global DOMMatrix
|
|
12
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
13
|
-
global.DOMMatrix = DOMMatrix;
|
|
14
|
-
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
10
|
+
import { extractText } from "unpdf";
|
|
15
11
|
export function extractTextFromPdf(input) {
|
|
16
12
|
return __awaiter(this, void 0, void 0, function* () {
|
|
17
|
-
//
|
|
13
|
+
// Normalize input into Uint8Array
|
|
18
14
|
const data = input instanceof Uint8Array && !(typeof Buffer !== "undefined" && Buffer.isBuffer(input))
|
|
19
15
|
? input
|
|
20
16
|
: typeof Buffer !== "undefined" && Buffer.isBuffer(input)
|
|
21
|
-
? new Uint8Array(input)
|
|
22
|
-
: new Uint8Array(input);
|
|
23
|
-
//
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
const pages = [];
|
|
27
|
-
for (let i = 1; i <= pdf.numPages; i++) {
|
|
28
|
-
const page = yield pdf.getPage(i);
|
|
29
|
-
const content = yield page.getTextContent();
|
|
30
|
-
// content.items is typed as TextItem | TextMarkedContent
|
|
31
|
-
const strings = content.items.map(item => ("str" in item ? item.str : "")).join(" ");
|
|
32
|
-
pages.push(strings.trim());
|
|
33
|
-
}
|
|
34
|
-
yield pdf.destroy();
|
|
35
|
-
return pages.join("\n\n");
|
|
17
|
+
? new Uint8Array(input)
|
|
18
|
+
: new Uint8Array(input);
|
|
19
|
+
// Extract text using unpdf
|
|
20
|
+
const { text } = yield extractText(data);
|
|
21
|
+
return text.map(page => page.trim()).join("\n\n");
|
|
36
22
|
});
|
|
37
23
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@credal/actions",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.138",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "AI Actions by Credal AI",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -68,11 +68,10 @@
|
|
|
68
68
|
"mongodb": "^6.13.1",
|
|
69
69
|
"node-forge": "^1.3.1",
|
|
70
70
|
"p-limit": "^7.1.1",
|
|
71
|
-
"pdf2json": "^3.1.6",
|
|
72
|
-
"pdfjs-dist": "^5.4.149",
|
|
73
71
|
"resend": "^4.7.0",
|
|
74
72
|
"snowflake-sdk": "^2.0.2",
|
|
75
73
|
"ts-node": "^10.9.2",
|
|
74
|
+
"unpdf": "^1.2.2",
|
|
76
75
|
"uuid": "^11.1.0",
|
|
77
76
|
"zod": "^3.25.0"
|
|
78
77
|
}
|