@credal/actions 0.2.120 → 0.2.122
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/utils/pdf.d.ts +1 -1
- package/dist/utils/pdf.js +16 -27
- package/package.json +3 -1
package/dist/utils/pdf.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare function extractTextFromPdf(
|
|
1
|
+
export declare function extractTextFromPdf(input: ArrayBuffer | Uint8Array): Promise<string>;
|
package/dist/utils/pdf.js
CHANGED
|
@@ -7,34 +7,23 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
7
7
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
// npm i pdfjs-dist
|
|
11
|
+
import { getDocument } from "pdfjs-dist";
|
|
12
|
+
export function extractTextFromPdf(input) {
|
|
12
13
|
return __awaiter(this, void 0, void 0, function* () {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}).join("")).join("\n");
|
|
25
|
-
resolve(text);
|
|
26
|
-
}
|
|
27
|
-
catch (error) {
|
|
28
|
-
reject(error);
|
|
29
|
-
}
|
|
30
|
-
});
|
|
31
|
-
pdfParser.parseBuffer(Buffer.from(buffer));
|
|
32
|
-
});
|
|
33
|
-
return extractedText;
|
|
34
|
-
}
|
|
35
|
-
catch (error) {
|
|
36
|
-
console.error("Error extracting PDF text:", error);
|
|
37
|
-
throw error;
|
|
14
|
+
const data = input instanceof Uint8Array ? input : new Uint8Array(input);
|
|
15
|
+
// Load PDF
|
|
16
|
+
const loadingTask = getDocument({ data });
|
|
17
|
+
const pdf = yield loadingTask.promise;
|
|
18
|
+
const pages = [];
|
|
19
|
+
for (let i = 1; i <= pdf.numPages; i++) {
|
|
20
|
+
const page = yield pdf.getPage(i);
|
|
21
|
+
const content = yield page.getTextContent();
|
|
22
|
+
// content.items is typed as TextItem | TextMarkedContent
|
|
23
|
+
const strings = content.items.map(item => ("str" in item ? item.str : "")).join(" ");
|
|
24
|
+
pages.push(strings.trim());
|
|
38
25
|
}
|
|
26
|
+
yield pdf.destroy();
|
|
27
|
+
return pages.join("\n\n");
|
|
39
28
|
});
|
|
40
29
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@credal/actions",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.122",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "AI Actions by Credal AI",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"@types/jsonwebtoken": "^9.0.9",
|
|
34
34
|
"@types/node": "^22.10.1",
|
|
35
35
|
"@types/node-forge": "^1.3.11",
|
|
36
|
+
"@types/pdf-parse": "^1.1.5",
|
|
36
37
|
"@typescript-eslint/eslint-plugin": "^8.18.0",
|
|
37
38
|
"@typescript-eslint/parser": "^8.18.0",
|
|
38
39
|
"eslint": "^9.16.0",
|
|
@@ -68,6 +69,7 @@
|
|
|
68
69
|
"node-forge": "^1.3.1",
|
|
69
70
|
"p-limit": "^7.1.1",
|
|
70
71
|
"pdf2json": "^3.1.6",
|
|
72
|
+
"pdfjs-dist": "^5.4.149",
|
|
71
73
|
"resend": "^4.7.0",
|
|
72
74
|
"snowflake-sdk": "^2.0.2",
|
|
73
75
|
"ts-node": "^10.9.2",
|