@uploadista/flow-documents-unpdf 0.0.16-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+
2
+ 
3
+ > @uploadista/flow-documents-unpdf@0.0.1 build /Users/denislaboureyras/Documents/uploadista/dev/uploadista-workspace/uploadista-sdk/packages/flow/documents/unpdf
4
+ > tsdown
5
+
6
+ ℹ tsdown v0.16.5 powered by rolldown v1.0.0-beta.50
7
+ ℹ entry: src/index.ts
8
+ ℹ tsconfig: tsconfig.json
9
+ ℹ Build start
10
+ ℹ Cleaning 4 files
11
+ ℹ dist/index.mjs 2.25 kB │ gzip: 0.76 kB
12
+ ℹ dist/index.mjs.map 4.30 kB │ gzip: 1.27 kB
13
+ ℹ dist/index.d.mts.map 0.18 kB │ gzip: 0.15 kB
14
+ ℹ dist/index.d.mts 0.39 kB │ gzip: 0.21 kB
15
+ ℹ 4 files, total: 7.12 kB
16
+ ✔ Build complete in 1311ms
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 uploadista
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # @uploadista/flow-documents-unpdf
2
+
3
+ unpdf-based text extraction plugin for Uploadista Flow.
4
+
5
+ ## Features
6
+
7
+ - Fast text extraction from searchable PDFs
8
+ - Modern TypeScript-first API
9
+ - Async/await support
10
+ - Multi-page text extraction with structure preservation
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ pnpm add @uploadista/flow-documents-unpdf
16
+ ```
17
+
18
+ ## Usage
19
+
20
+ ```typescript
21
+ import { UnpdfDocumentPluginLive } from "@uploadista/flow-documents-unpdf";
22
+ import { Effect } from "effect";
23
+
24
+ // Provide the plugin for text extraction
25
+ const program = Effect.gen(function* () {
26
+ // Your flow logic here
27
+ }).pipe(Effect.provide(UnpdfDocumentPluginLive));
28
+ ```
29
+
30
+ ## Why unpdf?
31
+
32
+ - **Modern**: TypeScript-first library maintained as an alternative to pdf-parse
33
+ - **Reliable**: Uses pdfjs-dist under the hood (Mozilla's PDF.js)
34
+ - **Fast**: Optimized for text extraction performance
35
+ - **Universal**: Works across all JavaScript runtimes
36
+
37
+ ## Use Cases
38
+
39
+ - Extract text from searchable PDFs for indexing
40
+ - Parse PDF documents for content analysis
41
+ - Extract structured text with paragraph/line preservation
42
+
43
+ ## When NOT to use
44
+
45
+ - For scanned documents or image-based PDFs (use OCR instead)
46
+ - For PDF manipulation (use pdf-lib instead)
47
+
48
+ ## License
49
+
50
+ MIT
@@ -0,0 +1,9 @@
1
+ import { DocumentPlugin } from "@uploadista/core/flow";
2
+ import { Layer } from "effect";
3
+
4
+ //#region src/document-plugin.d.ts
5
+ declare const unpdfDocumentPlugin: Layer.Layer<DocumentPlugin, never, never>;
6
+ declare const UnpdfDocumentPluginLive: Layer.Layer<DocumentPlugin, never, never>;
7
+ //#endregion
8
+ export { UnpdfDocumentPluginLive, unpdfDocumentPlugin };
9
+ //# sourceMappingURL=index.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/document-plugin.ts"],"sourcesContent":[],"mappings":";;;;cAKa,qBAAmB,KAAA,CAAA,MAAA;cAqFnB,yBAAuB,KAAA,CAAA,MAAA"}
package/dist/index.mjs ADDED
@@ -0,0 +1,45 @@
1
+ import { UploadistaError } from "@uploadista/core/errors";
2
+ import { DocumentPlugin } from "@uploadista/core/flow";
3
+ import { Effect, Layer } from "effect";
4
+ import { extractText } from "unpdf";
5
+
6
+ //#region src/document-plugin.ts
7
+ const unpdfDocumentPlugin = Layer.succeed(DocumentPlugin, DocumentPlugin.of({
8
+ extractText: (input) => {
9
+ return Effect.gen(function* () {
10
+ const text = yield* Effect.tryPromise({
11
+ try: async () => {
12
+ return (await extractText(input, { mergePages: true })).text;
13
+ },
14
+ catch: (error) => {
15
+ const errorMessage = error instanceof Error ? error.message : String(error);
16
+ if (errorMessage.toLowerCase().includes("encrypt") || errorMessage.toLowerCase().includes("password")) return UploadistaError.fromCode("PDF_ENCRYPTED", { cause: errorMessage });
17
+ if (errorMessage.toLowerCase().includes("corrupt") || errorMessage.toLowerCase().includes("invalid") || errorMessage.toLowerCase().includes("malformed")) return UploadistaError.fromCode("PDF_CORRUPTED", { cause: errorMessage });
18
+ return UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", { cause: errorMessage });
19
+ }
20
+ });
21
+ if (!text || text.trim().length === 0) yield* Effect.logWarning("No text extracted from PDF. This might be a scanned document or image-based PDF. Consider using OCR instead.");
22
+ return text;
23
+ });
24
+ },
25
+ getMetadata: () => {
26
+ return Effect.gen(function* () {
27
+ return yield* UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", { cause: "unpdf does not support metadata extraction. Use @uploadista/flow-documents-pdflib instead." }).toEffect();
28
+ });
29
+ },
30
+ splitPdf: () => {
31
+ return Effect.gen(function* () {
32
+ return yield* UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", { cause: "unpdf does not support PDF splitting. Use @uploadista/flow-documents-pdflib instead." }).toEffect();
33
+ });
34
+ },
35
+ mergePdfs: () => {
36
+ return Effect.gen(function* () {
37
+ return yield* UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", { cause: "unpdf does not support PDF merging. Use @uploadista/flow-documents-pdflib instead." }).toEffect();
38
+ });
39
+ }
40
+ }));
41
+ const UnpdfDocumentPluginLive = unpdfDocumentPlugin;
42
+
43
+ //#endregion
44
+ export { UnpdfDocumentPluginLive, unpdfDocumentPlugin };
45
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../src/document-plugin.ts"],"sourcesContent":["import { UploadistaError } from \"@uploadista/core/errors\";\nimport { DocumentPlugin } from \"@uploadista/core/flow\";\nimport { Effect, Layer } from \"effect\";\nimport { extractText } from \"unpdf\";\n\nexport const unpdfDocumentPlugin = Layer.succeed(\n DocumentPlugin,\n DocumentPlugin.of({\n extractText: (input) => {\n return Effect.gen(function* () {\n const text = yield* Effect.tryPromise({\n try: async () => {\n const result = await extractText(input, {\n mergePages: true,\n });\n return result.text;\n },\n catch: (error) => {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n\n if (\n errorMessage.toLowerCase().includes(\"encrypt\") ||\n errorMessage.toLowerCase().includes(\"password\")\n ) {\n return UploadistaError.fromCode(\"PDF_ENCRYPTED\", {\n cause: errorMessage,\n });\n }\n\n if (\n errorMessage.toLowerCase().includes(\"corrupt\") ||\n errorMessage.toLowerCase().includes(\"invalid\") ||\n errorMessage.toLowerCase().includes(\"malformed\")\n ) {\n return UploadistaError.fromCode(\"PDF_CORRUPTED\", {\n cause: errorMessage,\n });\n }\n\n return UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause: errorMessage,\n });\n },\n });\n\n // If no text was extracted, log a warning\n if (!text || text.trim().length === 0) {\n yield* Effect.logWarning(\n \"No text extracted from PDF. This might be a scanned document or image-based PDF. Consider using OCR instead.\",\n );\n }\n\n return text;\n });\n },\n\n getMetadata: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support metadata extraction\n // Return an error indicating that pdf-lib should be used instead\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support metadata extraction. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n\n splitPdf: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support PDF splitting\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support PDF splitting. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n\n mergePdfs: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support PDF merging\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support PDF merging. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n }),\n);\n\nexport const UnpdfDocumentPluginLive = unpdfDocumentPlugin;\n"],"mappings":";;;;;;AAKA,MAAa,sBAAsB,MAAM,QACvC,gBACA,eAAe,GAAG;CAChB,cAAc,UAAU;AACtB,SAAO,OAAO,IAAI,aAAa;GAC7B,MAAM,OAAO,OAAO,OAAO,WAAW;IACpC,KAAK,YAAY;AAIf,aAHe,MAAM,YAAY,OAAO,EACtC,YAAY,MACb,CAAC,EACY;;IAEhB,QAAQ,UAAU;KAChB,MAAM,eACJ,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AAExD,SACE,aAAa,aAAa,CAAC,SAAS,UAAU,IAC9C,aAAa,aAAa,CAAC,SAAS,WAAW,CAE/C,QAAO,gBAAgB,SAAS,iBAAiB,EAC/C,OAAO,cACR,CAAC;AAGJ,SACE,aAAa,aAAa,CAAC,SAAS,UAAU,IAC9C,aAAa,aAAa,CAAC,SAAS,UAAU,IAC9C,aAAa,aAAa,CAAC,SAAS,YAAY,CAEhD,QAAO,gBAAgB,SAAS,iBAAiB,EAC/C,OAAO,cACR,CAAC;AAGJ,YAAO,gBAAgB,SAAS,8BAA8B,EAC5D,OAAO,cACR,CAAC;;IAEL,CAAC;AAGF,OAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,WAAW,EAClC,QAAO,OAAO,WACZ,+GACD;AAGH,UAAO;IACP;;CAGJ,mBAAmB;AACjB,SAAO,OAAO,IAAI,aAAa;AAG7B,UAAO,OAAO,gBAAgB,SAAS,8BAA8B,EACnE,OACE,8FACH,CAAC,CAAC,UAAU;IACb;;CAGJ,gBAAgB;AACd,SAAO,OAAO,IAAI,aAAa;AAE7B,UAAO,OAAO,gBAAgB,SAAS,8BAA8B,EACnE,OACE,wFACH,CAAC,CAAC,UAAU;IACb;;CAGJ,iBAAiB;AACf,SAAO,OAAO,IAAI,aAAa;AAE7B,UAAO,OAAO,gBAAgB,SAAS,8BAA8B,EACnE,OACE,sFACH,CAAC,CAAC,UAAU;IACb;;CAEL,CAAC,CACH;AAED,MAAa,0BAA0B"}
package/package.json ADDED
@@ -0,0 +1,32 @@
1
+ {
2
+ "name": "@uploadista/flow-documents-unpdf",
3
+ "type": "module",
4
+ "version": "0.0.16-beta.2",
5
+ "description": "unpdf plugin for Uploadista document text extraction",
6
+ "license": "MIT",
7
+ "author": "Uploadista",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.mts",
11
+ "import": "./dist/index.mjs",
12
+ "require": "./dist/index.cjs",
13
+ "default": "./dist/index.mjs"
14
+ }
15
+ },
16
+ "dependencies": {
17
+ "effect": "3.19.4",
18
+ "unpdf": "^1.4.0",
19
+ "@uploadista/core": "0.0.16-beta.2"
20
+ },
21
+ "devDependencies": {
22
+ "@types/node": "24.10.1",
23
+ "tsdown": "0.16.5",
24
+ "@uploadista/typescript-config": "0.0.16-beta.2"
25
+ },
26
+ "scripts": {
27
+ "build": "tsdown",
28
+ "format": "biome format --write ./src",
29
+ "lint": "biome lint --write ./src",
30
+ "check": "biome check --write ./src"
31
+ }
32
+ }
@@ -0,0 +1,91 @@
1
+ import { UploadistaError } from "@uploadista/core/errors";
2
+ import { DocumentPlugin } from "@uploadista/core/flow";
3
+ import { Effect, Layer } from "effect";
4
+ import { extractText } from "unpdf";
5
+
6
+ export const unpdfDocumentPlugin = Layer.succeed(
7
+ DocumentPlugin,
8
+ DocumentPlugin.of({
9
+ extractText: (input) => {
10
+ return Effect.gen(function* () {
11
+ const text = yield* Effect.tryPromise({
12
+ try: async () => {
13
+ const result = await extractText(input, {
14
+ mergePages: true,
15
+ });
16
+ return result.text;
17
+ },
18
+ catch: (error) => {
19
+ const errorMessage =
20
+ error instanceof Error ? error.message : String(error);
21
+
22
+ if (
23
+ errorMessage.toLowerCase().includes("encrypt") ||
24
+ errorMessage.toLowerCase().includes("password")
25
+ ) {
26
+ return UploadistaError.fromCode("PDF_ENCRYPTED", {
27
+ cause: errorMessage,
28
+ });
29
+ }
30
+
31
+ if (
32
+ errorMessage.toLowerCase().includes("corrupt") ||
33
+ errorMessage.toLowerCase().includes("invalid") ||
34
+ errorMessage.toLowerCase().includes("malformed")
35
+ ) {
36
+ return UploadistaError.fromCode("PDF_CORRUPTED", {
37
+ cause: errorMessage,
38
+ });
39
+ }
40
+
41
+ return UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", {
42
+ cause: errorMessage,
43
+ });
44
+ },
45
+ });
46
+
47
+ // If no text was extracted, log a warning
48
+ if (!text || text.trim().length === 0) {
49
+ yield* Effect.logWarning(
50
+ "No text extracted from PDF. This might be a scanned document or image-based PDF. Consider using OCR instead.",
51
+ );
52
+ }
53
+
54
+ return text;
55
+ });
56
+ },
57
+
58
+ getMetadata: () => {
59
+ return Effect.gen(function* () {
60
+ // unpdf doesn't support metadata extraction
61
+ // Return an error indicating that pdf-lib should be used instead
62
+ return yield* UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", {
63
+ cause:
64
+ "unpdf does not support metadata extraction. Use @uploadista/flow-documents-pdflib instead.",
65
+ }).toEffect();
66
+ });
67
+ },
68
+
69
+ splitPdf: () => {
70
+ return Effect.gen(function* () {
71
+ // unpdf doesn't support PDF splitting
72
+ return yield* UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", {
73
+ cause:
74
+ "unpdf does not support PDF splitting. Use @uploadista/flow-documents-pdflib instead.",
75
+ }).toEffect();
76
+ });
77
+ },
78
+
79
+ mergePdfs: () => {
80
+ return Effect.gen(function* () {
81
+ // unpdf doesn't support PDF merging
82
+ return yield* UploadistaError.fromCode("DOCUMENT_PROCESSING_FAILED", {
83
+ cause:
84
+ "unpdf does not support PDF merging. Use @uploadista/flow-documents-pdflib instead.",
85
+ }).toEffect();
86
+ });
87
+ },
88
+ }),
89
+ );
90
+
91
+ export const UnpdfDocumentPluginLive = unpdfDocumentPlugin;
package/src/index.ts ADDED
@@ -0,0 +1 @@
1
+ export { unpdfDocumentPlugin, UnpdfDocumentPluginLive } from "./document-plugin";
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "extends": "@uploadista/typescript-config/server.json",
3
+ "compilerOptions": {
4
+ "baseUrl": "./",
5
+ "paths": {
6
+ "@/*": ["./src/*"]
7
+ },
8
+ "outDir": "./dist",
9
+ "rootDir": "./src",
10
+ "lib": ["ESNext", "DOM", "DOM.Iterable"],
11
+ "types": []
12
+ },
13
+ "include": ["src"]
14
+ }