@uploadista/flow-documents-unpdf 0.0.18-beta.9 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +13 -5
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +2 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +4 -4
- package/src/document-plugin.ts +6 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
|
-
> @uploadista/flow-documents-unpdf@0.0.18-beta.
|
|
3
|
+
> @uploadista/flow-documents-unpdf@0.0.18-beta.17 build /Users/denislaboureyras/Documents/uploadista/dev/uploadista-workspace/uploadista-sdk/packages/flow/documents/unpdf
|
|
4
4
|
> tsdown
|
|
5
5
|
|
|
6
6
|
[34mℹ[39m tsdown [2mv0.16.8[22m powered by rolldown [2mv1.0.0-beta.52[22m
|
|
@@ -8,9 +8,17 @@
|
|
|
8
8
|
[34mℹ[39m tsconfig: [34mtsconfig.json[39m
|
|
9
9
|
[34mℹ[39m Build start
|
|
10
10
|
[34mℹ[39m Cleaning 4 files
|
|
11
|
-
[34mℹ[39m [2mdist/[22m[1mindex.mjs[22m [2m2.
|
|
12
|
-
[34mℹ[39m [2mdist/[22mindex.mjs.map [2m4.
|
|
11
|
+
[34mℹ[39m [2mdist/[22m[1mindex.mjs[22m [2m2.41 kB[22m [2m│ gzip: 0.83 kB[22m
|
|
12
|
+
[34mℹ[39m [2mdist/[22mindex.mjs.map [2m4.58 kB[22m [2m│ gzip: 1.38 kB[22m
|
|
13
13
|
[34mℹ[39m [2mdist/[22mindex.d.mts.map [2m0.18 kB[22m [2m│ gzip: 0.15 kB[22m
|
|
14
14
|
[34mℹ[39m [2mdist/[22m[32m[1mindex.d.mts[22m[39m [2m0.39 kB[22m [2m│ gzip: 0.21 kB[22m
|
|
15
|
-
[34mℹ[39m 4 files, total: 7.
|
|
16
|
-
[
|
|
15
|
+
[34mℹ[39m 4 files, total: 7.56 kB
|
|
16
|
+
[33m[UNRESOLVED_IMPORT] Warning:[0m Could not resolve '@uploadista/observability' in src/document-plugin.ts
|
|
17
|
+
[38;5;246mâ•[0m[38;5;246m─[0m[38;5;246m[[0m src/document-plugin.ts:3:35 [38;5;246m][0m
|
|
18
|
+
[38;5;246m│[0m
|
|
19
|
+
[38;5;246m3 │[0m [38;5;249mi[0m[38;5;249mm[0m[38;5;249mp[0m[38;5;249mo[0m[38;5;249mr[0m[38;5;249mt[0m[38;5;249m [0m[38;5;249m{[0m[38;5;249m [0m[38;5;249mw[0m[38;5;249mi[0m[38;5;249mt[0m[38;5;249mh[0m[38;5;249mO[0m[38;5;249mp[0m[38;5;249me[0m[38;5;249mr[0m[38;5;249ma[0m[38;5;249mt[0m[38;5;249mi[0m[38;5;249mo[0m[38;5;249mn[0m[38;5;249mS[0m[38;5;249mp[0m[38;5;249ma[0m[38;5;249mn[0m[38;5;249m [0m[38;5;249m}[0m[38;5;249m [0m[38;5;249mf[0m[38;5;249mr[0m[38;5;249mo[0m[38;5;249mm[0m[38;5;249m [0m"@uploadista/observability"[38;5;249m;[0m
|
|
20
|
+
[38;5;240m │[0m ─────────────┬─────────────
|
|
21
|
+
[38;5;240m │[0m ╰─────────────── Module not found, treating it as an external dependency
|
|
22
|
+
[38;5;246m───╯[0m
|
|
23
|
+
|
|
24
|
+
[32m✔[39m Build complete in [32m4744ms[39m
|
package/dist/index.d.mts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/document-plugin.ts"],"sourcesContent":[],"mappings":";;;;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/document-plugin.ts"],"sourcesContent":[],"mappings":";;;;cAMa,qBAAmB,KAAA,CAAA,MAAA;cAyFnB,yBAAuB,KAAA,CAAA,MAAA"}
|
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { UploadistaError } from "@uploadista/core/errors";
|
|
2
2
|
import { DocumentPlugin } from "@uploadista/core/flow";
|
|
3
|
+
import { withOperationSpan } from "@uploadista/observability";
|
|
3
4
|
import { Effect, Layer } from "effect";
|
|
4
5
|
import { extractText } from "unpdf";
|
|
5
6
|
|
|
@@ -20,7 +21,7 @@ const unpdfDocumentPlugin = Layer.succeed(DocumentPlugin, DocumentPlugin.of({
|
|
|
20
21
|
});
|
|
21
22
|
if (!text || text.trim().length === 0) yield* Effect.logWarning("No text extracted from PDF. This might be a scanned document or image-based PDF. Consider using OCR instead.");
|
|
22
23
|
return text;
|
|
23
|
-
});
|
|
24
|
+
}).pipe(withOperationSpan("document", "extract-text", { "document.input_size": input.byteLength }));
|
|
24
25
|
},
|
|
25
26
|
getMetadata: () => {
|
|
26
27
|
return Effect.gen(function* () {
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../src/document-plugin.ts"],"sourcesContent":["import { UploadistaError } from \"@uploadista/core/errors\";\nimport { DocumentPlugin } from \"@uploadista/core/flow\";\nimport { Effect, Layer } from \"effect\";\nimport { extractText } from \"unpdf\";\n\nexport const unpdfDocumentPlugin = Layer.succeed(\n DocumentPlugin,\n DocumentPlugin.of({\n extractText: (input) => {\n return Effect.gen(function* () {\n const text = yield* Effect.tryPromise({\n try: async () => {\n const result = await extractText(input, {\n mergePages: true,\n });\n return result.text;\n },\n catch: (error) => {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n\n if (\n errorMessage.toLowerCase().includes(\"encrypt\") ||\n errorMessage.toLowerCase().includes(\"password\")\n ) {\n return UploadistaError.fromCode(\"PDF_ENCRYPTED\", {\n cause: errorMessage,\n });\n }\n\n if (\n errorMessage.toLowerCase().includes(\"corrupt\") ||\n errorMessage.toLowerCase().includes(\"invalid\") ||\n errorMessage.toLowerCase().includes(\"malformed\")\n ) {\n return UploadistaError.fromCode(\"PDF_CORRUPTED\", {\n cause: errorMessage,\n });\n }\n\n return UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause: errorMessage,\n });\n },\n });\n\n // If no text was extracted, log a warning\n if (!text || text.trim().length === 0) {\n yield* Effect.logWarning(\n \"No text extracted from PDF. This might be a scanned document or image-based PDF. Consider using OCR instead.\",\n );\n }\n\n return text;\n });\n },\n\n getMetadata: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support metadata extraction\n // Return an error indicating that pdf-lib should be used instead\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support metadata extraction. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n\n splitPdf: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support PDF splitting\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support PDF splitting. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n\n mergePdfs: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support PDF merging\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support PDF merging. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n }),\n);\n\nexport const UnpdfDocumentPluginLive = unpdfDocumentPlugin;\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../src/document-plugin.ts"],"sourcesContent":["import { UploadistaError } from \"@uploadista/core/errors\";\nimport { DocumentPlugin } from \"@uploadista/core/flow\";\nimport { withOperationSpan } from \"@uploadista/observability\";\nimport { Effect, Layer } from \"effect\";\nimport { extractText } from \"unpdf\";\n\nexport const unpdfDocumentPlugin = Layer.succeed(\n DocumentPlugin,\n DocumentPlugin.of({\n extractText: (input) => {\n return Effect.gen(function* () {\n const text = yield* Effect.tryPromise({\n try: async () => {\n const result = await extractText(input, {\n mergePages: true,\n });\n return result.text;\n },\n catch: (error) => {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n\n if (\n errorMessage.toLowerCase().includes(\"encrypt\") ||\n errorMessage.toLowerCase().includes(\"password\")\n ) {\n return UploadistaError.fromCode(\"PDF_ENCRYPTED\", {\n cause: errorMessage,\n });\n }\n\n if (\n errorMessage.toLowerCase().includes(\"corrupt\") ||\n errorMessage.toLowerCase().includes(\"invalid\") ||\n errorMessage.toLowerCase().includes(\"malformed\")\n ) {\n return UploadistaError.fromCode(\"PDF_CORRUPTED\", {\n cause: errorMessage,\n });\n }\n\n return UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause: errorMessage,\n });\n },\n });\n\n // If no text was extracted, log a warning\n if (!text || text.trim().length === 0) {\n yield* Effect.logWarning(\n \"No text extracted from PDF. This might be a scanned document or image-based PDF. Consider using OCR instead.\",\n );\n }\n\n return text;\n }).pipe(\n withOperationSpan(\"document\", \"extract-text\", {\n \"document.input_size\": input.byteLength,\n }),\n );\n },\n\n getMetadata: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support metadata extraction\n // Return an error indicating that pdf-lib should be used instead\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support metadata extraction. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n\n splitPdf: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support PDF splitting\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support PDF splitting. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n\n mergePdfs: () => {\n return Effect.gen(function* () {\n // unpdf doesn't support PDF merging\n return yield* UploadistaError.fromCode(\"DOCUMENT_PROCESSING_FAILED\", {\n cause:\n \"unpdf does not support PDF merging. Use @uploadista/flow-documents-pdflib instead.\",\n }).toEffect();\n });\n },\n }),\n);\n\nexport const UnpdfDocumentPluginLive = unpdfDocumentPlugin;\n"],"mappings":";;;;;;;AAMA,MAAa,sBAAsB,MAAM,QACvC,gBACA,eAAe,GAAG;CAChB,cAAc,UAAU;AACtB,SAAO,OAAO,IAAI,aAAa;GAC7B,MAAM,OAAO,OAAO,OAAO,WAAW;IACpC,KAAK,YAAY;AAIf,aAHe,MAAM,YAAY,OAAO,EACtC,YAAY,MACb,CAAC,EACY;;IAEhB,QAAQ,UAAU;KAChB,MAAM,eACJ,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AAExD,SACE,aAAa,aAAa,CAAC,SAAS,UAAU,IAC9C,aAAa,aAAa,CAAC,SAAS,WAAW,CAE/C,QAAO,gBAAgB,SAAS,iBAAiB,EAC/C,OAAO,cACR,CAAC;AAGJ,SACE,aAAa,aAAa,CAAC,SAAS,UAAU,IAC9C,aAAa,aAAa,CAAC,SAAS,UAAU,IAC9C,aAAa,aAAa,CAAC,SAAS,YAAY,CAEhD,QAAO,gBAAgB,SAAS,iBAAiB,EAC/C,OAAO,cACR,CAAC;AAGJ,YAAO,gBAAgB,SAAS,8BAA8B,EAC5D,OAAO,cACR,CAAC;;IAEL,CAAC;AAGF,OAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,WAAW,EAClC,QAAO,OAAO,WACZ,+GACD;AAGH,UAAO;IACP,CAAC,KACD,kBAAkB,YAAY,gBAAgB,EAC5C,uBAAuB,MAAM,YAC9B,CAAC,CACH;;CAGH,mBAAmB;AACjB,SAAO,OAAO,IAAI,aAAa;AAG7B,UAAO,OAAO,gBAAgB,SAAS,8BAA8B,EACnE,OACE,8FACH,CAAC,CAAC,UAAU;IACb;;CAGJ,gBAAgB;AACd,SAAO,OAAO,IAAI,aAAa;AAE7B,UAAO,OAAO,gBAAgB,SAAS,8BAA8B,EACnE,OACE,wFACH,CAAC,CAAC,UAAU;IACb;;CAGJ,iBAAiB;AACf,SAAO,OAAO,IAAI,aAAa;AAE7B,UAAO,OAAO,gBAAgB,SAAS,8BAA8B,EACnE,OACE,sFACH,CAAC,CAAC,UAAU;IACb;;CAEL,CAAC,CACH;AAED,MAAa,0BAA0B"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uploadista/flow-documents-unpdf",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0.18
|
|
4
|
+
"version": "0.0.18",
|
|
5
5
|
"description": "unpdf plugin for Uploadista document text extraction",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Uploadista",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
17
|
"unpdf": "^1.4.0",
|
|
18
|
-
"@uploadista/core": "0.0.18
|
|
18
|
+
"@uploadista/core": "0.0.18"
|
|
19
19
|
},
|
|
20
20
|
"peerDependencies": {
|
|
21
21
|
"effect": "^3.0.0"
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"effect": "3.19.8",
|
|
27
27
|
"pdf-lib": "^1.17.1",
|
|
28
28
|
"tsdown": "0.16.8",
|
|
29
|
-
"vitest": "4.0.
|
|
30
|
-
"@uploadista/typescript-config": "0.0.18
|
|
29
|
+
"vitest": "4.0.15",
|
|
30
|
+
"@uploadista/typescript-config": "0.0.18"
|
|
31
31
|
},
|
|
32
32
|
"scripts": {
|
|
33
33
|
"build": "tsdown",
|
package/src/document-plugin.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { UploadistaError } from "@uploadista/core/errors";
|
|
2
2
|
import { DocumentPlugin } from "@uploadista/core/flow";
|
|
3
|
+
import { withOperationSpan } from "@uploadista/observability";
|
|
3
4
|
import { Effect, Layer } from "effect";
|
|
4
5
|
import { extractText } from "unpdf";
|
|
5
6
|
|
|
@@ -52,7 +53,11 @@ export const unpdfDocumentPlugin = Layer.succeed(
|
|
|
52
53
|
}
|
|
53
54
|
|
|
54
55
|
return text;
|
|
55
|
-
})
|
|
56
|
+
}).pipe(
|
|
57
|
+
withOperationSpan("document", "extract-text", {
|
|
58
|
+
"document.input_size": input.byteLength,
|
|
59
|
+
}),
|
|
60
|
+
);
|
|
56
61
|
},
|
|
57
62
|
|
|
58
63
|
getMetadata: () => {
|