@uploadista/flow-documents-nodes 0.0.16-beta.2 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +26 -9
- package/dist/index-DN0piYEv.d.cts +62 -0
- package/dist/index-DN0piYEv.d.cts.map +1 -0
- package/dist/index-aD9vy0kH.d.mts +62 -0
- package/dist/index-aD9vy0kH.d.mts.map +1 -0
- package/dist/index.cjs +5 -0
- package/dist/index.d.cts +1178 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +37 -36
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +3 -393
- package/dist/index.mjs.map +1 -1
- package/dist/types/index.cjs +1 -0
- package/dist/types/index.d.cts +2 -0
- package/dist/types/index.d.mts +2 -0
- package/dist/types/index.mjs +1 -0
- package/dist/types-CdmvDfq8.cjs +1 -0
- package/dist/types-rDFmPO7S.mjs +2 -0
- package/dist/types-rDFmPO7S.mjs.map +1 -0
- package/package.json +14 -4
- package/src/index.ts +3 -0
- package/src/types/convert-to-markdown-node.ts +12 -0
- package/src/types/describe-document-node.ts +7 -0
- package/src/types/extract-text-node.ts +5 -0
- package/src/types/index.ts +23 -0
- package/src/types/merge-pdf-node.ts +7 -0
- package/src/types/ocr-node.ts +17 -0
- package/src/types/split-pdf-node.ts +9 -0
- package/tests/document-nodes.test.ts +318 -0
- package/tsdown.config.ts +12 -0
- package/vitest.config.ts +21 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,16 +1,33 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
|
-
> @uploadista/flow-documents-nodes@0.0.
|
|
3
|
+
> @uploadista/flow-documents-nodes@0.0.16-beta.3 build /Users/denislaboureyras/Documents/uploadista/dev/uploadista-workspace/uploadista-sdk/packages/flow/documents/nodes
|
|
4
4
|
> tsdown
|
|
5
5
|
|
|
6
6
|
[34mℹ[39m tsdown [2mv0.16.5[22m powered by rolldown [2mv1.0.0-beta.50[22m
|
|
7
|
-
[34mℹ[39m
|
|
7
|
+
[34mℹ[39m Using tsdown config: [4m/Users/denislaboureyras/Documents/uploadista/dev/uploadista-workspace/uploadista-sdk/packages/flow/documents/nodes/tsdown.config.ts[24m
|
|
8
|
+
[34mℹ[39m entry: [34msrc/index.ts, src/types/index.ts[39m
|
|
8
9
|
[34mℹ[39m tsconfig: [34mtsconfig.json[39m
|
|
9
10
|
[34mℹ[39m Build start
|
|
10
|
-
[34mℹ[39m Cleaning
|
|
11
|
-
[34mℹ[39m [2mdist/[22m[1mindex.
|
|
12
|
-
[34mℹ[39m [2mdist/[
|
|
13
|
-
[34mℹ[39m [2mdist/[
|
|
14
|
-
[34mℹ[39m [
|
|
15
|
-
[34mℹ[39m
|
|
16
|
-
[
|
|
11
|
+
[34mℹ[39m Cleaning 19 files
|
|
12
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[1mindex.cjs[22m [2m10.23 kB[22m [2m│ gzip: 2.44 kB[22m
|
|
13
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[1mtypes/index.cjs[22m [2m 0.25 kB[22m [2m│ gzip: 0.15 kB[22m
|
|
14
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22mtypes-CdmvDfq8.cjs [2m 1.09 kB[22m [2m│ gzip: 0.36 kB[22m
|
|
15
|
+
[34mℹ[39m [33m[CJS][39m 3 files, total: 11.58 kB
|
|
16
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[1mindex.mjs[22m [2m 9.03 kB[22m [2m│ gzip: 2.51 kB[22m
|
|
17
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[1mtypes/index.mjs[22m [2m 0.25 kB[22m [2m│ gzip: 0.17 kB[22m
|
|
18
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex.mjs.map [2m33.27 kB[22m [2m│ gzip: 5.62 kB[22m
|
|
19
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex.d.mts.map [2m 6.75 kB[22m [2m│ gzip: 0.88 kB[22m
|
|
20
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mtypes-rDFmPO7S.mjs.map [2m 2.57 kB[22m [2m│ gzip: 0.77 kB[22m
|
|
21
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex-aD9vy0kH.d.mts.map [2m 1.02 kB[22m [2m│ gzip: 0.46 kB[22m
|
|
22
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mtypes-rDFmPO7S.mjs [2m 0.70 kB[22m [2m│ gzip: 0.35 kB[22m
|
|
23
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32m[1mindex.d.mts[22m[39m [2m33.26 kB[22m [2m│ gzip: 1.67 kB[22m
|
|
24
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32m[1mtypes/index.d.mts[22m[39m [2m 0.63 kB[22m [2m│ gzip: 0.24 kB[22m
|
|
25
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32mindex-aD9vy0kH.d.mts[39m [2m 2.33 kB[22m [2m│ gzip: 0.64 kB[22m
|
|
26
|
+
[34mℹ[39m [34m[ESM][39m 10 files, total: 89.81 kB
|
|
27
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22mindex.d.cts.map [2m 6.75 kB[22m [2m│ gzip: 0.88 kB[22m
|
|
28
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22mindex-DN0piYEv.d.cts.map [2m 1.02 kB[22m [2m│ gzip: 0.46 kB[22m
|
|
29
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32m[1mindex.d.cts[22m[39m [2m33.26 kB[22m [2m│ gzip: 1.67 kB[22m
|
|
30
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32m[1mtypes/index.d.cts[22m[39m [2m 0.63 kB[22m [2m│ gzip: 0.23 kB[22m
|
|
31
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32mindex-DN0piYEv.d.cts[39m [2m 2.33 kB[22m [2m│ gzip: 0.64 kB[22m
|
|
32
|
+
[34mℹ[39m [33m[CJS][39m 5 files, total: 43.99 kB
|
|
33
|
+
[32m✔[39m Build complete in [32m5676ms[39m
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
//#region src/types/convert-to-markdown-node.d.ts
|
|
4
|
+
declare const convertToMarkdownParamsSchema: z.ZodObject<{
|
|
5
|
+
credentialId: z.ZodOptional<z.ZodString>;
|
|
6
|
+
resolution: z.ZodOptional<z.ZodEnum<{
|
|
7
|
+
tiny: "tiny";
|
|
8
|
+
small: "small";
|
|
9
|
+
base: "base";
|
|
10
|
+
gundam: "gundam";
|
|
11
|
+
large: "large";
|
|
12
|
+
}>>;
|
|
13
|
+
}, z.core.$strip>;
|
|
14
|
+
type ConvertToMarkdownParams = z.infer<typeof convertToMarkdownParamsSchema>;
|
|
15
|
+
//#endregion
|
|
16
|
+
//#region src/types/describe-document-node.d.ts
|
|
17
|
+
declare const describeDocumentParamsSchema: z.ZodObject<{}, z.core.$strip>;
|
|
18
|
+
type DescribeDocumentParams = z.infer<typeof describeDocumentParamsSchema>;
|
|
19
|
+
//#endregion
|
|
20
|
+
//#region src/types/extract-text-node.d.ts
|
|
21
|
+
declare const extractTextParamsSchema: z.ZodObject<{}, z.core.$strip>;
|
|
22
|
+
type ExtractTextParams = z.infer<typeof extractTextParamsSchema>;
|
|
23
|
+
//#endregion
|
|
24
|
+
//#region src/types/merge-pdf-node.d.ts
|
|
25
|
+
declare const mergePdfParamsSchema: z.ZodObject<{
|
|
26
|
+
inputCount: z.ZodOptional<z.ZodNumber>;
|
|
27
|
+
}, z.core.$strip>;
|
|
28
|
+
type MergePdfParams = z.infer<typeof mergePdfParamsSchema>;
|
|
29
|
+
//#endregion
|
|
30
|
+
//#region src/types/ocr-node.d.ts
|
|
31
|
+
declare const ocrParamsSchema: z.ZodObject<{
|
|
32
|
+
taskType: z.ZodEnum<{
|
|
33
|
+
convertToMarkdown: "convertToMarkdown";
|
|
34
|
+
freeOcr: "freeOcr";
|
|
35
|
+
parseFigure: "parseFigure";
|
|
36
|
+
locateObject: "locateObject";
|
|
37
|
+
}>;
|
|
38
|
+
resolution: z.ZodOptional<z.ZodEnum<{
|
|
39
|
+
tiny: "tiny";
|
|
40
|
+
small: "small";
|
|
41
|
+
base: "base";
|
|
42
|
+
gundam: "gundam";
|
|
43
|
+
large: "large";
|
|
44
|
+
}>>;
|
|
45
|
+
credentialId: z.ZodOptional<z.ZodString>;
|
|
46
|
+
referenceText: z.ZodOptional<z.ZodString>;
|
|
47
|
+
}, z.core.$strip>;
|
|
48
|
+
type OcrParams = z.infer<typeof ocrParamsSchema>;
|
|
49
|
+
//#endregion
|
|
50
|
+
//#region src/types/split-pdf-node.d.ts
|
|
51
|
+
declare const splitPdfParamsSchema: z.ZodObject<{
|
|
52
|
+
mode: z.ZodEnum<{
|
|
53
|
+
range: "range";
|
|
54
|
+
individual: "individual";
|
|
55
|
+
}>;
|
|
56
|
+
startPage: z.ZodOptional<z.ZodNumber>;
|
|
57
|
+
endPage: z.ZodOptional<z.ZodNumber>;
|
|
58
|
+
}, z.core.$strip>;
|
|
59
|
+
type SplitPdfParams = z.infer<typeof splitPdfParamsSchema>;
|
|
60
|
+
//#endregion
|
|
61
|
+
export { MergePdfParams as a, extractTextParamsSchema as c, ConvertToMarkdownParams as d, convertToMarkdownParamsSchema as f, ocrParamsSchema as i, DescribeDocumentParams as l, splitPdfParamsSchema as n, mergePdfParamsSchema as o, OcrParams as r, ExtractTextParams as s, SplitPdfParams as t, describeDocumentParamsSchema as u };
|
|
62
|
+
//# sourceMappingURL=index-DN0piYEv.d.cts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-DN0piYEv.d.cts","names":[],"sources":["../src/types/convert-to-markdown-node.ts","../src/types/describe-document-node.ts","../src/types/extract-text-node.ts","../src/types/merge-pdf-node.ts","../src/types/ocr-node.ts","../src/types/split-pdf-node.ts"],"sourcesContent":[],"mappings":";;;cAEa,+BAA6B,CAAA,CAAA;;EAA7B,UAAA,eAAA,UAKX,CAAA;;;;;;EALwC,CAAA,CAAA,CAAA;CAAA,eAAA,CAAA;AAO9B,KAAA,uBAAA,GAA0B,CAAA,CAAE,KAC/B,CAAA,OAAA,6BADoC,CAAA;;;cCPhC,8BAA4B,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAE7B,sBAAA,GAAyB,CAAA,CAAE,aAC9B;;;cCHI,yBAAuB,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAExB,iBAAA,GAAoB,CAAA,CAAE,aAAa;;;cCFlC,sBAAoB,CAAA,CAAA;;AHAjC,CAAA,eAAa,CAAA;KGID,cAAA,GAAiB,CAAA,CAAE,aAAa;;;cCJ/B,iBAAe,CAAA,CAAA;;IJAf,iBAAA,EAAA,mBAKX;;;;;;IALwC,IAAA,EAAA,MAAA;IAAA,KAAA,EAAA,OAAA;IAO9B,IAAA,EAAA,MAAA;;;;ECPC,YAAA,eAAA,YAA2C,CAAA;EAE5C,aAAA,eAAsB,YACzB,CAAA;;KGWG,SAAA,GAAY,CAAA,CAAE,aAAa;;;cCd1B,sBAAoB,CAAA,CAAA;;ILApB,KAAA,EAAA,OAAA;;;;;;AAA6B,KKM9B,cAAA,GAAiB,CAAA,CAAE,KLNW,CAAA,OKME,oBLNF,CAAA"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
//#region src/types/convert-to-markdown-node.d.ts
|
|
4
|
+
declare const convertToMarkdownParamsSchema: z.ZodObject<{
|
|
5
|
+
credentialId: z.ZodOptional<z.ZodString>;
|
|
6
|
+
resolution: z.ZodOptional<z.ZodEnum<{
|
|
7
|
+
tiny: "tiny";
|
|
8
|
+
small: "small";
|
|
9
|
+
base: "base";
|
|
10
|
+
gundam: "gundam";
|
|
11
|
+
large: "large";
|
|
12
|
+
}>>;
|
|
13
|
+
}, z.core.$strip>;
|
|
14
|
+
type ConvertToMarkdownParams = z.infer<typeof convertToMarkdownParamsSchema>;
|
|
15
|
+
//#endregion
|
|
16
|
+
//#region src/types/describe-document-node.d.ts
|
|
17
|
+
declare const describeDocumentParamsSchema: z.ZodObject<{}, z.core.$strip>;
|
|
18
|
+
type DescribeDocumentParams = z.infer<typeof describeDocumentParamsSchema>;
|
|
19
|
+
//#endregion
|
|
20
|
+
//#region src/types/extract-text-node.d.ts
|
|
21
|
+
declare const extractTextParamsSchema: z.ZodObject<{}, z.core.$strip>;
|
|
22
|
+
type ExtractTextParams = z.infer<typeof extractTextParamsSchema>;
|
|
23
|
+
//#endregion
|
|
24
|
+
//#region src/types/merge-pdf-node.d.ts
|
|
25
|
+
declare const mergePdfParamsSchema: z.ZodObject<{
|
|
26
|
+
inputCount: z.ZodOptional<z.ZodNumber>;
|
|
27
|
+
}, z.core.$strip>;
|
|
28
|
+
type MergePdfParams = z.infer<typeof mergePdfParamsSchema>;
|
|
29
|
+
//#endregion
|
|
30
|
+
//#region src/types/ocr-node.d.ts
|
|
31
|
+
declare const ocrParamsSchema: z.ZodObject<{
|
|
32
|
+
taskType: z.ZodEnum<{
|
|
33
|
+
convertToMarkdown: "convertToMarkdown";
|
|
34
|
+
freeOcr: "freeOcr";
|
|
35
|
+
parseFigure: "parseFigure";
|
|
36
|
+
locateObject: "locateObject";
|
|
37
|
+
}>;
|
|
38
|
+
resolution: z.ZodOptional<z.ZodEnum<{
|
|
39
|
+
tiny: "tiny";
|
|
40
|
+
small: "small";
|
|
41
|
+
base: "base";
|
|
42
|
+
gundam: "gundam";
|
|
43
|
+
large: "large";
|
|
44
|
+
}>>;
|
|
45
|
+
credentialId: z.ZodOptional<z.ZodString>;
|
|
46
|
+
referenceText: z.ZodOptional<z.ZodString>;
|
|
47
|
+
}, z.core.$strip>;
|
|
48
|
+
type OcrParams = z.infer<typeof ocrParamsSchema>;
|
|
49
|
+
//#endregion
|
|
50
|
+
//#region src/types/split-pdf-node.d.ts
|
|
51
|
+
declare const splitPdfParamsSchema: z.ZodObject<{
|
|
52
|
+
mode: z.ZodEnum<{
|
|
53
|
+
range: "range";
|
|
54
|
+
individual: "individual";
|
|
55
|
+
}>;
|
|
56
|
+
startPage: z.ZodOptional<z.ZodNumber>;
|
|
57
|
+
endPage: z.ZodOptional<z.ZodNumber>;
|
|
58
|
+
}, z.core.$strip>;
|
|
59
|
+
type SplitPdfParams = z.infer<typeof splitPdfParamsSchema>;
|
|
60
|
+
//#endregion
|
|
61
|
+
export { MergePdfParams as a, extractTextParamsSchema as c, ConvertToMarkdownParams as d, convertToMarkdownParamsSchema as f, ocrParamsSchema as i, DescribeDocumentParams as l, splitPdfParamsSchema as n, mergePdfParamsSchema as o, OcrParams as r, ExtractTextParams as s, SplitPdfParams as t, describeDocumentParamsSchema as u };
|
|
62
|
+
//# sourceMappingURL=index-aD9vy0kH.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-aD9vy0kH.d.mts","names":[],"sources":["../src/types/convert-to-markdown-node.ts","../src/types/describe-document-node.ts","../src/types/extract-text-node.ts","../src/types/merge-pdf-node.ts","../src/types/ocr-node.ts","../src/types/split-pdf-node.ts"],"sourcesContent":[],"mappings":";;;cAEa,+BAA6B,CAAA,CAAA;;EAA7B,UAAA,eAAA,UAKX,CAAA;;;;;;EALwC,CAAA,CAAA,CAAA;CAAA,eAAA,CAAA;AAO9B,KAAA,uBAAA,GAA0B,CAAA,CAAE,KAC/B,CAAA,OAAA,6BADoC,CAAA;;;cCPhC,8BAA4B,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAE7B,sBAAA,GAAyB,CAAA,CAAE,aAC9B;;;cCHI,yBAAuB,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAExB,iBAAA,GAAoB,CAAA,CAAE,aAAa;;;cCFlC,sBAAoB,CAAA,CAAA;;AHAjC,CAAA,eAAa,CAAA;KGID,cAAA,GAAiB,CAAA,CAAE,aAAa;;;cCJ/B,iBAAe,CAAA,CAAA;;IJAf,iBAAA,EAAA,mBAKX;;;;;;IALwC,IAAA,EAAA,MAAA;IAAA,KAAA,EAAA,OAAA;IAO9B,IAAA,EAAA,MAAA;;;;ECPC,YAAA,eAAA,YAA2C,CAAA;EAE5C,aAAA,eAAsB,YACzB,CAAA;;KGWG,SAAA,GAAY,CAAA,CAAE,aAAa;;;cCd1B,sBAAoB,CAAA,CAAA;;ILApB,KAAA,EAAA,OAAA;;;;;;AAA6B,KKM9B,cAAA,GAAiB,CAAA,CAAE,KLNW,CAAA,OKME,oBLNF,CAAA"}
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
const e=require(`./types-CdmvDfq8.cjs`);let t=require(`@uploadista/core/errors`),n=require(`@uploadista/core/flow`),r=require(`@uploadista/core/types`),i=require(`@uploadista/core/upload`),a=require(`effect`),o=require(`zod`);function s(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*n.DocumentAiPlugin,l=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Convert to Markdown`,description:`Convert documents to Markdown format (intelligently uses OCR if needed)`,type:n.NodeType.process,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:u,clientId:d})=>a.Effect.gen(function*(){let f={flowId:i,nodeId:e,jobId:u};yield*a.Effect.logInfo(`Converting file ${r.id} to Markdown`);let p=yield*l.read(r.id,d),m=yield*s.extractText(p).pipe(a.Effect.either),h,g;if(a.Either.isRight(m)&&m.right.trim().length>0){let e=m.right;yield*a.Effect.logInfo(`Successfully extracted ${e.length} characters from searchable PDF`),h=e.split(`
|
|
2
|
+
|
|
3
|
+
`).map(e=>e.trim()).filter(e=>e.length>0).join(`
|
|
4
|
+
|
|
5
|
+
`),g=`text`,yield*a.Effect.logInfo(`Converted text to Markdown (${h.length} characters)`)}else{yield*a.Effect.logInfo(`Text extraction failed or returned empty, falling back to OCR`);let e=r.url;if(!e)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR-based markdown conversion`}).toEffect();let n={clientId:d,credentialId:o.credentialId};h=(yield*c.performOCR(e,{taskType:`convertToMarkdown`,resolution:o.resolution||`gundam`},n).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to perform OCR`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR for markdown conversion`}).toEffect()})))).extractedText,g=`ocr`,yield*a.Effect.logInfo(`Successfully converted scanned document to Markdown using OCR (${h.length} characters)`)}let{metadata:_}=(0,n.resolveUploadMetadata)(r.metadata),v={...r.metadata,..._,markdown:h,markdownSource:g};return yield*a.Effect.logInfo(`Successfully converted file ${r.id} to Markdown via ${g}`),(0,n.completeNodeExecution)({...r,metadata:v,flow:f})})})})}function c(e,o={}){return a.Effect.gen(function*(){let o=yield*n.DocumentPlugin,s=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Describe Document`,description:`Extract metadata from PDF documents`,type:n.NodeType.process,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:c,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:c};yield*a.Effect.logInfo(`Extracting metadata from PDF file ${r.id}`);let d=yield*s.read(r.id,l),f=yield*o.getMetadata(d).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to extract metadata`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract metadata`}).toEffect()}))),{metadata:p}=(0,n.resolveUploadMetadata)(r.metadata),m={...r.metadata,...p,pageCount:f.pageCount,format:f.format,...f.author&&{author:f.author},...f.title&&{title:f.title},...f.subject&&{subject:f.subject},...f.creator&&{creator:f.creator},...f.creationDate&&{creationDate:f.creationDate},...f.modifiedDate&&{modifiedDate:f.modifiedDate},fileSize:f.fileSize};return yield*a.Effect.logInfo(`Successfully extracted metadata from file ${r.id}: ${f.pageCount} pages`),(0,n.completeNodeExecution)({...r,metadata:m,flow:u})})})})}function l(e,o={}){return a.Effect.gen(function*(){let o=yield*n.DocumentPlugin,s=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Extract Text`,description:`Extract text from searchable PDF documents`,type:n.NodeType.process,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:c,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:c};yield*a.Effect.logInfo(`Extracting text from PDF file ${r.id}`);let d=yield*s.read(r.id,l),f=yield*o.extractText(d).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to extract text`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract text`}).toEffect()}))),{metadata:p}=(0,n.resolveUploadMetadata)(r.metadata),m={...r.metadata,...p,extractedText:f};return!f||f.trim().length===0?yield*a.Effect.logWarning(`No text extracted from file ${r.id}. This might be a scanned document. Consider using the OCR node instead.`):yield*a.Effect.logInfo(`Successfully extracted ${f.length} characters from file ${r.id}`),(0,n.completeNodeExecution)({...r,metadata:m,flow:u})})})})}const u=o.z.array(r.uploadFileSchema);function d(e,o={}){return a.Effect.gen(function*(){let o=yield*n.DocumentPlugin,s=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Merge PDFs`,description:`Merge multiple PDF documents into one`,type:n.NodeType.process,inputSchema:u,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:c,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:c};if(!Array.isArray(r))return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`Merge PDF node requires an array of files from a Merge utility node`}).toEffect();if(r.length===0)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`At least one PDF file is required for merging`}).toEffect();let d=[],f=0;for(let e of r){let t=yield*s.read(e.id,l);d.push(t);let r=(0,n.resolveUploadMetadata)(e.metadata).metadata;r?.pageCount&&typeof r.pageCount==`number`&&(f+=r.pageCount)}yield*a.Effect.logInfo(`Merging ${r.length} PDF files`);let p=yield*o.mergePdfs({pdfs:d}).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to merge PDFs`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to merge PDFs`}).toEffect()}))),m=r[0],{metadata:h}=(0,n.resolveUploadMetadata)(m.metadata),g=new ReadableStream({start(e){e.enqueue(p),e.close()}}),_=yield*s.upload({storageId:m.storage.id,size:p.byteLength,type:`application/pdf`,fileName:`merged-${r.length}-documents.pdf`,lastModified:0,metadata:JSON.stringify({...h,pageCount:f,mergedFrom:r.length}),flow:u},l,g),v={...h,pageCount:f,mergedFrom:r.length,fileName:`merged-${r.length}-documents.pdf`};return yield*a.Effect.logInfo(`Successfully merged ${r.length} PDFs into one document with ${f} pages`),(0,n.completeNodeExecution)({..._,metadata:v})})})})}function f(e,i){return a.Effect.gen(function*(){let o=yield*n.DocumentAiPlugin;return yield*(0,n.createFlowNode)({id:e,name:`OCR`,description:`Extract text from scanned documents using AI`,type:n.NodeType.process,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:s,jobId:c,clientId:l})=>a.Effect.gen(function*(){let u={flowId:s,nodeId:e,jobId:c},d=r.url;if(!d)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR operation`}).toEffect();yield*a.Effect.logInfo(`Starting OCR for file ${r.id} with task type: ${i.taskType}`);let f={clientId:l,credentialId:i.credentialId},p=yield*o.performOCR(d,{taskType:i.taskType,resolution:i.resolution,referenceText:i.referenceText},f).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to perform OCR`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR`}).toEffect()}))),{metadata:m}=(0,n.resolveUploadMetadata)(r.metadata),h={...r.metadata,...m,ocrText:p.extractedText,ocrFormat:p.format,ocrTaskType:i.taskType};return yield*a.Effect.logInfo(`Successfully completed OCR for file ${r.id}, extracted ${p.extractedText.length} characters`),(0,n.completeNodeExecution)({...r,metadata:h,flow:u})})})})}function p(e,o){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Split PDF`,description:`Split PDF into pages or page ranges`,type:n.NodeType.process,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:l,clientId:u})=>a.Effect.gen(function*(){let d={flowId:i,nodeId:e,jobId:l};yield*a.Effect.logInfo(`Splitting PDF file ${r.id} in ${o.mode} mode`);let f=yield*c.read(r.id,u),p=yield*s.splitPdf(f,o).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to split PDF`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to split PDF`}).toEffect()}))),{metadata:m}=(0,n.resolveUploadMetadata)(r.metadata);if(p.mode===`individual`){yield*a.Effect.logInfo(`Successfully split PDF into ${p.pdfs.length} individual pages`),yield*a.Effect.logWarning(`Individual page mode returns multiple files - flow engine support required`);let e=p.pdfs[0],t=new ReadableStream({start(t){t.enqueue(e),t.close()}}),i=yield*c.upload({storageId:r.storage.id,size:e.byteLength,type:`application/pdf`,fileName:`${m?.fileName||`document`}-page-1.pdf`,lastModified:0,metadata:JSON.stringify({...m,pageCount:1,splitMode:`individual`}),flow:d},u,t),o={...m,pageCount:1,splitMode:`individual`};return(0,n.completeNodeExecution)({...i,metadata:o})}let h=o.endPage&&o.startPage?o.endPage-o.startPage+1:1,g=p.pdf,_=new ReadableStream({start(e){e.enqueue(g),e.close()}}),v=yield*c.upload({storageId:r.storage.id,size:g.byteLength,type:`application/pdf`,fileName:`${m?.fileName||`document`}-pages-${o.startPage}-${o.endPage}.pdf`,lastModified:0,metadata:JSON.stringify({...m,pageCount:h,splitMode:`range`,splitRange:`${o.startPage}-${o.endPage}`}),flow:d},u,_),y={...m,pageCount:h,splitMode:`range`,splitRange:`${o.startPage}-${o.endPage}`};return yield*a.Effect.logInfo(`Successfully split PDF to pages ${o.startPage}-${o.endPage}`),(0,n.completeNodeExecution)({...v,metadata:y})})})})}exports.convertToMarkdownParamsSchema=e.o,exports.createConvertToMarkdownNode=s,exports.createDescribeDocumentNode=c,exports.createExtractTextNode=l,exports.createMergePdfNode=d,exports.createOcrNode=f,exports.createSplitPdfNode=p,exports.describeDocumentParamsSchema=e.a,exports.extractTextParamsSchema=e.i,exports.mergePdfParamsSchema=e.r,exports.ocrParamsSchema=e.n,exports.splitPdfParamsSchema=e.t;
|