@uploadista/flow-documents-nodes 0.0.20-beta.7 → 0.0.20-beta.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +11 -11
- package/README.md +186 -19
- package/dist/{index-CN8GG19X.d.cts → index-DN0piYEv.d.cts} +3 -3
- package/dist/{index-CN8GG19X.d.cts.map → index-DN0piYEv.d.cts.map} +1 -1
- package/dist/{index-DXwYQF9t.d.mts → index-aD9vy0kH.d.mts} +3 -3
- package/dist/{index-DXwYQF9t.d.mts.map → index-aD9vy0kH.d.mts.map} +1 -1
- package/dist/index.cjs +2 -2
- package/dist/index.d.cts +7 -7
- package/dist/index.d.mts +7 -7
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +2 -2
- package/dist/index.mjs.map +1 -1
- package/dist/types/index.d.cts +1 -1
- package/dist/types/index.d.mts +1 -1
- package/package.json +8 -8
- package/src/convert-to-markdown-node.ts +3 -3
- package/src/describe-document-node.ts +16 -10
- package/src/extract-text-node.ts +3 -3
- package/src/merge-pdf-node.ts +9 -5
- package/src/split-pdf-node.ts +23 -11
- package/tests/document-nodes.test.ts +25 -19
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
3
|
> @uploadista/flow-documents-nodes@0.0.20-beta.7 build /Users/denislaboureyras/Documents/uploadista/dev/uploadista-workspace/uploadista-sdk/packages/flow/documents/nodes
|
|
4
|
-
> tsdown
|
|
4
|
+
> tsc --noEmit && tsdown
|
|
5
5
|
|
|
6
|
-
[34mℹ[39m tsdown [2mv0.
|
|
6
|
+
[34mℹ[39m tsdown [2mv0.18.0[22m powered by rolldown [2mv1.0.0-beta.53[22m
|
|
7
7
|
[34mℹ[39m config file: [4m/Users/denislaboureyras/Documents/uploadista/dev/uploadista-workspace/uploadista-sdk/packages/flow/documents/nodes/tsdown.config.ts[24m
|
|
8
8
|
[34mℹ[39m entry: [34msrc/index.ts, src/types/index.ts[39m
|
|
9
9
|
[34mℹ[39m tsconfig: [34mtsconfig.json[39m
|
|
@@ -14,21 +14,21 @@
|
|
|
14
14
|
[34mℹ[39m [33m[CJS][39m [2mdist/[22mtypes-Cz0s5Dxw.cjs [2m 1.09 kB[22m [2m│ gzip: 0.36 kB[22m
|
|
15
15
|
[34mℹ[39m [33m[CJS][39m 3 files, total: 12.99 kB
|
|
16
16
|
[34mℹ[39m [33m[CJS][39m [2mdist/[22mindex.d.cts.map [2m 6.99 kB[22m [2m│ gzip: 1.30 kB[22m
|
|
17
|
-
[34mℹ[39m [33m[CJS][39m [2mdist/[22mindex-
|
|
17
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22mindex-DN0piYEv.d.cts.map [2m 1.02 kB[22m [2m│ gzip: 0.46 kB[22m
|
|
18
18
|
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32m[1mindex.d.cts[22m[39m [2m36.29 kB[22m [2m│ gzip: 1.98 kB[22m
|
|
19
|
-
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32m[1mtypes/index.d.cts[22m[39m [2m 0.63 kB[22m [2m│ gzip: 0.
|
|
20
|
-
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32mindex-
|
|
19
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32m[1mtypes/index.d.cts[22m[39m [2m 0.63 kB[22m [2m│ gzip: 0.23 kB[22m
|
|
20
|
+
[34mℹ[39m [33m[CJS][39m [2mdist/[22m[32mindex-DN0piYEv.d.cts[39m [2m 2.33 kB[22m [2m│ gzip: 0.64 kB[22m
|
|
21
21
|
[34mℹ[39m [33m[CJS][39m 5 files, total: 47.26 kB
|
|
22
|
+
[32m✔[39m Build complete in [32m7399ms[39m
|
|
22
23
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[1mindex.mjs[22m [2m10.28 kB[22m [2m│ gzip: 2.87 kB[22m
|
|
23
24
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[1mtypes/index.mjs[22m [2m 0.25 kB[22m [2m│ gzip: 0.17 kB[22m
|
|
24
|
-
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex.mjs.map [2m38.
|
|
25
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex.mjs.map [2m38.59 kB[22m [2m│ gzip: 6.66 kB[22m
|
|
25
26
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex.d.mts.map [2m 6.99 kB[22m [2m│ gzip: 1.30 kB[22m
|
|
26
27
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22mtypes-wF9yqL-x.mjs.map [2m 2.57 kB[22m [2m│ gzip: 0.77 kB[22m
|
|
27
|
-
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex-
|
|
28
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22mindex-aD9vy0kH.d.mts.map [2m 1.02 kB[22m [2m│ gzip: 0.46 kB[22m
|
|
28
29
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22mtypes-wF9yqL-x.mjs [2m 0.70 kB[22m [2m│ gzip: 0.35 kB[22m
|
|
29
30
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32m[1mindex.d.mts[22m[39m [2m36.29 kB[22m [2m│ gzip: 1.98 kB[22m
|
|
30
31
|
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32m[1mtypes/index.d.mts[22m[39m [2m 0.63 kB[22m [2m│ gzip: 0.24 kB[22m
|
|
31
|
-
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32mindex-
|
|
32
|
-
[34mℹ[39m [34m[ESM][39m 10 files, total: 99.
|
|
33
|
-
[32m✔[39m Build complete in [
|
|
34
|
-
[32m✔[39m Build complete in [32m6388ms[39m
|
|
32
|
+
[34mℹ[39m [34m[ESM][39m [2mdist/[22m[32mindex-aD9vy0kH.d.mts[39m [2m 2.33 kB[22m [2m│ gzip: 0.64 kB[22m
|
|
33
|
+
[34mℹ[39m [34m[ESM][39m 10 files, total: 99.66 kB
|
|
34
|
+
[32m✔[39m Build complete in [32m7404ms[39m
|
package/README.md
CHANGED
|
@@ -1,23 +1,16 @@
|
|
|
1
1
|
# @uploadista/flow-documents-nodes
|
|
2
2
|
|
|
3
|
-
Document processing nodes for Uploadista
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **OCR Node**: Extract text from scanned documents and images using AI
|
|
8
|
-
- **Extract Text Node**: Extract plain text from searchable PDFs
|
|
9
|
-
- **Split PDF Node**: Split PDFs by page range or into individual pages
|
|
10
|
-
- **Merge PDF Node**: Combine multiple PDFs into a single document
|
|
11
|
-
- **Describe Document Node**: Extract document metadata
|
|
12
|
-
- **Convert to Markdown Node**: Convert documents to Markdown format
|
|
3
|
+
Document processing nodes for Uploadista flows. Process PDFs with text extraction, OCR, splitting, merging, and conversion.
|
|
13
4
|
|
|
14
5
|
## Installation
|
|
15
6
|
|
|
16
7
|
```bash
|
|
8
|
+
npm install @uploadista/flow-documents-nodes
|
|
9
|
+
# or
|
|
17
10
|
pnpm add @uploadista/flow-documents-nodes
|
|
18
11
|
```
|
|
19
12
|
|
|
20
|
-
##
|
|
13
|
+
## Quick Start
|
|
21
14
|
|
|
22
15
|
```typescript
|
|
23
16
|
import {
|
|
@@ -28,29 +21,203 @@ import {
|
|
|
28
21
|
createDescribeDocumentNode,
|
|
29
22
|
createConvertToMarkdownNode,
|
|
30
23
|
} from "@uploadista/flow-documents-nodes";
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Node Types
|
|
27
|
+
|
|
28
|
+
### OCR Node
|
|
31
29
|
|
|
32
|
-
|
|
30
|
+
AI-powered text extraction from scanned documents.
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { createOcrNode } from "@uploadista/flow-documents-nodes";
|
|
34
|
+
|
|
35
|
+
// Convert scanned document to markdown
|
|
33
36
|
const ocrNode = yield* createOcrNode("ocr-1", {
|
|
34
37
|
taskType: "convertToMarkdown",
|
|
35
38
|
resolution: "gundam",
|
|
36
|
-
credentialId: "replicate-credential
|
|
39
|
+
credentialId: "my-replicate-credential",
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
// Free-form OCR for plain text extraction
|
|
43
|
+
const freeOcrNode = yield* createOcrNode("ocr-2", {
|
|
44
|
+
taskType: "freeOcr",
|
|
45
|
+
resolution: "base",
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Locate specific content in document
|
|
49
|
+
const locateNode = yield* createOcrNode("ocr-3", {
|
|
50
|
+
taskType: "locateObject",
|
|
51
|
+
referenceText: "Invoice Total",
|
|
52
|
+
resolution: "small",
|
|
53
|
+
});
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
#### Parameters
|
|
57
|
+
|
|
58
|
+
| Parameter | Type | Required | Default | Description |
|
|
59
|
+
|-----------|------|----------|---------|-------------|
|
|
60
|
+
| `taskType` | `"convertToMarkdown" \| "freeOcr" \| "parseFigure" \| "locateObject"` | Yes | - | OCR task type |
|
|
61
|
+
| `resolution` | `"tiny" \| "small" \| "base" \| "gundam" \| "large"` | No | - | Model resolution |
|
|
62
|
+
| `credentialId` | `string` | No | - | AI service credential ID |
|
|
63
|
+
| `referenceText` | `string` | No | - | Text to locate (for `locateObject` task) |
|
|
64
|
+
| `keepOutput` | `boolean` | No | `false` | Keep output in flow results |
|
|
65
|
+
|
|
66
|
+
#### Task Types
|
|
67
|
+
|
|
68
|
+
| Task Type | Description |
|
|
69
|
+
|-----------|-------------|
|
|
70
|
+
| `convertToMarkdown` | Structured markdown output with headings, lists |
|
|
71
|
+
| `freeOcr` | Unstructured plain text extraction |
|
|
72
|
+
| `parseFigure` | Analyze charts and diagrams |
|
|
73
|
+
| `locateObject` | Find specific content using reference text |
|
|
74
|
+
|
|
75
|
+
### Extract Text Node
|
|
76
|
+
|
|
77
|
+
Fast text extraction from searchable PDFs.
|
|
78
|
+
|
|
79
|
+
```typescript
|
|
80
|
+
import { createExtractTextNode } from "@uploadista/flow-documents-nodes";
|
|
81
|
+
|
|
82
|
+
// Extract text from searchable PDF
|
|
83
|
+
const extractNode = yield* createExtractTextNode("extract-1");
|
|
84
|
+
|
|
85
|
+
// With keepOutput option
|
|
86
|
+
const keepOutputNode = yield* createExtractTextNode("extract-2", {
|
|
87
|
+
keepOutput: true,
|
|
37
88
|
});
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
#### Parameters
|
|
38
92
|
|
|
39
|
-
|
|
40
|
-
|
|
93
|
+
| Parameter | Type | Required | Default | Description |
|
|
94
|
+
|-----------|------|----------|---------|-------------|
|
|
95
|
+
| `keepOutput` | `boolean` | No | `false` | Keep output in flow results |
|
|
41
96
|
|
|
42
|
-
|
|
43
|
-
|
|
97
|
+
**Output:** Adds `extractedText` to file metadata.
|
|
98
|
+
|
|
99
|
+
### Split PDF Node
|
|
100
|
+
|
|
101
|
+
Split PDFs by page range or into individual pages.
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
import { createSplitPdfNode } from "@uploadista/flow-documents-nodes";
|
|
105
|
+
|
|
106
|
+
// Extract pages 3-5 as single PDF
|
|
107
|
+
const rangeNode = yield* createSplitPdfNode("split-1", {
|
|
44
108
|
mode: "range",
|
|
45
|
-
startPage:
|
|
109
|
+
startPage: 3,
|
|
46
110
|
endPage: 5,
|
|
47
111
|
});
|
|
112
|
+
|
|
113
|
+
// Split each page into separate PDF
|
|
114
|
+
const individualNode = yield* createSplitPdfNode("split-2", {
|
|
115
|
+
mode: "individual",
|
|
116
|
+
});
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### Parameters
|
|
120
|
+
|
|
121
|
+
| Parameter | Type | Required | Default | Description |
|
|
122
|
+
|-----------|------|----------|---------|-------------|
|
|
123
|
+
| `mode` | `"range" \| "individual"` | Yes | - | Split mode |
|
|
124
|
+
| `startPage` | `number` | No | - | Start page (for range mode) |
|
|
125
|
+
| `endPage` | `number` | No | - | End page (for range mode) |
|
|
126
|
+
| `keepOutput` | `boolean` | No | `false` | Keep output in flow results |
|
|
127
|
+
| `naming` | `FileNamingConfig` | No | - | File naming configuration |
|
|
128
|
+
|
|
129
|
+
### Merge PDF Node
|
|
130
|
+
|
|
131
|
+
Combine multiple PDFs into a single document.
|
|
132
|
+
|
|
133
|
+
```typescript
|
|
134
|
+
import { createMergePdfNode } from "@uploadista/flow-documents-nodes";
|
|
135
|
+
|
|
136
|
+
// Merge PDFs with default settings
|
|
137
|
+
const mergeNode = yield* createMergePdfNode("merge-1");
|
|
138
|
+
|
|
139
|
+
// With custom naming
|
|
140
|
+
const namedMergeNode = yield* createMergePdfNode("merge-2", {
|
|
141
|
+
naming: { mode: "auto" },
|
|
142
|
+
});
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
#### Parameters
|
|
146
|
+
|
|
147
|
+
| Parameter | Type | Required | Default | Description |
|
|
148
|
+
|-----------|------|----------|---------|-------------|
|
|
149
|
+
| `inputCount` | `number` | No | - | Expected number of input files |
|
|
150
|
+
| `keepOutput` | `boolean` | No | `false` | Keep output in flow results |
|
|
151
|
+
| `naming` | `FileNamingConfig` | No | - | File naming (auto suffix: `merged`) |
|
|
152
|
+
|
|
153
|
+
**Note:** Requires a Merge utility node upstream to provide multiple files.
|
|
154
|
+
|
|
155
|
+
### Describe Document Node
|
|
156
|
+
|
|
157
|
+
Extract PDF metadata (page count, author, title, etc.).
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
import { createDescribeDocumentNode } from "@uploadista/flow-documents-nodes";
|
|
161
|
+
|
|
162
|
+
// Extract document metadata
|
|
163
|
+
const describeNode = yield* createDescribeDocumentNode("describe-1");
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
#### Parameters
|
|
167
|
+
|
|
168
|
+
| Parameter | Type | Required | Default | Description |
|
|
169
|
+
|-----------|------|----------|---------|-------------|
|
|
170
|
+
| `keepOutput` | `boolean` | No | `false` | Keep output in flow results |
|
|
171
|
+
|
|
172
|
+
**Output Metadata:**
|
|
173
|
+
```json
|
|
174
|
+
{
|
|
175
|
+
"pageCount": 10,
|
|
176
|
+
"format": "pdf",
|
|
177
|
+
"author": "John Doe",
|
|
178
|
+
"title": "Document Title",
|
|
179
|
+
"subject": "Document Subject",
|
|
180
|
+
"creator": "Adobe Acrobat",
|
|
181
|
+
"creationDate": "2023-01-01T00:00:00Z",
|
|
182
|
+
"modifiedDate": "2023-01-02T00:00:00Z",
|
|
183
|
+
"fileSize": 1024000
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Convert to Markdown Node
|
|
188
|
+
|
|
189
|
+
Intelligent document-to-markdown conversion.
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
import { createConvertToMarkdownNode } from "@uploadista/flow-documents-nodes";
|
|
193
|
+
|
|
194
|
+
// Convert with default settings
|
|
195
|
+
const convertNode = yield* createConvertToMarkdownNode("convert-1");
|
|
196
|
+
|
|
197
|
+
// With custom resolution and credential
|
|
198
|
+
const customNode = yield* createConvertToMarkdownNode("convert-2", {
|
|
199
|
+
resolution: "gundam",
|
|
200
|
+
credentialId: "my-ai-credential",
|
|
201
|
+
});
|
|
48
202
|
```
|
|
49
203
|
|
|
204
|
+
#### Parameters
|
|
205
|
+
|
|
206
|
+
| Parameter | Type | Required | Default | Description |
|
|
207
|
+
|-----------|------|----------|---------|-------------|
|
|
208
|
+
| `resolution` | `"tiny" \| "small" \| "base" \| "gundam" \| "large"` | No | `"gundam"` | OCR model resolution |
|
|
209
|
+
| `credentialId` | `string` | No | - | AI service credential ID |
|
|
210
|
+
| `keepOutput` | `boolean` | No | `false` | Keep output in flow results |
|
|
211
|
+
|
|
212
|
+
**How it Works:**
|
|
213
|
+
1. Tries text extraction first (fast, for searchable PDFs)
|
|
214
|
+
2. Falls back to OCR if no text found (for scanned PDFs)
|
|
215
|
+
3. Returns structured markdown in `metadata.markdown`
|
|
216
|
+
|
|
50
217
|
## Requirements
|
|
51
218
|
|
|
52
219
|
- **DocumentPlugin**: Required for PDF operations (split, merge, extract text, metadata)
|
|
53
|
-
- **DocumentAiPlugin**: Required for OCR
|
|
220
|
+
- **DocumentAiPlugin**: Required for OCR and AI-powered conversion
|
|
54
221
|
|
|
55
222
|
## License
|
|
56
223
|
|
|
@@ -4,8 +4,8 @@ import { z } from "zod";
|
|
|
4
4
|
declare const convertToMarkdownParamsSchema: z.ZodObject<{
|
|
5
5
|
credentialId: z.ZodOptional<z.ZodString>;
|
|
6
6
|
resolution: z.ZodOptional<z.ZodEnum<{
|
|
7
|
-
small: "small";
|
|
8
7
|
tiny: "tiny";
|
|
8
|
+
small: "small";
|
|
9
9
|
base: "base";
|
|
10
10
|
gundam: "gundam";
|
|
11
11
|
large: "large";
|
|
@@ -36,8 +36,8 @@ declare const ocrParamsSchema: z.ZodObject<{
|
|
|
36
36
|
locateObject: "locateObject";
|
|
37
37
|
}>;
|
|
38
38
|
resolution: z.ZodOptional<z.ZodEnum<{
|
|
39
|
-
small: "small";
|
|
40
39
|
tiny: "tiny";
|
|
40
|
+
small: "small";
|
|
41
41
|
base: "base";
|
|
42
42
|
gundam: "gundam";
|
|
43
43
|
large: "large";
|
|
@@ -59,4 +59,4 @@ declare const splitPdfParamsSchema: z.ZodObject<{
|
|
|
59
59
|
type SplitPdfParams = z.infer<typeof splitPdfParamsSchema>;
|
|
60
60
|
//#endregion
|
|
61
61
|
export { MergePdfParams as a, extractTextParamsSchema as c, ConvertToMarkdownParams as d, convertToMarkdownParamsSchema as f, ocrParamsSchema as i, DescribeDocumentParams as l, splitPdfParamsSchema as n, mergePdfParamsSchema as o, OcrParams as r, ExtractTextParams as s, SplitPdfParams as t, describeDocumentParamsSchema as u };
|
|
62
|
-
//# sourceMappingURL=index-
|
|
62
|
+
//# sourceMappingURL=index-DN0piYEv.d.cts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index-
|
|
1
|
+
{"version":3,"file":"index-DN0piYEv.d.cts","names":[],"sources":["../src/types/convert-to-markdown-node.ts","../src/types/describe-document-node.ts","../src/types/extract-text-node.ts","../src/types/merge-pdf-node.ts","../src/types/ocr-node.ts","../src/types/split-pdf-node.ts"],"sourcesContent":[],"mappings":";;;cAEa,+BAA6B,CAAA,CAAA;;EAA7B,UAAA,eAAA,UAKX,CAAA;;;;;;EALwC,CAAA,CAAA,CAAA;CAAA,eAAA,CAAA;AAO9B,KAAA,uBAAA,GAA0B,CAAA,CAAE,KAC/B,CAAA,OAAA,6BADoC,CAAA;;;cCPhC,8BAA4B,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAE7B,sBAAA,GAAyB,CAAA,CAAE,aAC9B;;;cCHI,yBAAuB,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAExB,iBAAA,GAAoB,CAAA,CAAE,aAAa;;;cCFlC,sBAAoB,CAAA,CAAA;;AHAjC,CAAA,eAAa,CAAA;KGID,cAAA,GAAiB,CAAA,CAAE,aAAa;;;cCJ/B,iBAAe,CAAA,CAAA;;IJAf,iBAAA,EAAA,mBAKX;;;;;;IALwC,IAAA,EAAA,MAAA;IAAA,KAAA,EAAA,OAAA;IAO9B,IAAA,EAAA,MAAA;;;;ECPC,YAAA,eAAA,YAA2C,CAAA;EAE5C,aAAA,eAAsB,YACzB,CAAA;;KGWG,SAAA,GAAY,CAAA,CAAE,aAAa;;;cCd1B,sBAAoB,CAAA,CAAA;;ILApB,KAAA,EAAA,OAAA;;;;;;AAA6B,KKM9B,cAAA,GAAiB,CAAA,CAAE,KLNW,CAAA,OKME,oBLNF,CAAA"}
|
|
@@ -4,8 +4,8 @@ import { z } from "zod";
|
|
|
4
4
|
declare const convertToMarkdownParamsSchema: z.ZodObject<{
|
|
5
5
|
credentialId: z.ZodOptional<z.ZodString>;
|
|
6
6
|
resolution: z.ZodOptional<z.ZodEnum<{
|
|
7
|
-
small: "small";
|
|
8
7
|
tiny: "tiny";
|
|
8
|
+
small: "small";
|
|
9
9
|
base: "base";
|
|
10
10
|
gundam: "gundam";
|
|
11
11
|
large: "large";
|
|
@@ -36,8 +36,8 @@ declare const ocrParamsSchema: z.ZodObject<{
|
|
|
36
36
|
locateObject: "locateObject";
|
|
37
37
|
}>;
|
|
38
38
|
resolution: z.ZodOptional<z.ZodEnum<{
|
|
39
|
-
small: "small";
|
|
40
39
|
tiny: "tiny";
|
|
40
|
+
small: "small";
|
|
41
41
|
base: "base";
|
|
42
42
|
gundam: "gundam";
|
|
43
43
|
large: "large";
|
|
@@ -59,4 +59,4 @@ declare const splitPdfParamsSchema: z.ZodObject<{
|
|
|
59
59
|
type SplitPdfParams = z.infer<typeof splitPdfParamsSchema>;
|
|
60
60
|
//#endregion
|
|
61
61
|
export { MergePdfParams as a, extractTextParamsSchema as c, ConvertToMarkdownParams as d, convertToMarkdownParamsSchema as f, ocrParamsSchema as i, DescribeDocumentParams as l, splitPdfParamsSchema as n, mergePdfParamsSchema as o, OcrParams as r, ExtractTextParams as s, SplitPdfParams as t, describeDocumentParamsSchema as u };
|
|
62
|
-
//# sourceMappingURL=index-
|
|
62
|
+
//# sourceMappingURL=index-aD9vy0kH.d.mts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index-
|
|
1
|
+
{"version":3,"file":"index-aD9vy0kH.d.mts","names":[],"sources":["../src/types/convert-to-markdown-node.ts","../src/types/describe-document-node.ts","../src/types/extract-text-node.ts","../src/types/merge-pdf-node.ts","../src/types/ocr-node.ts","../src/types/split-pdf-node.ts"],"sourcesContent":[],"mappings":";;;cAEa,+BAA6B,CAAA,CAAA;;EAA7B,UAAA,eAAA,UAKX,CAAA;;;;;;EALwC,CAAA,CAAA,CAAA;CAAA,eAAA,CAAA;AAO9B,KAAA,uBAAA,GAA0B,CAAA,CAAE,KAC/B,CAAA,OAAA,6BADoC,CAAA;;;cCPhC,8BAA4B,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAE7B,sBAAA,GAAyB,CAAA,CAAE,aAC9B;;;cCHI,yBAAuB,CAAA,CAAA,cAAA,CAAA,CAAA,IAAA,CAAA;KAExB,iBAAA,GAAoB,CAAA,CAAE,aAAa;;;cCFlC,sBAAoB,CAAA,CAAA;;AHAjC,CAAA,eAAa,CAAA;KGID,cAAA,GAAiB,CAAA,CAAE,aAAa;;;cCJ/B,iBAAe,CAAA,CAAA;;IJAf,iBAAA,EAAA,mBAKX;;;;;;IALwC,IAAA,EAAA,MAAA;IAAA,KAAA,EAAA,OAAA;IAO9B,IAAA,EAAA,MAAA;;;;ECPC,YAAA,eAAA,YAA2C,CAAA;EAE5C,aAAA,eAAsB,YACzB,CAAA;;KGWG,SAAA,GAAY,CAAA,CAAE,aAAa;;;cCd1B,sBAAoB,CAAA,CAAA;;ILApB,KAAA,EAAA,OAAA;;;;;;AAA6B,KKM9B,cAAA,GAAiB,CAAA,CAAE,KLNW,CAAA,OKME,oBLNF,CAAA"}
|
package/dist/index.cjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
const e=require(`./types-Cz0s5Dxw.cjs`);let t=require(`@uploadista/core/errors`),n=require(`@uploadista/core/flow`),r=require(`@uploadista/core/types`),i=require(`@uploadista/core/upload`),a=require(`effect`),o=require(`zod`);function s(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*n.DocumentAiPlugin,l=yield*i.
|
|
1
|
+
const e=require(`./types-Cz0s5Dxw.cjs`);let t=require(`@uploadista/core/errors`),n=require(`@uploadista/core/flow`),r=require(`@uploadista/core/types`),i=require(`@uploadista/core/upload`),a=require(`effect`),o=require(`zod`);function s(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*n.DocumentAiPlugin,l=yield*i.UploadEngine;return yield*(0,n.createFlowNode)({id:e,name:`Convert to Markdown`,description:`Convert documents to Markdown format (intelligently uses OCR if needed)`,type:n.NodeType.process,nodeTypeId:`convert-to-markdown`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,circuitBreaker:{enabled:!0,failureThreshold:5,resetTimeout:6e4,fallback:{type:`skip`,passThrough:!0}},run:({data:r,flowId:i,jobId:u,clientId:d})=>a.Effect.gen(function*(){let f={flowId:i,nodeId:e,jobId:u};yield*a.Effect.logInfo(`Converting file ${r.id} to Markdown`);let p=yield*l.read(r.id,d),m=yield*s.extractText(p).pipe(a.Effect.either),h,g;if(a.Either.isRight(m)&&m.right.trim().length>0){let e=m.right;yield*a.Effect.logInfo(`Successfully extracted ${e.length} characters from searchable PDF`),h=e.split(`
|
|
2
2
|
|
|
3
3
|
`).map(e=>e.trim()).filter(e=>e.length>0).join(`
|
|
4
4
|
|
|
5
|
-
`),g=`text`,yield*a.Effect.logInfo(`Converted text to Markdown (${h.length} characters)`)}else{yield*a.Effect.logInfo(`Text extraction failed or returned empty, falling back to OCR`);let e=r.url;if(!e)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR-based markdown conversion`}).toEffect();let n={clientId:d,credentialId:o.credentialId};h=(yield*c.performOCR(e,{taskType:`convertToMarkdown`,resolution:o.resolution||`gundam`},n).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to perform OCR`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR for markdown conversion`}).toEffect()})))).extractedText,g=`ocr`,yield*a.Effect.logInfo(`Successfully converted scanned document to Markdown using OCR (${h.length} characters)`)}let{metadata:_}=(0,n.resolveUploadMetadata)(r.metadata),v={...r.metadata,..._,markdown:h,markdownSource:g};return yield*a.Effect.logInfo(`Successfully converted file ${r.id} to Markdown via ${g}`),(0,n.completeNodeExecution)({...r,metadata:v,flow:f})})})})}function c(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Describe Document`,description:`Extract metadata from PDF documents`,type:n.NodeType.process,nodeTypeId:`describe-document`,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:o,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:o};yield*a.Effect.logInfo(`Extracting metadata from PDF file ${r.id}`);let d=yield*c.read(r.id,l),f=yield*s.getMetadata(d).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to extract metadata`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract metadata`}).toEffect()}))),{metadata:p}=(0,n.resolveUploadMetadata)(r.metadata),m={...r.metadata,...p,pageCount:f.pageCount,format:f.format,...f.author&&{author:f.author},...f.title&&{title:f.title},...f.subject&&{subject:f.subject},...f.creator&&{creator:f.creator},...f.creationDate&&{creationDate:f.creationDate},...f.modifiedDate&&{modifiedDate:f.modifiedDate},fileSize:f.fileSize};return yield*a.Effect.logInfo(`Successfully extracted metadata from file ${r.id}: ${f.pageCount} pages`),(0,n.completeNodeExecution)({...r,metadata:m,flow:u})})})})}function l(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Extract Text`,description:`Extract text from searchable PDF documents`,type:n.NodeType.process,nodeTypeId:`extract-text`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:o,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:o};yield*a.Effect.logInfo(`Extracting text from PDF file ${r.id}`);let d=yield*c.read(r.id,l),f=yield*s.extractText(d).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to extract text`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract text`}).toEffect()}))),{metadata:p}=(0,n.resolveUploadMetadata)(r.metadata),m={...r.metadata,...p,extractedText:f};return!f||f.trim().length===0?yield*a.Effect.logWarning(`No text extracted from file ${r.id}. This might be a scanned document. Consider using the OCR node instead.`):yield*a.Effect.logInfo(`Successfully extracted ${f.length} characters from file ${r.id}`),(0,n.completeNodeExecution)({...r,metadata:m,flow:u})})})})}const u=o.z.array(r.uploadFileSchema);function d(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Merge PDFs`,description:`Merge multiple PDF documents into one`,type:n.NodeType.process,nodeTypeId:`merge-pdf`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:u,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:l,clientId:u})=>a.Effect.gen(function*(){let d={flowId:i,nodeId:e,jobId:l};if(!Array.isArray(r))return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`Merge PDF node requires an array of files from a Merge utility node`}).toEffect();if(r.length===0)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`At least one PDF file is required for merging`}).toEffect();let f=[],p=0;for(let e of r){let t=yield*c.read(e.id,u);f.push(t);let r=(0,n.resolveUploadMetadata)(e.metadata).metadata;r?.pageCount&&typeof r.pageCount==`number`&&(p+=r.pageCount)}yield*a.Effect.logInfo(`Merging ${r.length} PDF files`);let m=yield*s.mergePdfs({pdfs:f}).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to merge PDFs`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to merge PDFs`}).toEffect()}))),h=r[0],{metadata:g}=(0,n.resolveUploadMetadata)(h.metadata),_=new ReadableStream({start(e){e.enqueue(m),e.close()}}),v=`merged-${r.length}-documents.pdf`;if(o.naming){let t={...o.naming,autoSuffix:o.naming.autoSuffix??(()=>`merged`)};v=`${(0,n.getBaseName)((0,n.applyFileNaming)(h,(0,n.buildNamingContext)(h,{flowId:i,jobId:l,nodeId:e,nodeType:`merge-pdf`},{mergedCount:r.length}),t))}.pdf`}let y=yield*c.upload({storageId:h.storage.id,size:m.byteLength,type:`application/pdf`,fileName:v,lastModified:0,metadata:JSON.stringify({...g,pageCount:p,mergedFrom:r.length}),flow:d},u,_),b={...g,pageCount:p,mergedFrom:r.length,fileName:v};return yield*a.Effect.logInfo(`Successfully merged ${r.length} PDFs into one document with ${p} pages`),(0,n.completeNodeExecution)({...y,metadata:b})})})})}function f(e,i){return a.Effect.gen(function*(){let o=yield*n.DocumentAiPlugin;return yield*(0,n.createFlowNode)({id:e,name:`OCR`,description:`Extract text from scanned documents using AI`,type:n.NodeType.process,nodeTypeId:`ocr`,outputTypeId:n.OCR_OUTPUT_TYPE_ID,keepOutput:i.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:n.ocrOutputSchema,circuitBreaker:{enabled:!0,failureThreshold:5,resetTimeout:6e4,fallback:{type:`skip`,passThrough:!0}},run:({data:r,flowId:s,jobId:c,clientId:l})=>a.Effect.gen(function*(){let u={flowId:s,nodeId:e,jobId:c},d=r.url;if(!d)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR operation`}).toEffect();yield*a.Effect.logInfo(`Starting OCR for file ${r.id} with task type: ${i.taskType}`);let f={clientId:l,credentialId:i.credentialId},p=yield*o.performOCR(d,{taskType:i.taskType,resolution:i.resolution,referenceText:i.referenceText},f).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to perform OCR`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR`}).toEffect()})));return yield*a.Effect.logInfo(`Successfully completed OCR for file ${r.id}, extracted ${p.extractedText.length} characters`),(0,n.completeNodeExecution)({extractedText:p.extractedText,format:p.format,taskType:i.taskType,confidence:p.confidence,flow:u})})})})}function p(e,o){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadServer;return yield*(0,n.createFlowNode)({id:e,name:`Split PDF`,description:`Split PDF into pages or page ranges`,type:n.NodeType.process,nodeTypeId:`split-pdf`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:l,clientId:u})=>a.Effect.gen(function*(){let d={flowId:i,nodeId:e,jobId:l};yield*a.Effect.logInfo(`Splitting PDF file ${r.id} in ${o.mode} mode`);let f=yield*c.read(r.id,u),p=yield*s.splitPdf(f,o).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to split PDF`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to split PDF`}).toEffect()}))),{metadata:m}=(0,n.resolveUploadMetadata)(r.metadata);if(p.mode===`individual`){yield*a.Effect.logInfo(`Successfully split PDF into ${p.pdfs.length} individual pages`),yield*a.Effect.logWarning(`Individual page mode returns multiple files - flow engine support required`);let t=p.pdfs[0],s=new ReadableStream({start(e){e.enqueue(t),e.close()}}),f=`${(0,n.getBaseName)(m?.fileName||`document`)}-page-1.pdf`;if(o.naming){let t={...o.naming,autoSuffix:o.naming.autoSuffix??(e=>`page-${e.pageNumber??1}`)};f=`${(0,n.getBaseName)((0,n.applyFileNaming)(r,(0,n.buildNamingContext)(r,{flowId:i,jobId:l,nodeId:e,nodeType:`split-pdf`},{pageNumber:1}),t))}.pdf`}let h=yield*c.upload({storageId:r.storage.id,size:t.byteLength,type:`application/pdf`,fileName:f,lastModified:0,metadata:JSON.stringify({...m,pageCount:1,splitMode:`individual`}),flow:d},u,s),g={...m,pageCount:1,splitMode:`individual`};return(0,n.completeNodeExecution)({...h,metadata:g})}let h=o.endPage&&o.startPage?o.endPage-o.startPage+1:1,g=p.pdf,_=new ReadableStream({start(e){e.enqueue(g),e.close()}}),v=`${(0,n.getBaseName)(m?.fileName||`document`)}-pages-${o.startPage}-${o.endPage}.pdf`;if(o.naming){let t={...o.naming,autoSuffix:o.naming.autoSuffix??(e=>`pages-${o.startPage}-${o.endPage}`)};v=`${(0,n.getBaseName)((0,n.applyFileNaming)(r,(0,n.buildNamingContext)(r,{flowId:i,jobId:l,nodeId:e,nodeType:`split-pdf`},{startPage:o.startPage,endPage:o.endPage}),t))}.pdf`}let y=yield*c.upload({storageId:r.storage.id,size:g.byteLength,type:`application/pdf`,fileName:v,lastModified:0,metadata:JSON.stringify({...m,pageCount:h,splitMode:`range`,splitRange:`${o.startPage}-${o.endPage}`}),flow:d},u,_),b={...m,pageCount:h,splitMode:`range`,splitRange:`${o.startPage}-${o.endPage}`};return yield*a.Effect.logInfo(`Successfully split PDF to pages ${o.startPage}-${o.endPage}`),(0,n.completeNodeExecution)({...y,metadata:b})})})})}exports.convertToMarkdownParamsSchema=e.o,exports.createConvertToMarkdownNode=s,exports.createDescribeDocumentNode=c,exports.createExtractTextNode=l,exports.createMergePdfNode=d,exports.createOcrNode=f,exports.createSplitPdfNode=p,exports.describeDocumentParamsSchema=e.a,exports.extractTextParamsSchema=e.i,exports.mergePdfParamsSchema=e.r,exports.ocrParamsSchema=e.n,exports.splitPdfParamsSchema=e.t;
|
|
5
|
+
`),g=`text`,yield*a.Effect.logInfo(`Converted text to Markdown (${h.length} characters)`)}else{yield*a.Effect.logInfo(`Text extraction failed or returned empty, falling back to OCR`);let e=r.url;if(!e)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR-based markdown conversion`}).toEffect();let n={clientId:d,credentialId:o.credentialId};h=(yield*c.performOCR(e,{taskType:`convertToMarkdown`,resolution:o.resolution||`gundam`},n).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to perform OCR`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR for markdown conversion`}).toEffect()})))).extractedText,g=`ocr`,yield*a.Effect.logInfo(`Successfully converted scanned document to Markdown using OCR (${h.length} characters)`)}let{metadata:_}=(0,n.resolveUploadMetadata)(r.metadata),v={...r.metadata,..._,markdown:h,markdownSource:g};return yield*a.Effect.logInfo(`Successfully converted file ${r.id} to Markdown via ${g}`),(0,n.completeNodeExecution)({...r,metadata:v,flow:f})})})})}function c(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadEngine;return yield*(0,n.createFlowNode)({id:e,name:`Describe Document`,description:`Extract metadata from PDF documents`,type:n.NodeType.process,nodeTypeId:`describe-document`,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:o,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:o};yield*a.Effect.logInfo(`Extracting metadata from PDF file ${r.id}`);let d=yield*c.read(r.id,l),f=yield*s.getMetadata(d).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to extract metadata`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract metadata`}).toEffect()}))),{metadata:p}=(0,n.resolveUploadMetadata)(r.metadata),m={...r.metadata,...p,pageCount:f.pageCount,format:f.format,...f.author&&{author:f.author},...f.title&&{title:f.title},...f.subject&&{subject:f.subject},...f.creator&&{creator:f.creator},...f.creationDate&&{creationDate:f.creationDate},...f.modifiedDate&&{modifiedDate:f.modifiedDate},fileSize:f.fileSize};return yield*a.Effect.logInfo(`Successfully extracted metadata from file ${r.id}: ${f.pageCount} pages`),(0,n.completeNodeExecution)({...r,metadata:m,flow:u})})})})}function l(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadEngine;return yield*(0,n.createFlowNode)({id:e,name:`Extract Text`,description:`Extract text from searchable PDF documents`,type:n.NodeType.process,nodeTypeId:`extract-text`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:o,clientId:l})=>a.Effect.gen(function*(){let u={flowId:i,nodeId:e,jobId:o};yield*a.Effect.logInfo(`Extracting text from PDF file ${r.id}`);let d=yield*c.read(r.id,l),f=yield*s.extractText(d).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to extract text`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract text`}).toEffect()}))),{metadata:p}=(0,n.resolveUploadMetadata)(r.metadata),m={...r.metadata,...p,extractedText:f};return!f||f.trim().length===0?yield*a.Effect.logWarning(`No text extracted from file ${r.id}. This might be a scanned document. Consider using the OCR node instead.`):yield*a.Effect.logInfo(`Successfully extracted ${f.length} characters from file ${r.id}`),(0,n.completeNodeExecution)({...r,metadata:m,flow:u})})})})}const u=o.z.array(r.uploadFileSchema);function d(e,o={}){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadEngine;return yield*(0,n.createFlowNode)({id:e,name:`Merge PDFs`,description:`Merge multiple PDF documents into one`,type:n.NodeType.process,nodeTypeId:`merge-pdf`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:u,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:l,clientId:u})=>a.Effect.gen(function*(){let d={flowId:i,nodeId:e,jobId:l};if(!Array.isArray(r))return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`Merge PDF node requires an array of files from a Merge utility node`}).toEffect();if(r.length===0)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`At least one PDF file is required for merging`}).toEffect();let f=[],p=0;for(let e of r){let t=yield*c.read(e.id,u);f.push(t);let r=(0,n.resolveUploadMetadata)(e.metadata).metadata;r?.pageCount&&typeof r.pageCount==`number`&&(p+=r.pageCount)}yield*a.Effect.logInfo(`Merging ${r.length} PDF files`);let m=yield*s.mergePdfs({pdfs:f}).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to merge PDFs`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to merge PDFs`}).toEffect()}))),h=r[0],{metadata:g}=(0,n.resolveUploadMetadata)(h.metadata),_=new ReadableStream({start(e){e.enqueue(m),e.close()}}),v=`merged-${r.length}-documents.pdf`;if(o.naming){let t={...o.naming,autoSuffix:o.naming.autoSuffix??(()=>`merged`)};v=`${(0,n.getBaseName)((0,n.applyFileNaming)(h,(0,n.buildNamingContext)(h,{flowId:i,jobId:l,nodeId:e,nodeType:`merge-pdf`},{mergedCount:r.length}),t))}.pdf`}let y=yield*c.upload({storageId:h.storage.id,size:m.byteLength,type:`application/pdf`,fileName:v,lastModified:0,metadata:JSON.stringify({...g,pageCount:p,mergedFrom:r.length}),flow:d},u,_),b={...g,pageCount:p,mergedFrom:r.length,fileName:v};return yield*a.Effect.logInfo(`Successfully merged ${r.length} PDFs into one document with ${p} pages`),(0,n.completeNodeExecution)({...y,metadata:b})})})})}function f(e,i){return a.Effect.gen(function*(){let o=yield*n.DocumentAiPlugin;return yield*(0,n.createFlowNode)({id:e,name:`OCR`,description:`Extract text from scanned documents using AI`,type:n.NodeType.process,nodeTypeId:`ocr`,outputTypeId:n.OCR_OUTPUT_TYPE_ID,keepOutput:i.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:n.ocrOutputSchema,circuitBreaker:{enabled:!0,failureThreshold:5,resetTimeout:6e4,fallback:{type:`skip`,passThrough:!0}},run:({data:r,flowId:s,jobId:c,clientId:l})=>a.Effect.gen(function*(){let u={flowId:s,nodeId:e,jobId:c},d=r.url;if(!d)return yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR operation`}).toEffect();yield*a.Effect.logInfo(`Starting OCR for file ${r.id} with task type: ${i.taskType}`);let f={clientId:l,credentialId:i.credentialId},p=yield*o.performOCR(d,{taskType:i.taskType,resolution:i.resolution,referenceText:i.referenceText},f).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to perform OCR`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR`}).toEffect()})));return yield*a.Effect.logInfo(`Successfully completed OCR for file ${r.id}, extracted ${p.extractedText.length} characters`),(0,n.completeNodeExecution)({extractedText:p.extractedText,format:p.format,taskType:i.taskType,confidence:p.confidence,flow:u})})})})}function p(e,o){return a.Effect.gen(function*(){let s=yield*n.DocumentPlugin,c=yield*i.UploadEngine;return yield*(0,n.createFlowNode)({id:e,name:`Split PDF`,description:`Split PDF into pages or page ranges`,type:n.NodeType.process,nodeTypeId:`split-pdf`,outputTypeId:n.STORAGE_OUTPUT_TYPE_ID,keepOutput:o.keepOutput,inputSchema:r.uploadFileSchema,outputSchema:r.uploadFileSchema,run:({data:r,flowId:i,jobId:l,clientId:u})=>a.Effect.gen(function*(){let d={flowId:i,nodeId:e,jobId:l};yield*a.Effect.logInfo(`Splitting PDF file ${r.id} in ${o.mode} mode`);let f=yield*c.read(r.id,u),p=yield*s.splitPdf(f,o).pipe(a.Effect.catchAll(e=>a.Effect.gen(function*(){return yield*a.Effect.logError(`Failed to split PDF`,e),yield*t.UploadistaError.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to split PDF`}).toEffect()}))),{metadata:m}=(0,n.resolveUploadMetadata)(r.metadata);if(p.mode===`individual`){yield*a.Effect.logInfo(`Successfully split PDF into ${p.pdfs.length} individual pages`),yield*a.Effect.logWarning(`Individual page mode returns multiple files - flow engine support required`);let t=p.pdfs[0],s=new ReadableStream({start(e){e.enqueue(t),e.close()}}),f=`${(0,n.getBaseName)(m?.fileName||`document`)}-page-1.pdf`;if(o.naming){let t={...o.naming,autoSuffix:o.naming.autoSuffix??(e=>`page-${e.pageNumber??1}`)};f=`${(0,n.getBaseName)((0,n.applyFileNaming)(r,(0,n.buildNamingContext)(r,{flowId:i,jobId:l,nodeId:e,nodeType:`split-pdf`},{pageNumber:1}),t))}.pdf`}let h=yield*c.upload({storageId:r.storage.id,size:t.byteLength,type:`application/pdf`,fileName:f,lastModified:0,metadata:JSON.stringify({...m,pageCount:1,splitMode:`individual`}),flow:d},u,s),g={...m,pageCount:1,splitMode:`individual`};return(0,n.completeNodeExecution)({...h,metadata:g})}let h=o.endPage&&o.startPage?o.endPage-o.startPage+1:1,g=p.pdf,_=new ReadableStream({start(e){e.enqueue(g),e.close()}}),v=`${(0,n.getBaseName)(m?.fileName||`document`)}-pages-${o.startPage}-${o.endPage}.pdf`;if(o.naming){let t={...o.naming,autoSuffix:o.naming.autoSuffix??(()=>`pages-${o.startPage}-${o.endPage}`)};v=`${(0,n.getBaseName)((0,n.applyFileNaming)(r,(0,n.buildNamingContext)(r,{flowId:i,jobId:l,nodeId:e,nodeType:`split-pdf`},{startPage:o.startPage,endPage:o.endPage}),t))}.pdf`}let y=yield*c.upload({storageId:r.storage.id,size:g.byteLength,type:`application/pdf`,fileName:v,lastModified:0,metadata:JSON.stringify({...m,pageCount:h,splitMode:`range`,splitRange:`${o.startPage}-${o.endPage}`}),flow:d},u,_),b={...m,pageCount:h,splitMode:`range`,splitRange:`${o.startPage}-${o.endPage}`};return yield*a.Effect.logInfo(`Successfully split PDF to pages ${o.startPage}-${o.endPage}`),(0,n.completeNodeExecution)({...y,metadata:b})})})})}exports.convertToMarkdownParamsSchema=e.o,exports.createConvertToMarkdownNode=s,exports.createDescribeDocumentNode=c,exports.createExtractTextNode=l,exports.createMergePdfNode=d,exports.createOcrNode=f,exports.createSplitPdfNode=p,exports.describeDocumentParamsSchema=e.a,exports.extractTextParamsSchema=e.i,exports.mergePdfParamsSchema=e.r,exports.ocrParamsSchema=e.n,exports.splitPdfParamsSchema=e.t;
|
package/dist/index.d.cts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "./index-
|
|
1
|
+
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "./index-DN0piYEv.cjs";
|
|
2
2
|
import * as _uploadista_core_flow2 from "@uploadista/core/flow";
|
|
3
3
|
import { DocumentAiPlugin, DocumentPlugin, FileNamingConfig, NodeType, OcrResolution, OcrTaskType } from "@uploadista/core/flow";
|
|
4
4
|
import * as zod_v4_core1 from "zod/v4/core";
|
|
5
5
|
import * as zod1 from "zod";
|
|
6
6
|
import { z } from "zod";
|
|
7
7
|
import { UploadistaError } from "@uploadista/core/errors";
|
|
8
|
-
import {
|
|
8
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
9
9
|
import { Effect } from "effect";
|
|
10
10
|
|
|
11
11
|
//#region src/convert-to-markdown-node.d.ts
|
|
@@ -233,7 +233,7 @@ declare function createConvertToMarkdownNode(id: string, params?: ConvertToMarkd
|
|
|
233
233
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
234
234
|
} & {
|
|
235
235
|
type: NodeType.process;
|
|
236
|
-
}, UploadistaError,
|
|
236
|
+
}, UploadistaError, UploadEngine | DocumentPlugin | DocumentAiPlugin>;
|
|
237
237
|
//#endregion
|
|
238
238
|
//#region src/describe-document-node.d.ts
|
|
239
239
|
type DescribeDocumentNodeParams = {
|
|
@@ -458,7 +458,7 @@ declare function createDescribeDocumentNode(id: string, params?: DescribeDocumen
|
|
|
458
458
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
459
459
|
} & {
|
|
460
460
|
type: NodeType.process;
|
|
461
|
-
}, UploadistaError,
|
|
461
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
462
462
|
//#endregion
|
|
463
463
|
//#region src/extract-text-node.d.ts
|
|
464
464
|
type ExtractTextNodeParams = {
|
|
@@ -683,7 +683,7 @@ declare function createExtractTextNode(id: string, params?: ExtractTextNodeParam
|
|
|
683
683
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
684
684
|
} & {
|
|
685
685
|
type: NodeType.process;
|
|
686
|
-
}, UploadistaError,
|
|
686
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
687
687
|
//#endregion
|
|
688
688
|
//#region src/merge-pdf-node.d.ts
|
|
689
689
|
type MergePdfNodeParams = {
|
|
@@ -914,7 +914,7 @@ declare function createMergePdfNode(id: string, params?: MergePdfNodeParams): Ef
|
|
|
914
914
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
915
915
|
} & {
|
|
916
916
|
type: NodeType.process;
|
|
917
|
-
}, UploadistaError,
|
|
917
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
918
918
|
//#endregion
|
|
919
919
|
//#region src/ocr-node.d.ts
|
|
920
920
|
type OcrNodeParams = {
|
|
@@ -1295,7 +1295,7 @@ declare function createSplitPdfNode(id: string, params: SplitPdfNodeParams): Eff
|
|
|
1295
1295
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
1296
1296
|
} & {
|
|
1297
1297
|
type: NodeType.process;
|
|
1298
|
-
}, UploadistaError,
|
|
1298
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
1299
1299
|
//#endregion
|
|
1300
1300
|
export { type ConvertToMarkdownNodeParams, ConvertToMarkdownParams, type DescribeDocumentNodeParams, DescribeDocumentParams, type ExtractTextNodeParams, ExtractTextParams, type MergePdfNodeParams, MergePdfParams, type OcrNodeParams, OcrParams, type SplitPdfNodeParams, SplitPdfParams, convertToMarkdownParamsSchema, createConvertToMarkdownNode, createDescribeDocumentNode, createExtractTextNode, createMergePdfNode, createOcrNode, createSplitPdfNode, describeDocumentParamsSchema, extractTextParamsSchema, mergePdfParamsSchema, ocrParamsSchema, splitPdfParamsSchema };
|
|
1301
1301
|
//# sourceMappingURL=index.d.cts.map
|
package/dist/index.d.mts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "./index-
|
|
1
|
+
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "./index-aD9vy0kH.mjs";
|
|
2
2
|
import { UploadistaError } from "@uploadista/core/errors";
|
|
3
3
|
import * as _uploadista_core_flow2 from "@uploadista/core/flow";
|
|
4
4
|
import { DocumentAiPlugin, DocumentPlugin, FileNamingConfig, NodeType, OcrResolution, OcrTaskType } from "@uploadista/core/flow";
|
|
5
|
-
import {
|
|
5
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
6
6
|
import { Effect } from "effect";
|
|
7
7
|
import * as zod1 from "zod";
|
|
8
8
|
import { z } from "zod";
|
|
@@ -233,7 +233,7 @@ declare function createConvertToMarkdownNode(id: string, params?: ConvertToMarkd
|
|
|
233
233
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
234
234
|
} & {
|
|
235
235
|
type: NodeType.process;
|
|
236
|
-
}, UploadistaError,
|
|
236
|
+
}, UploadistaError, UploadEngine | DocumentPlugin | DocumentAiPlugin>;
|
|
237
237
|
//#endregion
|
|
238
238
|
//#region src/describe-document-node.d.ts
|
|
239
239
|
type DescribeDocumentNodeParams = {
|
|
@@ -458,7 +458,7 @@ declare function createDescribeDocumentNode(id: string, params?: DescribeDocumen
|
|
|
458
458
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
459
459
|
} & {
|
|
460
460
|
type: NodeType.process;
|
|
461
|
-
}, UploadistaError,
|
|
461
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
462
462
|
//#endregion
|
|
463
463
|
//#region src/extract-text-node.d.ts
|
|
464
464
|
type ExtractTextNodeParams = {
|
|
@@ -683,7 +683,7 @@ declare function createExtractTextNode(id: string, params?: ExtractTextNodeParam
|
|
|
683
683
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
684
684
|
} & {
|
|
685
685
|
type: NodeType.process;
|
|
686
|
-
}, UploadistaError,
|
|
686
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
687
687
|
//#endregion
|
|
688
688
|
//#region src/merge-pdf-node.d.ts
|
|
689
689
|
type MergePdfNodeParams = {
|
|
@@ -914,7 +914,7 @@ declare function createMergePdfNode(id: string, params?: MergePdfNodeParams): Ef
|
|
|
914
914
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
915
915
|
} & {
|
|
916
916
|
type: NodeType.process;
|
|
917
|
-
}, UploadistaError,
|
|
917
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
918
918
|
//#endregion
|
|
919
919
|
//#region src/ocr-node.d.ts
|
|
920
920
|
type OcrNodeParams = {
|
|
@@ -1295,7 +1295,7 @@ declare function createSplitPdfNode(id: string, params: SplitPdfNodeParams): Eff
|
|
|
1295
1295
|
circuitBreaker?: _uploadista_core_flow2.FlowCircuitBreakerConfig;
|
|
1296
1296
|
} & {
|
|
1297
1297
|
type: NodeType.process;
|
|
1298
|
-
}, UploadistaError,
|
|
1298
|
+
}, UploadistaError, UploadEngine | DocumentPlugin>;
|
|
1299
1299
|
//#endregion
|
|
1300
1300
|
export { type ConvertToMarkdownNodeParams, ConvertToMarkdownParams, type DescribeDocumentNodeParams, DescribeDocumentParams, type ExtractTextNodeParams, ExtractTextParams, type MergePdfNodeParams, MergePdfParams, type OcrNodeParams, OcrParams, type SplitPdfNodeParams, SplitPdfParams, convertToMarkdownParamsSchema, createConvertToMarkdownNode, createDescribeDocumentNode, createExtractTextNode, createMergePdfNode, createOcrNode, createSplitPdfNode, describeDocumentParamsSchema, extractTextParamsSchema, mergePdfParamsSchema, ocrParamsSchema, splitPdfParamsSchema };
|
|
1301
1301
|
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.d.mts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/convert-to-markdown-node.ts","../src/describe-document-node.ts","../src/extract-text-node.ts","../src/merge-pdf-node.ts","../src/ocr-node.ts","../src/split-pdf-node.ts"],"sourcesContent":[],"mappings":";;;;;;;;;;;KAcY,2BAAA;;;;;iBAMI,2BAAA,sBAEN,8BAAgC,MAAA,CAAA,OAAA,sBAAA,CAAA,YAAA;;IAR9B,EAAA,EAAA,MAAA;IAMI,MAAA,EAAA,MAAA;IAEN,OAAA,EAAA;MAAgC,EAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;;MAAA,KAAA,EAAA,MAAA;IAAA,CAAA,GAAA,SAAA;;;;MCV9B,UAAA,EAAA,MAAA;IAII,CAAA,GAAA,SAAA;EAEN,CAAA,EAAA,OAAA,gCAAA,CAAA;IAA+B,EAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,iBAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,IAAA,CAAA,EAAA;;;;ICL7B,CAAA,GAAA,SAAA;IAII,YAAA,CAAA,EAAA;MAEN,OAAA,EAAA,MAAA;MAA0B,MAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,QAAA,CAAA,QAAA,CAAA,MAAA,EAAA,MAAA,GAAA,MAAA,GAAA,OAAA,CAAA,GAAA,SAAA;IAAA,YAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;ICDxB,iBAAkB,CAAA,EAAA,MAAA,GAOnB,SAAA;IAMK,IAAA,CAAA,EAAA;MAEN,MAAA,EAAA,MAAA;MAAuB,MAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,IAAA,EAAA,MAAA;MAAA,CAAA,EAAA,GAAA,SAAA;;;;ICnBrB,YAAa,CAAA,EAAA,MAAA,GACb,SAAA;IAOI,GAAA,CAAA,EAAA,MAAa,GAAA,SAAA;IAAqB,cAAA,CAAA,EAAA,OAAA,GAAA,SAAA;IAAa,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;;;;;;;;;;;;;MAAA,MAAA,EAAA,MAAA;MAAA,OAAA,EAAA;;;;QCLnD,QAAA,CAAkB,EAAA,MAAA,
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/convert-to-markdown-node.ts","../src/describe-document-node.ts","../src/extract-text-node.ts","../src/merge-pdf-node.ts","../src/ocr-node.ts","../src/split-pdf-node.ts"],"sourcesContent":[],"mappings":";;;;;;;;;;;KAcY,2BAAA;;;;;iBAMI,2BAAA,sBAEN,8BAAgC,MAAA,CAAA,OAAA,sBAAA,CAAA,YAAA;;IAR9B,EAAA,EAAA,MAAA;IAMI,MAAA,EAAA,MAAA;IAEN,OAAA,EAAA;MAAgC,EAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;;MAAA,KAAA,EAAA,MAAA;IAAA,CAAA,GAAA,SAAA;;;;MCV9B,UAAA,EAAA,MAAA;IAII,CAAA,GAAA,SAAA;EAEN,CAAA,EAAA,OAAA,gCAAA,CAAA;IAA+B,EAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,iBAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,IAAA,CAAA,EAAA;;;;ICL7B,CAAA,GAAA,SAAA;IAII,YAAA,CAAA,EAAA;MAEN,OAAA,EAAA,MAAA;MAA0B,MAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,QAAA,CAAA,QAAA,CAAA,MAAA,EAAA,MAAA,GAAA,MAAA,GAAA,OAAA,CAAA,GAAA,SAAA;IAAA,YAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;ICDxB,iBAAkB,CAAA,EAAA,MAAA,GAOnB,SAAA;IAMK,IAAA,CAAA,EAAA;MAEN,MAAA,EAAA,MAAA;MAAuB,MAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,IAAA,EAAA,MAAA;MAAA,CAAA,EAAA,GAAA,SAAA;;;;ICnBrB,YAAa,CAAA,EAAA,MAAA,GACb,SAAA;IAOI,GAAA,CAAA,EAAA,MAAa,GAAA,SAAA;IAAqB,cAAA,CAAA,EAAA,OAAA,GAAA,SAAA;IAAa,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;;;;;;;;;;;;;MAAA,MAAA,EAAA,MAAA;MAAA,OAAA,EAAA;;;;QCLnD,QAAA,CAAkB,EAAA,MAAA,GAAA,SASnB;QAGK,MAAA,CAAA,EAAkB,MAAA,GAAA,SAAA;QAAqB,KAAA,CAAA,EAAA;UAAkB,UAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,UAAA,EAAA,MAAA;MAAA,CAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KJjB7D,0BAAA;;;iBAII,0BAAA,sBAEN,6BAA+B,MAAA,CAAA,OAAA,sBAAA,CAAA,YAAA;;;;IDJ7B,OAAA,EAAA;MAMI,EAAA,EAAA,MAAA;MAEN,IAAA,EAAA,MAAA;MAAgC,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;;IAAA,YAAA,CAAA,EAAA;MAAA,OAAA,EAAA,MAAA;;;;ECV9B,CAAA,EAAA,OAAA,gCAA0B,CAAA;IAItB,EAAA,EAAA,MAAA;IAEN,MAAA,EAAA,MAAA;IAA+B,OAAA,EAAA;;;;;;;;;;;;;;;;;;;;MAAA,MAAA,EAAA,MAAA;MAAA,MAAA,EAAA,MAAA;;;;MCL7B,OAAA,EAAA,MAAA;MAII,MAAA,EAAA,MAAA;MAEN,UAAA,EAAA,MAAA;IAA0B,CAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;IAAA,GAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,cAAA,CAAA,EAAA,OAAA,GAAA,SAAA;;;;MCDxB,MAAA,EAAA,MAAkB;MAad,MAAA,EAAA,MAAkB;MAExB,KAAA,EAAA,MAAA;IAAuB,CAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;IAAA,CAAA;IAAA,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;ICnBrB,cAAa,CAAA,EAAA,OACb,GAAA,SAAA;IAOI,QAAA,CAAA,EAAA,MAAa,GAAA,SAAA;IAAqB,iBAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAa,IAAA,CAAA,EAAA;;;;;;;;;;;;;;;;QAAA,EAAA,EAAA,MAAA;QAAA,IAAA,EAAA,MAAA;;;;QCLnD,KAAA,CAAA,EAAA;UAYI,UAAkB,EAAA,MAAA;UAAqB,IAAA,EAAA,MAAA;UAAkB,IAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,CAAA;IAAA,KAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KHhB7D,qBAAA;;;iBAII,qBAAA,sBAEN,wBAA0B,MAAA,CAAA,OAAA,sBAAA,CAAA,YAAA;;;;IFLxB,OAAA,EAAA;MAMI,EAAA,EAAA,MAAA;MAEN,IAAA,EAAA,MAAA;MAAgC,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;;IAAA,YAAA,CAAA,EAAA;MAAA,OAAA,EAAA,MAAA;;;;ECV9B,CAAA,EAAA,OAAA,gCAA0B,CAAA;IAItB,EAAA,EAAA,MAAA;IAEN,MAAA,EAAA,MAAA;IAA+B,OAAA,EAAA;;;;;;;;;;;;;;;;;;;;MAAA,MAAA,EAAA,MAAA;MAAA,MAAA,EAAA,MAAA;;;;MCL7B,OAAA,EAAA,MAAA;MAII,MAAA,EAAA,MAAA;MAEN,UAAA,EAAA,MAAA;IAA0B,CAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;IAAA,GAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,cAAA,CAAA,EAAA,OAAA,GAAA,SAAA;;;;MCDxB,MAAA,EAAA,MAAkB;MAad,MAAA,EAAA,MAAkB;MAExB,KAAA,EAAA,MAAA;IAAuB,CAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;IAAA,CAAA;IAAA,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;ICnBrB,cAAa,CAAA,EAAA,OACb,GAAA,SAAA;IAOI,QAAA,CAAA,EAAA,MAAa,GAAA,SAAA;IAAqB,iBAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAa,IAAA,CAAA,EAAA;;;;;;;;;;;;;;;;QAAA,EAAA,EAAA,MAAA;QAAA,IAAA,EAAA,MAAA;;;;QCLnD,KAAA,CAAA,EAAA;UAYI,UAAkB,EAAA,MAAA;UAAqB,IAAA,EAAA,MAAA;UAAkB,IAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,CAAA;IAAA,KAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KFX7D,kBAAA;;;;;;AHJZ;EAMgB,MAAA,CAAA,EGKL,gBHLK;CAEN;AAAgC,iBGS1B,kBAAA,CHT0B,EAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EGWhC,kBHXgC,CAAA,EGWT,MAAA,CAAA,MHXS,CGWT,sBAAA,CAAA,YAAA,GHXS;;;;;;;;;;;;;;;;;;;;;IAAA,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,iBAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;MCV9B,KAAA,EAAA,MAAA;IAII,CAAA,GAAA,SAAA;IAEN,YAAA,CAAA,EAAA;MAA+B,OAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;IAAA,QAAA,CAAA,QAAA,CAAA,MAAA,EAAA,MAAA,GAAA,MAAA,GAAA,OAAA,CAAA,GAAA,SAAA;IAAA,YAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;ICL7B,iBAAA,CAAA,EAAqB,MAAA,GAAA,SAAA;IAIjB,IAAA,CAAA,EAAA;MAEN,MAAA,EAAA,MAAA;MAA0B,MAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,IAAA,EAAA,MAAA;QAAA,IAAA,EAAA,MAAA;;;;ICDxB,QAAA,CAAA,QAAkB,CAAA,MAAA,EAAA,MAOnB,GAAA,MAAA,GAAA,OAAgB,CAAA,GAAA,SAAA;IAMX,YAAA,CAAA,EAAA,MAAkB,GAAA,SAAA;IAExB,GAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAuB,cAAA,CAAA,EAAA,OAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;MAAA,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;MAAA,MAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;QCnBrB,IAAa,EAAA,MAAA;MAQT,CAAA,EAAA,GAAA,SAAa;IAAqB,CAAA;IAAa,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;;;;;;;;;;;;;IAAA,CAAA,GAAA,SAAA;EAAA,CAAA,EAAA,OAAA,CAAA,CAAA;;;;MCLnD,MAAA,EAAA,MAAkB;MAYd,OAAA,EAAA;QAAuC,EAAA,EAAA,MAAA;QAAkB,IAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,KAAA,EAAA,MAAA;MAAA,CAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KDf7D,aAAA;YACA;eACG;;;;;iBAMC,aAAA,qBAAkC,gBAAa,MAAA,CAAA,OAAA,sBAAA,CAAA,YAAA;EJRnD,WAAA,cAA2B,CAAA;IAMvB,EAAA,EAAA,MAAA;IAEN,MAAA,EAAA,MAAA;IAAgC,OAAA,EAAA;;;;;;;;;;;;;;;;;;;;;MAAA,MAAA,EAAA,MAAA;MAAA,KAAA,EAAA,MAAA;;;;MCV9B,MAAA,EAAA,MAAA;MAII,UAAA,EAAA,MAAA;IAEN,CAAA,GAAA,SAAA;EAA+B,CAAA,EAAA,OAAA,gCAAA,CAAA;;;;;;;;;;;;;;;;;;;;IAAA,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,iBAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;MCL7B,KAAA,EAAA,MAAA;IAII,CAAA,GAAA,SAAA;IAEN,YAAA,CAAA,EAAA;MAA0B,OAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;MAAA,OAAA,EAAA;QAAA,EAAA,EAAA,MAAA;;;;QCDxB,MAAA,CAAA,EAAkB,MAAA,GAAA,SAOnB;QAMK,KAAA,CAAA,EAAA;UAEN,UAAA,EAAA,MAAA;UAAuB,IAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;MAAA,CAAA,GAAA,SAAA;IAAA,CAAA;;;;ICnBrB,MAAA,CAAA,QAAa,CAAA,MAAA,EACb,OAAA,CAAA;IAOI,QAAA,EAAA,MAAa,GAAA,IAAA;EAAqB,CAAA,EAAA,gBAAA,2CAAA,CAAA;IAAa,aAAA,EAAA,MAAA;;;;;;;;;;;;;;;;IAAA,kBAAA,CAAA,EAAA,OAAA;EAAA,CAAA;;;;ACL/D,CAAA,iBAAY,kBAAkB,CASnB;;;KATC,kBAAA;;;;;;;ALHZ;AAMA;EAEU,MAAA,CAAA,EKIC,gBLJD;CAAgC;iBKO1B,kBAAA,qBAAuC,qBAAkB,MAAA,CAAA,OAAA,sBAAA,CAAA,YAAA;;;;;;;;;;;;;;;;;;;;ILP/B,cAAA,CAAA,EAAA,OAAA,GAAA,SAAA;IAAA,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;MCV9B,MAAA,EAAA,MAAA;MAII,KAAA,EAAA,MAAA;IAEN,CAAA,GAAA,SAAA;IAA+B,YAAA,CAAA,EAAA;;;;;;;;;;;;;;;;;;;;IAAA,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAA,QAAA,CAAA,QAAA,CAAA,MAAA,EAAA,MAAA,GAAA,MAAA,GAAA,OAAA,CAAA,GAAA,SAAA;;;;ICL7B,QAAA,CAAA,EAAA,MAAA,GAAqB,SAAA;IAIjB,iBAAA,CAAA,EAAqB,MAAA,GAAA,SAAA;IAE3B,IAAA,CAAA,EAAA;MAA0B,MAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,UAAA,EAAA,MAAA;QAAA,IAAA,EAAA,MAAA;;;;ICDxB,IAAA,CAAA,EAAA,MAAA,GAAA,SAAkB;IAad,QAAA,CAAA,QAAkB,CAAA,MAAA,EAAA,MAAA,GAAA,MAAA,GAAA,OAAA,CAAA,GAAA,SAAA;IAExB,YAAA,CAAA,EAAA,MAAA,GAAA,SAAA;IAAuB,GAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;;;;;;;;;;;;;;;;;MAAA,IAAA,CAAA,EAAA,MAAA,GAAA,SAAA;MAAA,QAAA,CAAA,EAAA,MAAA,GAAA,SAAA;;;;QCnBrB,IAAa,EAAA,MAAA;QAQT,IAAa,EAAA,MAAA;MAAqB,CAAA,EAAA,GAAA,SAAA;IAAa,CAAA;;;;;;;;;;;;;;;;MAAA,UAAA,EAAA,MAAA;IAAA,CAAA,GAAA,SAAA;;;;MCLnD,EAAA,EAAA,MAAA;MAYI,MAAA,EAAA,MAAkB;MAAqB,OAAA,EAAA;QAAkB,EAAA,EAAA,MAAA;;;;;;;;;;;;;;;;;;;;QAAA,MAAA,EAAA,MAAA;QAAA,KAAA,EAAA,MAAA"}
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import{a as e,i as t,n,o as r,r as i,t as a}from"./types-wF9yqL-x.mjs";import{UploadistaError as o}from"@uploadista/core/errors";import{DocumentAiPlugin as s,DocumentPlugin as c,NodeType as l,OCR_OUTPUT_TYPE_ID as u,STORAGE_OUTPUT_TYPE_ID as d,applyFileNaming as f,buildNamingContext as p,completeNodeExecution as m,createFlowNode as h,getBaseName as g,ocrOutputSchema as _,resolveUploadMetadata as v}from"@uploadista/core/flow";import{uploadFileSchema as y}from"@uploadista/core/types";import{
|
|
1
|
+
import{a as e,i as t,n,o as r,r as i,t as a}from"./types-wF9yqL-x.mjs";import{UploadistaError as o}from"@uploadista/core/errors";import{DocumentAiPlugin as s,DocumentPlugin as c,NodeType as l,OCR_OUTPUT_TYPE_ID as u,STORAGE_OUTPUT_TYPE_ID as d,applyFileNaming as f,buildNamingContext as p,completeNodeExecution as m,createFlowNode as h,getBaseName as g,ocrOutputSchema as _,resolveUploadMetadata as v}from"@uploadista/core/flow";import{uploadFileSchema as y}from"@uploadista/core/types";import{UploadEngine as b}from"@uploadista/core/upload";import{Effect as x,Either as S}from"effect";import{z as C}from"zod";function w(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*s,i=yield*b;return yield*h({id:e,name:`Convert to Markdown`,description:`Convert documents to Markdown format (intelligently uses OCR if needed)`,type:l.process,nodeTypeId:`convert-to-markdown`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,circuitBreaker:{enabled:!0,failureThreshold:5,resetTimeout:6e4,fallback:{type:`skip`,passThrough:!0}},run:({data:a,flowId:s,jobId:c,clientId:l})=>x.gen(function*(){let u={flowId:s,nodeId:e,jobId:c};yield*x.logInfo(`Converting file ${a.id} to Markdown`);let d=yield*i.read(a.id,l),f=yield*n.extractText(d).pipe(x.either),p,h;if(S.isRight(f)&&f.right.trim().length>0){let e=f.right;yield*x.logInfo(`Successfully extracted ${e.length} characters from searchable PDF`),p=e.split(`
|
|
2
2
|
|
|
3
3
|
`).map(e=>e.trim()).filter(e=>e.length>0).join(`
|
|
4
4
|
|
|
5
|
-
`),h=`text`,yield*x.logInfo(`Converted text to Markdown (${p.length} characters)`)}else{yield*x.logInfo(`Text extraction failed or returned empty, falling back to OCR`);let e=a.url;if(!e)return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR-based markdown conversion`}).toEffect();let n={clientId:l,credentialId:t.credentialId};p=(yield*r.performOCR(e,{taskType:`convertToMarkdown`,resolution:t.resolution||`gundam`},n).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to perform OCR`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR for markdown conversion`}).toEffect()})))).extractedText,h=`ocr`,yield*x.logInfo(`Successfully converted scanned document to Markdown using OCR (${p.length} characters)`)}let{metadata:g}=v(a.metadata),_={...a.metadata,...g,markdown:p,markdownSource:h};return yield*x.logInfo(`Successfully converted file ${a.id} to Markdown via ${h}`),m({...a,metadata:_,flow:u})})})})}function T(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Describe Document`,description:`Extract metadata from PDF documents`,type:l.process,nodeTypeId:`describe-document`,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,run:({data:t,flowId:i,jobId:a,clientId:s})=>x.gen(function*(){let c={flowId:i,nodeId:e,jobId:a};yield*x.logInfo(`Extracting metadata from PDF file ${t.id}`);let l=yield*r.read(t.id,s),u=yield*n.getMetadata(l).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to extract metadata`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract metadata`}).toEffect()}))),{metadata:d}=v(t.metadata),f={...t.metadata,...d,pageCount:u.pageCount,format:u.format,...u.author&&{author:u.author},...u.title&&{title:u.title},...u.subject&&{subject:u.subject},...u.creator&&{creator:u.creator},...u.creationDate&&{creationDate:u.creationDate},...u.modifiedDate&&{modifiedDate:u.modifiedDate},fileSize:u.fileSize};return yield*x.logInfo(`Successfully extracted metadata from file ${t.id}: ${u.pageCount} pages`),m({...t,metadata:f,flow:c})})})})}function E(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Extract Text`,description:`Extract text from searchable PDF documents`,type:l.process,nodeTypeId:`extract-text`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,run:({data:t,flowId:i,jobId:a,clientId:s})=>x.gen(function*(){let c={flowId:i,nodeId:e,jobId:a};yield*x.logInfo(`Extracting text from PDF file ${t.id}`);let l=yield*r.read(t.id,s),u=yield*n.extractText(l).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to extract text`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract text`}).toEffect()}))),{metadata:d}=v(t.metadata),f={...t.metadata,...d,extractedText:u};return!u||u.trim().length===0?yield*x.logWarning(`No text extracted from file ${t.id}. This might be a scanned document. Consider using the OCR node instead.`):yield*x.logInfo(`Successfully extracted ${u.length} characters from file ${t.id}`),m({...t,metadata:f,flow:c})})})})}const D=C.array(y);function O(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Merge PDFs`,description:`Merge multiple PDF documents into one`,type:l.process,nodeTypeId:`merge-pdf`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:D,outputSchema:y,run:({data:i,flowId:a,jobId:s,clientId:c})=>x.gen(function*(){let l={flowId:a,nodeId:e,jobId:s};if(!Array.isArray(i))return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`Merge PDF node requires an array of files from a Merge utility node`}).toEffect();if(i.length===0)return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`At least one PDF file is required for merging`}).toEffect();let u=[],d=0;for(let e of i){let t=yield*r.read(e.id,c);u.push(t);let n=v(e.metadata).metadata;n?.pageCount&&typeof n.pageCount==`number`&&(d+=n.pageCount)}yield*x.logInfo(`Merging ${i.length} PDF files`);let h=yield*n.mergePdfs({pdfs:u}).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to merge PDFs`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to merge PDFs`}).toEffect()}))),_=i[0],{metadata:y}=v(_.metadata),b=new ReadableStream({start(e){e.enqueue(h),e.close()}}),S=`merged-${i.length}-documents.pdf`;if(t.naming){let n={...t.naming,autoSuffix:t.naming.autoSuffix??(()=>`merged`)};S=`${g(f(_,p(_,{flowId:a,jobId:s,nodeId:e,nodeType:`merge-pdf`},{mergedCount:i.length}),n))}.pdf`}let C=yield*r.upload({storageId:_.storage.id,size:h.byteLength,type:`application/pdf`,fileName:S,lastModified:0,metadata:JSON.stringify({...y,pageCount:d,mergedFrom:i.length}),flow:l},c,b),w={...y,pageCount:d,mergedFrom:i.length,fileName:S};return yield*x.logInfo(`Successfully merged ${i.length} PDFs into one document with ${d} pages`),m({...C,metadata:w})})})})}function k(e,t){return x.gen(function*(){let n=yield*s;return yield*h({id:e,name:`OCR`,description:`Extract text from scanned documents using AI`,type:l.process,nodeTypeId:`ocr`,outputTypeId:u,keepOutput:t.keepOutput,inputSchema:y,outputSchema:_,circuitBreaker:{enabled:!0,failureThreshold:5,resetTimeout:6e4,fallback:{type:`skip`,passThrough:!0}},run:({data:r,flowId:i,jobId:a,clientId:s})=>x.gen(function*(){let c={flowId:i,nodeId:e,jobId:a},l=r.url;if(!l)return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR operation`}).toEffect();yield*x.logInfo(`Starting OCR for file ${r.id} with task type: ${t.taskType}`);let u={clientId:s,credentialId:t.credentialId},d=yield*n.performOCR(l,{taskType:t.taskType,resolution:t.resolution,referenceText:t.referenceText},u).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to perform OCR`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR`}).toEffect()})));return yield*x.logInfo(`Successfully completed OCR for file ${r.id}, extracted ${d.extractedText.length} characters`),m({extractedText:d.extractedText,format:d.format,taskType:t.taskType,confidence:d.confidence,flow:c})})})})}function A(e,t){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Split PDF`,description:`Split PDF into pages or page ranges`,type:l.process,nodeTypeId:`split-pdf`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,run:({data:i,flowId:a,jobId:s,clientId:c})=>x.gen(function*(){let l={flowId:a,nodeId:e,jobId:s};yield*x.logInfo(`Splitting PDF file ${i.id} in ${t.mode} mode`);let u=yield*r.read(i.id,c),d=yield*n.splitPdf(u,t).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to split PDF`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to split PDF`}).toEffect()}))),{metadata:h}=v(i.metadata);if(d.mode===`individual`){yield*x.logInfo(`Successfully split PDF into ${d.pdfs.length} individual pages`),yield*x.logWarning(`Individual page mode returns multiple files - flow engine support required`);let n=d.pdfs[0],o=new ReadableStream({start(e){e.enqueue(n),e.close()}}),u=`${g(h?.fileName||`document`)}-page-1.pdf`;if(t.naming){let n={...t.naming,autoSuffix:t.naming.autoSuffix??(e=>`page-${e.pageNumber??1}`)};u=`${g(f(i,p(i,{flowId:a,jobId:s,nodeId:e,nodeType:`split-pdf`},{pageNumber:1}),n))}.pdf`}let _=yield*r.upload({storageId:i.storage.id,size:n.byteLength,type:`application/pdf`,fileName:u,lastModified:0,metadata:JSON.stringify({...h,pageCount:1,splitMode:`individual`}),flow:l},c,o),v={...h,pageCount:1,splitMode:`individual`};return m({..._,metadata:v})}let _=t.endPage&&t.startPage?t.endPage-t.startPage+1:1,y=d.pdf,b=new ReadableStream({start(e){e.enqueue(y),e.close()}}),S=`${g(h?.fileName||`document`)}-pages-${t.startPage}-${t.endPage}.pdf`;if(t.naming){let n={...t.naming,autoSuffix:t.naming.autoSuffix??(
|
|
5
|
+
`),h=`text`,yield*x.logInfo(`Converted text to Markdown (${p.length} characters)`)}else{yield*x.logInfo(`Text extraction failed or returned empty, falling back to OCR`);let e=a.url;if(!e)return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR-based markdown conversion`}).toEffect();let n={clientId:l,credentialId:t.credentialId};p=(yield*r.performOCR(e,{taskType:`convertToMarkdown`,resolution:t.resolution||`gundam`},n).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to perform OCR`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR for markdown conversion`}).toEffect()})))).extractedText,h=`ocr`,yield*x.logInfo(`Successfully converted scanned document to Markdown using OCR (${p.length} characters)`)}let{metadata:g}=v(a.metadata),_={...a.metadata,...g,markdown:p,markdownSource:h};return yield*x.logInfo(`Successfully converted file ${a.id} to Markdown via ${h}`),m({...a,metadata:_,flow:u})})})})}function T(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Describe Document`,description:`Extract metadata from PDF documents`,type:l.process,nodeTypeId:`describe-document`,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,run:({data:t,flowId:i,jobId:a,clientId:s})=>x.gen(function*(){let c={flowId:i,nodeId:e,jobId:a};yield*x.logInfo(`Extracting metadata from PDF file ${t.id}`);let l=yield*r.read(t.id,s),u=yield*n.getMetadata(l).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to extract metadata`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract metadata`}).toEffect()}))),{metadata:d}=v(t.metadata),f={...t.metadata,...d,pageCount:u.pageCount,format:u.format,...u.author&&{author:u.author},...u.title&&{title:u.title},...u.subject&&{subject:u.subject},...u.creator&&{creator:u.creator},...u.creationDate&&{creationDate:u.creationDate},...u.modifiedDate&&{modifiedDate:u.modifiedDate},fileSize:u.fileSize};return yield*x.logInfo(`Successfully extracted metadata from file ${t.id}: ${u.pageCount} pages`),m({...t,metadata:f,flow:c})})})})}function E(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Extract Text`,description:`Extract text from searchable PDF documents`,type:l.process,nodeTypeId:`extract-text`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,run:({data:t,flowId:i,jobId:a,clientId:s})=>x.gen(function*(){let c={flowId:i,nodeId:e,jobId:a};yield*x.logInfo(`Extracting text from PDF file ${t.id}`);let l=yield*r.read(t.id,s),u=yield*n.extractText(l).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to extract text`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to extract text`}).toEffect()}))),{metadata:d}=v(t.metadata),f={...t.metadata,...d,extractedText:u};return!u||u.trim().length===0?yield*x.logWarning(`No text extracted from file ${t.id}. This might be a scanned document. Consider using the OCR node instead.`):yield*x.logInfo(`Successfully extracted ${u.length} characters from file ${t.id}`),m({...t,metadata:f,flow:c})})})})}const D=C.array(y);function O(e,t={}){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Merge PDFs`,description:`Merge multiple PDF documents into one`,type:l.process,nodeTypeId:`merge-pdf`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:D,outputSchema:y,run:({data:i,flowId:a,jobId:s,clientId:c})=>x.gen(function*(){let l={flowId:a,nodeId:e,jobId:s};if(!Array.isArray(i))return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`Merge PDF node requires an array of files from a Merge utility node`}).toEffect();if(i.length===0)return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`At least one PDF file is required for merging`}).toEffect();let u=[],d=0;for(let e of i){let t=yield*r.read(e.id,c);u.push(t);let n=v(e.metadata).metadata;n?.pageCount&&typeof n.pageCount==`number`&&(d+=n.pageCount)}yield*x.logInfo(`Merging ${i.length} PDF files`);let h=yield*n.mergePdfs({pdfs:u}).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to merge PDFs`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to merge PDFs`}).toEffect()}))),_=i[0],{metadata:y}=v(_.metadata),b=new ReadableStream({start(e){e.enqueue(h),e.close()}}),S=`merged-${i.length}-documents.pdf`;if(t.naming){let n={...t.naming,autoSuffix:t.naming.autoSuffix??(()=>`merged`)};S=`${g(f(_,p(_,{flowId:a,jobId:s,nodeId:e,nodeType:`merge-pdf`},{mergedCount:i.length}),n))}.pdf`}let C=yield*r.upload({storageId:_.storage.id,size:h.byteLength,type:`application/pdf`,fileName:S,lastModified:0,metadata:JSON.stringify({...y,pageCount:d,mergedFrom:i.length}),flow:l},c,b),w={...y,pageCount:d,mergedFrom:i.length,fileName:S};return yield*x.logInfo(`Successfully merged ${i.length} PDFs into one document with ${d} pages`),m({...C,metadata:w})})})})}function k(e,t){return x.gen(function*(){let n=yield*s;return yield*h({id:e,name:`OCR`,description:`Extract text from scanned documents using AI`,type:l.process,nodeTypeId:`ocr`,outputTypeId:u,keepOutput:t.keepOutput,inputSchema:y,outputSchema:_,circuitBreaker:{enabled:!0,failureThreshold:5,resetTimeout:6e4,fallback:{type:`skip`,passThrough:!0}},run:({data:r,flowId:i,jobId:a,clientId:s})=>x.gen(function*(){let c={flowId:i,nodeId:e,jobId:a},l=r.url;if(!l)return yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:`URL is required for OCR operation`}).toEffect();yield*x.logInfo(`Starting OCR for file ${r.id} with task type: ${t.taskType}`);let u={clientId:s,credentialId:t.credentialId},d=yield*n.performOCR(l,{taskType:t.taskType,resolution:t.resolution,referenceText:t.referenceText},u).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to perform OCR`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to perform OCR`}).toEffect()})));return yield*x.logInfo(`Successfully completed OCR for file ${r.id}, extracted ${d.extractedText.length} characters`),m({extractedText:d.extractedText,format:d.format,taskType:t.taskType,confidence:d.confidence,flow:c})})})})}function A(e,t){return x.gen(function*(){let n=yield*c,r=yield*b;return yield*h({id:e,name:`Split PDF`,description:`Split PDF into pages or page ranges`,type:l.process,nodeTypeId:`split-pdf`,outputTypeId:d,keepOutput:t.keepOutput,inputSchema:y,outputSchema:y,run:({data:i,flowId:a,jobId:s,clientId:c})=>x.gen(function*(){let l={flowId:a,nodeId:e,jobId:s};yield*x.logInfo(`Splitting PDF file ${i.id} in ${t.mode} mode`);let u=yield*r.read(i.id,c),d=yield*n.splitPdf(u,t).pipe(x.catchAll(e=>x.gen(function*(){return yield*x.logError(`Failed to split PDF`,e),yield*o.fromCode(`FLOW_NODE_ERROR`,{cause:e instanceof Error?e.message:`Failed to split PDF`}).toEffect()}))),{metadata:h}=v(i.metadata);if(d.mode===`individual`){yield*x.logInfo(`Successfully split PDF into ${d.pdfs.length} individual pages`),yield*x.logWarning(`Individual page mode returns multiple files - flow engine support required`);let n=d.pdfs[0],o=new ReadableStream({start(e){e.enqueue(n),e.close()}}),u=`${g(h?.fileName||`document`)}-page-1.pdf`;if(t.naming){let n={...t.naming,autoSuffix:t.naming.autoSuffix??(e=>`page-${e.pageNumber??1}`)};u=`${g(f(i,p(i,{flowId:a,jobId:s,nodeId:e,nodeType:`split-pdf`},{pageNumber:1}),n))}.pdf`}let _=yield*r.upload({storageId:i.storage.id,size:n.byteLength,type:`application/pdf`,fileName:u,lastModified:0,metadata:JSON.stringify({...h,pageCount:1,splitMode:`individual`}),flow:l},c,o),v={...h,pageCount:1,splitMode:`individual`};return m({..._,metadata:v})}let _=t.endPage&&t.startPage?t.endPage-t.startPage+1:1,y=d.pdf,b=new ReadableStream({start(e){e.enqueue(y),e.close()}}),S=`${g(h?.fileName||`document`)}-pages-${t.startPage}-${t.endPage}.pdf`;if(t.naming){let n={...t.naming,autoSuffix:t.naming.autoSuffix??(()=>`pages-${t.startPage}-${t.endPage}`)};S=`${g(f(i,p(i,{flowId:a,jobId:s,nodeId:e,nodeType:`split-pdf`},{startPage:t.startPage,endPage:t.endPage}),n))}.pdf`}let C=yield*r.upload({storageId:i.storage.id,size:y.byteLength,type:`application/pdf`,fileName:S,lastModified:0,metadata:JSON.stringify({...h,pageCount:_,splitMode:`range`,splitRange:`${t.startPage}-${t.endPage}`}),flow:l},c,b),w={...h,pageCount:_,splitMode:`range`,splitRange:`${t.startPage}-${t.endPage}`};return yield*x.logInfo(`Successfully split PDF to pages ${t.startPage}-${t.endPage}`),m({...C,metadata:w})})})})}export{r as convertToMarkdownParamsSchema,w as createConvertToMarkdownNode,T as createDescribeDocumentNode,E as createExtractTextNode,O as createMergePdfNode,k as createOcrNode,A as createSplitPdfNode,e as describeDocumentParamsSchema,t as extractTextParamsSchema,i as mergePdfParamsSchema,n as ocrParamsSchema,a as splitPdfParamsSchema};
|
|
6
6
|
//# sourceMappingURL=index.mjs.map
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":["markdown: string","markdownSource: \"text\" | \"ocr\"","pdfBuffers: Uint8Array[]","namingConfig: FileNamingConfig","pdfBytes","stream","namingConfig: FileNamingConfig","uploadResult","newMetadata"],"sources":["../src/convert-to-markdown-node.ts","../src/describe-document-node.ts","../src/extract-text-node.ts","../src/merge-pdf-node.ts","../src/ocr-node.ts","../src/split-pdf-node.ts"],"sourcesContent":["import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentAiPlugin,\n DocumentPlugin,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadServer } from \"@uploadista/core/upload\";\nimport { Effect, Either } from \"effect\";\n\nexport type ConvertToMarkdownNodeParams = {\n credentialId?: string;\n resolution?: \"tiny\" | \"small\" | \"base\" | \"gundam\" | \"large\";\n keepOutput?: boolean;\n};\n\nexport function createConvertToMarkdownNode(\n id: string,\n params: ConvertToMarkdownNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const documentAiService = yield* DocumentAiPlugin;\n const uploadServer = yield* UploadServer;\n\n return yield* createFlowNode({\n id,\n name: \"Convert to Markdown\",\n description:\n \"Convert documents to Markdown format (intelligently uses OCR if needed)\",\n type: NodeType.process,\n nodeTypeId: \"convert-to-markdown\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n // AI service (OCR) - enable circuit breaker with skip fallback\n circuitBreaker: {\n enabled: true,\n failureThreshold: 5,\n resetTimeout: 60000,\n fallback: { type: \"skip\", passThrough: true },\n },\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(`Converting file ${file.id} to Markdown`);\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadServer.read(file.id, clientId);\n\n // Try to extract text first (for searchable PDFs)\n const extractResult = yield* documentService\n .extractText(fileBytes)\n .pipe(Effect.either);\n\n let markdown: string;\n let markdownSource: \"text\" | \"ocr\";\n\n if (\n Either.isRight(extractResult) &&\n extractResult.right.trim().length > 0\n ) {\n // Successfully extracted text from searchable PDF\n const text = extractResult.right;\n\n yield* Effect.logInfo(\n `Successfully extracted ${text.length} characters from searchable PDF`,\n );\n\n // Simple text-to-markdown conversion\n // In a real implementation, this could be more sophisticated\n markdown = text\n .split(\"\\n\\n\")\n .map((para: string) => para.trim())\n .filter((para: string) => para.length > 0)\n .join(\"\\n\\n\");\n\n markdownSource = \"text\";\n\n yield* Effect.logInfo(\n `Converted text to Markdown (${markdown.length} characters)`,\n );\n } else {\n // Text extraction failed or returned empty - use OCR\n yield* Effect.logInfo(\n \"Text extraction failed or returned empty, falling back to OCR\",\n );\n\n const fileUrl = file.url;\n\n if (!fileUrl) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause: \"URL is required for OCR-based markdown conversion\",\n }).toEffect();\n }\n\n // Build context for DocumentAI plugin\n const context = {\n clientId,\n credentialId: params.credentialId,\n };\n\n // Perform OCR with markdown conversion\n const ocrResult = yield* documentAiService\n .performOCR(\n fileUrl,\n {\n taskType: \"convertToMarkdown\",\n resolution: params.resolution || \"gundam\",\n },\n context,\n )\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to perform OCR\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to perform OCR for markdown conversion\",\n }).toEffect();\n }),\n ),\n );\n\n markdown = ocrResult.extractedText;\n markdownSource = \"ocr\";\n\n yield* Effect.logInfo(\n `Successfully converted scanned document to Markdown using OCR (${markdown.length} characters)`,\n );\n }\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n // Add markdown to metadata\n const newMetadata = {\n ...file.metadata,\n ...metadata,\n markdown,\n markdownSource,\n };\n\n yield* Effect.logInfo(\n `Successfully converted file ${file.id} to Markdown via ${markdownSource}`,\n );\n\n return completeNodeExecution({\n ...file,\n metadata: newMetadata,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n NodeType,\n resolveUploadMetadata,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadServer } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\n\nexport type DescribeDocumentNodeParams = {\n keepOutput?: boolean;\n};\n\nexport function createDescribeDocumentNode(\n id: string,\n params: DescribeDocumentNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadServer = yield* UploadServer;\n\n return yield* createFlowNode({\n id,\n name: \"Describe Document\",\n description: \"Extract metadata from PDF documents\",\n type: NodeType.process,\n nodeTypeId: \"describe-document\",\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(\n `Extracting metadata from PDF file ${file.id}`,\n );\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadServer.read(file.id, clientId);\n\n // Get metadata with error handling\n const documentMetadata = yield* documentService\n .getMetadata(fileBytes)\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to extract metadata\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to extract metadata\",\n }).toEffect();\n }),\n ),\n );\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n // Add document metadata to file metadata (filter out null values)\n const newMetadata = {\n ...file.metadata,\n ...metadata,\n pageCount: documentMetadata.pageCount,\n format: documentMetadata.format,\n ...(documentMetadata.author && { author: documentMetadata.author }),\n ...(documentMetadata.title && { title: documentMetadata.title }),\n ...(documentMetadata.subject && { subject: documentMetadata.subject }),\n ...(documentMetadata.creator && { creator: documentMetadata.creator }),\n ...(documentMetadata.creationDate && { creationDate: documentMetadata.creationDate }),\n ...(documentMetadata.modifiedDate && { modifiedDate: documentMetadata.modifiedDate }),\n fileSize: documentMetadata.fileSize,\n };\n\n yield* Effect.logInfo(\n `Successfully extracted metadata from file ${file.id}: ${documentMetadata.pageCount} pages`,\n );\n\n return completeNodeExecution({\n ...file,\n metadata: newMetadata,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadServer } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\n\nexport type ExtractTextNodeParams = {\n keepOutput?: boolean;\n};\n\nexport function createExtractTextNode(\n id: string,\n params: ExtractTextNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadServer = yield* UploadServer;\n\n return yield* createFlowNode({\n id,\n name: \"Extract Text\",\n description: \"Extract text from searchable PDF documents\",\n type: NodeType.process,\n nodeTypeId: \"extract-text\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(`Extracting text from PDF file ${file.id}`);\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadServer.read(file.id, clientId);\n\n // Extract text with error handling\n const extractedText = yield* documentService\n .extractText(fileBytes)\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to extract text\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to extract text\",\n }).toEffect();\n }),\n ),\n );\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n // Add extracted text to metadata\n const newMetadata = {\n ...file.metadata,\n ...metadata,\n extractedText,\n };\n\n if (!extractedText || extractedText.trim().length === 0) {\n yield* Effect.logWarning(\n `No text extracted from file ${file.id}. This might be a scanned document. Consider using the OCR node instead.`,\n );\n } else {\n yield* Effect.logInfo(\n `Successfully extracted ${extractedText.length} characters from file ${file.id}`,\n );\n }\n\n return completeNodeExecution({\n ...file,\n metadata: newMetadata,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n applyFileNaming,\n buildNamingContext,\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n type FileNamingConfig,\n getBaseName,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadServer } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\nimport { z } from \"zod\";\n\nexport type MergePdfNodeParams = {\n inputCount?: number;\n keepOutput?: boolean;\n /**\n * Optional file naming configuration.\n * Auto suffix: `merged`\n */\n naming?: FileNamingConfig;\n};\n\n// Schema for multiple file inputs\nconst multipleFilesSchema = z.array(uploadFileSchema);\n\nexport function createMergePdfNode(\n id: string,\n params: MergePdfNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadServer = yield* UploadServer;\n\n return yield* createFlowNode({\n id,\n name: \"Merge PDFs\",\n description: \"Merge multiple PDF documents into one\",\n type: NodeType.process,\n nodeTypeId: \"merge-pdf\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: multipleFilesSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: files, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n // Validate that we have an array of files\n if (!Array.isArray(files)) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n \"Merge PDF node requires an array of files from a Merge utility node\",\n }).toEffect();\n }\n\n if (files.length === 0) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause: \"At least one PDF file is required for merging\",\n }).toEffect();\n }\n\n // Read buffers from all files\n const pdfBuffers: Uint8Array[] = [];\n let totalPages = 0;\n\n for (const file of files) {\n // Read file bytes from upload server\n const fileBytes = yield* uploadServer.read(file.id, clientId);\n pdfBuffers.push(fileBytes);\n\n // Sum up page counts if available\n const fileMetadata = resolveUploadMetadata(file.metadata).metadata;\n if (\n fileMetadata?.pageCount &&\n typeof fileMetadata.pageCount === \"number\"\n ) {\n totalPages += fileMetadata.pageCount;\n }\n }\n\n yield* Effect.logInfo(`Merging ${files.length} PDF files`);\n\n // Merge PDFs with error handling\n const mergedPdf = yield* documentService\n .mergePdfs({ pdfs: pdfBuffers })\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to merge PDFs\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to merge PDFs\",\n }).toEffect();\n }),\n ),\n );\n\n // Use metadata from first file as base\n const firstFile = files[0];\n const { metadata } = resolveUploadMetadata(firstFile.metadata);\n\n // Create a stream from the merged PDF bytes\n const stream = new ReadableStream({\n start(controller) {\n controller.enqueue(mergedPdf);\n controller.close();\n },\n });\n\n // Generate output filename\n let outputFileName = `merged-${files.length}-documents.pdf`;\n if (params.naming) {\n const namingConfig: FileNamingConfig = {\n ...params.naming,\n autoSuffix: params.naming.autoSuffix ?? (() => \"merged\"),\n };\n const namingContext = buildNamingContext(\n firstFile,\n { flowId, jobId, nodeId: id, nodeType: \"merge-pdf\" },\n { mergedCount: files.length },\n );\n const namedFile = applyFileNaming(firstFile, namingContext, namingConfig);\n outputFileName = `${getBaseName(namedFile)}.pdf`;\n }\n\n // Upload the merged PDF back to the upload server\n const result = yield* uploadServer.upload(\n {\n storageId: firstFile.storage.id,\n size: mergedPdf.byteLength,\n type: \"application/pdf\",\n fileName: outputFileName,\n lastModified: 0,\n metadata: JSON.stringify({\n ...metadata,\n pageCount: totalPages,\n mergedFrom: files.length,\n }),\n flow,\n },\n clientId,\n stream,\n );\n\n const newMetadata = {\n ...metadata,\n pageCount: totalPages,\n mergedFrom: files.length,\n fileName: outputFileName,\n };\n\n yield* Effect.logInfo(\n `Successfully merged ${files.length} PDFs into one document with ${totalPages} pages`,\n );\n\n return completeNodeExecution({\n ...result,\n metadata: newMetadata,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentAiPlugin,\n NodeType,\n OCR_OUTPUT_TYPE_ID,\n type OcrResolution,\n type OcrTaskType,\n ocrOutputSchema,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { Effect } from \"effect\";\n\nexport type OcrNodeParams = {\n taskType: OcrTaskType;\n resolution?: OcrResolution;\n credentialId?: string;\n referenceText?: string;\n keepOutput?: boolean;\n};\n\nexport function createOcrNode(id: string, params: OcrNodeParams) {\n return Effect.gen(function* () {\n const documentAiService = yield* DocumentAiPlugin;\n\n return yield* createFlowNode({\n id,\n name: \"OCR\",\n description: \"Extract text from scanned documents using AI\",\n type: NodeType.process,\n nodeTypeId: \"ocr\",\n outputTypeId: OCR_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: ocrOutputSchema,\n // AI service - enable circuit breaker with skip fallback\n circuitBreaker: {\n enabled: true,\n failureThreshold: 5,\n resetTimeout: 60000,\n fallback: { type: \"skip\", passThrough: true },\n },\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n const fileUrl = file.url;\n\n // Validate input\n if (!fileUrl) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause: \"URL is required for OCR operation\",\n }).toEffect();\n }\n\n yield* Effect.logInfo(\n `Starting OCR for file ${file.id} with task type: ${params.taskType}`,\n );\n\n // Build context for DocumentAI plugin\n const context = {\n clientId,\n credentialId: params.credentialId,\n };\n\n // Perform OCR with error handling\n const ocrResult = yield* documentAiService\n .performOCR(\n fileUrl,\n {\n taskType: params.taskType,\n resolution: params.resolution,\n referenceText: params.referenceText,\n },\n context,\n )\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to perform OCR\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to perform OCR\",\n }).toEffect();\n }),\n ),\n );\n\n yield* Effect.logInfo(\n `Successfully completed OCR for file ${file.id}, extracted ${ocrResult.extractedText.length} characters`,\n );\n\n // Return structured OCR output (not UploadFile)\n return completeNodeExecution({\n extractedText: ocrResult.extractedText,\n format: ocrResult.format,\n taskType: params.taskType,\n confidence: ocrResult.confidence,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n applyFileNaming,\n buildNamingContext,\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n type FileNamingConfig,\n getBaseName,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadServer } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\n\nexport type SplitPdfNodeParams = {\n mode: \"range\" | \"individual\";\n startPage?: number;\n endPage?: number;\n keepOutput?: boolean;\n /**\n * Optional file naming configuration.\n * Auto suffix: `page-${pageNumber}` for individual mode, `pages-${start}-${end}` for range mode\n */\n naming?: FileNamingConfig;\n};\n\nexport function createSplitPdfNode(id: string, params: SplitPdfNodeParams) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadServer = yield* UploadServer;\n\n return yield* createFlowNode({\n id,\n name: \"Split PDF\",\n description: \"Split PDF into pages or page ranges\",\n type: NodeType.process,\n nodeTypeId: \"split-pdf\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(\n `Splitting PDF file ${file.id} in ${params.mode} mode`,\n );\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadServer.read(file.id, clientId);\n\n // Split PDF with error handling\n const result = yield* documentService\n .splitPdf(fileBytes, params)\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to split PDF\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to split PDF\",\n }).toEffect();\n }),\n ),\n );\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n if (result.mode === \"individual\") {\n // Return array of files (one per page)\n yield* Effect.logInfo(\n `Successfully split PDF into ${result.pdfs.length} individual pages`,\n );\n\n // For individual mode, we'd need to return multiple files\n // This requires special handling in the flow engine\n // For now, we'll return the first page and log a warning\n yield* Effect.logWarning(\n \"Individual page mode returns multiple files - flow engine support required\",\n );\n\n const pdfBytes = result.pdfs[0];\n\n // Create a stream from the PDF bytes\n const stream = new ReadableStream({\n start(controller) {\n controller.enqueue(pdfBytes);\n controller.close();\n },\n });\n\n // Generate output filename\n let outputFileName = `${getBaseName(metadata?.fileName as string || \"document\")}-page-1.pdf`;\n if (params.naming) {\n const namingConfig: FileNamingConfig = {\n ...params.naming,\n autoSuffix: params.naming.autoSuffix ?? ((ctx) => `page-${ctx.pageNumber ?? 1}`),\n };\n const namingContext = buildNamingContext(\n file,\n { flowId, jobId, nodeId: id, nodeType: \"split-pdf\" },\n { pageNumber: 1 },\n );\n const namedFile = applyFileNaming(file, namingContext, namingConfig);\n outputFileName = `${getBaseName(namedFile)}.pdf`;\n }\n\n // Upload the split PDF back to the upload server\n const uploadResult = yield* uploadServer.upload(\n {\n storageId: file.storage.id,\n size: pdfBytes.byteLength,\n type: \"application/pdf\",\n fileName: outputFileName,\n lastModified: 0,\n metadata: JSON.stringify({\n ...metadata,\n pageCount: 1,\n splitMode: \"individual\",\n }),\n flow,\n },\n clientId,\n stream,\n );\n\n const newMetadata = {\n ...metadata,\n pageCount: 1,\n splitMode: \"individual\",\n };\n\n return completeNodeExecution({\n ...uploadResult,\n metadata: newMetadata,\n });\n }\n\n // Range mode - return single PDF with selected pages\n const pageCount =\n params.endPage && params.startPage\n ? params.endPage - params.startPage + 1\n : 1;\n\n const pdfBytes = result.pdf;\n\n // Create a stream from the PDF bytes\n const stream = new ReadableStream({\n start(controller) {\n controller.enqueue(pdfBytes);\n controller.close();\n },\n });\n\n // Generate output filename for range mode\n let rangeOutputFileName = `${getBaseName(metadata?.fileName as string || \"document\")}-pages-${params.startPage}-${params.endPage}.pdf`;\n if (params.naming) {\n const namingConfig: FileNamingConfig = {\n ...params.naming,\n autoSuffix: params.naming.autoSuffix ?? ((ctx) => `pages-${params.startPage}-${params.endPage}`),\n };\n const namingContext = buildNamingContext(\n file,\n { flowId, jobId, nodeId: id, nodeType: \"split-pdf\" },\n { startPage: params.startPage, endPage: params.endPage },\n );\n const namedFile = applyFileNaming(file, namingContext, namingConfig);\n rangeOutputFileName = `${getBaseName(namedFile)}.pdf`;\n }\n\n // Upload the split PDF back to the upload server\n const uploadResult = yield* uploadServer.upload(\n {\n storageId: file.storage.id,\n size: pdfBytes.byteLength,\n type: \"application/pdf\",\n fileName: rangeOutputFileName,\n lastModified: 0,\n metadata: JSON.stringify({\n ...metadata,\n pageCount,\n splitMode: \"range\",\n splitRange: `${params.startPage}-${params.endPage}`,\n }),\n flow,\n },\n clientId,\n stream,\n );\n\n const newMetadata = {\n ...metadata,\n pageCount,\n splitMode: \"range\",\n splitRange: `${params.startPage}-${params.endPage}`,\n };\n\n yield* Effect.logInfo(\n `Successfully split PDF to pages ${params.startPage}-${params.endPage}`,\n );\n\n return completeNodeExecution({\n ...uploadResult,\n metadata: newMetadata,\n });\n });\n },\n });\n });\n}\n"],"mappings":"kmBAoBA,SAAgB,EACd,EACA,EAAsC,EAAE,CACxC,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAoB,MAAO,EAC3B,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,sBACN,YACE,0EACF,KAAM,EAAS,QACf,WAAY,sBACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EAEd,eAAgB,CACd,QAAS,GACT,iBAAkB,EAClB,aAAc,IACd,SAAU,CAAE,KAAM,OAAQ,YAAa,GAAM,CAC9C,CACD,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QAAQ,mBAAmB,EAAK,GAAG,cAAc,CAG/D,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAgB,MAAO,EAC1B,YAAY,EAAU,CACtB,KAAK,EAAO,OAAO,CAElBA,EACAC,EAEJ,GACE,EAAO,QAAQ,EAAc,EAC7B,EAAc,MAAM,MAAM,CAAC,OAAS,EACpC,CAEA,IAAM,EAAO,EAAc,MAE3B,MAAO,EAAO,QACZ,0BAA0B,EAAK,OAAO,iCACvC,CAID,EAAW,EACR,MAAM;;EAAO,CACb,IAAK,GAAiB,EAAK,MAAM,CAAC,CAClC,OAAQ,GAAiB,EAAK,OAAS,EAAE,CACzC,KAAK;;EAAO,CAEf,EAAiB,OAEjB,MAAO,EAAO,QACZ,+BAA+B,EAAS,OAAO,cAChD,KACI,CAEL,MAAO,EAAO,QACZ,gEACD,CAED,IAAM,EAAU,EAAK,IAErB,GAAI,CAAC,EACH,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MAAO,oDACR,CAAC,CAAC,UAAU,CAIf,IAAM,EAAU,CACd,WACA,aAAc,EAAO,aACtB,CA0BD,GAvBkB,MAAO,EACtB,WACC,EACA,CACE,SAAU,oBACV,WAAY,EAAO,YAAc,SAClC,CACD,EACD,CACA,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,wBAAyB,EAAM,CAC/C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,gDACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,EAEkB,cACrB,EAAiB,MAEjB,MAAO,EAAO,QACZ,kEAAkE,EAAS,OAAO,cACnF,CAGH,GAAM,CAAE,YAAa,EAAsB,EAAK,SAAS,CAGnD,EAAc,CAClB,GAAG,EAAK,SACR,GAAG,EACH,WACA,iBACD,CAMD,OAJA,MAAO,EAAO,QACZ,+BAA+B,EAAK,GAAG,mBAAmB,IAC3D,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACV,OACD,CAAC,EACF,CAEL,CAAC,EACF,CCtJJ,SAAgB,EACd,EACA,EAAqC,EAAE,CACvC,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,oBACN,YAAa,sCACb,KAAM,EAAS,QACf,WAAY,oBACZ,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QACZ,qCAAqC,EAAK,KAC3C,CAGD,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAmB,MAAO,EAC7B,YAAY,EAAU,CACtB,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,6BAA8B,EAAM,CACpD,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,6BACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAEG,CAAE,YAAa,EAAsB,EAAK,SAAS,CAGnD,EAAc,CAClB,GAAG,EAAK,SACR,GAAG,EACH,UAAW,EAAiB,UAC5B,OAAQ,EAAiB,OACzB,GAAI,EAAiB,QAAU,CAAE,OAAQ,EAAiB,OAAQ,CAClE,GAAI,EAAiB,OAAS,CAAE,MAAO,EAAiB,MAAO,CAC/D,GAAI,EAAiB,SAAW,CAAE,QAAS,EAAiB,QAAS,CACrE,GAAI,EAAiB,SAAW,CAAE,QAAS,EAAiB,QAAS,CACrE,GAAI,EAAiB,cAAgB,CAAE,aAAc,EAAiB,aAAc,CACpF,GAAI,EAAiB,cAAgB,CAAE,aAAc,EAAiB,aAAc,CACpF,SAAU,EAAiB,SAC5B,CAMD,OAJA,MAAO,EAAO,QACZ,6CAA6C,EAAK,GAAG,IAAI,EAAiB,UAAU,QACrF,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACV,OACD,CAAC,EACF,CAEL,CAAC,EACF,CC7EJ,SAAgB,EACd,EACA,EAAgC,EAAE,CAClC,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,eACN,YAAa,6CACb,KAAM,EAAS,QACf,WAAY,eACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QAAQ,iCAAiC,EAAK,KAAK,CAGjE,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAgB,MAAO,EAC1B,YAAY,EAAU,CACtB,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,yBAA0B,EAAM,CAChD,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,yBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAEG,CAAE,YAAa,EAAsB,EAAK,SAAS,CAGnD,EAAc,CAClB,GAAG,EAAK,SACR,GAAG,EACH,gBACD,CAYD,MAVI,CAAC,GAAiB,EAAc,MAAM,CAAC,SAAW,EACpD,MAAO,EAAO,WACZ,+BAA+B,EAAK,GAAG,0EACxC,CAED,MAAO,EAAO,QACZ,0BAA0B,EAAc,OAAO,wBAAwB,EAAK,KAC7E,CAGI,EAAsB,CAC3B,GAAG,EACH,SAAU,EACV,OACD,CAAC,EACF,CAEL,CAAC,EACF,CC/DJ,MAAM,EAAsB,EAAE,MAAM,EAAiB,CAErD,SAAgB,EACd,EACA,EAA6B,EAAE,CAC/B,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,aACN,YAAa,wCACb,KAAM,EAAS,QACf,WAAY,YACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAO,SAAQ,QAAO,cAC3B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAGD,GAAI,CAAC,MAAM,QAAQ,EAAM,CACvB,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,sEACH,CAAC,CAAC,UAAU,CAGf,GAAI,EAAM,SAAW,EACnB,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MAAO,gDACR,CAAC,CAAC,UAAU,CAIf,IAAMC,EAA2B,EAAE,CAC/B,EAAa,EAEjB,IAAK,IAAM,KAAQ,EAAO,CAExB,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAC7D,EAAW,KAAK,EAAU,CAG1B,IAAM,EAAe,EAAsB,EAAK,SAAS,CAAC,SAExD,GAAc,WACd,OAAO,EAAa,WAAc,WAElC,GAAc,EAAa,WAI/B,MAAO,EAAO,QAAQ,WAAW,EAAM,OAAO,YAAY,CAG1D,IAAM,EAAY,MAAO,EACtB,UAAU,CAAE,KAAM,EAAY,CAAC,CAC/B,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,uBAAwB,EAAM,CAC9C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,uBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAGG,EAAY,EAAM,GAClB,CAAE,YAAa,EAAsB,EAAU,SAAS,CAGxD,EAAS,IAAI,eAAe,CAChC,MAAM,EAAY,CAChB,EAAW,QAAQ,EAAU,CAC7B,EAAW,OAAO,EAErB,CAAC,CAGE,EAAiB,UAAU,EAAM,OAAO,gBAC5C,GAAI,EAAO,OAAQ,CACjB,IAAMC,EAAiC,CACrC,GAAG,EAAO,OACV,WAAY,EAAO,OAAO,iBAAqB,UAChD,CAOD,EAAiB,GAAG,EADF,EAAgB,EALZ,EACpB,EACA,CAAE,SAAQ,QAAO,OAAQ,EAAI,SAAU,YAAa,CACpD,CAAE,YAAa,EAAM,OAAQ,CAC9B,CAC2D,EAAa,CAC/B,CAAC,MAI7C,IAAM,EAAS,MAAO,EAAa,OACjC,CACE,UAAW,EAAU,QAAQ,GAC7B,KAAM,EAAU,WAChB,KAAM,kBACN,SAAU,EACV,aAAc,EACd,SAAU,KAAK,UAAU,CACvB,GAAG,EACH,UAAW,EACX,WAAY,EAAM,OACnB,CAAC,CACF,OACD,CACD,EACA,EACD,CAEK,EAAc,CAClB,GAAG,EACH,UAAW,EACX,WAAY,EAAM,OAClB,SAAU,EACX,CAMD,OAJA,MAAO,EAAO,QACZ,uBAAuB,EAAM,OAAO,+BAA+B,EAAW,QAC/E,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACX,CAAC,EACF,CAEL,CAAC,EACF,CCxJJ,SAAgB,EAAc,EAAY,EAAuB,CAC/D,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAoB,MAAO,EAEjC,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,MACN,YAAa,+CACb,KAAM,EAAS,QACf,WAAY,MACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EAEd,eAAgB,CACd,QAAS,GACT,iBAAkB,EAClB,aAAc,IACd,SAAU,CAAE,KAAM,OAAQ,YAAa,GAAM,CAC9C,CACD,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAEK,EAAU,EAAK,IAGrB,GAAI,CAAC,EACH,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MAAO,oCACR,CAAC,CAAC,UAAU,CAGf,MAAO,EAAO,QACZ,yBAAyB,EAAK,GAAG,mBAAmB,EAAO,WAC5D,CAGD,IAAM,EAAU,CACd,WACA,aAAc,EAAO,aACtB,CAGK,EAAY,MAAO,EACtB,WACC,EACA,CACE,SAAU,EAAO,SACjB,WAAY,EAAO,WACnB,cAAe,EAAO,cACvB,CACD,EACD,CACA,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,wBAAyB,EAAM,CAC/C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,wBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAOH,OALA,MAAO,EAAO,QACZ,uCAAuC,EAAK,GAAG,cAAc,EAAU,cAAc,OAAO,aAC7F,CAGM,EAAsB,CAC3B,cAAe,EAAU,cACzB,OAAQ,EAAU,OAClB,SAAU,EAAO,SACjB,WAAY,EAAU,WACtB,OACD,CAAC,EACF,CAEL,CAAC,EACF,CCjFJ,SAAgB,EAAmB,EAAY,EAA4B,CACzE,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,YACN,YAAa,sCACb,KAAM,EAAS,QACf,WAAY,YACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QACZ,sBAAsB,EAAK,GAAG,MAAM,EAAO,KAAK,OACjD,CAGD,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAS,MAAO,EACnB,SAAS,EAAW,EAAO,CAC3B,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,sBAAuB,EAAM,CAC7C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,sBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAEG,CAAE,YAAa,EAAsB,EAAK,SAAS,CAEzD,GAAI,EAAO,OAAS,aAAc,CAEhC,MAAO,EAAO,QACZ,+BAA+B,EAAO,KAAK,OAAO,mBACnD,CAKD,MAAO,EAAO,WACZ,6EACD,CAED,IAAMC,EAAW,EAAO,KAAK,GAGvBC,EAAS,IAAI,eAAe,CAChC,MAAM,EAAY,CAChB,EAAW,QAAQD,EAAS,CAC5B,EAAW,OAAO,EAErB,CAAC,CAGE,EAAiB,GAAG,EAAY,GAAU,UAAsB,WAAW,CAAC,aAChF,GAAI,EAAO,OAAQ,CACjB,IAAME,EAAiC,CACrC,GAAG,EAAO,OACV,WAAY,EAAO,OAAO,aAAgB,GAAQ,QAAQ,EAAI,YAAc,KAC7E,CAOD,EAAiB,GAAG,EADF,EAAgB,EALZ,EACpB,EACA,CAAE,SAAQ,QAAO,OAAQ,EAAI,SAAU,YAAa,CACpD,CAAE,WAAY,EAAG,CAClB,CACsD,EAAa,CAC1B,CAAC,MAI7C,IAAMC,EAAe,MAAO,EAAa,OACvC,CACE,UAAW,EAAK,QAAQ,GACxB,KAAMH,EAAS,WACf,KAAM,kBACN,SAAU,EACV,aAAc,EACd,SAAU,KAAK,UAAU,CACvB,GAAG,EACH,UAAW,EACX,UAAW,aACZ,CAAC,CACF,OACD,CACD,EACAC,EACD,CAEKG,EAAc,CAClB,GAAG,EACH,UAAW,EACX,UAAW,aACZ,CAED,OAAO,EAAsB,CAC3B,GAAGD,EACH,SAAUC,EACX,CAAC,CAIJ,IAAM,EACJ,EAAO,SAAW,EAAO,UACrB,EAAO,QAAU,EAAO,UAAY,EACpC,EAEA,EAAW,EAAO,IAGlB,EAAS,IAAI,eAAe,CAChC,MAAM,EAAY,CAChB,EAAW,QAAQ,EAAS,CAC5B,EAAW,OAAO,EAErB,CAAC,CAGE,EAAsB,GAAG,EAAY,GAAU,UAAsB,WAAW,CAAC,SAAS,EAAO,UAAU,GAAG,EAAO,QAAQ,MACjI,GAAI,EAAO,OAAQ,CACjB,IAAMF,EAAiC,CACrC,GAAG,EAAO,OACV,WAAY,EAAO,OAAO,aAAgB,GAAQ,SAAS,EAAO,UAAU,GAAG,EAAO,WACvF,CAOD,EAAsB,GAAG,EADP,EAAgB,EALZ,EACpB,EACA,CAAE,SAAQ,QAAO,OAAQ,EAAI,SAAU,YAAa,CACpD,CAAE,UAAW,EAAO,UAAW,QAAS,EAAO,QAAS,CACzD,CACsD,EAAa,CACrB,CAAC,MAIlD,IAAM,EAAe,MAAO,EAAa,OACvC,CACE,UAAW,EAAK,QAAQ,GACxB,KAAM,EAAS,WACf,KAAM,kBACN,SAAU,EACV,aAAc,EACd,SAAU,KAAK,UAAU,CACvB,GAAG,EACH,YACA,UAAW,QACX,WAAY,GAAG,EAAO,UAAU,GAAG,EAAO,UAC3C,CAAC,CACF,OACD,CACD,EACA,EACD,CAEK,EAAc,CAClB,GAAG,EACH,YACA,UAAW,QACX,WAAY,GAAG,EAAO,UAAU,GAAG,EAAO,UAC3C,CAMD,OAJA,MAAO,EAAO,QACZ,mCAAmC,EAAO,UAAU,GAAG,EAAO,UAC/D,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACX,CAAC,EACF,CAEL,CAAC,EACF"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":["markdown: string","markdownSource: \"text\" | \"ocr\"","pdfBuffers: Uint8Array[]","namingConfig: FileNamingConfig","pdfBytes","stream","namingConfig: FileNamingConfig","uploadResult","newMetadata"],"sources":["../src/convert-to-markdown-node.ts","../src/describe-document-node.ts","../src/extract-text-node.ts","../src/merge-pdf-node.ts","../src/ocr-node.ts","../src/split-pdf-node.ts"],"sourcesContent":["import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentAiPlugin,\n DocumentPlugin,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadEngine } from \"@uploadista/core/upload\";\nimport { Effect, Either } from \"effect\";\n\nexport type ConvertToMarkdownNodeParams = {\n credentialId?: string;\n resolution?: \"tiny\" | \"small\" | \"base\" | \"gundam\" | \"large\";\n keepOutput?: boolean;\n};\n\nexport function createConvertToMarkdownNode(\n id: string,\n params: ConvertToMarkdownNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const documentAiService = yield* DocumentAiPlugin;\n const uploadEngine = yield* UploadEngine;\n\n return yield* createFlowNode({\n id,\n name: \"Convert to Markdown\",\n description:\n \"Convert documents to Markdown format (intelligently uses OCR if needed)\",\n type: NodeType.process,\n nodeTypeId: \"convert-to-markdown\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n // AI service (OCR) - enable circuit breaker with skip fallback\n circuitBreaker: {\n enabled: true,\n failureThreshold: 5,\n resetTimeout: 60000,\n fallback: { type: \"skip\", passThrough: true },\n },\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(`Converting file ${file.id} to Markdown`);\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadEngine.read(file.id, clientId);\n\n // Try to extract text first (for searchable PDFs)\n const extractResult = yield* documentService\n .extractText(fileBytes)\n .pipe(Effect.either);\n\n let markdown: string;\n let markdownSource: \"text\" | \"ocr\";\n\n if (\n Either.isRight(extractResult) &&\n extractResult.right.trim().length > 0\n ) {\n // Successfully extracted text from searchable PDF\n const text = extractResult.right;\n\n yield* Effect.logInfo(\n `Successfully extracted ${text.length} characters from searchable PDF`,\n );\n\n // Simple text-to-markdown conversion\n // In a real implementation, this could be more sophisticated\n markdown = text\n .split(\"\\n\\n\")\n .map((para: string) => para.trim())\n .filter((para: string) => para.length > 0)\n .join(\"\\n\\n\");\n\n markdownSource = \"text\";\n\n yield* Effect.logInfo(\n `Converted text to Markdown (${markdown.length} characters)`,\n );\n } else {\n // Text extraction failed or returned empty - use OCR\n yield* Effect.logInfo(\n \"Text extraction failed or returned empty, falling back to OCR\",\n );\n\n const fileUrl = file.url;\n\n if (!fileUrl) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause: \"URL is required for OCR-based markdown conversion\",\n }).toEffect();\n }\n\n // Build context for DocumentAI plugin\n const context = {\n clientId,\n credentialId: params.credentialId,\n };\n\n // Perform OCR with markdown conversion\n const ocrResult = yield* documentAiService\n .performOCR(\n fileUrl,\n {\n taskType: \"convertToMarkdown\",\n resolution: params.resolution || \"gundam\",\n },\n context,\n )\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to perform OCR\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to perform OCR for markdown conversion\",\n }).toEffect();\n }),\n ),\n );\n\n markdown = ocrResult.extractedText;\n markdownSource = \"ocr\";\n\n yield* Effect.logInfo(\n `Successfully converted scanned document to Markdown using OCR (${markdown.length} characters)`,\n );\n }\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n // Add markdown to metadata\n const newMetadata = {\n ...file.metadata,\n ...metadata,\n markdown,\n markdownSource,\n };\n\n yield* Effect.logInfo(\n `Successfully converted file ${file.id} to Markdown via ${markdownSource}`,\n );\n\n return completeNodeExecution({\n ...file,\n metadata: newMetadata,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n NodeType,\n resolveUploadMetadata,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadEngine } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\n\nexport type DescribeDocumentNodeParams = {\n keepOutput?: boolean;\n};\n\nexport function createDescribeDocumentNode(\n id: string,\n params: DescribeDocumentNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadEngine = yield* UploadEngine;\n\n return yield* createFlowNode({\n id,\n name: \"Describe Document\",\n description: \"Extract metadata from PDF documents\",\n type: NodeType.process,\n nodeTypeId: \"describe-document\",\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(`Extracting metadata from PDF file ${file.id}`);\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadEngine.read(file.id, clientId);\n\n // Get metadata with error handling\n const documentMetadata = yield* documentService\n .getMetadata(fileBytes)\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to extract metadata\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to extract metadata\",\n }).toEffect();\n }),\n ),\n );\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n // Add document metadata to file metadata (filter out null values)\n const newMetadata = {\n ...file.metadata,\n ...metadata,\n pageCount: documentMetadata.pageCount,\n format: documentMetadata.format,\n ...(documentMetadata.author && { author: documentMetadata.author }),\n ...(documentMetadata.title && { title: documentMetadata.title }),\n ...(documentMetadata.subject && {\n subject: documentMetadata.subject,\n }),\n ...(documentMetadata.creator && {\n creator: documentMetadata.creator,\n }),\n ...(documentMetadata.creationDate && {\n creationDate: documentMetadata.creationDate,\n }),\n ...(documentMetadata.modifiedDate && {\n modifiedDate: documentMetadata.modifiedDate,\n }),\n fileSize: documentMetadata.fileSize,\n };\n\n yield* Effect.logInfo(\n `Successfully extracted metadata from file ${file.id}: ${documentMetadata.pageCount} pages`,\n );\n\n return completeNodeExecution({\n ...file,\n metadata: newMetadata,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadEngine } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\n\nexport type ExtractTextNodeParams = {\n keepOutput?: boolean;\n};\n\nexport function createExtractTextNode(\n id: string,\n params: ExtractTextNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadEngine = yield* UploadEngine;\n\n return yield* createFlowNode({\n id,\n name: \"Extract Text\",\n description: \"Extract text from searchable PDF documents\",\n type: NodeType.process,\n nodeTypeId: \"extract-text\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(`Extracting text from PDF file ${file.id}`);\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadEngine.read(file.id, clientId);\n\n // Extract text with error handling\n const extractedText = yield* documentService\n .extractText(fileBytes)\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to extract text\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to extract text\",\n }).toEffect();\n }),\n ),\n );\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n // Add extracted text to metadata\n const newMetadata = {\n ...file.metadata,\n ...metadata,\n extractedText,\n };\n\n if (!extractedText || extractedText.trim().length === 0) {\n yield* Effect.logWarning(\n `No text extracted from file ${file.id}. This might be a scanned document. Consider using the OCR node instead.`,\n );\n } else {\n yield* Effect.logInfo(\n `Successfully extracted ${extractedText.length} characters from file ${file.id}`,\n );\n }\n\n return completeNodeExecution({\n ...file,\n metadata: newMetadata,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n applyFileNaming,\n buildNamingContext,\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n type FileNamingConfig,\n getBaseName,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadEngine } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\nimport { z } from \"zod\";\n\nexport type MergePdfNodeParams = {\n inputCount?: number;\n keepOutput?: boolean;\n /**\n * Optional file naming configuration.\n * Auto suffix: `merged`\n */\n naming?: FileNamingConfig;\n};\n\n// Schema for multiple file inputs\nconst multipleFilesSchema = z.array(uploadFileSchema);\n\nexport function createMergePdfNode(\n id: string,\n params: MergePdfNodeParams = {},\n) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadEngine = yield* UploadEngine;\n\n return yield* createFlowNode({\n id,\n name: \"Merge PDFs\",\n description: \"Merge multiple PDF documents into one\",\n type: NodeType.process,\n nodeTypeId: \"merge-pdf\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: multipleFilesSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: files, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n // Validate that we have an array of files\n if (!Array.isArray(files)) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n \"Merge PDF node requires an array of files from a Merge utility node\",\n }).toEffect();\n }\n\n if (files.length === 0) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause: \"At least one PDF file is required for merging\",\n }).toEffect();\n }\n\n // Read buffers from all files\n const pdfBuffers: Uint8Array[] = [];\n let totalPages = 0;\n\n for (const file of files) {\n // Read file bytes from upload server\n const fileBytes = yield* uploadEngine.read(file.id, clientId);\n pdfBuffers.push(fileBytes);\n\n // Sum up page counts if available\n const fileMetadata = resolveUploadMetadata(file.metadata).metadata;\n if (\n fileMetadata?.pageCount &&\n typeof fileMetadata.pageCount === \"number\"\n ) {\n totalPages += fileMetadata.pageCount;\n }\n }\n\n yield* Effect.logInfo(`Merging ${files.length} PDF files`);\n\n // Merge PDFs with error handling\n const mergedPdf = yield* documentService\n .mergePdfs({ pdfs: pdfBuffers })\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to merge PDFs\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to merge PDFs\",\n }).toEffect();\n }),\n ),\n );\n\n // Use metadata from first file as base\n const firstFile = files[0];\n const { metadata } = resolveUploadMetadata(firstFile.metadata);\n\n // Create a stream from the merged PDF bytes\n const stream = new ReadableStream({\n start(controller) {\n controller.enqueue(mergedPdf);\n controller.close();\n },\n });\n\n // Generate output filename\n let outputFileName = `merged-${files.length}-documents.pdf`;\n if (params.naming) {\n const namingConfig: FileNamingConfig = {\n ...params.naming,\n autoSuffix: params.naming.autoSuffix ?? (() => \"merged\"),\n };\n const namingContext = buildNamingContext(\n firstFile,\n { flowId, jobId, nodeId: id, nodeType: \"merge-pdf\" },\n { mergedCount: files.length },\n );\n const namedFile = applyFileNaming(\n firstFile,\n namingContext,\n namingConfig,\n );\n outputFileName = `${getBaseName(namedFile)}.pdf`;\n }\n\n // Upload the merged PDF back to the upload server\n const result = yield* uploadEngine.upload(\n {\n storageId: firstFile.storage.id,\n size: mergedPdf.byteLength,\n type: \"application/pdf\",\n fileName: outputFileName,\n lastModified: 0,\n metadata: JSON.stringify({\n ...metadata,\n pageCount: totalPages,\n mergedFrom: files.length,\n }),\n flow,\n },\n clientId,\n stream,\n );\n\n const newMetadata = {\n ...metadata,\n pageCount: totalPages,\n mergedFrom: files.length,\n fileName: outputFileName,\n };\n\n yield* Effect.logInfo(\n `Successfully merged ${files.length} PDFs into one document with ${totalPages} pages`,\n );\n\n return completeNodeExecution({\n ...result,\n metadata: newMetadata,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n completeNodeExecution,\n createFlowNode,\n DocumentAiPlugin,\n NodeType,\n OCR_OUTPUT_TYPE_ID,\n type OcrResolution,\n type OcrTaskType,\n ocrOutputSchema,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { Effect } from \"effect\";\n\nexport type OcrNodeParams = {\n taskType: OcrTaskType;\n resolution?: OcrResolution;\n credentialId?: string;\n referenceText?: string;\n keepOutput?: boolean;\n};\n\nexport function createOcrNode(id: string, params: OcrNodeParams) {\n return Effect.gen(function* () {\n const documentAiService = yield* DocumentAiPlugin;\n\n return yield* createFlowNode({\n id,\n name: \"OCR\",\n description: \"Extract text from scanned documents using AI\",\n type: NodeType.process,\n nodeTypeId: \"ocr\",\n outputTypeId: OCR_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: ocrOutputSchema,\n // AI service - enable circuit breaker with skip fallback\n circuitBreaker: {\n enabled: true,\n failureThreshold: 5,\n resetTimeout: 60000,\n fallback: { type: \"skip\", passThrough: true },\n },\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n const fileUrl = file.url;\n\n // Validate input\n if (!fileUrl) {\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause: \"URL is required for OCR operation\",\n }).toEffect();\n }\n\n yield* Effect.logInfo(\n `Starting OCR for file ${file.id} with task type: ${params.taskType}`,\n );\n\n // Build context for DocumentAI plugin\n const context = {\n clientId,\n credentialId: params.credentialId,\n };\n\n // Perform OCR with error handling\n const ocrResult = yield* documentAiService\n .performOCR(\n fileUrl,\n {\n taskType: params.taskType,\n resolution: params.resolution,\n referenceText: params.referenceText,\n },\n context,\n )\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to perform OCR\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to perform OCR\",\n }).toEffect();\n }),\n ),\n );\n\n yield* Effect.logInfo(\n `Successfully completed OCR for file ${file.id}, extracted ${ocrResult.extractedText.length} characters`,\n );\n\n // Return structured OCR output (not UploadFile)\n return completeNodeExecution({\n extractedText: ocrResult.extractedText,\n format: ocrResult.format,\n taskType: params.taskType,\n confidence: ocrResult.confidence,\n flow,\n });\n });\n },\n });\n });\n}\n","import { UploadistaError } from \"@uploadista/core/errors\";\nimport {\n applyFileNaming,\n buildNamingContext,\n completeNodeExecution,\n createFlowNode,\n DocumentPlugin,\n type FileNamingConfig,\n getBaseName,\n NodeType,\n resolveUploadMetadata,\n STORAGE_OUTPUT_TYPE_ID,\n} from \"@uploadista/core/flow\";\nimport { uploadFileSchema } from \"@uploadista/core/types\";\nimport { UploadEngine } from \"@uploadista/core/upload\";\nimport { Effect } from \"effect\";\n\nexport type SplitPdfNodeParams = {\n mode: \"range\" | \"individual\";\n startPage?: number;\n endPage?: number;\n keepOutput?: boolean;\n /**\n * Optional file naming configuration.\n * Auto suffix: `page-${pageNumber}` for individual mode, `pages-${start}-${end}` for range mode\n */\n naming?: FileNamingConfig;\n};\n\nexport function createSplitPdfNode(id: string, params: SplitPdfNodeParams) {\n return Effect.gen(function* () {\n const documentService = yield* DocumentPlugin;\n const uploadEngine = yield* UploadEngine;\n\n return yield* createFlowNode({\n id,\n name: \"Split PDF\",\n description: \"Split PDF into pages or page ranges\",\n type: NodeType.process,\n nodeTypeId: \"split-pdf\",\n outputTypeId: STORAGE_OUTPUT_TYPE_ID,\n keepOutput: params.keepOutput,\n inputSchema: uploadFileSchema,\n outputSchema: uploadFileSchema,\n run: ({ data: file, flowId, jobId, clientId }) => {\n return Effect.gen(function* () {\n const flow = {\n flowId,\n nodeId: id,\n jobId,\n };\n\n yield* Effect.logInfo(\n `Splitting PDF file ${file.id} in ${params.mode} mode`,\n );\n\n // Read file bytes from upload server\n const fileBytes = yield* uploadEngine.read(file.id, clientId);\n\n // Split PDF with error handling\n const result = yield* documentService\n .splitPdf(fileBytes, params)\n .pipe(\n Effect.catchAll((error) =>\n Effect.gen(function* () {\n yield* Effect.logError(\"Failed to split PDF\", error);\n return yield* UploadistaError.fromCode(\"FLOW_NODE_ERROR\", {\n cause:\n error instanceof Error\n ? error.message\n : \"Failed to split PDF\",\n }).toEffect();\n }),\n ),\n );\n\n const { metadata } = resolveUploadMetadata(file.metadata);\n\n if (result.mode === \"individual\") {\n // Return array of files (one per page)\n yield* Effect.logInfo(\n `Successfully split PDF into ${result.pdfs.length} individual pages`,\n );\n\n // For individual mode, we'd need to return multiple files\n // This requires special handling in the flow engine\n // For now, we'll return the first page and log a warning\n yield* Effect.logWarning(\n \"Individual page mode returns multiple files - flow engine support required\",\n );\n\n const pdfBytes = result.pdfs[0];\n\n // Create a stream from the PDF bytes\n const stream = new ReadableStream({\n start(controller) {\n controller.enqueue(pdfBytes);\n controller.close();\n },\n });\n\n // Generate output filename\n let outputFileName = `${getBaseName((metadata?.fileName as string) || \"document\")}-page-1.pdf`;\n if (params.naming) {\n const namingConfig: FileNamingConfig = {\n ...params.naming,\n autoSuffix:\n params.naming.autoSuffix ??\n ((ctx) => `page-${ctx.pageNumber ?? 1}`),\n };\n const namingContext = buildNamingContext(\n file,\n { flowId, jobId, nodeId: id, nodeType: \"split-pdf\" },\n { pageNumber: 1 },\n );\n const namedFile = applyFileNaming(\n file,\n namingContext,\n namingConfig,\n );\n outputFileName = `${getBaseName(namedFile)}.pdf`;\n }\n\n // Upload the split PDF back to the upload server\n const uploadResult = yield* uploadEngine.upload(\n {\n storageId: file.storage.id,\n size: pdfBytes.byteLength,\n type: \"application/pdf\",\n fileName: outputFileName,\n lastModified: 0,\n metadata: JSON.stringify({\n ...metadata,\n pageCount: 1,\n splitMode: \"individual\",\n }),\n flow,\n },\n clientId,\n stream,\n );\n\n const newMetadata = {\n ...metadata,\n pageCount: 1,\n splitMode: \"individual\",\n };\n\n return completeNodeExecution({\n ...uploadResult,\n metadata: newMetadata,\n });\n }\n\n // Range mode - return single PDF with selected pages\n const pageCount =\n params.endPage && params.startPage\n ? params.endPage - params.startPage + 1\n : 1;\n\n const pdfBytes = result.pdf;\n\n // Create a stream from the PDF bytes\n const stream = new ReadableStream({\n start(controller) {\n controller.enqueue(pdfBytes);\n controller.close();\n },\n });\n\n // Generate output filename for range mode\n let rangeOutputFileName = `${getBaseName((metadata?.fileName as string) || \"document\")}-pages-${params.startPage}-${params.endPage}.pdf`;\n if (params.naming) {\n const namingConfig: FileNamingConfig = {\n ...params.naming,\n autoSuffix:\n params.naming.autoSuffix ??\n (() => `pages-${params.startPage}-${params.endPage}`),\n };\n const namingContext = buildNamingContext(\n file,\n { flowId, jobId, nodeId: id, nodeType: \"split-pdf\" },\n { startPage: params.startPage, endPage: params.endPage },\n );\n const namedFile = applyFileNaming(\n file,\n namingContext,\n namingConfig,\n );\n rangeOutputFileName = `${getBaseName(namedFile)}.pdf`;\n }\n\n // Upload the split PDF back to the upload server\n const uploadResult = yield* uploadEngine.upload(\n {\n storageId: file.storage.id,\n size: pdfBytes.byteLength,\n type: \"application/pdf\",\n fileName: rangeOutputFileName,\n lastModified: 0,\n metadata: JSON.stringify({\n ...metadata,\n pageCount,\n splitMode: \"range\",\n splitRange: `${params.startPage}-${params.endPage}`,\n }),\n flow,\n },\n clientId,\n stream,\n );\n\n const newMetadata = {\n ...metadata,\n pageCount,\n splitMode: \"range\",\n splitRange: `${params.startPage}-${params.endPage}`,\n };\n\n yield* Effect.logInfo(\n `Successfully split PDF to pages ${params.startPage}-${params.endPage}`,\n );\n\n return completeNodeExecution({\n ...uploadResult,\n metadata: newMetadata,\n });\n });\n },\n });\n });\n}\n"],"mappings":"kmBAoBA,SAAgB,EACd,EACA,EAAsC,EAAE,CACxC,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAoB,MAAO,EAC3B,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,sBACN,YACE,0EACF,KAAM,EAAS,QACf,WAAY,sBACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EAEd,eAAgB,CACd,QAAS,GACT,iBAAkB,EAClB,aAAc,IACd,SAAU,CAAE,KAAM,OAAQ,YAAa,GAAM,CAC9C,CACD,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QAAQ,mBAAmB,EAAK,GAAG,cAAc,CAG/D,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAgB,MAAO,EAC1B,YAAY,EAAU,CACtB,KAAK,EAAO,OAAO,CAElBA,EACAC,EAEJ,GACE,EAAO,QAAQ,EAAc,EAC7B,EAAc,MAAM,MAAM,CAAC,OAAS,EACpC,CAEA,IAAM,EAAO,EAAc,MAE3B,MAAO,EAAO,QACZ,0BAA0B,EAAK,OAAO,iCACvC,CAID,EAAW,EACR,MAAM;;EAAO,CACb,IAAK,GAAiB,EAAK,MAAM,CAAC,CAClC,OAAQ,GAAiB,EAAK,OAAS,EAAE,CACzC,KAAK;;EAAO,CAEf,EAAiB,OAEjB,MAAO,EAAO,QACZ,+BAA+B,EAAS,OAAO,cAChD,KACI,CAEL,MAAO,EAAO,QACZ,gEACD,CAED,IAAM,EAAU,EAAK,IAErB,GAAI,CAAC,EACH,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MAAO,oDACR,CAAC,CAAC,UAAU,CAIf,IAAM,EAAU,CACd,WACA,aAAc,EAAO,aACtB,CA0BD,GAvBkB,MAAO,EACtB,WACC,EACA,CACE,SAAU,oBACV,WAAY,EAAO,YAAc,SAClC,CACD,EACD,CACA,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,wBAAyB,EAAM,CAC/C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,gDACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,EAEkB,cACrB,EAAiB,MAEjB,MAAO,EAAO,QACZ,kEAAkE,EAAS,OAAO,cACnF,CAGH,GAAM,CAAE,YAAa,EAAsB,EAAK,SAAS,CAGnD,EAAc,CAClB,GAAG,EAAK,SACR,GAAG,EACH,WACA,iBACD,CAMD,OAJA,MAAO,EAAO,QACZ,+BAA+B,EAAK,GAAG,mBAAmB,IAC3D,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACV,OACD,CAAC,EACF,CAEL,CAAC,EACF,CCtJJ,SAAgB,EACd,EACA,EAAqC,EAAE,CACvC,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,oBACN,YAAa,sCACb,KAAM,EAAS,QACf,WAAY,oBACZ,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QAAQ,qCAAqC,EAAK,KAAK,CAGrE,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAmB,MAAO,EAC7B,YAAY,EAAU,CACtB,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,6BAA8B,EAAM,CACpD,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,6BACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAEG,CAAE,YAAa,EAAsB,EAAK,SAAS,CAGnD,EAAc,CAClB,GAAG,EAAK,SACR,GAAG,EACH,UAAW,EAAiB,UAC5B,OAAQ,EAAiB,OACzB,GAAI,EAAiB,QAAU,CAAE,OAAQ,EAAiB,OAAQ,CAClE,GAAI,EAAiB,OAAS,CAAE,MAAO,EAAiB,MAAO,CAC/D,GAAI,EAAiB,SAAW,CAC9B,QAAS,EAAiB,QAC3B,CACD,GAAI,EAAiB,SAAW,CAC9B,QAAS,EAAiB,QAC3B,CACD,GAAI,EAAiB,cAAgB,CACnC,aAAc,EAAiB,aAChC,CACD,GAAI,EAAiB,cAAgB,CACnC,aAAc,EAAiB,aAChC,CACD,SAAU,EAAiB,SAC5B,CAMD,OAJA,MAAO,EAAO,QACZ,6CAA6C,EAAK,GAAG,IAAI,EAAiB,UAAU,QACrF,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACV,OACD,CAAC,EACF,CAEL,CAAC,EACF,CCnFJ,SAAgB,EACd,EACA,EAAgC,EAAE,CAClC,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,eACN,YAAa,6CACb,KAAM,EAAS,QACf,WAAY,eACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QAAQ,iCAAiC,EAAK,KAAK,CAGjE,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAgB,MAAO,EAC1B,YAAY,EAAU,CACtB,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,yBAA0B,EAAM,CAChD,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,yBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAEG,CAAE,YAAa,EAAsB,EAAK,SAAS,CAGnD,EAAc,CAClB,GAAG,EAAK,SACR,GAAG,EACH,gBACD,CAYD,MAVI,CAAC,GAAiB,EAAc,MAAM,CAAC,SAAW,EACpD,MAAO,EAAO,WACZ,+BAA+B,EAAK,GAAG,0EACxC,CAED,MAAO,EAAO,QACZ,0BAA0B,EAAc,OAAO,wBAAwB,EAAK,KAC7E,CAGI,EAAsB,CAC3B,GAAG,EACH,SAAU,EACV,OACD,CAAC,EACF,CAEL,CAAC,EACF,CC/DJ,MAAM,EAAsB,EAAE,MAAM,EAAiB,CAErD,SAAgB,EACd,EACA,EAA6B,EAAE,CAC/B,CACA,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,aACN,YAAa,wCACb,KAAM,EAAS,QACf,WAAY,YACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAO,SAAQ,QAAO,cAC3B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAGD,GAAI,CAAC,MAAM,QAAQ,EAAM,CACvB,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,sEACH,CAAC,CAAC,UAAU,CAGf,GAAI,EAAM,SAAW,EACnB,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MAAO,gDACR,CAAC,CAAC,UAAU,CAIf,IAAMC,EAA2B,EAAE,CAC/B,EAAa,EAEjB,IAAK,IAAM,KAAQ,EAAO,CAExB,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAC7D,EAAW,KAAK,EAAU,CAG1B,IAAM,EAAe,EAAsB,EAAK,SAAS,CAAC,SAExD,GAAc,WACd,OAAO,EAAa,WAAc,WAElC,GAAc,EAAa,WAI/B,MAAO,EAAO,QAAQ,WAAW,EAAM,OAAO,YAAY,CAG1D,IAAM,EAAY,MAAO,EACtB,UAAU,CAAE,KAAM,EAAY,CAAC,CAC/B,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,uBAAwB,EAAM,CAC9C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,uBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAGG,EAAY,EAAM,GAClB,CAAE,YAAa,EAAsB,EAAU,SAAS,CAGxD,EAAS,IAAI,eAAe,CAChC,MAAM,EAAY,CAChB,EAAW,QAAQ,EAAU,CAC7B,EAAW,OAAO,EAErB,CAAC,CAGE,EAAiB,UAAU,EAAM,OAAO,gBAC5C,GAAI,EAAO,OAAQ,CACjB,IAAMC,EAAiC,CACrC,GAAG,EAAO,OACV,WAAY,EAAO,OAAO,iBAAqB,UAChD,CAWD,EAAiB,GAAG,EALF,EAChB,EANoB,EACpB,EACA,CAAE,SAAQ,QAAO,OAAQ,EAAI,SAAU,YAAa,CACpD,CAAE,YAAa,EAAM,OAAQ,CAC9B,CAIC,EACD,CACyC,CAAC,MAI7C,IAAM,EAAS,MAAO,EAAa,OACjC,CACE,UAAW,EAAU,QAAQ,GAC7B,KAAM,EAAU,WAChB,KAAM,kBACN,SAAU,EACV,aAAc,EACd,SAAU,KAAK,UAAU,CACvB,GAAG,EACH,UAAW,EACX,WAAY,EAAM,OACnB,CAAC,CACF,OACD,CACD,EACA,EACD,CAEK,EAAc,CAClB,GAAG,EACH,UAAW,EACX,WAAY,EAAM,OAClB,SAAU,EACX,CAMD,OAJA,MAAO,EAAO,QACZ,uBAAuB,EAAM,OAAO,+BAA+B,EAAW,QAC/E,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACX,CAAC,EACF,CAEL,CAAC,EACF,CC5JJ,SAAgB,EAAc,EAAY,EAAuB,CAC/D,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAoB,MAAO,EAEjC,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,MACN,YAAa,+CACb,KAAM,EAAS,QACf,WAAY,MACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EAEd,eAAgB,CACd,QAAS,GACT,iBAAkB,EAClB,aAAc,IACd,SAAU,CAAE,KAAM,OAAQ,YAAa,GAAM,CAC9C,CACD,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAEK,EAAU,EAAK,IAGrB,GAAI,CAAC,EACH,OAAO,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MAAO,oCACR,CAAC,CAAC,UAAU,CAGf,MAAO,EAAO,QACZ,yBAAyB,EAAK,GAAG,mBAAmB,EAAO,WAC5D,CAGD,IAAM,EAAU,CACd,WACA,aAAc,EAAO,aACtB,CAGK,EAAY,MAAO,EACtB,WACC,EACA,CACE,SAAU,EAAO,SACjB,WAAY,EAAO,WACnB,cAAe,EAAO,cACvB,CACD,EACD,CACA,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,wBAAyB,EAAM,CAC/C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,wBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAOH,OALA,MAAO,EAAO,QACZ,uCAAuC,EAAK,GAAG,cAAc,EAAU,cAAc,OAAO,aAC7F,CAGM,EAAsB,CAC3B,cAAe,EAAU,cACzB,OAAQ,EAAU,OAClB,SAAU,EAAO,SACjB,WAAY,EAAU,WACtB,OACD,CAAC,EACF,CAEL,CAAC,EACF,CCjFJ,SAAgB,EAAmB,EAAY,EAA4B,CACzE,OAAO,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAkB,MAAO,EACzB,EAAe,MAAO,EAE5B,OAAO,MAAO,EAAe,CAC3B,KACA,KAAM,YACN,YAAa,sCACb,KAAM,EAAS,QACf,WAAY,YACZ,aAAc,EACd,WAAY,EAAO,WACnB,YAAa,EACb,aAAc,EACd,KAAM,CAAE,KAAM,EAAM,SAAQ,QAAO,cAC1B,EAAO,IAAI,WAAa,CAC7B,IAAM,EAAO,CACX,SACA,OAAQ,EACR,QACD,CAED,MAAO,EAAO,QACZ,sBAAsB,EAAK,GAAG,MAAM,EAAO,KAAK,OACjD,CAGD,IAAM,EAAY,MAAO,EAAa,KAAK,EAAK,GAAI,EAAS,CAGvD,EAAS,MAAO,EACnB,SAAS,EAAW,EAAO,CAC3B,KACC,EAAO,SAAU,GACf,EAAO,IAAI,WAAa,CAEtB,OADA,MAAO,EAAO,SAAS,sBAAuB,EAAM,CAC7C,MAAO,EAAgB,SAAS,kBAAmB,CACxD,MACE,aAAiB,MACb,EAAM,QACN,sBACP,CAAC,CAAC,UAAU,EACb,CACH,CACF,CAEG,CAAE,YAAa,EAAsB,EAAK,SAAS,CAEzD,GAAI,EAAO,OAAS,aAAc,CAEhC,MAAO,EAAO,QACZ,+BAA+B,EAAO,KAAK,OAAO,mBACnD,CAKD,MAAO,EAAO,WACZ,6EACD,CAED,IAAMC,EAAW,EAAO,KAAK,GAGvBC,EAAS,IAAI,eAAe,CAChC,MAAM,EAAY,CAChB,EAAW,QAAQD,EAAS,CAC5B,EAAW,OAAO,EAErB,CAAC,CAGE,EAAiB,GAAG,EAAa,GAAU,UAAuB,WAAW,CAAC,aAClF,GAAI,EAAO,OAAQ,CACjB,IAAME,EAAiC,CACrC,GAAG,EAAO,OACV,WACE,EAAO,OAAO,aACZ,GAAQ,QAAQ,EAAI,YAAc,KACvC,CAWD,EAAiB,GAAG,EALF,EAChB,EANoB,EACpB,EACA,CAAE,SAAQ,QAAO,OAAQ,EAAI,SAAU,YAAa,CACpD,CAAE,WAAY,EAAG,CAClB,CAIC,EACD,CACyC,CAAC,MAI7C,IAAMC,EAAe,MAAO,EAAa,OACvC,CACE,UAAW,EAAK,QAAQ,GACxB,KAAMH,EAAS,WACf,KAAM,kBACN,SAAU,EACV,aAAc,EACd,SAAU,KAAK,UAAU,CACvB,GAAG,EACH,UAAW,EACX,UAAW,aACZ,CAAC,CACF,OACD,CACD,EACAC,EACD,CAEKG,EAAc,CAClB,GAAG,EACH,UAAW,EACX,UAAW,aACZ,CAED,OAAO,EAAsB,CAC3B,GAAGD,EACH,SAAUC,EACX,CAAC,CAIJ,IAAM,EACJ,EAAO,SAAW,EAAO,UACrB,EAAO,QAAU,EAAO,UAAY,EACpC,EAEA,EAAW,EAAO,IAGlB,EAAS,IAAI,eAAe,CAChC,MAAM,EAAY,CAChB,EAAW,QAAQ,EAAS,CAC5B,EAAW,OAAO,EAErB,CAAC,CAGE,EAAsB,GAAG,EAAa,GAAU,UAAuB,WAAW,CAAC,SAAS,EAAO,UAAU,GAAG,EAAO,QAAQ,MACnI,GAAI,EAAO,OAAQ,CACjB,IAAMF,EAAiC,CACrC,GAAG,EAAO,OACV,WACE,EAAO,OAAO,iBACP,SAAS,EAAO,UAAU,GAAG,EAAO,WAC9C,CAWD,EAAsB,GAAG,EALP,EAChB,EANoB,EACpB,EACA,CAAE,SAAQ,QAAO,OAAQ,EAAI,SAAU,YAAa,CACpD,CAAE,UAAW,EAAO,UAAW,QAAS,EAAO,QAAS,CACzD,CAIC,EACD,CAC8C,CAAC,MAIlD,IAAM,EAAe,MAAO,EAAa,OACvC,CACE,UAAW,EAAK,QAAQ,GACxB,KAAM,EAAS,WACf,KAAM,kBACN,SAAU,EACV,aAAc,EACd,SAAU,KAAK,UAAU,CACvB,GAAG,EACH,YACA,UAAW,QACX,WAAY,GAAG,EAAO,UAAU,GAAG,EAAO,UAC3C,CAAC,CACF,OACD,CACD,EACA,EACD,CAEK,EAAc,CAClB,GAAG,EACH,YACA,UAAW,QACX,WAAY,GAAG,EAAO,UAAU,GAAG,EAAO,UAC3C,CAMD,OAJA,MAAO,EAAO,QACZ,mCAAmC,EAAO,UAAU,GAAG,EAAO,UAC/D,CAEM,EAAsB,CAC3B,GAAG,EACH,SAAU,EACX,CAAC,EACF,CAEL,CAAC,EACF"}
|
package/dist/types/index.d.cts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "../index-
|
|
1
|
+
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "../index-DN0piYEv.cjs";
|
|
2
2
|
export { ConvertToMarkdownParams, DescribeDocumentParams, ExtractTextParams, MergePdfParams, OcrParams, SplitPdfParams, convertToMarkdownParamsSchema, describeDocumentParamsSchema, extractTextParamsSchema, mergePdfParamsSchema, ocrParamsSchema, splitPdfParamsSchema };
|
package/dist/types/index.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "../index-
|
|
1
|
+
import { a as MergePdfParams, c as extractTextParamsSchema, d as ConvertToMarkdownParams, f as convertToMarkdownParamsSchema, i as ocrParamsSchema, l as DescribeDocumentParams, n as splitPdfParamsSchema, o as mergePdfParamsSchema, r as OcrParams, s as ExtractTextParams, t as SplitPdfParams, u as describeDocumentParamsSchema } from "../index-aD9vy0kH.mjs";
|
|
2
2
|
export { ConvertToMarkdownParams, DescribeDocumentParams, ExtractTextParams, MergePdfParams, OcrParams, SplitPdfParams, convertToMarkdownParamsSchema, describeDocumentParamsSchema, extractTextParamsSchema, mergePdfParamsSchema, ocrParamsSchema, splitPdfParamsSchema };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uploadista/flow-documents-nodes",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0.20-beta.
|
|
4
|
+
"version": "0.0.20-beta.8",
|
|
5
5
|
"description": "Document processing nodes for Uploadista Flow",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Uploadista",
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
}
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"@uploadista/core": "0.0.20-beta.
|
|
23
|
+
"@uploadista/core": "0.0.20-beta.8"
|
|
24
24
|
},
|
|
25
25
|
"peerDependencies": {
|
|
26
26
|
"effect": "^3.0.0",
|
|
@@ -28,15 +28,15 @@
|
|
|
28
28
|
},
|
|
29
29
|
"devDependencies": {
|
|
30
30
|
"@effect/vitest": "0.27.0",
|
|
31
|
-
"@types/node": "24.10.
|
|
32
|
-
"effect": "3.19.
|
|
33
|
-
"tsdown": "0.
|
|
31
|
+
"@types/node": "24.10.4",
|
|
32
|
+
"effect": "3.19.12",
|
|
33
|
+
"tsdown": "0.18.0",
|
|
34
34
|
"vitest": "4.0.15",
|
|
35
|
-
"zod": "4.
|
|
36
|
-
"@uploadista/typescript-config": "0.0.20-beta.
|
|
35
|
+
"zod": "4.2.0",
|
|
36
|
+
"@uploadista/typescript-config": "0.0.20-beta.8"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|
|
39
|
-
"build": "tsdown",
|
|
39
|
+
"build": "tsc --noEmit && tsdown",
|
|
40
40
|
"format": "biome format --write ./src",
|
|
41
41
|
"lint": "biome lint --write ./src",
|
|
42
42
|
"check": "biome check --write ./src",
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
STORAGE_OUTPUT_TYPE_ID,
|
|
10
10
|
} from "@uploadista/core/flow";
|
|
11
11
|
import { uploadFileSchema } from "@uploadista/core/types";
|
|
12
|
-
import {
|
|
12
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
13
13
|
import { Effect, Either } from "effect";
|
|
14
14
|
|
|
15
15
|
export type ConvertToMarkdownNodeParams = {
|
|
@@ -25,7 +25,7 @@ export function createConvertToMarkdownNode(
|
|
|
25
25
|
return Effect.gen(function* () {
|
|
26
26
|
const documentService = yield* DocumentPlugin;
|
|
27
27
|
const documentAiService = yield* DocumentAiPlugin;
|
|
28
|
-
const
|
|
28
|
+
const uploadEngine = yield* UploadEngine;
|
|
29
29
|
|
|
30
30
|
return yield* createFlowNode({
|
|
31
31
|
id,
|
|
@@ -56,7 +56,7 @@ export function createConvertToMarkdownNode(
|
|
|
56
56
|
yield* Effect.logInfo(`Converting file ${file.id} to Markdown`);
|
|
57
57
|
|
|
58
58
|
// Read file bytes from upload server
|
|
59
|
-
const fileBytes = yield*
|
|
59
|
+
const fileBytes = yield* uploadEngine.read(file.id, clientId);
|
|
60
60
|
|
|
61
61
|
// Try to extract text first (for searchable PDFs)
|
|
62
62
|
const extractResult = yield* documentService
|
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
resolveUploadMetadata,
|
|
8
8
|
} from "@uploadista/core/flow";
|
|
9
9
|
import { uploadFileSchema } from "@uploadista/core/types";
|
|
10
|
-
import {
|
|
10
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
11
11
|
import { Effect } from "effect";
|
|
12
12
|
|
|
13
13
|
export type DescribeDocumentNodeParams = {
|
|
@@ -20,7 +20,7 @@ export function createDescribeDocumentNode(
|
|
|
20
20
|
) {
|
|
21
21
|
return Effect.gen(function* () {
|
|
22
22
|
const documentService = yield* DocumentPlugin;
|
|
23
|
-
const
|
|
23
|
+
const uploadEngine = yield* UploadEngine;
|
|
24
24
|
|
|
25
25
|
return yield* createFlowNode({
|
|
26
26
|
id,
|
|
@@ -39,12 +39,10 @@ export function createDescribeDocumentNode(
|
|
|
39
39
|
jobId,
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
-
yield* Effect.logInfo(
|
|
43
|
-
`Extracting metadata from PDF file ${file.id}`,
|
|
44
|
-
);
|
|
42
|
+
yield* Effect.logInfo(`Extracting metadata from PDF file ${file.id}`);
|
|
45
43
|
|
|
46
44
|
// Read file bytes from upload server
|
|
47
|
-
const fileBytes = yield*
|
|
45
|
+
const fileBytes = yield* uploadEngine.read(file.id, clientId);
|
|
48
46
|
|
|
49
47
|
// Get metadata with error handling
|
|
50
48
|
const documentMetadata = yield* documentService
|
|
@@ -73,10 +71,18 @@ export function createDescribeDocumentNode(
|
|
|
73
71
|
format: documentMetadata.format,
|
|
74
72
|
...(documentMetadata.author && { author: documentMetadata.author }),
|
|
75
73
|
...(documentMetadata.title && { title: documentMetadata.title }),
|
|
76
|
-
...(documentMetadata.subject && {
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
...(documentMetadata.
|
|
74
|
+
...(documentMetadata.subject && {
|
|
75
|
+
subject: documentMetadata.subject,
|
|
76
|
+
}),
|
|
77
|
+
...(documentMetadata.creator && {
|
|
78
|
+
creator: documentMetadata.creator,
|
|
79
|
+
}),
|
|
80
|
+
...(documentMetadata.creationDate && {
|
|
81
|
+
creationDate: documentMetadata.creationDate,
|
|
82
|
+
}),
|
|
83
|
+
...(documentMetadata.modifiedDate && {
|
|
84
|
+
modifiedDate: documentMetadata.modifiedDate,
|
|
85
|
+
}),
|
|
80
86
|
fileSize: documentMetadata.fileSize,
|
|
81
87
|
};
|
|
82
88
|
|
package/src/extract-text-node.ts
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
STORAGE_OUTPUT_TYPE_ID,
|
|
9
9
|
} from "@uploadista/core/flow";
|
|
10
10
|
import { uploadFileSchema } from "@uploadista/core/types";
|
|
11
|
-
import {
|
|
11
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
12
12
|
import { Effect } from "effect";
|
|
13
13
|
|
|
14
14
|
export type ExtractTextNodeParams = {
|
|
@@ -21,7 +21,7 @@ export function createExtractTextNode(
|
|
|
21
21
|
) {
|
|
22
22
|
return Effect.gen(function* () {
|
|
23
23
|
const documentService = yield* DocumentPlugin;
|
|
24
|
-
const
|
|
24
|
+
const uploadEngine = yield* UploadEngine;
|
|
25
25
|
|
|
26
26
|
return yield* createFlowNode({
|
|
27
27
|
id,
|
|
@@ -44,7 +44,7 @@ export function createExtractTextNode(
|
|
|
44
44
|
yield* Effect.logInfo(`Extracting text from PDF file ${file.id}`);
|
|
45
45
|
|
|
46
46
|
// Read file bytes from upload server
|
|
47
|
-
const fileBytes = yield*
|
|
47
|
+
const fileBytes = yield* uploadEngine.read(file.id, clientId);
|
|
48
48
|
|
|
49
49
|
// Extract text with error handling
|
|
50
50
|
const extractedText = yield* documentService
|
package/src/merge-pdf-node.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
STORAGE_OUTPUT_TYPE_ID,
|
|
13
13
|
} from "@uploadista/core/flow";
|
|
14
14
|
import { uploadFileSchema } from "@uploadista/core/types";
|
|
15
|
-
import {
|
|
15
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
16
16
|
import { Effect } from "effect";
|
|
17
17
|
import { z } from "zod";
|
|
18
18
|
|
|
@@ -35,7 +35,7 @@ export function createMergePdfNode(
|
|
|
35
35
|
) {
|
|
36
36
|
return Effect.gen(function* () {
|
|
37
37
|
const documentService = yield* DocumentPlugin;
|
|
38
|
-
const
|
|
38
|
+
const uploadEngine = yield* UploadEngine;
|
|
39
39
|
|
|
40
40
|
return yield* createFlowNode({
|
|
41
41
|
id,
|
|
@@ -75,7 +75,7 @@ export function createMergePdfNode(
|
|
|
75
75
|
|
|
76
76
|
for (const file of files) {
|
|
77
77
|
// Read file bytes from upload server
|
|
78
|
-
const fileBytes = yield*
|
|
78
|
+
const fileBytes = yield* uploadEngine.read(file.id, clientId);
|
|
79
79
|
pdfBuffers.push(fileBytes);
|
|
80
80
|
|
|
81
81
|
// Sum up page counts if available
|
|
@@ -131,12 +131,16 @@ export function createMergePdfNode(
|
|
|
131
131
|
{ flowId, jobId, nodeId: id, nodeType: "merge-pdf" },
|
|
132
132
|
{ mergedCount: files.length },
|
|
133
133
|
);
|
|
134
|
-
const namedFile = applyFileNaming(
|
|
134
|
+
const namedFile = applyFileNaming(
|
|
135
|
+
firstFile,
|
|
136
|
+
namingContext,
|
|
137
|
+
namingConfig,
|
|
138
|
+
);
|
|
135
139
|
outputFileName = `${getBaseName(namedFile)}.pdf`;
|
|
136
140
|
}
|
|
137
141
|
|
|
138
142
|
// Upload the merged PDF back to the upload server
|
|
139
|
-
const result = yield*
|
|
143
|
+
const result = yield* uploadEngine.upload(
|
|
140
144
|
{
|
|
141
145
|
storageId: firstFile.storage.id,
|
|
142
146
|
size: mergedPdf.byteLength,
|
package/src/split-pdf-node.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
STORAGE_OUTPUT_TYPE_ID,
|
|
13
13
|
} from "@uploadista/core/flow";
|
|
14
14
|
import { uploadFileSchema } from "@uploadista/core/types";
|
|
15
|
-
import {
|
|
15
|
+
import { UploadEngine } from "@uploadista/core/upload";
|
|
16
16
|
import { Effect } from "effect";
|
|
17
17
|
|
|
18
18
|
export type SplitPdfNodeParams = {
|
|
@@ -30,7 +30,7 @@ export type SplitPdfNodeParams = {
|
|
|
30
30
|
export function createSplitPdfNode(id: string, params: SplitPdfNodeParams) {
|
|
31
31
|
return Effect.gen(function* () {
|
|
32
32
|
const documentService = yield* DocumentPlugin;
|
|
33
|
-
const
|
|
33
|
+
const uploadEngine = yield* UploadEngine;
|
|
34
34
|
|
|
35
35
|
return yield* createFlowNode({
|
|
36
36
|
id,
|
|
@@ -55,7 +55,7 @@ export function createSplitPdfNode(id: string, params: SplitPdfNodeParams) {
|
|
|
55
55
|
);
|
|
56
56
|
|
|
57
57
|
// Read file bytes from upload server
|
|
58
|
-
const fileBytes = yield*
|
|
58
|
+
const fileBytes = yield* uploadEngine.read(file.id, clientId);
|
|
59
59
|
|
|
60
60
|
// Split PDF with error handling
|
|
61
61
|
const result = yield* documentService
|
|
@@ -100,23 +100,29 @@ export function createSplitPdfNode(id: string, params: SplitPdfNodeParams) {
|
|
|
100
100
|
});
|
|
101
101
|
|
|
102
102
|
// Generate output filename
|
|
103
|
-
let outputFileName = `${getBaseName(metadata?.fileName as string || "document")}-page-1.pdf`;
|
|
103
|
+
let outputFileName = `${getBaseName((metadata?.fileName as string) || "document")}-page-1.pdf`;
|
|
104
104
|
if (params.naming) {
|
|
105
105
|
const namingConfig: FileNamingConfig = {
|
|
106
106
|
...params.naming,
|
|
107
|
-
autoSuffix:
|
|
107
|
+
autoSuffix:
|
|
108
|
+
params.naming.autoSuffix ??
|
|
109
|
+
((ctx) => `page-${ctx.pageNumber ?? 1}`),
|
|
108
110
|
};
|
|
109
111
|
const namingContext = buildNamingContext(
|
|
110
112
|
file,
|
|
111
113
|
{ flowId, jobId, nodeId: id, nodeType: "split-pdf" },
|
|
112
114
|
{ pageNumber: 1 },
|
|
113
115
|
);
|
|
114
|
-
const namedFile = applyFileNaming(
|
|
116
|
+
const namedFile = applyFileNaming(
|
|
117
|
+
file,
|
|
118
|
+
namingContext,
|
|
119
|
+
namingConfig,
|
|
120
|
+
);
|
|
115
121
|
outputFileName = `${getBaseName(namedFile)}.pdf`;
|
|
116
122
|
}
|
|
117
123
|
|
|
118
124
|
// Upload the split PDF back to the upload server
|
|
119
|
-
const uploadResult = yield*
|
|
125
|
+
const uploadResult = yield* uploadEngine.upload(
|
|
120
126
|
{
|
|
121
127
|
storageId: file.storage.id,
|
|
122
128
|
size: pdfBytes.byteLength,
|
|
@@ -163,23 +169,29 @@ export function createSplitPdfNode(id: string, params: SplitPdfNodeParams) {
|
|
|
163
169
|
});
|
|
164
170
|
|
|
165
171
|
// Generate output filename for range mode
|
|
166
|
-
let rangeOutputFileName = `${getBaseName(metadata?.fileName as string || "document")}-pages-${params.startPage}-${params.endPage}.pdf`;
|
|
172
|
+
let rangeOutputFileName = `${getBaseName((metadata?.fileName as string) || "document")}-pages-${params.startPage}-${params.endPage}.pdf`;
|
|
167
173
|
if (params.naming) {
|
|
168
174
|
const namingConfig: FileNamingConfig = {
|
|
169
175
|
...params.naming,
|
|
170
|
-
autoSuffix:
|
|
176
|
+
autoSuffix:
|
|
177
|
+
params.naming.autoSuffix ??
|
|
178
|
+
(() => `pages-${params.startPage}-${params.endPage}`),
|
|
171
179
|
};
|
|
172
180
|
const namingContext = buildNamingContext(
|
|
173
181
|
file,
|
|
174
182
|
{ flowId, jobId, nodeId: id, nodeType: "split-pdf" },
|
|
175
183
|
{ startPage: params.startPage, endPage: params.endPage },
|
|
176
184
|
);
|
|
177
|
-
const namedFile = applyFileNaming(
|
|
185
|
+
const namedFile = applyFileNaming(
|
|
186
|
+
file,
|
|
187
|
+
namingContext,
|
|
188
|
+
namingConfig,
|
|
189
|
+
);
|
|
178
190
|
rangeOutputFileName = `${getBaseName(namedFile)}.pdf`;
|
|
179
191
|
}
|
|
180
192
|
|
|
181
193
|
// Upload the split PDF back to the upload server
|
|
182
|
-
const uploadResult = yield*
|
|
194
|
+
const uploadResult = yield* uploadEngine.upload(
|
|
183
195
|
{
|
|
184
196
|
storageId: file.storage.id,
|
|
185
197
|
size: pdfBytes.byteLength,
|
|
@@ -2,7 +2,7 @@ import { describe, expect, it, vi } from "@effect/vitest";
|
|
|
2
2
|
import {
|
|
3
3
|
TestDocumentAiPlugin,
|
|
4
4
|
TestDocumentPlugin,
|
|
5
|
-
|
|
5
|
+
TestUploadEngine,
|
|
6
6
|
} from "@uploadista/core/testing";
|
|
7
7
|
import type { UploadFile } from "@uploadista/core/types";
|
|
8
8
|
import { Effect, Layer } from "effect";
|
|
@@ -43,7 +43,7 @@ const createTestUploadFile = (overrides?: Partial<UploadFile>): UploadFile => ({
|
|
|
43
43
|
const TestLayer = Layer.mergeAll(
|
|
44
44
|
TestDocumentPlugin,
|
|
45
45
|
TestDocumentAiPlugin,
|
|
46
|
-
|
|
46
|
+
TestUploadEngine,
|
|
47
47
|
);
|
|
48
48
|
|
|
49
49
|
// Mock fetch for URL availability tests
|
|
@@ -82,7 +82,9 @@ describe("Document Nodes", () => {
|
|
|
82
82
|
expect(result.type).toBe("complete");
|
|
83
83
|
if (result.type === "complete") {
|
|
84
84
|
expect(result.data.metadata?.extractedText).toBeDefined();
|
|
85
|
-
expect(result.data.metadata?.extractedText).toContain(
|
|
85
|
+
expect(result.data.metadata?.extractedText).toContain(
|
|
86
|
+
"extracted text",
|
|
87
|
+
);
|
|
86
88
|
}
|
|
87
89
|
}).pipe(Effect.provide(TestLayer)),
|
|
88
90
|
);
|
|
@@ -188,14 +190,16 @@ describe("Document Nodes", () => {
|
|
|
188
190
|
});
|
|
189
191
|
|
|
190
192
|
describe("DescribeDocumentNode", () => {
|
|
191
|
-
it.effect(
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
193
|
+
it.effect(
|
|
194
|
+
"should create describe document node with correct properties",
|
|
195
|
+
() =>
|
|
196
|
+
Effect.gen(function* () {
|
|
197
|
+
const node = yield* createDescribeDocumentNode("describe-1", {});
|
|
198
|
+
|
|
199
|
+
expect(node.id).toBe("describe-1");
|
|
200
|
+
expect(node.name).toBe("Describe Document");
|
|
201
|
+
expect(node.description).toContain("metadata");
|
|
202
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
199
203
|
);
|
|
200
204
|
|
|
201
205
|
it.effect("should extract document metadata", () =>
|
|
@@ -285,14 +289,16 @@ describe("Document Nodes", () => {
|
|
|
285
289
|
});
|
|
286
290
|
|
|
287
291
|
describe("ConvertToMarkdownNode", () => {
|
|
288
|
-
it.effect(
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
292
|
+
it.effect(
|
|
293
|
+
"should create convert to markdown node with correct properties",
|
|
294
|
+
() =>
|
|
295
|
+
Effect.gen(function* () {
|
|
296
|
+
const node = yield* createConvertToMarkdownNode("convert-1", {});
|
|
297
|
+
|
|
298
|
+
expect(node.id).toBe("convert-1");
|
|
299
|
+
expect(node.name).toBe("Convert to Markdown");
|
|
300
|
+
expect(node.description).toContain("Markdown");
|
|
301
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
296
302
|
);
|
|
297
303
|
|
|
298
304
|
it.effect("should convert document to markdown", () =>
|