@uploadista/flow-documents-nodes 0.0.16-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +16 -0
- package/LICENSE +21 -0
- package/README.md +57 -0
- package/dist/index.d.mts +1177 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +396 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +32 -0
- package/src/convert-to-markdown-node.ts +156 -0
- package/src/describe-document-node.ts +92 -0
- package/src/extract-text-node.ts +90 -0
- package/src/index.ts +27 -0
- package/src/merge-pdf-node.ts +144 -0
- package/src/ocr-node.ts +111 -0
- package/src/split-pdf-node.ts +176 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { UploadistaError } from "@uploadista/core/errors";
|
|
2
|
+
import {
|
|
3
|
+
completeNodeExecution,
|
|
4
|
+
createFlowNode,
|
|
5
|
+
DocumentPlugin,
|
|
6
|
+
NodeType,
|
|
7
|
+
resolveUploadMetadata,
|
|
8
|
+
} from "@uploadista/core/flow";
|
|
9
|
+
import { uploadFileSchema } from "@uploadista/core/types";
|
|
10
|
+
import { UploadServer } from "@uploadista/core/upload";
|
|
11
|
+
import { Effect } from "effect";
|
|
12
|
+
|
|
13
|
+
export type SplitPdfNodeParams = {
|
|
14
|
+
mode: "range" | "individual";
|
|
15
|
+
startPage?: number;
|
|
16
|
+
endPage?: number;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export function createSplitPdfNode(
|
|
20
|
+
id: string,
|
|
21
|
+
params: SplitPdfNodeParams,
|
|
22
|
+
) {
|
|
23
|
+
return Effect.gen(function* () {
|
|
24
|
+
const documentService = yield* DocumentPlugin;
|
|
25
|
+
const uploadServer = yield* UploadServer;
|
|
26
|
+
|
|
27
|
+
return yield* createFlowNode({
|
|
28
|
+
id,
|
|
29
|
+
name: "Split PDF",
|
|
30
|
+
description: "Split PDF into pages or page ranges",
|
|
31
|
+
type: NodeType.process,
|
|
32
|
+
inputSchema: uploadFileSchema,
|
|
33
|
+
outputSchema: uploadFileSchema,
|
|
34
|
+
run: ({ data: file, flowId, jobId, clientId }) => {
|
|
35
|
+
return Effect.gen(function* () {
|
|
36
|
+
const flow = {
|
|
37
|
+
flowId,
|
|
38
|
+
nodeId: id,
|
|
39
|
+
jobId,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
yield* Effect.logInfo(
|
|
43
|
+
`Splitting PDF file ${file.id} in ${params.mode} mode`,
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
// Read file bytes from upload server
|
|
47
|
+
const fileBytes = yield* uploadServer.read(file.id, clientId);
|
|
48
|
+
|
|
49
|
+
// Split PDF with error handling
|
|
50
|
+
const result = yield* documentService
|
|
51
|
+
.splitPdf(fileBytes, params)
|
|
52
|
+
.pipe(
|
|
53
|
+
Effect.catchAll((error) =>
|
|
54
|
+
Effect.gen(function* () {
|
|
55
|
+
yield* Effect.logError("Failed to split PDF", error);
|
|
56
|
+
return yield* UploadistaError.fromCode("FLOW_NODE_ERROR", {
|
|
57
|
+
cause:
|
|
58
|
+
error instanceof Error
|
|
59
|
+
? error.message
|
|
60
|
+
: "Failed to split PDF",
|
|
61
|
+
}).toEffect();
|
|
62
|
+
}),
|
|
63
|
+
),
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
const { metadata } = resolveUploadMetadata(file.metadata);
|
|
67
|
+
|
|
68
|
+
if (result.mode === "individual") {
|
|
69
|
+
// Return array of files (one per page)
|
|
70
|
+
yield* Effect.logInfo(
|
|
71
|
+
`Successfully split PDF into ${result.pdfs.length} individual pages`,
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
// For individual mode, we'd need to return multiple files
|
|
75
|
+
// This requires special handling in the flow engine
|
|
76
|
+
// For now, we'll return the first page and log a warning
|
|
77
|
+
yield* Effect.logWarning(
|
|
78
|
+
"Individual page mode returns multiple files - flow engine support required",
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const pdfBytes = result.pdfs[0];
|
|
82
|
+
|
|
83
|
+
// Create a stream from the PDF bytes
|
|
84
|
+
const stream = new ReadableStream({
|
|
85
|
+
start(controller) {
|
|
86
|
+
controller.enqueue(pdfBytes);
|
|
87
|
+
controller.close();
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Upload the split PDF back to the upload server
|
|
92
|
+
const uploadResult = yield* uploadServer.upload(
|
|
93
|
+
{
|
|
94
|
+
storageId: file.storage.id,
|
|
95
|
+
size: pdfBytes.byteLength,
|
|
96
|
+
type: "application/pdf",
|
|
97
|
+
fileName: `${metadata?.fileName || 'document'}-page-1.pdf`,
|
|
98
|
+
lastModified: 0,
|
|
99
|
+
metadata: JSON.stringify({
|
|
100
|
+
...metadata,
|
|
101
|
+
pageCount: 1,
|
|
102
|
+
splitMode: "individual",
|
|
103
|
+
}),
|
|
104
|
+
flow,
|
|
105
|
+
},
|
|
106
|
+
clientId,
|
|
107
|
+
stream,
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
const newMetadata = {
|
|
111
|
+
...metadata,
|
|
112
|
+
pageCount: 1,
|
|
113
|
+
splitMode: "individual",
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
return completeNodeExecution({
|
|
117
|
+
...uploadResult,
|
|
118
|
+
metadata: newMetadata,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Range mode - return single PDF with selected pages
|
|
123
|
+
const pageCount = params.endPage && params.startPage
|
|
124
|
+
? params.endPage - params.startPage + 1
|
|
125
|
+
: 1;
|
|
126
|
+
|
|
127
|
+
const pdfBytes = result.pdf;
|
|
128
|
+
|
|
129
|
+
// Create a stream from the PDF bytes
|
|
130
|
+
const stream = new ReadableStream({
|
|
131
|
+
start(controller) {
|
|
132
|
+
controller.enqueue(pdfBytes);
|
|
133
|
+
controller.close();
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// Upload the split PDF back to the upload server
|
|
138
|
+
const uploadResult = yield* uploadServer.upload(
|
|
139
|
+
{
|
|
140
|
+
storageId: file.storage.id,
|
|
141
|
+
size: pdfBytes.byteLength,
|
|
142
|
+
type: "application/pdf",
|
|
143
|
+
fileName: `${metadata?.fileName || 'document'}-pages-${params.startPage}-${params.endPage}.pdf`,
|
|
144
|
+
lastModified: 0,
|
|
145
|
+
metadata: JSON.stringify({
|
|
146
|
+
...metadata,
|
|
147
|
+
pageCount,
|
|
148
|
+
splitMode: "range",
|
|
149
|
+
splitRange: `${params.startPage}-${params.endPage}`,
|
|
150
|
+
}),
|
|
151
|
+
flow,
|
|
152
|
+
},
|
|
153
|
+
clientId,
|
|
154
|
+
stream,
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
const newMetadata = {
|
|
158
|
+
...metadata,
|
|
159
|
+
pageCount,
|
|
160
|
+
splitMode: "range",
|
|
161
|
+
splitRange: `${params.startPage}-${params.endPage}`,
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
yield* Effect.logInfo(
|
|
165
|
+
`Successfully split PDF to pages ${params.startPage}-${params.endPage}`,
|
|
166
|
+
);
|
|
167
|
+
|
|
168
|
+
return completeNodeExecution({
|
|
169
|
+
...uploadResult,
|
|
170
|
+
metadata: newMetadata,
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
},
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "@uploadista/typescript-config/server.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"baseUrl": "./",
|
|
5
|
+
"paths": {
|
|
6
|
+
"@/*": ["./src/*"]
|
|
7
|
+
},
|
|
8
|
+
"outDir": "./dist",
|
|
9
|
+
"rootDir": "./src",
|
|
10
|
+
"lib": ["ESNext", "DOM", "DOM.Iterable"],
|
|
11
|
+
"types": []
|
|
12
|
+
},
|
|
13
|
+
"include": ["src"]
|
|
14
|
+
}
|