@uploadista/flow-documents-nodes 0.0.16-beta.2 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +26 -9
- package/dist/index-DN0piYEv.d.cts +62 -0
- package/dist/index-DN0piYEv.d.cts.map +1 -0
- package/dist/index-aD9vy0kH.d.mts +62 -0
- package/dist/index-aD9vy0kH.d.mts.map +1 -0
- package/dist/index.cjs +5 -0
- package/dist/index.d.cts +1178 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +37 -36
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +3 -393
- package/dist/index.mjs.map +1 -1
- package/dist/types/index.cjs +1 -0
- package/dist/types/index.d.cts +2 -0
- package/dist/types/index.d.mts +2 -0
- package/dist/types/index.mjs +1 -0
- package/dist/types-CdmvDfq8.cjs +1 -0
- package/dist/types-rDFmPO7S.mjs +2 -0
- package/dist/types-rDFmPO7S.mjs.map +1 -0
- package/package.json +14 -4
- package/src/index.ts +3 -0
- package/src/types/convert-to-markdown-node.ts +12 -0
- package/src/types/describe-document-node.ts +7 -0
- package/src/types/extract-text-node.ts +5 -0
- package/src/types/index.ts +23 -0
- package/src/types/merge-pdf-node.ts +7 -0
- package/src/types/ocr-node.ts +17 -0
- package/src/types/split-pdf-node.ts +9 -0
- package/tests/document-nodes.test.ts +318 -0
- package/tsdown.config.ts +12 -0
- package/vitest.config.ts +21 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uploadista/flow-documents-nodes",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0.16
|
|
4
|
+
"version": "0.0.16",
|
|
5
5
|
"description": "Document processing nodes for Uploadista Flow",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Uploadista",
|
|
@@ -11,22 +11,32 @@
|
|
|
11
11
|
"import": "./dist/index.mjs",
|
|
12
12
|
"require": "./dist/index.cjs",
|
|
13
13
|
"default": "./dist/index.mjs"
|
|
14
|
+
},
|
|
15
|
+
"./types": {
|
|
16
|
+
"types": "./dist/types/index.d.mts",
|
|
17
|
+
"import": "./dist/types/index.mjs",
|
|
18
|
+
"require": "./dist/types/index.cjs",
|
|
19
|
+
"default": "./dist/types/index.mjs"
|
|
14
20
|
}
|
|
15
21
|
},
|
|
16
22
|
"dependencies": {
|
|
17
23
|
"effect": "3.19.4",
|
|
18
24
|
"zod": "4.1.12",
|
|
19
|
-
"@uploadista/core": "0.0.16
|
|
25
|
+
"@uploadista/core": "0.0.16"
|
|
20
26
|
},
|
|
21
27
|
"devDependencies": {
|
|
28
|
+
"@effect/vitest": "0.27.0",
|
|
22
29
|
"@types/node": "24.10.1",
|
|
23
30
|
"tsdown": "0.16.5",
|
|
24
|
-
"
|
|
31
|
+
"vitest": "4.0.9",
|
|
32
|
+
"@uploadista/typescript-config": "0.0.16"
|
|
25
33
|
},
|
|
26
34
|
"scripts": {
|
|
27
35
|
"build": "tsdown",
|
|
28
36
|
"format": "biome format --write ./src",
|
|
29
37
|
"lint": "biome lint --write ./src",
|
|
30
|
-
"check": "biome check --write ./src"
|
|
38
|
+
"check": "biome check --write ./src",
|
|
39
|
+
"test": "vitest run",
|
|
40
|
+
"test:watch": "vitest"
|
|
31
41
|
}
|
|
32
42
|
}
|
package/src/index.ts
CHANGED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
export const convertToMarkdownParamsSchema = z.object({
|
|
4
|
+
credentialId: z.string().optional(),
|
|
5
|
+
resolution: z
|
|
6
|
+
.enum(["tiny", "small", "base", "gundam", "large"])
|
|
7
|
+
.optional(),
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
export type ConvertToMarkdownParams = z.infer<
|
|
11
|
+
typeof convertToMarkdownParamsSchema
|
|
12
|
+
>;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
// Document node parameter schemas
|
|
2
|
+
|
|
3
|
+
export {
|
|
4
|
+
convertToMarkdownParamsSchema,
|
|
5
|
+
type ConvertToMarkdownParams,
|
|
6
|
+
} from "./convert-to-markdown-node.js";
|
|
7
|
+
export {
|
|
8
|
+
describeDocumentParamsSchema,
|
|
9
|
+
type DescribeDocumentParams,
|
|
10
|
+
} from "./describe-document-node.js";
|
|
11
|
+
export {
|
|
12
|
+
extractTextParamsSchema,
|
|
13
|
+
type ExtractTextParams,
|
|
14
|
+
} from "./extract-text-node.js";
|
|
15
|
+
export {
|
|
16
|
+
mergePdfParamsSchema,
|
|
17
|
+
type MergePdfParams,
|
|
18
|
+
} from "./merge-pdf-node.js";
|
|
19
|
+
export { ocrParamsSchema, type OcrParams } from "./ocr-node.js";
|
|
20
|
+
export {
|
|
21
|
+
splitPdfParamsSchema,
|
|
22
|
+
type SplitPdfParams,
|
|
23
|
+
} from "./split-pdf-node.js";
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
export const ocrParamsSchema = z.object({
|
|
4
|
+
taskType: z.enum([
|
|
5
|
+
"convertToMarkdown",
|
|
6
|
+
"freeOcr",
|
|
7
|
+
"parseFigure",
|
|
8
|
+
"locateObject",
|
|
9
|
+
]),
|
|
10
|
+
resolution: z
|
|
11
|
+
.enum(["tiny", "small", "base", "gundam", "large"])
|
|
12
|
+
.optional(),
|
|
13
|
+
credentialId: z.string().optional(),
|
|
14
|
+
referenceText: z.string().optional(),
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
export type OcrParams = z.infer<typeof ocrParamsSchema>;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
export const splitPdfParamsSchema = z.object({
|
|
4
|
+
mode: z.enum(["range", "individual"]),
|
|
5
|
+
startPage: z.number().positive().optional(),
|
|
6
|
+
endPage: z.number().positive().optional(),
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
export type SplitPdfParams = z.infer<typeof splitPdfParamsSchema>;
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from "@effect/vitest";
|
|
2
|
+
import {
|
|
3
|
+
TestDocumentAiPlugin,
|
|
4
|
+
TestDocumentPlugin,
|
|
5
|
+
TestUploadServer,
|
|
6
|
+
} from "@uploadista/core/testing";
|
|
7
|
+
import type { UploadFile } from "@uploadista/core/types";
|
|
8
|
+
import { Effect, Layer } from "effect";
|
|
9
|
+
import {
|
|
10
|
+
createConvertToMarkdownNode,
|
|
11
|
+
createDescribeDocumentNode,
|
|
12
|
+
createExtractTextNode,
|
|
13
|
+
createMergePdfNode,
|
|
14
|
+
createOcrNode,
|
|
15
|
+
createSplitPdfNode,
|
|
16
|
+
} from "../src/index";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Test utilities for creating sample data
|
|
20
|
+
*/
|
|
21
|
+
const createTestUploadFile = (overrides?: Partial<UploadFile>): UploadFile => ({
|
|
22
|
+
id: "test-file-1",
|
|
23
|
+
offset: 0,
|
|
24
|
+
size: 2048,
|
|
25
|
+
storage: {
|
|
26
|
+
id: "test-storage",
|
|
27
|
+
type: "memory",
|
|
28
|
+
},
|
|
29
|
+
metadata: {
|
|
30
|
+
mimeType: "application/pdf",
|
|
31
|
+
originalName: "test-document.pdf",
|
|
32
|
+
fileName: "test-document.pdf",
|
|
33
|
+
extension: "pdf",
|
|
34
|
+
},
|
|
35
|
+
url: "https://example.com/test-document.pdf",
|
|
36
|
+
creationDate: new Date().toISOString(),
|
|
37
|
+
...overrides,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Test layer combining all mocks
|
|
42
|
+
*/
|
|
43
|
+
const TestLayer = Layer.mergeAll(
|
|
44
|
+
TestDocumentPlugin,
|
|
45
|
+
TestDocumentAiPlugin,
|
|
46
|
+
TestUploadServer,
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
// Mock fetch for URL availability tests
|
|
50
|
+
global.fetch = vi.fn(() =>
|
|
51
|
+
Promise.resolve({
|
|
52
|
+
ok: true,
|
|
53
|
+
status: 200,
|
|
54
|
+
} as Response),
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
describe("Document Nodes", () => {
|
|
58
|
+
describe("ExtractTextNode", () => {
|
|
59
|
+
it.effect("should create extract text node with correct properties", () =>
|
|
60
|
+
Effect.gen(function* () {
|
|
61
|
+
const node = yield* createExtractTextNode("extract-1", {});
|
|
62
|
+
|
|
63
|
+
expect(node.id).toBe("extract-1");
|
|
64
|
+
expect(node.name).toBe("Extract Text");
|
|
65
|
+
expect(node.description).toContain("Extract text");
|
|
66
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
it.effect("should extract text from PDF", () =>
|
|
70
|
+
Effect.gen(function* () {
|
|
71
|
+
const node = yield* createExtractTextNode("extract-text", {});
|
|
72
|
+
const testFile = createTestUploadFile();
|
|
73
|
+
|
|
74
|
+
const result = yield* node.run({
|
|
75
|
+
data: testFile,
|
|
76
|
+
jobId: "test-job",
|
|
77
|
+
flowId: "test-flow",
|
|
78
|
+
storageId: "test-storage",
|
|
79
|
+
clientId: "test-client",
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
expect(result.type).toBe("complete");
|
|
83
|
+
if (result.type === "complete") {
|
|
84
|
+
expect(result.data.metadata?.extractedText).toBeDefined();
|
|
85
|
+
expect(result.data.metadata?.extractedText).toContain("extracted text");
|
|
86
|
+
}
|
|
87
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
88
|
+
);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
describe("SplitPdfNode", () => {
|
|
92
|
+
it.effect("should create split PDF node with correct properties", () =>
|
|
93
|
+
Effect.gen(function* () {
|
|
94
|
+
const node = yield* createSplitPdfNode("split-1", {
|
|
95
|
+
mode: "range",
|
|
96
|
+
startPage: 1,
|
|
97
|
+
endPage: 3,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
expect(node.id).toBe("split-1");
|
|
101
|
+
expect(node.name).toBe("Split PDF");
|
|
102
|
+
expect(node.description).toContain("Split");
|
|
103
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
it.effect("should split PDF by page range", () =>
|
|
107
|
+
Effect.gen(function* () {
|
|
108
|
+
const node = yield* createSplitPdfNode("split-range", {
|
|
109
|
+
mode: "range",
|
|
110
|
+
startPage: 2,
|
|
111
|
+
endPage: 4,
|
|
112
|
+
});
|
|
113
|
+
const testFile = createTestUploadFile();
|
|
114
|
+
|
|
115
|
+
const result = yield* node.run({
|
|
116
|
+
data: testFile,
|
|
117
|
+
jobId: "test-job",
|
|
118
|
+
flowId: "test-flow",
|
|
119
|
+
storageId: "test-storage",
|
|
120
|
+
clientId: "test-client",
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
expect(result.type).toBe("complete");
|
|
124
|
+
if (result.type === "complete") {
|
|
125
|
+
expect(result.data).toBeDefined();
|
|
126
|
+
}
|
|
127
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
it.effect("should split PDF into individual pages", () =>
|
|
131
|
+
Effect.gen(function* () {
|
|
132
|
+
const node = yield* createSplitPdfNode("split-individual", {
|
|
133
|
+
mode: "individual",
|
|
134
|
+
});
|
|
135
|
+
const testFile = createTestUploadFile();
|
|
136
|
+
|
|
137
|
+
const result = yield* node.run({
|
|
138
|
+
data: testFile,
|
|
139
|
+
jobId: "test-job",
|
|
140
|
+
flowId: "test-flow",
|
|
141
|
+
storageId: "test-storage",
|
|
142
|
+
clientId: "test-client",
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
expect(result.type).toBe("complete");
|
|
146
|
+
if (result.type === "complete") {
|
|
147
|
+
// Individual mode outputs array of files
|
|
148
|
+
expect(result.data).toBeDefined();
|
|
149
|
+
}
|
|
150
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
151
|
+
);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
describe("MergePdfNode", () => {
|
|
155
|
+
it.effect("should create merge PDF node with correct properties", () =>
|
|
156
|
+
Effect.gen(function* () {
|
|
157
|
+
const node = yield* createMergePdfNode("merge-1", {});
|
|
158
|
+
|
|
159
|
+
expect(node.id).toBe("merge-1");
|
|
160
|
+
expect(node.name).toBe("Merge PDFs");
|
|
161
|
+
expect(node.description).toContain("Merge");
|
|
162
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
it.effect("should merge multiple PDFs", () =>
|
|
166
|
+
Effect.gen(function* () {
|
|
167
|
+
const node = yield* createMergePdfNode("merge-pdfs", {});
|
|
168
|
+
const testFiles = [
|
|
169
|
+
createTestUploadFile({ id: "file-1" }),
|
|
170
|
+
createTestUploadFile({ id: "file-2" }),
|
|
171
|
+
createTestUploadFile({ id: "file-3" }),
|
|
172
|
+
];
|
|
173
|
+
|
|
174
|
+
const result = yield* node.run({
|
|
175
|
+
data: testFiles,
|
|
176
|
+
jobId: "test-job",
|
|
177
|
+
flowId: "test-flow",
|
|
178
|
+
storageId: "test-storage",
|
|
179
|
+
clientId: "test-client",
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
expect(result.type).toBe("complete");
|
|
183
|
+
if (result.type === "complete") {
|
|
184
|
+
expect(result.data).toBeDefined();
|
|
185
|
+
}
|
|
186
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
187
|
+
);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
describe("DescribeDocumentNode", () => {
|
|
191
|
+
it.effect("should create describe document node with correct properties", () =>
|
|
192
|
+
Effect.gen(function* () {
|
|
193
|
+
const node = yield* createDescribeDocumentNode("describe-1", {});
|
|
194
|
+
|
|
195
|
+
expect(node.id).toBe("describe-1");
|
|
196
|
+
expect(node.name).toBe("Describe Document");
|
|
197
|
+
expect(node.description).toContain("metadata");
|
|
198
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
199
|
+
);
|
|
200
|
+
|
|
201
|
+
it.effect("should extract document metadata", () =>
|
|
202
|
+
Effect.gen(function* () {
|
|
203
|
+
const node = yield* createDescribeDocumentNode("describe-doc", {});
|
|
204
|
+
const testFile = createTestUploadFile();
|
|
205
|
+
|
|
206
|
+
const result = yield* node.run({
|
|
207
|
+
data: testFile,
|
|
208
|
+
jobId: "test-job",
|
|
209
|
+
flowId: "test-flow",
|
|
210
|
+
storageId: "test-storage",
|
|
211
|
+
clientId: "test-client",
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
expect(result.type).toBe("complete");
|
|
215
|
+
if (result.type === "complete") {
|
|
216
|
+
expect(result.data.metadata?.pageCount).toBeDefined();
|
|
217
|
+
expect(result.data.metadata?.author).toBeDefined();
|
|
218
|
+
expect(result.data.metadata?.title).toBeDefined();
|
|
219
|
+
}
|
|
220
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
221
|
+
);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
describe("OcrNode", () => {
|
|
225
|
+
it.effect("should create OCR node with correct properties", () =>
|
|
226
|
+
Effect.gen(function* () {
|
|
227
|
+
const node = yield* createOcrNode("ocr-1", {
|
|
228
|
+
taskType: "convertToMarkdown",
|
|
229
|
+
resolution: "gundam",
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
expect(node.id).toBe("ocr-1");
|
|
233
|
+
expect(node.name).toBe("OCR");
|
|
234
|
+
expect(node.description).toContain("text");
|
|
235
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
it.effect("should perform OCR with markdown conversion", () =>
|
|
239
|
+
Effect.gen(function* () {
|
|
240
|
+
const node = yield* createOcrNode("ocr-markdown", {
|
|
241
|
+
taskType: "convertToMarkdown",
|
|
242
|
+
resolution: "gundam",
|
|
243
|
+
});
|
|
244
|
+
const testFile = createTestUploadFile();
|
|
245
|
+
|
|
246
|
+
const result = yield* node.run({
|
|
247
|
+
data: testFile,
|
|
248
|
+
jobId: "test-job",
|
|
249
|
+
flowId: "test-flow",
|
|
250
|
+
storageId: "test-storage",
|
|
251
|
+
clientId: "test-client",
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
expect(result.type).toBe("complete");
|
|
255
|
+
if (result.type === "complete") {
|
|
256
|
+
expect(result.data.metadata?.ocrText).toBeDefined();
|
|
257
|
+
expect(result.data.metadata?.ocrFormat).toBe("markdown");
|
|
258
|
+
}
|
|
259
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
it.effect("should perform free OCR", () =>
|
|
263
|
+
Effect.gen(function* () {
|
|
264
|
+
const node = yield* createOcrNode("ocr-free", {
|
|
265
|
+
taskType: "freeOcr",
|
|
266
|
+
resolution: "base",
|
|
267
|
+
});
|
|
268
|
+
const testFile = createTestUploadFile();
|
|
269
|
+
|
|
270
|
+
const result = yield* node.run({
|
|
271
|
+
data: testFile,
|
|
272
|
+
jobId: "test-job",
|
|
273
|
+
flowId: "test-flow",
|
|
274
|
+
storageId: "test-storage",
|
|
275
|
+
clientId: "test-client",
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
expect(result.type).toBe("complete");
|
|
279
|
+
if (result.type === "complete") {
|
|
280
|
+
expect(result.data.metadata?.ocrText).toBeDefined();
|
|
281
|
+
expect(result.data.metadata?.ocrFormat).toBe("plain");
|
|
282
|
+
}
|
|
283
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
284
|
+
);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
describe("ConvertToMarkdownNode", () => {
|
|
288
|
+
it.effect("should create convert to markdown node with correct properties", () =>
|
|
289
|
+
Effect.gen(function* () {
|
|
290
|
+
const node = yield* createConvertToMarkdownNode("convert-1", {});
|
|
291
|
+
|
|
292
|
+
expect(node.id).toBe("convert-1");
|
|
293
|
+
expect(node.name).toBe("Convert to Markdown");
|
|
294
|
+
expect(node.description).toContain("Markdown");
|
|
295
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
296
|
+
);
|
|
297
|
+
|
|
298
|
+
it.effect("should convert document to markdown", () =>
|
|
299
|
+
Effect.gen(function* () {
|
|
300
|
+
const node = yield* createConvertToMarkdownNode("convert-md", {});
|
|
301
|
+
const testFile = createTestUploadFile();
|
|
302
|
+
|
|
303
|
+
const result = yield* node.run({
|
|
304
|
+
data: testFile,
|
|
305
|
+
jobId: "test-job",
|
|
306
|
+
flowId: "test-flow",
|
|
307
|
+
storageId: "test-storage",
|
|
308
|
+
clientId: "test-client",
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
expect(result.type).toBe("complete");
|
|
312
|
+
if (result.type === "complete") {
|
|
313
|
+
expect(result.data.metadata?.markdown).toBeDefined();
|
|
314
|
+
}
|
|
315
|
+
}).pipe(Effect.provide(TestLayer)),
|
|
316
|
+
);
|
|
317
|
+
});
|
|
318
|
+
});
|
package/tsdown.config.ts
ADDED
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { defineConfig } from "vitest/config";
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
test: {
|
|
5
|
+
globals: true,
|
|
6
|
+
environment: "node",
|
|
7
|
+
include: ["tests/**/*.{test,spec}.{js,mjs,cjs,ts,mts,cts,jsx,tsx}"],
|
|
8
|
+
exclude: ["node_modules", "dist"],
|
|
9
|
+
coverage: {
|
|
10
|
+
provider: "v8",
|
|
11
|
+
reporter: ["text", "json", "html"],
|
|
12
|
+
exclude: [
|
|
13
|
+
"node_modules/",
|
|
14
|
+
"dist/",
|
|
15
|
+
"**/*.d.ts",
|
|
16
|
+
"**/*.test.ts",
|
|
17
|
+
"**/*.spec.ts",
|
|
18
|
+
],
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
});
|