@workglow/tasks 0.0.83 → 0.0.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index.js → browser.js} +333 -6
- package/dist/{index.js.map → browser.js.map} +6 -5
- package/dist/bun.js +5714 -0
- package/dist/bun.js.map +21 -0
- package/dist/{index.d.ts → common.d.ts} +1 -1
- package/dist/common.d.ts.map +1 -0
- package/dist/node.js +5714 -0
- package/dist/node.js.map +21 -0
- package/dist/task/FetchUrlTask.d.ts +36 -0
- package/dist/task/FetchUrlTask.d.ts.map +1 -1
- package/dist/task/FileLoaderTask.d.ts +162 -0
- package/dist/task/FileLoaderTask.d.ts.map +1 -0
- package/dist/task/FileLoaderTask.server.d.ts +38 -0
- package/dist/task/FileLoaderTask.server.d.ts.map +1 -0
- package/dist/types.d.ts +2 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +24 -15
- package/dist/index.d.ts.map +0 -1
|
@@ -178,6 +178,7 @@ var inputSchema3 = {
|
|
|
178
178
|
response_type: {
|
|
179
179
|
anyOf: [{ type: "null" }, { enum: ["json", "text", "blob", "arraybuffer"] }],
|
|
180
180
|
title: "Response Type",
|
|
181
|
+
description: "The forced type of response to return. If null, the response type is inferred from the Content-Type header.",
|
|
181
182
|
default: null
|
|
182
183
|
},
|
|
183
184
|
timeout: {
|
|
@@ -214,6 +215,16 @@ var outputSchema3 = {
|
|
|
214
215
|
arraybuffer: {
|
|
215
216
|
title: "ArrayBuffer",
|
|
216
217
|
description: "The arraybuffer response"
|
|
218
|
+
},
|
|
219
|
+
metadata: {
|
|
220
|
+
type: "object",
|
|
221
|
+
properties: {
|
|
222
|
+
contentType: { type: "string" },
|
|
223
|
+
headers: { type: "object", additionalProperties: { type: "string" } }
|
|
224
|
+
},
|
|
225
|
+
additionalProperties: false,
|
|
226
|
+
title: "Response Metadata",
|
|
227
|
+
description: "HTTP response metadata including content type and headers"
|
|
217
228
|
}
|
|
218
229
|
},
|
|
219
230
|
additionalProperties: false
|
|
@@ -281,9 +292,17 @@ class FetchUrlJob extends Job {
|
|
|
281
292
|
signal: context.signal
|
|
282
293
|
}, async (progress) => await context.updateProgress(progress));
|
|
283
294
|
if (response.ok) {
|
|
295
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
296
|
+
const headers = {};
|
|
297
|
+
response.headers.forEach((value, key) => {
|
|
298
|
+
headers[key] = value;
|
|
299
|
+
});
|
|
300
|
+
const metadata = {
|
|
301
|
+
contentType,
|
|
302
|
+
headers
|
|
303
|
+
};
|
|
284
304
|
let responseType = input.response_type;
|
|
285
305
|
if (!responseType) {
|
|
286
|
-
const contentType = response.headers.get("content-type") ?? "";
|
|
287
306
|
if (contentType.includes("application/json")) {
|
|
288
307
|
responseType = "json";
|
|
289
308
|
} else if (contentType.includes("text/")) {
|
|
@@ -298,13 +317,13 @@ class FetchUrlJob extends Job {
|
|
|
298
317
|
input.response_type = responseType;
|
|
299
318
|
}
|
|
300
319
|
if (responseType === "json") {
|
|
301
|
-
return { json: await response.json() };
|
|
320
|
+
return { json: await response.json(), metadata };
|
|
302
321
|
} else if (responseType === "text") {
|
|
303
|
-
return { text: await response.text() };
|
|
322
|
+
return { text: await response.text(), metadata };
|
|
304
323
|
} else if (responseType === "blob") {
|
|
305
|
-
return { blob: await response.blob() };
|
|
324
|
+
return { blob: await response.blob(), metadata };
|
|
306
325
|
} else if (responseType === "arraybuffer") {
|
|
307
|
-
return { arraybuffer: await response.arrayBuffer() };
|
|
326
|
+
return { arraybuffer: await response.arrayBuffer(), metadata };
|
|
308
327
|
}
|
|
309
328
|
throw new TaskInvalidInputError(`Invalid response type: ${responseType}`);
|
|
310
329
|
} else {
|
|
@@ -364,6 +383,9 @@ class FetchUrlTask extends JobQueueTask {
|
|
|
364
383
|
} else if (responseType === "arraybuffer" && staticSchema.properties.arraybuffer) {
|
|
365
384
|
properties.arraybuffer = staticSchema.properties.arraybuffer;
|
|
366
385
|
}
|
|
386
|
+
if (staticSchema.properties.metadata) {
|
|
387
|
+
properties.metadata = staticSchema.properties.metadata;
|
|
388
|
+
}
|
|
367
389
|
if (Object.keys(properties).length === 0) {
|
|
368
390
|
return staticSchema;
|
|
369
391
|
}
|
|
@@ -5195,6 +5217,309 @@ var split = (input2, config = {}) => {
|
|
|
5195
5217
|
return task.run();
|
|
5196
5218
|
};
|
|
5197
5219
|
Workflow8.prototype.split = CreateWorkflow8(SplitTask);
|
|
5220
|
+
// src/task/FileLoaderTask.ts
|
|
5221
|
+
import {
|
|
5222
|
+
CreateWorkflow as CreateWorkflow9,
|
|
5223
|
+
Task as Task7,
|
|
5224
|
+
TaskAbortedError as TaskAbortedError2,
|
|
5225
|
+
TaskRegistry as TaskRegistry9,
|
|
5226
|
+
Workflow as Workflow9
|
|
5227
|
+
} from "@workglow/task-graph";
|
|
5228
|
+
import { parse as parse2 } from "csv-parse/sync";
|
|
5229
|
+
var inputSchema9 = {
|
|
5230
|
+
type: "object",
|
|
5231
|
+
properties: {
|
|
5232
|
+
url: {
|
|
5233
|
+
type: "string",
|
|
5234
|
+
title: "URL",
|
|
5235
|
+
description: "URL to load document from (http://, https://)",
|
|
5236
|
+
format: "uri"
|
|
5237
|
+
},
|
|
5238
|
+
format: {
|
|
5239
|
+
type: "string",
|
|
5240
|
+
enum: ["text", "markdown", "json", "csv", "pdf", "image", "html", "auto"],
|
|
5241
|
+
title: "Format",
|
|
5242
|
+
description: "File format (auto-detected from URL if 'auto')",
|
|
5243
|
+
default: "auto"
|
|
5244
|
+
}
|
|
5245
|
+
},
|
|
5246
|
+
required: ["url"],
|
|
5247
|
+
additionalProperties: false
|
|
5248
|
+
};
|
|
5249
|
+
var outputSchema9 = {
|
|
5250
|
+
type: "object",
|
|
5251
|
+
properties: {
|
|
5252
|
+
text: {
|
|
5253
|
+
type: "string",
|
|
5254
|
+
title: "Text",
|
|
5255
|
+
description: "Text content (for text, markdown, html formats)"
|
|
5256
|
+
},
|
|
5257
|
+
json: {
|
|
5258
|
+
title: "JSON",
|
|
5259
|
+
description: "Parsed JSON object or array"
|
|
5260
|
+
},
|
|
5261
|
+
csv: {
|
|
5262
|
+
type: "array",
|
|
5263
|
+
title: "CSV",
|
|
5264
|
+
description: "Parsed CSV data as array of objects"
|
|
5265
|
+
},
|
|
5266
|
+
image: {
|
|
5267
|
+
type: "string",
|
|
5268
|
+
title: "Image",
|
|
5269
|
+
description: "Base64 data URL for image files",
|
|
5270
|
+
format: "image:data-uri"
|
|
5271
|
+
},
|
|
5272
|
+
pdf: {
|
|
5273
|
+
type: "string",
|
|
5274
|
+
title: "PDF",
|
|
5275
|
+
description: "Base64 data URL for PDF files"
|
|
5276
|
+
},
|
|
5277
|
+
metadata: {
|
|
5278
|
+
type: "object",
|
|
5279
|
+
properties: {
|
|
5280
|
+
url: { type: "string" },
|
|
5281
|
+
format: { type: "string" },
|
|
5282
|
+
size: { type: "number" },
|
|
5283
|
+
title: { type: "string" },
|
|
5284
|
+
mimeType: { type: "string" }
|
|
5285
|
+
},
|
|
5286
|
+
additionalProperties: false,
|
|
5287
|
+
title: "Metadata",
|
|
5288
|
+
description: "File metadata"
|
|
5289
|
+
}
|
|
5290
|
+
},
|
|
5291
|
+
required: ["metadata"],
|
|
5292
|
+
additionalProperties: false
|
|
5293
|
+
};
|
|
5294
|
+
|
|
5295
|
+
class FileLoaderTask extends Task7 {
|
|
5296
|
+
static type = "FileLoaderTask";
|
|
5297
|
+
static category = "Document";
|
|
5298
|
+
static title = "File Loader";
|
|
5299
|
+
static description = "Load documents from URLs (http://, https://)";
|
|
5300
|
+
static cacheable = true;
|
|
5301
|
+
static inputSchema() {
|
|
5302
|
+
return inputSchema9;
|
|
5303
|
+
}
|
|
5304
|
+
static outputSchema() {
|
|
5305
|
+
return outputSchema9;
|
|
5306
|
+
}
|
|
5307
|
+
async execute(input2, context) {
|
|
5308
|
+
const { url, format = "auto" } = input2;
|
|
5309
|
+
if (context.signal.aborted) {
|
|
5310
|
+
throw new TaskAbortedError2("Task aborted");
|
|
5311
|
+
}
|
|
5312
|
+
await context.updateProgress(0, "Detecting file format");
|
|
5313
|
+
const detectedFormat = this.detectFormat(url, format);
|
|
5314
|
+
const responseType = this.detectResponseType(detectedFormat);
|
|
5315
|
+
if (context.signal.aborted) {
|
|
5316
|
+
throw new TaskAbortedError2("Task aborted");
|
|
5317
|
+
}
|
|
5318
|
+
await context.updateProgress(10, `Fetching ${detectedFormat} file from ${url}`);
|
|
5319
|
+
const fetchTask = context.own(new FetchUrlTask({
|
|
5320
|
+
url,
|
|
5321
|
+
response_type: responseType,
|
|
5322
|
+
queue: false
|
|
5323
|
+
}));
|
|
5324
|
+
const response = await fetchTask.run();
|
|
5325
|
+
if (context.signal.aborted) {
|
|
5326
|
+
throw new TaskAbortedError2("Task aborted");
|
|
5327
|
+
}
|
|
5328
|
+
await context.updateProgress(60, "Parsing file content");
|
|
5329
|
+
const title = url.split("/").pop() || url;
|
|
5330
|
+
const { text, json: json2, csv, image, pdf, size, mimeType } = await this.parseResponse(response, url, detectedFormat);
|
|
5331
|
+
if (context.signal.aborted) {
|
|
5332
|
+
throw new TaskAbortedError2("Task aborted");
|
|
5333
|
+
}
|
|
5334
|
+
await context.updateProgress(100, "File loaded successfully");
|
|
5335
|
+
return {
|
|
5336
|
+
text,
|
|
5337
|
+
json: json2,
|
|
5338
|
+
csv,
|
|
5339
|
+
image,
|
|
5340
|
+
pdf,
|
|
5341
|
+
metadata: {
|
|
5342
|
+
url,
|
|
5343
|
+
format: detectedFormat,
|
|
5344
|
+
size,
|
|
5345
|
+
title,
|
|
5346
|
+
mimeType
|
|
5347
|
+
}
|
|
5348
|
+
};
|
|
5349
|
+
}
|
|
5350
|
+
parseJsonContent(content) {
|
|
5351
|
+
return JSON.parse(content);
|
|
5352
|
+
}
|
|
5353
|
+
parseCsvContent(content) {
|
|
5354
|
+
try {
|
|
5355
|
+
return parse2(content, {
|
|
5356
|
+
columns: true,
|
|
5357
|
+
skip_empty_lines: true,
|
|
5358
|
+
trim: true
|
|
5359
|
+
});
|
|
5360
|
+
} catch (error) {
|
|
5361
|
+
throw new Error(`Failed to parse CSV: ${error}`);
|
|
5362
|
+
}
|
|
5363
|
+
}
|
|
5364
|
+
async parseResponse(response, url, detectedFormat) {
|
|
5365
|
+
const responseMimeType = response.metadata?.contentType || "";
|
|
5366
|
+
if (detectedFormat === "json") {
|
|
5367
|
+
if (!response.json) {
|
|
5368
|
+
throw new Error(`Failed to load JSON from ${url}`);
|
|
5369
|
+
}
|
|
5370
|
+
const jsonData = response.json;
|
|
5371
|
+
const content2 = JSON.stringify(jsonData, null, 2);
|
|
5372
|
+
return {
|
|
5373
|
+
text: undefined,
|
|
5374
|
+
json: jsonData,
|
|
5375
|
+
csv: undefined,
|
|
5376
|
+
image: undefined,
|
|
5377
|
+
pdf: undefined,
|
|
5378
|
+
size: content2.length,
|
|
5379
|
+
mimeType: responseMimeType || "application/json"
|
|
5380
|
+
};
|
|
5381
|
+
}
|
|
5382
|
+
if (detectedFormat === "csv") {
|
|
5383
|
+
const content2 = response.text || "";
|
|
5384
|
+
if (!content2) {
|
|
5385
|
+
throw new Error(`Failed to load CSV from ${url}`);
|
|
5386
|
+
}
|
|
5387
|
+
const csvData = this.parseCsvContent(content2);
|
|
5388
|
+
return {
|
|
5389
|
+
text: undefined,
|
|
5390
|
+
json: undefined,
|
|
5391
|
+
csv: csvData,
|
|
5392
|
+
image: undefined,
|
|
5393
|
+
pdf: undefined,
|
|
5394
|
+
size: content2.length,
|
|
5395
|
+
mimeType: responseMimeType || "text/csv"
|
|
5396
|
+
};
|
|
5397
|
+
}
|
|
5398
|
+
if (detectedFormat === "image") {
|
|
5399
|
+
if (!response.blob) {
|
|
5400
|
+
throw new Error(`Failed to load image from ${url}`);
|
|
5401
|
+
}
|
|
5402
|
+
const blob = response.blob;
|
|
5403
|
+
const mimeType2 = responseMimeType || (blob.type && blob.type !== "" ? blob.type : this.getImageMimeType(url));
|
|
5404
|
+
const imageData = await this.blobToBase64DataURL(blob, mimeType2);
|
|
5405
|
+
return {
|
|
5406
|
+
text: undefined,
|
|
5407
|
+
json: undefined,
|
|
5408
|
+
csv: undefined,
|
|
5409
|
+
image: imageData,
|
|
5410
|
+
pdf: undefined,
|
|
5411
|
+
size: blob.size,
|
|
5412
|
+
mimeType: mimeType2
|
|
5413
|
+
};
|
|
5414
|
+
}
|
|
5415
|
+
if (detectedFormat === "pdf") {
|
|
5416
|
+
if (!response.blob) {
|
|
5417
|
+
throw new Error(`Failed to load PDF from ${url}`);
|
|
5418
|
+
}
|
|
5419
|
+
const blob = response.blob;
|
|
5420
|
+
const mimeType2 = responseMimeType || "application/pdf";
|
|
5421
|
+
const pdfData = await this.blobToBase64DataURL(blob, mimeType2);
|
|
5422
|
+
return {
|
|
5423
|
+
text: undefined,
|
|
5424
|
+
json: undefined,
|
|
5425
|
+
csv: undefined,
|
|
5426
|
+
image: undefined,
|
|
5427
|
+
pdf: pdfData,
|
|
5428
|
+
size: blob.size,
|
|
5429
|
+
mimeType: mimeType2
|
|
5430
|
+
};
|
|
5431
|
+
}
|
|
5432
|
+
const content = response.text || "";
|
|
5433
|
+
if (!content) {
|
|
5434
|
+
throw new Error(`Failed to load content from ${url}`);
|
|
5435
|
+
}
|
|
5436
|
+
const mimeType = responseMimeType || (detectedFormat === "markdown" ? "text/markdown" : detectedFormat === "html" ? "text/html" : "text/plain");
|
|
5437
|
+
return {
|
|
5438
|
+
text: content,
|
|
5439
|
+
json: undefined,
|
|
5440
|
+
csv: undefined,
|
|
5441
|
+
image: undefined,
|
|
5442
|
+
pdf: undefined,
|
|
5443
|
+
size: content.length,
|
|
5444
|
+
mimeType
|
|
5445
|
+
};
|
|
5446
|
+
}
|
|
5447
|
+
detectResponseType(detectedFormat) {
|
|
5448
|
+
let responseType = "text";
|
|
5449
|
+
if (detectedFormat === "json") {
|
|
5450
|
+
responseType = "json";
|
|
5451
|
+
} else if (detectedFormat === "image" || detectedFormat === "pdf") {
|
|
5452
|
+
responseType = "blob";
|
|
5453
|
+
} else if (detectedFormat === "csv" || detectedFormat === "text" || detectedFormat === "markdown" || detectedFormat === "html") {
|
|
5454
|
+
responseType = "text";
|
|
5455
|
+
}
|
|
5456
|
+
return responseType;
|
|
5457
|
+
}
|
|
5458
|
+
detectFormat(url, format) {
|
|
5459
|
+
if (format === "auto") {
|
|
5460
|
+
const urlLower = url.toLowerCase();
|
|
5461
|
+
if (urlLower.endsWith(".md") || urlLower.endsWith(".markdown")) {
|
|
5462
|
+
return "markdown";
|
|
5463
|
+
} else if (urlLower.endsWith(".json")) {
|
|
5464
|
+
return "json";
|
|
5465
|
+
} else if (urlLower.endsWith(".csv")) {
|
|
5466
|
+
return "csv";
|
|
5467
|
+
} else if (urlLower.endsWith(".pdf")) {
|
|
5468
|
+
return "pdf";
|
|
5469
|
+
} else if (urlLower.match(/\.(jpg|jpeg|png|gif|bmp|webp|svg|ico)$/)) {
|
|
5470
|
+
return "image";
|
|
5471
|
+
} else if (urlLower.endsWith(".html") || urlLower.endsWith(".htm")) {
|
|
5472
|
+
return "html";
|
|
5473
|
+
} else {
|
|
5474
|
+
return "text";
|
|
5475
|
+
}
|
|
5476
|
+
}
|
|
5477
|
+
return format;
|
|
5478
|
+
}
|
|
5479
|
+
getImageMimeType(url) {
|
|
5480
|
+
const urlLower = url.toLowerCase();
|
|
5481
|
+
if (urlLower.endsWith(".png"))
|
|
5482
|
+
return "image/png";
|
|
5483
|
+
if (urlLower.endsWith(".jpg") || urlLower.endsWith(".jpeg"))
|
|
5484
|
+
return "image/jpeg";
|
|
5485
|
+
if (urlLower.endsWith(".gif"))
|
|
5486
|
+
return "image/gif";
|
|
5487
|
+
if (urlLower.endsWith(".webp"))
|
|
5488
|
+
return "image/webp";
|
|
5489
|
+
if (urlLower.endsWith(".bmp"))
|
|
5490
|
+
return "image/bmp";
|
|
5491
|
+
if (urlLower.endsWith(".svg"))
|
|
5492
|
+
return "image/svg+xml";
|
|
5493
|
+
if (urlLower.endsWith(".ico"))
|
|
5494
|
+
return "image/x-icon";
|
|
5495
|
+
return "image/jpeg";
|
|
5496
|
+
}
|
|
5497
|
+
async blobToBase64DataURL(blob, mimeType) {
|
|
5498
|
+
if (typeof Buffer !== "undefined") {
|
|
5499
|
+
const arrayBuffer = await blob.arrayBuffer();
|
|
5500
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
5501
|
+
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
|
5502
|
+
}
|
|
5503
|
+
return new Promise((resolve, reject) => {
|
|
5504
|
+
const reader = new FileReader;
|
|
5505
|
+
reader.onloadend = () => {
|
|
5506
|
+
const result = reader.result;
|
|
5507
|
+
if (result.startsWith("data:;base64,")) {
|
|
5508
|
+
resolve(`data:${mimeType};base64,${result.substring(13)}`);
|
|
5509
|
+
} else {
|
|
5510
|
+
resolve(result);
|
|
5511
|
+
}
|
|
5512
|
+
};
|
|
5513
|
+
reader.onerror = reject;
|
|
5514
|
+
reader.readAsDataURL(blob);
|
|
5515
|
+
});
|
|
5516
|
+
}
|
|
5517
|
+
}
|
|
5518
|
+
TaskRegistry9.registerTask(FileLoaderTask);
|
|
5519
|
+
var fileLoader = (input2, config) => {
|
|
5520
|
+
return new FileLoaderTask(input2, config).run();
|
|
5521
|
+
};
|
|
5522
|
+
Workflow9.prototype.fileLoader = CreateWorkflow9(FileLoaderTask);
|
|
5198
5523
|
export {
|
|
5199
5524
|
split,
|
|
5200
5525
|
process,
|
|
@@ -5202,6 +5527,7 @@ export {
|
|
|
5202
5527
|
lambda,
|
|
5203
5528
|
json,
|
|
5204
5529
|
javaScript,
|
|
5530
|
+
fileLoader,
|
|
5205
5531
|
fetchUrl,
|
|
5206
5532
|
delay,
|
|
5207
5533
|
debugLog,
|
|
@@ -5210,10 +5536,11 @@ export {
|
|
|
5210
5536
|
LambdaTask,
|
|
5211
5537
|
JsonTask,
|
|
5212
5538
|
JavaScriptTask,
|
|
5539
|
+
FileLoaderTask,
|
|
5213
5540
|
FetchUrlTask,
|
|
5214
5541
|
FetchUrlJob,
|
|
5215
5542
|
DelayTask,
|
|
5216
5543
|
DebugLogTask
|
|
5217
5544
|
};
|
|
5218
5545
|
|
|
5219
|
-
//# debugId=
|
|
5546
|
+
//# debugId=0A217C65AC31B63164756E2164756E21
|