@workglow/tasks 0.0.83 → 0.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -178,6 +178,7 @@ var inputSchema3 = {
178
178
  response_type: {
179
179
  anyOf: [{ type: "null" }, { enum: ["json", "text", "blob", "arraybuffer"] }],
180
180
  title: "Response Type",
181
+ description: "The forced type of response to return. If null, the response type is inferred from the Content-Type header.",
181
182
  default: null
182
183
  },
183
184
  timeout: {
@@ -214,6 +215,16 @@ var outputSchema3 = {
214
215
  arraybuffer: {
215
216
  title: "ArrayBuffer",
216
217
  description: "The arraybuffer response"
218
+ },
219
+ metadata: {
220
+ type: "object",
221
+ properties: {
222
+ contentType: { type: "string" },
223
+ headers: { type: "object", additionalProperties: { type: "string" } }
224
+ },
225
+ additionalProperties: false,
226
+ title: "Response Metadata",
227
+ description: "HTTP response metadata including content type and headers"
217
228
  }
218
229
  },
219
230
  additionalProperties: false
@@ -281,9 +292,17 @@ class FetchUrlJob extends Job {
281
292
  signal: context.signal
282
293
  }, async (progress) => await context.updateProgress(progress));
283
294
  if (response.ok) {
295
+ const contentType = response.headers.get("content-type") ?? "";
296
+ const headers = {};
297
+ response.headers.forEach((value, key) => {
298
+ headers[key] = value;
299
+ });
300
+ const metadata = {
301
+ contentType,
302
+ headers
303
+ };
284
304
  let responseType = input.response_type;
285
305
  if (!responseType) {
286
- const contentType = response.headers.get("content-type") ?? "";
287
306
  if (contentType.includes("application/json")) {
288
307
  responseType = "json";
289
308
  } else if (contentType.includes("text/")) {
@@ -298,13 +317,13 @@ class FetchUrlJob extends Job {
298
317
  input.response_type = responseType;
299
318
  }
300
319
  if (responseType === "json") {
301
- return { json: await response.json() };
320
+ return { json: await response.json(), metadata };
302
321
  } else if (responseType === "text") {
303
- return { text: await response.text() };
322
+ return { text: await response.text(), metadata };
304
323
  } else if (responseType === "blob") {
305
- return { blob: await response.blob() };
324
+ return { blob: await response.blob(), metadata };
306
325
  } else if (responseType === "arraybuffer") {
307
- return { arraybuffer: await response.arrayBuffer() };
326
+ return { arraybuffer: await response.arrayBuffer(), metadata };
308
327
  }
309
328
  throw new TaskInvalidInputError(`Invalid response type: ${responseType}`);
310
329
  } else {
@@ -364,6 +383,9 @@ class FetchUrlTask extends JobQueueTask {
364
383
  } else if (responseType === "arraybuffer" && staticSchema.properties.arraybuffer) {
365
384
  properties.arraybuffer = staticSchema.properties.arraybuffer;
366
385
  }
386
+ if (staticSchema.properties.metadata) {
387
+ properties.metadata = staticSchema.properties.metadata;
388
+ }
367
389
  if (Object.keys(properties).length === 0) {
368
390
  return staticSchema;
369
391
  }
@@ -5195,6 +5217,309 @@ var split = (input2, config = {}) => {
5195
5217
  return task.run();
5196
5218
  };
5197
5219
  Workflow8.prototype.split = CreateWorkflow8(SplitTask);
5220
+ // src/task/FileLoaderTask.ts
5221
+ import {
5222
+ CreateWorkflow as CreateWorkflow9,
5223
+ Task as Task7,
5224
+ TaskAbortedError as TaskAbortedError2,
5225
+ TaskRegistry as TaskRegistry9,
5226
+ Workflow as Workflow9
5227
+ } from "@workglow/task-graph";
5228
+ import { parse as parse2 } from "csv-parse/sync";
5229
+ var inputSchema9 = {
5230
+ type: "object",
5231
+ properties: {
5232
+ url: {
5233
+ type: "string",
5234
+ title: "URL",
5235
+ description: "URL to load document from (http://, https://)",
5236
+ format: "uri"
5237
+ },
5238
+ format: {
5239
+ type: "string",
5240
+ enum: ["text", "markdown", "json", "csv", "pdf", "image", "html", "auto"],
5241
+ title: "Format",
5242
+ description: "File format (auto-detected from URL if 'auto')",
5243
+ default: "auto"
5244
+ }
5245
+ },
5246
+ required: ["url"],
5247
+ additionalProperties: false
5248
+ };
5249
+ var outputSchema9 = {
5250
+ type: "object",
5251
+ properties: {
5252
+ text: {
5253
+ type: "string",
5254
+ title: "Text",
5255
+ description: "Text content (for text, markdown, html formats)"
5256
+ },
5257
+ json: {
5258
+ title: "JSON",
5259
+ description: "Parsed JSON object or array"
5260
+ },
5261
+ csv: {
5262
+ type: "array",
5263
+ title: "CSV",
5264
+ description: "Parsed CSV data as array of objects"
5265
+ },
5266
+ image: {
5267
+ type: "string",
5268
+ title: "Image",
5269
+ description: "Base64 data URL for image files",
5270
+ format: "image:data-uri"
5271
+ },
5272
+ pdf: {
5273
+ type: "string",
5274
+ title: "PDF",
5275
+ description: "Base64 data URL for PDF files"
5276
+ },
5277
+ metadata: {
5278
+ type: "object",
5279
+ properties: {
5280
+ url: { type: "string" },
5281
+ format: { type: "string" },
5282
+ size: { type: "number" },
5283
+ title: { type: "string" },
5284
+ mimeType: { type: "string" }
5285
+ },
5286
+ additionalProperties: false,
5287
+ title: "Metadata",
5288
+ description: "File metadata"
5289
+ }
5290
+ },
5291
+ required: ["metadata"],
5292
+ additionalProperties: false
5293
+ };
5294
+
5295
+ class FileLoaderTask extends Task7 {
5296
+ static type = "FileLoaderTask";
5297
+ static category = "Document";
5298
+ static title = "File Loader";
5299
+ static description = "Load documents from URLs (http://, https://)";
5300
+ static cacheable = true;
5301
+ static inputSchema() {
5302
+ return inputSchema9;
5303
+ }
5304
+ static outputSchema() {
5305
+ return outputSchema9;
5306
+ }
5307
+ async execute(input2, context) {
5308
+ const { url, format = "auto" } = input2;
5309
+ if (context.signal.aborted) {
5310
+ throw new TaskAbortedError2("Task aborted");
5311
+ }
5312
+ await context.updateProgress(0, "Detecting file format");
5313
+ const detectedFormat = this.detectFormat(url, format);
5314
+ const responseType = this.detectResponseType(detectedFormat);
5315
+ if (context.signal.aborted) {
5316
+ throw new TaskAbortedError2("Task aborted");
5317
+ }
5318
+ await context.updateProgress(10, `Fetching ${detectedFormat} file from ${url}`);
5319
+ const fetchTask = context.own(new FetchUrlTask({
5320
+ url,
5321
+ response_type: responseType,
5322
+ queue: false
5323
+ }));
5324
+ const response = await fetchTask.run();
5325
+ if (context.signal.aborted) {
5326
+ throw new TaskAbortedError2("Task aborted");
5327
+ }
5328
+ await context.updateProgress(60, "Parsing file content");
5329
+ const title = url.split("/").pop() || url;
5330
+ const { text, json: json2, csv, image, pdf, size, mimeType } = await this.parseResponse(response, url, detectedFormat);
5331
+ if (context.signal.aborted) {
5332
+ throw new TaskAbortedError2("Task aborted");
5333
+ }
5334
+ await context.updateProgress(100, "File loaded successfully");
5335
+ return {
5336
+ text,
5337
+ json: json2,
5338
+ csv,
5339
+ image,
5340
+ pdf,
5341
+ metadata: {
5342
+ url,
5343
+ format: detectedFormat,
5344
+ size,
5345
+ title,
5346
+ mimeType
5347
+ }
5348
+ };
5349
+ }
5350
+ parseJsonContent(content) {
5351
+ return JSON.parse(content);
5352
+ }
5353
+ parseCsvContent(content) {
5354
+ try {
5355
+ return parse2(content, {
5356
+ columns: true,
5357
+ skip_empty_lines: true,
5358
+ trim: true
5359
+ });
5360
+ } catch (error) {
5361
+ throw new Error(`Failed to parse CSV: ${error}`);
5362
+ }
5363
+ }
5364
+ async parseResponse(response, url, detectedFormat) {
5365
+ const responseMimeType = response.metadata?.contentType || "";
5366
+ if (detectedFormat === "json") {
5367
+ if (!response.json) {
5368
+ throw new Error(`Failed to load JSON from ${url}`);
5369
+ }
5370
+ const jsonData = response.json;
5371
+ const content2 = JSON.stringify(jsonData, null, 2);
5372
+ return {
5373
+ text: undefined,
5374
+ json: jsonData,
5375
+ csv: undefined,
5376
+ image: undefined,
5377
+ pdf: undefined,
5378
+ size: content2.length,
5379
+ mimeType: responseMimeType || "application/json"
5380
+ };
5381
+ }
5382
+ if (detectedFormat === "csv") {
5383
+ const content2 = response.text || "";
5384
+ if (!content2) {
5385
+ throw new Error(`Failed to load CSV from ${url}`);
5386
+ }
5387
+ const csvData = this.parseCsvContent(content2);
5388
+ return {
5389
+ text: undefined,
5390
+ json: undefined,
5391
+ csv: csvData,
5392
+ image: undefined,
5393
+ pdf: undefined,
5394
+ size: content2.length,
5395
+ mimeType: responseMimeType || "text/csv"
5396
+ };
5397
+ }
5398
+ if (detectedFormat === "image") {
5399
+ if (!response.blob) {
5400
+ throw new Error(`Failed to load image from ${url}`);
5401
+ }
5402
+ const blob = response.blob;
5403
+ const mimeType2 = responseMimeType || (blob.type && blob.type !== "" ? blob.type : this.getImageMimeType(url));
5404
+ const imageData = await this.blobToBase64DataURL(blob, mimeType2);
5405
+ return {
5406
+ text: undefined,
5407
+ json: undefined,
5408
+ csv: undefined,
5409
+ image: imageData,
5410
+ pdf: undefined,
5411
+ size: blob.size,
5412
+ mimeType: mimeType2
5413
+ };
5414
+ }
5415
+ if (detectedFormat === "pdf") {
5416
+ if (!response.blob) {
5417
+ throw new Error(`Failed to load PDF from ${url}`);
5418
+ }
5419
+ const blob = response.blob;
5420
+ const mimeType2 = responseMimeType || "application/pdf";
5421
+ const pdfData = await this.blobToBase64DataURL(blob, mimeType2);
5422
+ return {
5423
+ text: undefined,
5424
+ json: undefined,
5425
+ csv: undefined,
5426
+ image: undefined,
5427
+ pdf: pdfData,
5428
+ size: blob.size,
5429
+ mimeType: mimeType2
5430
+ };
5431
+ }
5432
+ const content = response.text || "";
5433
+ if (!content) {
5434
+ throw new Error(`Failed to load content from ${url}`);
5435
+ }
5436
+ const mimeType = responseMimeType || (detectedFormat === "markdown" ? "text/markdown" : detectedFormat === "html" ? "text/html" : "text/plain");
5437
+ return {
5438
+ text: content,
5439
+ json: undefined,
5440
+ csv: undefined,
5441
+ image: undefined,
5442
+ pdf: undefined,
5443
+ size: content.length,
5444
+ mimeType
5445
+ };
5446
+ }
5447
+ detectResponseType(detectedFormat) {
5448
+ let responseType = "text";
5449
+ if (detectedFormat === "json") {
5450
+ responseType = "json";
5451
+ } else if (detectedFormat === "image" || detectedFormat === "pdf") {
5452
+ responseType = "blob";
5453
+ } else if (detectedFormat === "csv" || detectedFormat === "text" || detectedFormat === "markdown" || detectedFormat === "html") {
5454
+ responseType = "text";
5455
+ }
5456
+ return responseType;
5457
+ }
5458
+ detectFormat(url, format) {
5459
+ if (format === "auto") {
5460
+ const urlLower = url.toLowerCase();
5461
+ if (urlLower.endsWith(".md") || urlLower.endsWith(".markdown")) {
5462
+ return "markdown";
5463
+ } else if (urlLower.endsWith(".json")) {
5464
+ return "json";
5465
+ } else if (urlLower.endsWith(".csv")) {
5466
+ return "csv";
5467
+ } else if (urlLower.endsWith(".pdf")) {
5468
+ return "pdf";
5469
+ } else if (urlLower.match(/\.(jpg|jpeg|png|gif|bmp|webp|svg|ico)$/)) {
5470
+ return "image";
5471
+ } else if (urlLower.endsWith(".html") || urlLower.endsWith(".htm")) {
5472
+ return "html";
5473
+ } else {
5474
+ return "text";
5475
+ }
5476
+ }
5477
+ return format;
5478
+ }
5479
+ getImageMimeType(url) {
5480
+ const urlLower = url.toLowerCase();
5481
+ if (urlLower.endsWith(".png"))
5482
+ return "image/png";
5483
+ if (urlLower.endsWith(".jpg") || urlLower.endsWith(".jpeg"))
5484
+ return "image/jpeg";
5485
+ if (urlLower.endsWith(".gif"))
5486
+ return "image/gif";
5487
+ if (urlLower.endsWith(".webp"))
5488
+ return "image/webp";
5489
+ if (urlLower.endsWith(".bmp"))
5490
+ return "image/bmp";
5491
+ if (urlLower.endsWith(".svg"))
5492
+ return "image/svg+xml";
5493
+ if (urlLower.endsWith(".ico"))
5494
+ return "image/x-icon";
5495
+ return "image/jpeg";
5496
+ }
5497
+ async blobToBase64DataURL(blob, mimeType) {
5498
+ if (typeof Buffer !== "undefined") {
5499
+ const arrayBuffer = await blob.arrayBuffer();
5500
+ const buffer = Buffer.from(arrayBuffer);
5501
+ return `data:${mimeType};base64,${buffer.toString("base64")}`;
5502
+ }
5503
+ return new Promise((resolve, reject) => {
5504
+ const reader = new FileReader;
5505
+ reader.onloadend = () => {
5506
+ const result = reader.result;
5507
+ if (result.startsWith("data:;base64,")) {
5508
+ resolve(`data:${mimeType};base64,${result.substring(13)}`);
5509
+ } else {
5510
+ resolve(result);
5511
+ }
5512
+ };
5513
+ reader.onerror = reject;
5514
+ reader.readAsDataURL(blob);
5515
+ });
5516
+ }
5517
+ }
5518
+ TaskRegistry9.registerTask(FileLoaderTask);
5519
+ var fileLoader = (input2, config) => {
5520
+ return new FileLoaderTask(input2, config).run();
5521
+ };
5522
+ Workflow9.prototype.fileLoader = CreateWorkflow9(FileLoaderTask);
5198
5523
  export {
5199
5524
  split,
5200
5525
  process,
@@ -5202,6 +5527,7 @@ export {
5202
5527
  lambda,
5203
5528
  json,
5204
5529
  javaScript,
5530
+ fileLoader,
5205
5531
  fetchUrl,
5206
5532
  delay,
5207
5533
  debugLog,
@@ -5210,10 +5536,11 @@ export {
5210
5536
  LambdaTask,
5211
5537
  JsonTask,
5212
5538
  JavaScriptTask,
5539
+ FileLoaderTask,
5213
5540
  FetchUrlTask,
5214
5541
  FetchUrlJob,
5215
5542
  DelayTask,
5216
5543
  DebugLogTask
5217
5544
  };
5218
5545
 
5219
- //# debugId=5A48AA5322FF43C164756E2164756E21
5546
+ //# debugId=0A217C65AC31B63164756E2164756E21