npm - @huggingface/inference - Versions diffs - 3.0.0 → 3.1.0 - Mend

@huggingface/inference 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/README.md +11 -6
package/dist/index.cjs +193 -76
package/dist/index.js +193 -76
package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/src/providers/fal-ai.d.ts.map +1 -1
package/dist/src/providers/replicate.d.ts.map +1 -1
package/dist/src/providers/together.d.ts.map +1 -1
package/dist/src/tasks/audio/audioClassification.d.ts +4 -18
package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -1
package/dist/src/tasks/audio/audioToAudio.d.ts +10 -9
package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -1
package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +3 -12
package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -1
package/dist/src/tasks/audio/textToSpeech.d.ts +4 -8
package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -1
package/dist/src/tasks/audio/utils.d.ts +11 -0
package/dist/src/tasks/audio/utils.d.ts.map +1 -0
package/dist/src/tasks/cv/imageClassification.d.ts +3 -17
package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -1
package/dist/src/tasks/cv/imageSegmentation.d.ts +3 -21
package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -1
package/dist/src/tasks/cv/imageToImage.d.ts +3 -49
package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -1
package/dist/src/tasks/cv/imageToText.d.ts +3 -12
package/dist/src/tasks/cv/imageToText.d.ts.map +1 -1
package/dist/src/tasks/cv/objectDetection.d.ts +3 -26
package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -1
package/dist/src/tasks/cv/textToImage.d.ts +3 -38
package/dist/src/tasks/cv/textToImage.d.ts.map +1 -1
package/dist/src/tasks/cv/textToVideo.d.ts +6 -0
package/dist/src/tasks/cv/textToVideo.d.ts.map +1 -0
package/dist/src/tasks/cv/utils.d.ts +11 -0
package/dist/src/tasks/cv/utils.d.ts.map +1 -0
package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +7 -15
package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -1
package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +5 -28
package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -1
package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +5 -20
package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -1
package/dist/src/tasks/nlp/fillMask.d.ts +2 -21
package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -1
package/dist/src/tasks/nlp/questionAnswering.d.ts +3 -25
package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -1
package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +2 -13
package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -1
package/dist/src/tasks/nlp/summarization.d.ts +2 -42
package/dist/src/tasks/nlp/summarization.d.ts.map +1 -1
package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +3 -31
package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -1
package/dist/src/tasks/nlp/textClassification.d.ts +2 -16
package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -1
package/dist/src/tasks/nlp/tokenClassification.d.ts +2 -45
package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -1
package/dist/src/tasks/nlp/translation.d.ts +2 -13
package/dist/src/tasks/nlp/translation.d.ts.map +1 -1
package/dist/src/tasks/nlp/zeroShotClassification.d.ts +2 -22
package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -1
package/dist/src/types.d.ts +4 -0
package/dist/src/types.d.ts.map +1 -1
package/package.json +2 -2
package/src/lib/makeRequestOptions.ts +7 -5
package/src/providers/fal-ai.ts +12 -0
package/src/providers/replicate.ts +6 -3
package/src/providers/together.ts +2 -0
package/src/tasks/audio/audioClassification.ts +7 -22
package/src/tasks/audio/audioToAudio.ts +43 -23
package/src/tasks/audio/automaticSpeechRecognition.ts +35 -23
package/src/tasks/audio/textToSpeech.ts +23 -14
package/src/tasks/audio/utils.ts +18 -0
package/src/tasks/cv/imageClassification.ts +5 -20
package/src/tasks/cv/imageSegmentation.ts +5 -24
package/src/tasks/cv/imageToImage.ts +4 -52
package/src/tasks/cv/imageToText.ts +6 -15
package/src/tasks/cv/objectDetection.ts +5 -30
package/src/tasks/cv/textToImage.ts +14 -50
package/src/tasks/cv/textToVideo.ts +67 -0
package/src/tasks/cv/utils.ts +13 -0
package/src/tasks/cv/zeroShotImageClassification.ts +32 -31
package/src/tasks/multimodal/documentQuestionAnswering.ts +25 -43
package/src/tasks/multimodal/visualQuestionAnswering.ts +20 -36
package/src/tasks/nlp/fillMask.ts +2 -22
package/src/tasks/nlp/questionAnswering.ts +22 -36
package/src/tasks/nlp/sentenceSimilarity.ts +12 -15
package/src/tasks/nlp/summarization.ts +2 -43
package/src/tasks/nlp/tableQuestionAnswering.ts +25 -41
package/src/tasks/nlp/textClassification.ts +3 -18
package/src/tasks/nlp/tokenClassification.ts +2 -47
package/src/tasks/nlp/translation.ts +3 -17
package/src/tasks/nlp/zeroShotClassification.ts +2 -24
package/src/types.ts +7 -1

package/dist/index.js CHANGED Viewed

@@ -49,10 +49,22 @@ var FAL_AI_API_BASE_URL = "https://fal.run";
 var FAL_AI_SUPPORTED_MODEL_IDS = {
   "text-to-image": {
     "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
-    "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev"
+    "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
+    "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
+    "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
+    "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
+    "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
+    "Warlord-K/Sana-1024": "fal-ai/sana",
+    "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
+    "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
+    "Kwai-Kolors/Kolors": "fal-ai/kolors"
   },
   "automatic-speech-recognition": {
     "openai/whisper-large-v3": "fal-ai/whisper"
+  },
+  "text-to-video": {
+    "genmo/mochi-1-preview": "fal-ai/mochi-v1",
+    "tencent/HunyuanVideo": "fal-ai/hunyuan-video"
   }
 };
@@ -62,10 +74,13 @@ var REPLICATE_SUPPORTED_MODEL_IDS = {
   "text-to-image": {
     "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
     "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637"
+  },
+  "text-to-speech": {
+    "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:39a59319327b27327fa3095149c5a746e7f2aee18c75055c3368237a6503cd26"
+  },
+  "text-to-video": {
+    "genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
   }
-  // "text-to-speech": {
-  // 	"SWivid/F5-TTS": "x-lance/f5-tts:87faf6dd7a692dd82043f662e76369cab126a2cf1937e25a9d41e0b834fd230e"
-  // },
 };
 // src/providers/sambanova.ts
@@ -101,6 +116,8 @@ var TOGETHER_SUPPORTED_MODEL_IDS = {
   },
   conversational: {
     "databricks/dbrx-instruct": "databricks/dbrx-instruct",
+    "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
+    "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
     "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
     "google/gemma-2-9b-it": "google/gemma-2-9b-it",
     "google/gemma-2b-it": "google/gemma-2-27b-it",
@@ -146,7 +163,8 @@ function isUrl(modelOrUrl) {
 var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
 var tasks = null;
 async function makeRequestOptions(args, options) {
-  const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...otherArgs } = args;
+  const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
+  let otherArgs = remainingArgs;
   const provider = maybeProvider ?? "hf-inference";
   const { forceTask, includeCredentials, taskHint, wait_for_model, use_cache, dont_load_model, chatCompletion: chatCompletion2 } = options ?? {};
   if (endpointUrl && provider !== "hf-inference") {
@@ -205,9 +223,9 @@ async function makeRequestOptions(args, options) {
   } else if (includeCredentials === true) {
     credentials = "include";
   }
-  if (provider === "replicate" && model.includes(":")) {
-    const version = model.split(":")[1];
-    otherArgs.version = version;
+  if (provider === "replicate") {
+    const version = model.includes(":") ? model.split(":")[1] : void 0;
+    otherArgs = { input: otherArgs, version };
   }
   const info = {
     headers,
@@ -527,9 +545,42 @@ var InferenceOutputError = class extends TypeError {
   }
 };
+// src/utils/pick.ts
+function pick(o, props) {
+  return Object.assign(
+    {},
+    ...props.map((prop) => {
+      if (o[prop] !== void 0) {
+        return { [prop]: o[prop] };
+      }
+    })
+  );
+}
+// src/utils/typedInclude.ts
+function typedInclude(arr, v) {
+  return arr.includes(v);
+}
+// src/utils/omit.ts
+function omit(o, props) {
+  const propsArr = Array.isArray(props) ? props : [props];
+  const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
+  return pick(o, letsKeep);
+}
+// src/tasks/audio/utils.ts
+function preparePayload(args) {
+  return "data" in args ? args : {
+    ...omit(args, "inputs"),
+    data: args.inputs
+  };
+}
 // src/tasks/audio/audioClassification.ts
 async function audioClassification(args, options) {
-  const res = await request(args, {
+  const payload = preparePayload(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "audio-classification"
   });
@@ -555,15 +606,8 @@ function base64FromBytes(arr) {
 // src/tasks/audio/automaticSpeechRecognition.ts
 async function automaticSpeechRecognition(args, options) {
-  if (args.provider === "fal-ai") {
-    const contentType = args.data instanceof Blob ? args.data.type : "audio/mpeg";
-    const base64audio = base64FromBytes(
-      new Uint8Array(args.data instanceof ArrayBuffer ? args.data : await args.data.arrayBuffer())
-    );
-    args.audio_url = `data:${contentType};base64,${base64audio}`;
-    delete args.data;
-  }
-  const res = await request(args, {
+  const payload = await buildPayload(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "automatic-speech-recognition"
   });
@@ -573,6 +617,32 @@ async function automaticSpeechRecognition(args, options) {
   }
   return res;
 }
+var FAL_AI_SUPPORTED_BLOB_TYPES = ["audio/mpeg", "audio/mp4", "audio/wav", "audio/x-wav"];
+async function buildPayload(args) {
+  if (args.provider === "fal-ai") {
+    const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : void 0;
+    const contentType = blob?.type;
+    if (!contentType) {
+      throw new Error(
+        `Unable to determine the input's content-type. Make sure your are passing a Blob when using provider fal-ai.`
+      );
+    }
+    if (!FAL_AI_SUPPORTED_BLOB_TYPES.includes(contentType)) {
+      throw new Error(
+        `Provider fal-ai does not support blob type ${contentType} - supported content types are: ${FAL_AI_SUPPORTED_BLOB_TYPES.join(
+          ", "
+        )}`
+      );
+    }
+    const base64audio = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
+    return {
+      ..."data" in args ? omit(args, "data") : omit(args, "inputs"),
+      audio_url: `data:${contentType};base64,${base64audio}`
+    };
+  } else {
+    return preparePayload(args);
+  }
+}
 // src/tasks/audio/textToSpeech.ts
 async function textToSpeech(args, options) {
@@ -580,31 +650,55 @@ async function textToSpeech(args, options) {
     ...options,
     taskHint: "text-to-speech"
   });
-  const isValidOutput = res && res instanceof Blob;
-  if (!isValidOutput) {
-    throw new InferenceOutputError("Expected Blob");
+  if (res instanceof Blob) {
+    return res;
   }
-  return res;
+  if (res && typeof res === "object") {
+    if ("output" in res) {
+      if (typeof res.output === "string") {
+        const urlResponse = await fetch(res.output);
+        const blob = await urlResponse.blob();
+        return blob;
+      } else if (Array.isArray(res.output)) {
+        const urlResponse = await fetch(res.output[0]);
+        const blob = await urlResponse.blob();
+        return blob;
+      }
+    }
+  }
+  throw new InferenceOutputError("Expected Blob or object with output");
 }
 // src/tasks/audio/audioToAudio.ts
 async function audioToAudio(args, options) {
-  const res = await request(args, {
+  const payload = preparePayload(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "audio-to-audio"
   });
-  const isValidOutput = Array.isArray(res) && res.every(
-    (x) => typeof x.label === "string" && typeof x.blob === "string" && typeof x["content-type"] === "string"
-  );
-  if (!isValidOutput) {
-    throw new InferenceOutputError("Expected Array<{label: string, blob: string, content-type: string}>");
+  return validateOutput(res);
+}
+function validateOutput(output) {
+  if (!Array.isArray(output)) {
+    throw new InferenceOutputError("Expected Array");
   }
-  return res;
+  if (!output.every((elem) => {
+    return typeof elem === "object" && elem && "label" in elem && typeof elem.label === "string" && "content-type" in elem && typeof elem["content-type"] === "string" && "blob" in elem && typeof elem.blob === "string";
+  })) {
+    throw new InferenceOutputError("Expected Array<{label: string, audio: Blob}>");
+  }
+  return output;
+}
+// src/tasks/cv/utils.ts
+function preparePayload2(args) {
+  return "data" in args ? args : { ...omit(args, "inputs"), data: args.inputs };
 }
 // src/tasks/cv/imageClassification.ts
 async function imageClassification(args, options) {
-  const res = await request(args, {
+  const payload = preparePayload2(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "image-classification"
   });
@@ -617,7 +711,8 @@ async function imageClassification(args, options) {
 // src/tasks/cv/imageSegmentation.ts
 async function imageSegmentation(args, options) {
-  const res = await request(args, {
+  const payload = preparePayload2(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "image-segmentation"
   });
@@ -630,7 +725,8 @@ async function imageSegmentation(args, options) {
 // src/tasks/cv/imageToText.ts
 async function imageToText(args, options) {
-  const res = (await request(args, {
+  const payload = preparePayload2(args);
+  const res = (await request(payload, {
     ...options,
     taskHint: "image-to-text"
   }))?.[0];
@@ -642,7 +738,8 @@ async function imageToText(args, options) {
 // src/tasks/cv/objectDetection.ts
 async function objectDetection(args, options) {
-  const res = await request(args, {
+  const payload = preparePayload2(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "object-detection"
   });
@@ -659,15 +756,13 @@ async function objectDetection(args, options) {
 // src/tasks/cv/textToImage.ts
 async function textToImage(args, options) {
-  if (args.provider === "together" || args.provider === "fal-ai") {
-    args.prompt = args.inputs;
-    args.inputs = "";
-    args.response_format = "base64";
-  } else if (args.provider === "replicate") {
-    args.input = { prompt: args.inputs };
-    delete args.inputs;
-  }
-  const res = await request(args, {
+  const payload = args.provider === "together" || args.provider === "fal-ai" || args.provider === "replicate" ? {
+    ...omit(args, ["inputs", "parameters"]),
+    ...args.parameters,
+    ...args.provider !== "replicate" ? { response_format: "base64" } : void 0,
+    prompt: args.inputs
+  } : args;
+  const res = await request(payload, {
     ...options,
     taskHint: "text-to-image"
   });
@@ -724,18 +819,30 @@ async function imageToImage(args, options) {
 }
 // src/tasks/cv/zeroShotImageClassification.ts
-async function zeroShotImageClassification(args, options) {
-  const reqArgs = {
-    ...args,
-    inputs: {
-      image: base64FromBytes(
-        new Uint8Array(
-          args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
+async function preparePayload3(args) {
+  if (args.inputs instanceof Blob) {
+    return {
+      ...args,
+      inputs: {
+        image: base64FromBytes(new Uint8Array(await args.inputs.arrayBuffer()))
+      }
+    };
+  } else {
+    return {
+      ...args,
+      inputs: {
+        image: base64FromBytes(
+          new Uint8Array(
+            args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
+          )
         )
-      )
-    }
-  };
-  const res = await request(reqArgs, {
+      }
+    };
+  }
+}
+async function zeroShotImageClassification(args, options) {
+  const payload = await preparePayload3(args);
+  const res = await request(payload, {
     ...options,
     taskHint: "zero-shot-image-classification"
   });
@@ -824,17 +931,19 @@ async function questionAnswering(args, options) {
     ...options,
     taskHint: "question-answering"
   });
-  const isValidOutput = typeof res === "object" && !!res && typeof res.answer === "string" && typeof res.end === "number" && typeof res.score === "number" && typeof res.start === "number";
+  const isValidOutput = Array.isArray(res) ? res.every(
+    (elem) => typeof elem === "object" && !!elem && typeof elem.answer === "string" && typeof elem.end === "number" && typeof elem.score === "number" && typeof elem.start === "number"
+  ) : typeof res === "object" && !!res && typeof res.answer === "string" && typeof res.end === "number" && typeof res.score === "number" && typeof res.start === "number";
   if (!isValidOutput) {
-    throw new InferenceOutputError("Expected {answer: string, end: number, score: number, start: number}");
+    throw new InferenceOutputError("Expected Array<{answer: string, end: number, score: number, start: number}>");
   }
-  return res;
+  return Array.isArray(res) ? res[0] : res;
 }
 // src/tasks/nlp/sentenceSimilarity.ts
 async function sentenceSimilarity(args, options) {
   const defaultTask = args.model ? await getDefaultTask(args.model, args.accessToken, options) : void 0;
-  const res = await request(args, {
+  const res = await request(prepareInput(args), {
     ...options,
     taskHint: "sentence-similarity",
     ...defaultTask === "feature-extraction" && { forceTask: "sentence-similarity" }
@@ -845,6 +954,13 @@ async function sentenceSimilarity(args, options) {
   }
   return res;
 }
+function prepareInput(args) {
+  return {
+    ...omit(args, ["inputs", "parameters"]),
+    inputs: { ...omit(args.inputs, "sourceSentence") },
+    parameters: { source_sentence: args.inputs.sourceSentence, ...args.parameters }
+  };
+}
 // src/tasks/nlp/summarization.ts
 async function summarization(args, options) {
@@ -865,13 +981,18 @@ async function tableQuestionAnswering(args, options) {
     ...options,
     taskHint: "table-question-answering"
   });
-  const isValidOutput = typeof res?.aggregator === "string" && typeof res.answer === "string" && Array.isArray(res.cells) && res.cells.every((x) => typeof x === "string") && Array.isArray(res.coordinates) && res.coordinates.every((coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number"));
+  const isValidOutput = Array.isArray(res) ? res.every((elem) => validate(elem)) : validate(res);
   if (!isValidOutput) {
     throw new InferenceOutputError(
       "Expected {aggregator: string, answer: string, cells: string[], coordinates: number[][]}"
     );
   }
-  return res;
+  return Array.isArray(res) ? res[0] : res;
+}
+function validate(elem) {
+  return typeof elem === "object" && !!elem && "aggregator" in elem && typeof elem.aggregator === "string" && "answer" in elem && typeof elem.answer === "string" && "cells" in elem && Array.isArray(elem.cells) && elem.cells.every((x) => typeof x === "string") && "coordinates" in elem && Array.isArray(elem.coordinates) && elem.coordinates.every(
+    (coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number")
+  );
 }
 // src/tasks/nlp/textClassification.ts
@@ -1014,11 +1135,7 @@ async function documentQuestionAnswering(args, options) {
     inputs: {
       question: args.inputs.question,
       // convert Blob or ArrayBuffer to base64
-      image: base64FromBytes(
-        new Uint8Array(
-          args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
-        )
-      )
+      image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer()))
     }
   };
   const res = toArray(
@@ -1026,12 +1143,14 @@ async function documentQuestionAnswering(args, options) {
       ...options,
       taskHint: "document-question-answering"
     })
-  )?.[0];
-  const isValidOutput = typeof res?.answer === "string" && (typeof res.end === "number" || typeof res.end === "undefined") && (typeof res.score === "number" || typeof res.score === "undefined") && (typeof res.start === "number" || typeof res.start === "undefined");
+  );
+  const isValidOutput = Array.isArray(res) && res.every(
+    (elem) => typeof elem === "object" && !!elem && typeof elem?.answer === "string" && (typeof elem.end === "number" || typeof elem.end === "undefined") && (typeof elem.score === "number" || typeof elem.score === "undefined") && (typeof elem.start === "number" || typeof elem.start === "undefined")
+  );
   if (!isValidOutput) {
     throw new InferenceOutputError("Expected Array<{answer: string, end?: number, score?: number, start?: number}>");
   }
-  return res;
+  return res[0];
 }
 // src/tasks/multimodal/visualQuestionAnswering.ts
@@ -1041,22 +1160,20 @@ async function visualQuestionAnswering(args, options) {
     inputs: {
       question: args.inputs.question,
       // convert Blob or ArrayBuffer to base64
-      image: base64FromBytes(
-        new Uint8Array(
-          args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
-        )
-      )
+      image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer()))
     }
   };
-  const res = (await request(reqArgs, {
+  const res = await request(reqArgs, {
     ...options,
     taskHint: "visual-question-answering"
-  }))?.[0];
-  const isValidOutput = typeof res?.answer === "string" && typeof res.score === "number";
+  });
+  const isValidOutput = Array.isArray(res) && res.every(
+    (elem) => typeof elem === "object" && !!elem && typeof elem?.answer === "string" && typeof elem.score === "number"
+  );
   if (!isValidOutput) {
     throw new InferenceOutputError("Expected Array<{answer: string, score: number}>");
   }
-  return res;
+  return res[0];
 }
 // src/tasks/tabular/tabularRegression.ts

package/dist/src/lib/makeRequestOptions.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAWpE;;GAEG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;IACnC,sCAAsC;IACtC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,~~CA+G7C~~"}
1	+ {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAWpE;;GAEG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;IACnC,sCAAsC;IACtC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAiH7C"}

package/dist/src/providers/fal-ai.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"fal-ai.d.ts","sourceRoot":"","sources":["../../../src/providers/fal-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAErD,KAAK,OAAO,GAAG,MAAM,CAAC;AAEtB,eAAO,MAAM,0BAA0B,EAAE,eAAe,CAAC,OAAO,~~CAQ~~/D,CAAC"}
1	+ {"version":3,"file":"fal-ai.d.ts","sourceRoot":"","sources":["../../../src/providers/fal-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAErD,KAAK,OAAO,GAAG,MAAM,CAAC;AAEtB,eAAO,MAAM,0BAA0B,EAAE,eAAe,CAAC,OAAO,CAoB/D,CAAC"}

package/dist/src/providers/replicate.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"replicate.d.ts","sourceRoot":"","sources":["../../../src/providers/replicate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,sBAAsB,8BAA8B,CAAC;AAElE,KAAK,WAAW,GAAG,MAAM,CAAC;AAE1B,eAAO,MAAM,6BAA6B,EAAE,eAAe,CAAC,WAAW,~~CAStE~~,CAAC"}
1	+ {"version":3,"file":"replicate.d.ts","sourceRoot":"","sources":["../../../src/providers/replicate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,sBAAsB,8BAA8B,CAAC;AAElE,KAAK,WAAW,GAAG,MAAM,CAAC;AAE1B,eAAO,MAAM,6BAA6B,EAAE,eAAe,CAAC,WAAW,CAYtE,CAAC"}

package/dist/src/providers/together.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"together.d.ts","sourceRoot":"","sources":["../../../src/providers/together.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,qBAAqB,6BAA6B,CAAC;AAEhE;;GAEG;AACH,KAAK,UAAU,GAAG,MAAM,CAAC;AAEzB;;GAEG;AACH,eAAO,MAAM,4BAA4B,EAAE,eAAe,CAAC,UAAU,~~CA6CpE~~,CAAC"}
1	+ {"version":3,"file":"together.d.ts","sourceRoot":"","sources":["../../../src/providers/together.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,qBAAqB,6BAA6B,CAAC;AAEhE;;GAEG;AACH,KAAK,UAAU,GAAG,MAAM,CAAC;AAEzB;;GAEG;AACH,eAAO,MAAM,4BAA4B,EAAE,eAAe,CAAC,UAAU,CA+CpE,CAAC"}

package/dist/src/tasks/audio/audioClassification.d.ts CHANGED Viewed

@@ -1,24 +1,10 @@
+import type { AudioClassificationInput, AudioClassificationOutput } from "@huggingface/tasks";
 import type { BaseArgs, Options } from "../../types";
-export type AudioClassificationArgs = BaseArgs & {
-    /**
-     * Binary audio data
-     */
-    data: Blob | ArrayBuffer;
-};
-export interface AudioClassificationOutputValue {
-    /**
-     * The label for the class (model specific)
-     */
-    label: string;
-    /**
-     * A float that represents how likely it is that the audio file belongs to this class.
-     */
-    score: number;
-}
-export type AudioClassificationReturn = AudioClassificationOutputValue[];
+import type { LegacyAudioInput } from "./utils";
+export type AudioClassificationArgs = BaseArgs & (AudioClassificationInput | LegacyAudioInput);
 /**
  * This task reads some audio input and outputs the likelihood of classes.
  * Recommended model:  superb/hubert-large-superb-er
  */
-export declare function audioClassification(args: AudioClassificationArgs, options?: Options): Promise<AudioClassificationReturn>;
+export declare function audioClassification(args: AudioClassificationArgs, options?: Options): Promise<AudioClassificationOutput>;
 //# sourceMappingURL=audioClassification.d.ts.map

package/dist/src/tasks/audio/audioClassification.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"audioClassification.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioClassification.ts"],"names":[],"mappings":"~~AACA~~,OAAO,KAAK,EAAE,~~QAAQ~~,EAAE,~~OAAO~~,EAAE,MAAM,~~aAAa~~,CAAC;~~AAGrD~~,~~MAAM~~,~~MAAM~~,~~uBAAuB~~,~~GAAG,~~QAAQ,~~GAAG;IAChD;;OAEG;IACH~~,~~IAAI~~,EAAE,~~IAAI~~,~~GAAG~~,~~WAAW,~~CAAC;~~CACzB~~,~~CAAC;AAEF~~,~~MAAM,WAAW,8BAA8B;IAC9C;;OAEG;IACH,~~KAAK,EAAE,~~MAAM~~,~~CAAC;IAEd;;OAEG;IACH,KAAK,~~EAAE,MAAM,CAAC;~~CACd;AAED~~,MAAM,MAAM,~~yBAAyB~~,GAAG,~~8BAA8B~~,~~EAAE~~,CAAC;~~AAEzE~~;;;GAGG;AACH,wBAAsB,mBAAmB,CACxC,IAAI,EAAE,uBAAuB,EAC7B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,yBAAyB,CAAC,~~CAWpC~~"}
1	+ {"version":3,"file":"audioClassification.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioClassification.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAE9F,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAGhD,MAAM,MAAM,uBAAuB,GAAG,QAAQ,GAAG,CAAC,wBAAwB,GAAG,gBAAgB,CAAC,CAAC;AAE/F;;;GAGG;AACH,wBAAsB,mBAAmB,CACxC,IAAI,EAAE,uBAAuB,EAC7B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,yBAAyB,CAAC,CAYpC"}

package/dist/src/tasks/audio/audioToAudio.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
 import type { BaseArgs, Options } from "../../types";
-export type AudioToAudioArgs = BaseArgs & {
+import type { LegacyAudioInput } from "./utils";
+export type AudioToAudioArgs = (BaseArgs & {
     /**
      * Binary audio data
      */
-    data: Blob | ArrayBuffer;
-};
-export interface AudioToAudioOutputValue {
+    inputs: Blob;
+}) | LegacyAudioInput;
+export interface AudioToAudioOutputElem {
     /**
      * The label for the audio output (model specific)
      */
@@ -13,16 +14,16 @@ export interface AudioToAudioOutputValue {
     /**
      * Base64 encoded audio output.
      */
+    audio: Blob;
+}
+export interface AudioToAudioOutput {
     blob: string;
-    /**
-     * Content-type for blob, e.g. audio/flac
-     */
     "content-type": string;
+    label: string;
 }
-export type AudioToAudioReturn = AudioToAudioOutputValue[];
 /**
  * This task reads some audio input and outputs one or multiple audio files.
  * Example model: speechbrain/sepformer-wham does audio source separation.
  */
-export declare function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioReturn>;
+export declare function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioOutput[]>;
 //# sourceMappingURL=audioToAudio.d.ts.map

package/dist/src/tasks/audio/audioToAudio.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"audioToAudio.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioToAudio.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;~~AAGrD~~,MAAM,MAAM,gBAAgB,~~GAAG~~,QAAQ,GAAG;~~IACzC~~;;OAEG;IACH,~~IAAI~~,EAAE,IAAI,~~GAAG,WAAW,~~CAAC;~~CACzB~~,CAAC;~~AAEF~~,MAAM,WAAW,~~uBAAuB~~;~~IACvC~~;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,~~IAAI~~,EAAE,~~MAAM~~,CAAC;~~IAEb;;OAEG~~;~~IACH~~,~~cAAc~~,EAAE,MAAM,CAAC;~~CACvB;AAED~~,~~MAAM~~,MAAM,~~kBAAkB~~,~~GAAG~~,~~uBAAuB,~~EAAE,CAAC;~~AAE3D~~;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,CAAC,~~CAczG~~"}
1	+ {"version":3,"file":"audioToAudio.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioToAudio.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAGhD,MAAM,MAAM,gBAAgB,GACzB,CAAC,QAAQ,GAAG;IACZ;;OAEG;IACH,MAAM,EAAE,IAAI,CAAC;CACZ,CAAC,GACF,gBAAgB,CAAC;AAEpB,MAAM,WAAW,sBAAsB;IACtC;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,KAAK,EAAE,IAAI,CAAC;CACZ;AAED,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;CACd;AAED;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAQ3G"}

package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts CHANGED Viewed

@@ -1,16 +1,7 @@
+import type { AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput } from "@huggingface/tasks";
 import type { BaseArgs, Options } from "../../types";
-export type AutomaticSpeechRecognitionArgs = BaseArgs & {
-    /**
-     * Binary audio data
-     */
-    data: Blob | ArrayBuffer;
-};
-export interface AutomaticSpeechRecognitionOutput {
-    /**
-     * The text that was recognized from the audio
-     */
-    text: string;
-}
+import type { LegacyAudioInput } from "./utils";
+export type AutomaticSpeechRecognitionArgs = BaseArgs & (AutomaticSpeechRecognitionInput | LegacyAudioInput);
 /**
  * This task reads some audio input and outputs the said words within the audio files.
  * Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self

package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"automaticSpeechRecognition.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/automaticSpeechRecognition.ts"],"names":[],"mappings":"~~AACA~~,OAAO,KAAK,EAAE,~~QAAQ,~~EAAE,~~OAAO~~,~~EAAe~~,MAAM,~~aAAa~~,CAAC;~~AAIlE~~,~~MAAM~~,~~MAAM~~,~~8BAA8B~~,~~GAAG,~~QAAQ,~~GAAG~~;~~IACvD;;OAEG;IACH~~,~~IAAI~~,EAAE,~~IAAI~~,~~GAAG~~,~~WAAW~~,~~CAAC;CACzB~~,CAAC;~~AAEF~~,MAAM,~~WAAW~~,~~gCAAgC;IAChD;;OAEG;IACH~~,~~IAAI~~,~~EAAE~~,~~MAAM~~,CAAC;~~CACb;AAED~~;;;GAGG;AACH,wBAAsB,0BAA0B,CAC/C,IAAI,EAAE,8BAA8B,EACpC,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,gCAAgC,CAAC,~~CAkB3C~~"}
1	+ {"version":3,"file":"automaticSpeechRecognition.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/automaticSpeechRecognition.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,+BAA+B,EAAE,gCAAgC,EAAE,MAAM,oBAAoB,CAAC;AAE5G,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAe,MAAM,aAAa,CAAC;AAGlE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAIhD,MAAM,MAAM,8BAA8B,GAAG,QAAQ,GAAG,CAAC,+BAA+B,GAAG,gBAAgB,CAAC,CAAC;AAC7G;;;GAGG;AACH,wBAAsB,0BAA0B,CAC/C,IAAI,EAAE,8BAA8B,EACpC,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,gCAAgC,CAAC,CAW3C"}

package/dist/src/tasks/audio/textToSpeech.d.ts CHANGED Viewed

@@ -1,14 +1,10 @@
+import type { TextToSpeechInput } from "@huggingface/tasks";
 import type { BaseArgs, Options } from "../../types";
-export type TextToSpeechArgs = BaseArgs & {
-    /**
-     * The text to generate an audio from
-     */
-    inputs: string;
-};
-export type TextToSpeechOutput = Blob;
+type TextToSpeechArgs = BaseArgs & TextToSpeechInput;
 /**
  * This task synthesize an audio of a voice pronouncing a given text.
  * Recommended model: espnet/kan-bayashi_ljspeech_vits
  */
-export declare function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput>;
+export declare function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<Blob>;
+export {};
 //# sourceMappingURL=textToSpeech.d.ts.map

package/dist/src/tasks/audio/textToSpeech.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/textToSpeech.ts"],"names":[],"mappings":"~~AACA~~,OAAO,KAAK,EAAE,~~QAAQ~~,EAAE,~~OAAO,EAAE,~~MAAM,~~aAAa~~,CAAC;~~AAGrD~~,~~MAAM~~,~~MAAM~~,~~gBAAgB~~,~~GAAG,~~QAAQ,~~GAAG;IACzC;;OAEG;IACH~~,~~MAAM~~,EAAE,MAAM,~~CAAC;CACf~~,CAAC;~~AAEF~~,~~MAAM~~,~~MAAM~~,~~kBAAkB~~,GAAG,~~IAAI~~,CAAC;~~AAEtC~~;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,~~kBAAkB~~,CAAC,~~CAUzG~~"}
1	+ {"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/textToSpeech.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAE5D,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,KAAK,gBAAgB,GAAG,QAAQ,GAAG,iBAAiB,CAAC;AAKrD;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAsB3F"}

package/dist/src/tasks/audio/utils.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import type { BaseArgs, RequestArgs } from "../../types";
+/**
+ * @deprecated
+ */
+export interface LegacyAudioInput {
+    data: Blob | ArrayBuffer;
+}
+export declare function preparePayload(args: BaseArgs & ({
+    inputs: Blob;
+} | LegacyAudioInput)): RequestArgs;
+//# sourceMappingURL=utils.d.ts.map

package/dist/src/tasks/audio/utils.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAGzD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,QAAQ,GAAG,CAAC;IAAE,MAAM,EAAE,IAAI,CAAA;CAAE,GAAG,gBAAgB,CAAC,GAAG,WAAW,CAOlG"}

package/dist/src/tasks/cv/imageClassification.d.ts CHANGED Viewed

@@ -1,21 +1,7 @@
+import type { ImageClassificationInput, ImageClassificationOutput } from "@huggingface/tasks";
 import type { BaseArgs, Options } from "../../types";
-export type ImageClassificationArgs = BaseArgs & {
-    /**
-     * Binary image data
-     */
-    data: Blob | ArrayBuffer;
-};
-export interface ImageClassificationOutputValue {
-    /**
-     * The label for the class (model specific)
-     */
-    label: string;
-    /**
-     * A float that represents how likely it is that the image file belongs to this class.
-     */
-    score: number;
-}
-export type ImageClassificationOutput = ImageClassificationOutputValue[];
+import { type LegacyImageInput } from "./utils";
+export type ImageClassificationArgs = BaseArgs & (ImageClassificationInput | LegacyImageInput);
 /**
  * This task reads some image input and outputs the likelihood of classes.
  * Recommended model: google/vit-base-patch16-224