npm - @huggingface/inference - Versions diffs - 3.7.0 → 3.7.1 - Mend

@huggingface/inference 3.7.0 → 3.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/dist/index.cjs +1152 -839
package/dist/index.js +1154 -841
package/dist/src/lib/getProviderHelper.d.ts +37 -0
package/dist/src/lib/getProviderHelper.d.ts.map +1 -0
package/dist/src/lib/makeRequestOptions.d.ts +0 -2
package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/src/providers/black-forest-labs.d.ts +14 -18
package/dist/src/providers/black-forest-labs.d.ts.map +1 -1
package/dist/src/providers/cerebras.d.ts +4 -2
package/dist/src/providers/cerebras.d.ts.map +1 -1
package/dist/src/providers/cohere.d.ts +5 -2
package/dist/src/providers/cohere.d.ts.map +1 -1
package/dist/src/providers/fal-ai.d.ts +50 -3
package/dist/src/providers/fal-ai.d.ts.map +1 -1
package/dist/src/providers/fireworks-ai.d.ts +5 -2
package/dist/src/providers/fireworks-ai.d.ts.map +1 -1
package/dist/src/providers/hf-inference.d.ts +125 -2
package/dist/src/providers/hf-inference.d.ts.map +1 -1
package/dist/src/providers/hyperbolic.d.ts +31 -2
package/dist/src/providers/hyperbolic.d.ts.map +1 -1
package/dist/src/providers/nebius.d.ts +20 -18
package/dist/src/providers/nebius.d.ts.map +1 -1
package/dist/src/providers/novita.d.ts +21 -18
package/dist/src/providers/novita.d.ts.map +1 -1
package/dist/src/providers/openai.d.ts +4 -2
package/dist/src/providers/openai.d.ts.map +1 -1
package/dist/src/providers/providerHelper.d.ts +182 -0
package/dist/src/providers/providerHelper.d.ts.map +1 -0
package/dist/src/providers/replicate.d.ts +23 -19
package/dist/src/providers/replicate.d.ts.map +1 -1
package/dist/src/providers/sambanova.d.ts +4 -2
package/dist/src/providers/sambanova.d.ts.map +1 -1
package/dist/src/providers/together.d.ts +32 -2
package/dist/src/providers/together.d.ts.map +1 -1
package/dist/src/snippets/getInferenceSnippets.d.ts.map +1 -1
package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -1
package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -1
package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -1
package/dist/src/tasks/audio/utils.d.ts +2 -1
package/dist/src/tasks/audio/utils.d.ts.map +1 -1
package/dist/src/tasks/custom/request.d.ts +0 -2
package/dist/src/tasks/custom/request.d.ts.map +1 -1
package/dist/src/tasks/custom/streamingRequest.d.ts +0 -2
package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -1
package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -1
package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -1
package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -1
package/dist/src/tasks/cv/imageToText.d.ts.map +1 -1
package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -1
package/dist/src/tasks/cv/textToImage.d.ts.map +1 -1
package/dist/src/tasks/cv/textToVideo.d.ts.map +1 -1
package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -1
package/dist/src/tasks/index.d.ts +6 -6
package/dist/src/tasks/index.d.ts.map +1 -1
package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -1
package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -1
package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -1
package/dist/src/tasks/nlp/chatCompletionStream.d.ts.map +1 -1
package/dist/src/tasks/nlp/featureExtraction.d.ts.map +1 -1
package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -1
package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -1
package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -1
package/dist/src/tasks/nlp/summarization.d.ts.map +1 -1
package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -1
package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -1
package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -1
package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -1
package/dist/src/tasks/nlp/translation.d.ts.map +1 -1
package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -1
package/dist/src/tasks/tabular/tabularClassification.d.ts.map +1 -1
package/dist/src/tasks/tabular/tabularRegression.d.ts.map +1 -1
package/dist/src/types.d.ts +3 -13
package/dist/src/types.d.ts.map +1 -1
package/package.json +3 -3
package/src/lib/getProviderHelper.ts +270 -0
package/src/lib/makeRequestOptions.ts +34 -91
package/src/providers/black-forest-labs.ts +73 -22
package/src/providers/cerebras.ts +6 -27
package/src/providers/cohere.ts +9 -28
package/src/providers/fal-ai.ts +195 -77
package/src/providers/fireworks-ai.ts +8 -29
package/src/providers/hf-inference.ts +555 -34
package/src/providers/hyperbolic.ts +107 -29
package/src/providers/nebius.ts +65 -29
package/src/providers/novita.ts +68 -32
package/src/providers/openai.ts +6 -32
package/src/providers/providerHelper.ts +354 -0
package/src/providers/replicate.ts +124 -34
package/src/providers/sambanova.ts +5 -30
package/src/providers/together.ts +92 -28
package/src/snippets/getInferenceSnippets.ts +16 -9
package/src/snippets/templates.exported.ts +1 -1
package/src/tasks/audio/audioClassification.ts +4 -7
package/src/tasks/audio/audioToAudio.ts +3 -26
package/src/tasks/audio/automaticSpeechRecognition.ts +4 -3
package/src/tasks/audio/textToSpeech.ts +5 -29
package/src/tasks/audio/utils.ts +2 -1
package/src/tasks/custom/request.ts +0 -2
package/src/tasks/custom/streamingRequest.ts +0 -2
package/src/tasks/cv/imageClassification.ts +3 -7
package/src/tasks/cv/imageSegmentation.ts +3 -8
package/src/tasks/cv/imageToImage.ts +3 -6
package/src/tasks/cv/imageToText.ts +3 -6
package/src/tasks/cv/objectDetection.ts +3 -18
package/src/tasks/cv/textToImage.ts +9 -137
package/src/tasks/cv/textToVideo.ts +11 -62
package/src/tasks/cv/zeroShotImageClassification.ts +3 -7
package/src/tasks/index.ts +6 -6
package/src/tasks/multimodal/documentQuestionAnswering.ts +3 -19
package/src/tasks/multimodal/visualQuestionAnswering.ts +3 -11
package/src/tasks/nlp/chatCompletion.ts +5 -20
package/src/tasks/nlp/chatCompletionStream.ts +1 -2
package/src/tasks/nlp/featureExtraction.ts +3 -18
package/src/tasks/nlp/fillMask.ts +3 -16
package/src/tasks/nlp/questionAnswering.ts +3 -22
package/src/tasks/nlp/sentenceSimilarity.ts +3 -7
package/src/tasks/nlp/summarization.ts +3 -6
package/src/tasks/nlp/tableQuestionAnswering.ts +3 -27
package/src/tasks/nlp/textClassification.ts +3 -8
package/src/tasks/nlp/textGeneration.ts +12 -79
package/src/tasks/nlp/tokenClassification.ts +3 -18
package/src/tasks/nlp/translation.ts +3 -6
package/src/tasks/nlp/zeroShotClassification.ts +3 -16
package/src/tasks/tabular/tabularClassification.ts +3 -6
package/src/tasks/tabular/tabularRegression.ts +3 -6
package/src/types.ts +3 -14

package/src/snippets/getInferenceSnippets.ts CHANGED Viewed

@@ -1,15 +1,15 @@
-import type { PipelineType, WidgetType } from "@huggingface/tasks/src/pipelines.js";
-import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks/src/tasks/index.js";
+import { Template } from "@huggingface/jinja";
 import {
 	type InferenceSnippet,
 	type InferenceSnippetLanguage,
 	type ModelDataMinimal,
-	inferenceSnippetLanguages,
 	getModelInputSnippet,
+	inferenceSnippetLanguages,
 } from "@huggingface/tasks";
-import type { InferenceProvider, InferenceTask, RequestArgs } from "../types";
-import { Template } from "@huggingface/jinja";
+import type { PipelineType, WidgetType } from "@huggingface/tasks/src/pipelines.js";
+import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks/src/tasks/index.js";
 import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions";
+import type { InferenceProvider, InferenceTask, RequestArgs } from "../types";
 import { templates } from "./templates.exported";
 const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
@@ -120,6 +120,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 		opts?: Record<string, unknown>
 	): InferenceSnippet[] => {
 		/// Hacky: hard-code conversational templates here
+		let task = model.pipeline_tag as InferenceTask;
 		if (
 			model.pipeline_tag &&
 			["text-generation", "image-text-to-text"].includes(model.pipeline_tag) &&
@@ -127,14 +128,20 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 		) {
 			templateName = opts?.streaming ? "conversationalStream" : "conversational";
 			inputPreparationFn = prepareConversationalInput;
+			task = "conversational";
 		}
 		/// Prepare inputs + make request
 		const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
 		const request = makeRequestOptionsFromResolvedModel(
 			providerModelId ?? model.id,
-			{ accessToken: accessToken, provider: provider, ...inputs } as RequestArgs,
-			{ chatCompletion: templateName.includes("conversational"), task: model.pipeline_tag as InferenceTask }
+			{
+				accessToken: accessToken,
+				provider: provider,
+				...inputs,
+			} as RequestArgs,
+			{
+				task: task,
+			}
 		);
 		/// Parse request.info.body if not a binary.
@@ -247,7 +254,7 @@ const prepareConversationalInput = (
 	return {
 		messages: opts?.messages ?? getModelInputSnippet(model),
 		...(opts?.temperature ? { temperature: opts?.temperature } : undefined),
-		max_tokens: opts?.max_tokens ?? 500,
+		max_tokens: opts?.max_tokens ?? 512,
 		...(opts?.top_p ? { top_p: opts?.top_p } : undefined),
 	};
 };

package/src/snippets/templates.exported.ts CHANGED Viewed

@@ -20,7 +20,7 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
     },
     "openai": {
       "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
-      "conversationalStream": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n});\n\nlet out = \"\";\n\nconst stream = await client.chat.completions.create({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}  \n}"
+      "conversationalStream": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n});\n\nconst stream = await client.chat.completions.create({\n    model: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n    stream: true,\n});\n\nfor await (const chunk of stream) {\n    process.stdout.write(chunk.choices[0]?.delta?.content || \"\");\n}"
     }
   },
   "python": {

package/src/tasks/audio/audioClassification.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { AudioClassificationInput, AudioClassificationOutput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 import type { LegacyAudioInput } from "./utils";
@@ -15,15 +15,12 @@ export async function audioClassification(
 	args: AudioClassificationArgs,
 	options?: Options
 ): Promise<AudioClassificationOutput> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "audio-classification");
 	const payload = preparePayload(args);
 	const { data: res } = await innerRequest<AudioClassificationOutput>(payload, {
 		...options,
 		task: "audio-classification",
 	});
-	const isValidOutput =
-		Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
-	if (!isValidOutput) {
-		throw new InferenceOutputError("Expected Array<{label: string, score: number}>");
-	}
-	return res;
+	return providerHelper.getResponse(res);
 }

package/src/tasks/audio/audioToAudio.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 import type { LegacyAudioInput } from "./utils";
@@ -36,34 +36,11 @@ export interface AudioToAudioOutput {
  * Example model: speechbrain/sepformer-wham does audio source separation.
  */
 export async function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioOutput[]> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "audio-to-audio");
 	const payload = preparePayload(args);
 	const { data: res } = await innerRequest<AudioToAudioOutput>(payload, {
 		...options,
 		task: "audio-to-audio",
 	});
-	return validateOutput(res);
-}
-function validateOutput(output: unknown): AudioToAudioOutput[] {
-	if (!Array.isArray(output)) {
-		throw new InferenceOutputError("Expected Array");
-	}
-	if (
-		!output.every((elem): elem is AudioToAudioOutput => {
-			return (
-				typeof elem === "object" &&
-				elem &&
-				"label" in elem &&
-				typeof elem.label === "string" &&
-				"content-type" in elem &&
-				typeof elem["content-type"] === "string" &&
-				"blob" in elem &&
-				typeof elem.blob === "string"
-			);
-		})
-	) {
-		throw new InferenceOutputError("Expected Array<{label: string, audio: Blob}>");
-	}
-	return output;
+	return providerHelper.getResponse(res);
 }

package/src/tasks/audio/automaticSpeechRecognition.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import type { AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput } from "@huggingface/tasks";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { FAL_AI_SUPPORTED_BLOB_TYPES } from "../../providers/fal-ai";
 import type { BaseArgs, Options, RequestArgs } from "../../types";
 import { base64FromBytes } from "../../utils/base64FromBytes";
 import { omit } from "../../utils/omit";
@@ -16,6 +18,7 @@ export async function automaticSpeechRecognition(
 	args: AutomaticSpeechRecognitionArgs,
 	options?: Options
 ): Promise<AutomaticSpeechRecognitionOutput> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "automatic-speech-recognition");
 	const payload = await buildPayload(args);
 	const { data: res } = await innerRequest<AutomaticSpeechRecognitionOutput>(payload, {
 		...options,
@@ -25,11 +28,9 @@ export async function automaticSpeechRecognition(
 	if (!isValidOutput) {
 		throw new InferenceOutputError("Expected {text: string}");
 	}
-	return res;
+	return providerHelper.getResponse(res);
 }
-const FAL_AI_SUPPORTED_BLOB_TYPES = ["audio/mpeg", "audio/mp4", "audio/wav", "audio/x-wav"];
 async function buildPayload(args: AutomaticSpeechRecognitionArgs): Promise<RequestArgs> {
 	if (args.provider === "fal-ai") {
 		const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : undefined;

package/src/tasks/audio/textToSpeech.ts CHANGED Viewed

@@ -1,7 +1,6 @@
 import type { TextToSpeechInput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
-import { omit } from "../../utils/omit";
 import { innerRequest } from "../../utils/request";
 type TextToSpeechArgs = BaseArgs & TextToSpeechInput;
@@ -13,34 +12,11 @@ interface OutputUrlTextToSpeechGeneration {
  * Recommended model: espnet/kan-bayashi_ljspeech_vits
  */
 export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<Blob> {
-	// Replicate models expects "text" instead of "inputs"
-	const payload =
-		args.provider === "replicate"
-			? {
-					...omit(args, ["inputs", "parameters"]),
-					...args.parameters,
-					text: args.inputs,
-			  }
-			: args;
-	const { data: res } = await innerRequest<Blob | OutputUrlTextToSpeechGeneration>(payload, {
+	const provider = args.provider ?? "hf-inference";
+	const providerHelper = getProviderHelper(provider, "text-to-speech");
+	const { data: res } = await innerRequest<Blob | OutputUrlTextToSpeechGeneration>(args, {
 		...options,
 		task: "text-to-speech",
 	});
-	if (res instanceof Blob) {
-		return res;
-	}
-	if (res && typeof res === "object") {
-		if ("output" in res) {
-			if (typeof res.output === "string") {
-				const urlResponse = await fetch(res.output);
-				const blob = await urlResponse.blob();
-				return blob;
-			} else if (Array.isArray(res.output)) {
-				const urlResponse = await fetch(res.output[0]);
-				const blob = await urlResponse.blob();
-				return blob;
-			}
-		}
-	}
-	throw new InferenceOutputError("Expected Blob or object with output");
+	return providerHelper.getResponse(res);
 }

package/src/tasks/audio/utils.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { BaseArgs, RequestArgs } from "../../types";
+import type { BaseArgs, InferenceProvider, RequestArgs } from "../../types";
 import { omit } from "../../utils/omit";
 /**
@@ -6,6 +6,7 @@ import { omit } from "../../utils/omit";
  */
 export interface LegacyAudioInput {
 	data: Blob | ArrayBuffer;
+	provider?: InferenceProvider;
 }
 export function preparePayload(args: BaseArgs & ({ inputs: Blob } | LegacyAudioInput)): RequestArgs {

package/src/tasks/custom/request.ts CHANGED Viewed

@@ -10,8 +10,6 @@ export async function request<T>(
 	options?: Options & {
 		/** In most cases (unless we pass a endpointUrl) we know the task */
 		task?: InferenceTask;
-		/** Is chat completion compatible */
-		chatCompletion?: boolean;
 	}
 ): Promise<T> {
 	console.warn(

package/src/tasks/custom/streamingRequest.ts CHANGED Viewed

@@ -9,8 +9,6 @@ export async function* streamingRequest<T>(
 	options?: Options & {
 		/** In most cases (unless we pass a endpointUrl) we know the task */
 		task?: InferenceTask;
-		/** Is chat completion compatible */
-		chatCompletion?: boolean;
 	}
 ): AsyncGenerator<T> {
 	console.warn(

package/src/tasks/cv/imageClassification.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ImageClassificationInput, ImageClassificationOutput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 import { preparePayload, type LegacyImageInput } from "./utils";
@@ -14,15 +14,11 @@ export async function imageClassification(
 	args: ImageClassificationArgs,
 	options?: Options
 ): Promise<ImageClassificationOutput> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "image-classification");
 	const payload = preparePayload(args);
 	const { data: res } = await innerRequest<ImageClassificationOutput>(payload, {
 		...options,
 		task: "image-classification",
 	});
-	const isValidOutput =
-		Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
-	if (!isValidOutput) {
-		throw new InferenceOutputError("Expected Array<{label: string, score: number}>");
-	}
-	return res;
+	return providerHelper.getResponse(res);
 }

package/src/tasks/cv/imageSegmentation.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ImageSegmentationInput, ImageSegmentationOutput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 import { preparePayload, type LegacyImageInput } from "./utils";
@@ -14,16 +14,11 @@ export async function imageSegmentation(
 	args: ImageSegmentationArgs,
 	options?: Options
 ): Promise<ImageSegmentationOutput> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "image-segmentation");
 	const payload = preparePayload(args);
 	const { data: res } = await innerRequest<ImageSegmentationOutput>(payload, {
 		...options,
 		task: "image-segmentation",
 	});
-	const isValidOutput =
-		Array.isArray(res) &&
-		res.every((x) => typeof x.label === "string" && typeof x.mask === "string" && typeof x.score === "number");
-	if (!isValidOutput) {
-		throw new InferenceOutputError("Expected Array<{label: string, mask: string, score: number}>");
-	}
-	return res;
+	return providerHelper.getResponse(res);
 }

package/src/tasks/cv/imageToImage.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ImageToImageInput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options, RequestArgs } from "../../types";
 import { base64FromBytes } from "../../utils/base64FromBytes";
 import { innerRequest } from "../../utils/request";
@@ -11,6 +11,7 @@ export type ImageToImageArgs = BaseArgs & ImageToImageInput;
  * Recommended model: lllyasviel/sd-controlnet-depth
  */
 export async function imageToImage(args: ImageToImageArgs, options?: Options): Promise<Blob> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "image-to-image");
 	let reqArgs: RequestArgs;
 	if (!args.parameters) {
 		reqArgs = {
@@ -30,9 +31,5 @@ export async function imageToImage(args: ImageToImageArgs, options?: Options): P
 		...options,
 		task: "image-to-image",
 	});
-	const isValidOutput = res && res instanceof Blob;
-	if (!isValidOutput) {
-		throw new InferenceOutputError("Expected Blob");
-	}
-	return res;
+	return providerHelper.getResponse(res);
 }

package/src/tasks/cv/imageToText.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ImageToTextInput, ImageToTextOutput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 import type { LegacyImageInput } from "./utils";
@@ -10,15 +10,12 @@ export type ImageToTextArgs = BaseArgs & (ImageToTextInput | LegacyImageInput);
  * This task reads some image input and outputs the text caption.
  */
 export async function imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextOutput> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "image-to-text");
 	const payload = preparePayload(args);
 	const { data: res } = await innerRequest<[ImageToTextOutput]>(payload, {
 		...options,
 		task: "image-to-text",
 	});
-	if (typeof res?.[0]?.generated_text !== "string") {
-		throw new InferenceOutputError("Expected {generated_text: string}");
-	}
-	return res?.[0];
+	return providerHelper.getResponse(res[0]);
 }

package/src/tasks/cv/objectDetection.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ObjectDetectionInput, ObjectDetectionOutput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
+import { getProviderHelper } from "../../lib/getProviderHelper";
 import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 import { preparePayload, type LegacyImageInput } from "./utils";
@@ -11,26 +11,11 @@ export type ObjectDetectionArgs = BaseArgs & (ObjectDetectionInput | LegacyImage
  * Recommended model: facebook/detr-resnet-50
  */
 export async function objectDetection(args: ObjectDetectionArgs, options?: Options): Promise<ObjectDetectionOutput> {
+	const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "object-detection");
 	const payload = preparePayload(args);
 	const { data: res } = await innerRequest<ObjectDetectionOutput>(payload, {
 		...options,
 		task: "object-detection",
 	});
-	const isValidOutput =
-		Array.isArray(res) &&
-		res.every(
-			(x) =>
-				typeof x.label === "string" &&
-				typeof x.score === "number" &&
-				typeof x.box.xmin === "number" &&
-				typeof x.box.ymin === "number" &&
-				typeof x.box.xmax === "number" &&
-				typeof x.box.ymax === "number"
-		);
-	if (!isValidOutput) {
-		throw new InferenceOutputError(
-			"Expected Array<{label:string; score:number; box:{xmin:number; ymin:number; xmax:number; ymax:number}}>"
-		);
-	}
-	return res;
+	return providerHelper.getResponse(res);
 }

package/src/tasks/cv/textToImage.ts CHANGED Viewed

@@ -1,48 +1,15 @@
-import type { TextToImageInput, TextToImageOutput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
-import type { BaseArgs, InferenceProvider, Options } from "../../types";
-import { delay } from "../../utils/delay";
-import { omit } from "../../utils/omit";
+import type { TextToImageInput } from "@huggingface/tasks";
+import { getProviderHelper } from "../../lib/getProviderHelper";
+import { makeRequestOptions } from "../../lib/makeRequestOptions";
+import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
 export type TextToImageArgs = BaseArgs & TextToImageInput;
-interface Base64ImageGeneration {
-	data: Array<{
-		b64_json: string;
-	}>;
-}
-interface OutputUrlImageGeneration {
-	output: string[];
-}
-interface HyperbolicTextToImageOutput {
-	images: Array<{ image: string }>;
-}
-interface BlackForestLabsResponse {
-	id: string;
-	polling_url: string;
-}
 interface TextToImageOptions extends Options {
 	outputType?: "url" | "blob";
 }
-function getResponseFormatArg(provider: InferenceProvider) {
-	switch (provider) {
-		case "fal-ai":
-			return { sync_mode: true };
-		case "nebius":
-			return { response_format: "b64_json" };
-		case "replicate":
-			return undefined;
-		case "together":
-			return { response_format: "base64" };
-		default:
-			return undefined;
-	}
-}
 /**
  * This task reads some text input and outputs an image.
  * Recommended model: stabilityai/stable-diffusion-2
@@ -56,108 +23,13 @@ export async function textToImage(
 	options?: TextToImageOptions & { outputType?: undefined | "blob" }
 ): Promise<Blob>;
 export async function textToImage(args: TextToImageArgs, options?: TextToImageOptions): Promise<Blob | string> {
-	const payload =
-		!args.provider || args.provider === "hf-inference" || args.provider === "sambanova"
-			? args
-			: {
-					...omit(args, ["inputs", "parameters"]),
-					...args.parameters,
-					...getResponseFormatArg(args.provider),
-					prompt: args.inputs,
-			  };
-	const { data: res } = await innerRequest<
-		| TextToImageOutput
-		| Base64ImageGeneration
-		| OutputUrlImageGeneration
-		| BlackForestLabsResponse
-		| HyperbolicTextToImageOutput
-	>(payload, {
+	const provider = args.provider ?? "hf-inference";
+	const providerHelper = getProviderHelper(provider, "text-to-image");
+	const { data: res } = await innerRequest<Record<string, unknown>>(args, {
 		...options,
 		task: "text-to-image",
 	});
-	if (res && typeof res === "object") {
-		if (args.provider === "black-forest-labs" && "polling_url" in res && typeof res.polling_url === "string") {
-			return await pollBflResponse(res.polling_url, options?.outputType);
-		}
-		if (args.provider === "fal-ai" && "images" in res && Array.isArray(res.images) && res.images[0].url) {
-			if (options?.outputType === "url") {
-				return res.images[0].url;
-			} else {
-				const image = await fetch(res.images[0].url);
-				return await image.blob();
-			}
-		}
-		if (
-			args.provider === "hyperbolic" &&
-			"images" in res &&
-			Array.isArray(res.images) &&
-			res.images[0] &&
-			typeof res.images[0].image === "string"
-		) {
-			if (options?.outputType === "url") {
-				return `data:image/jpeg;base64,${res.images[0].image}`;
-			}
-			const base64Response = await fetch(`data:image/jpeg;base64,${res.images[0].image}`);
-			return await base64Response.blob();
-		}
-		if ("data" in res && Array.isArray(res.data) && res.data[0].b64_json) {
-			const base64Data = res.data[0].b64_json;
-			if (options?.outputType === "url") {
-				return `data:image/jpeg;base64,${base64Data}`;
-			}
-			const base64Response = await fetch(`data:image/jpeg;base64,${base64Data}`);
-			return await base64Response.blob();
-		}
-		if ("output" in res && Array.isArray(res.output)) {
-			if (options?.outputType === "url") {
-				return res.output[0];
-			}
-			const urlResponse = await fetch(res.output[0]);
-			const blob = await urlResponse.blob();
-			return blob;
-		}
-	}
-	const isValidOutput = res && res instanceof Blob;
-	if (!isValidOutput) {
-		throw new InferenceOutputError("Expected Blob");
-	}
-	if (options?.outputType === "url") {
-		const b64 = await res.arrayBuffer().then((buf) => Buffer.from(buf).toString("base64"));
-		return `data:image/jpeg;base64,${b64}`;
-	}
-	return res;
-}
-async function pollBflResponse(url: string, outputType?: "url" | "blob"): Promise<Blob> {
-	const urlObj = new URL(url);
-	for (let step = 0; step < 5; step++) {
-		await delay(1000);
-		console.debug(`Polling Black Forest Labs API for the result... ${step + 1}/5`);
-		urlObj.searchParams.set("attempt", step.toString(10));
-		const resp = await fetch(urlObj, { headers: { "Content-Type": "application/json" } });
-		if (!resp.ok) {
-			throw new InferenceOutputError("Failed to fetch result from black forest labs API");
-		}
-		const payload = await resp.json();
-		if (
-			typeof payload === "object" &&
-			payload &&
-			"status" in payload &&
-			typeof payload.status === "string" &&
-			payload.status === "Ready" &&
-			"result" in payload &&
-			typeof payload.result === "object" &&
-			payload.result &&
-			"sample" in payload.result &&
-			typeof payload.result.sample === "string"
-		) {
-			if (outputType === "url") {
-				return payload.result.sample;
-			}
-			const image = await fetch(payload.result.sample);
-			return await image.blob();
-		}
-	}
-	throw new InferenceOutputError("Failed to fetch result from black forest labs API");
+	const { url, info } = await makeRequestOptions(args, { ...options, task: "text-to-image" });
+	return providerHelper.getResponse(res, url, info.headers as Record<string, string>, options?.outputType);
 }

package/src/tasks/cv/textToVideo.ts CHANGED Viewed

@@ -1,74 +1,23 @@
 import type { TextToVideoInput } from "@huggingface/tasks";
-import { InferenceOutputError } from "../../lib/InferenceOutputError";
-import { isUrl } from "../../lib/isUrl";
-import { pollFalResponse, type FalAiQueueOutput } from "../../providers/fal-ai";
-import type { BaseArgs, InferenceProvider, Options } from "../../types";
-import { omit } from "../../utils/omit";
+import { getProviderHelper } from "../../lib/getProviderHelper";
+import { makeRequestOptions } from "../../lib/makeRequestOptions";
+import type { FalAiQueueOutput } from "../../providers/fal-ai";
+import type { NovitaOutput } from "../../providers/novita";
+import type { ReplicateOutput } from "../../providers/replicate";
+import type { BaseArgs, Options } from "../../types";
 import { innerRequest } from "../../utils/request";
-import { typedInclude } from "../../utils/typedInclude";
 export type TextToVideoArgs = BaseArgs & TextToVideoInput;
 export type TextToVideoOutput = Blob;
-interface ReplicateOutput {
-	output: string;
-}
-interface NovitaOutput {
-	video: {
-		video_url: string;
-	};
-}
-const SUPPORTED_PROVIDERS = ["fal-ai", "novita", "replicate"] as const satisfies readonly InferenceProvider[];
 export async function textToVideo(args: TextToVideoArgs, options?: Options): Promise<TextToVideoOutput> {
-	if (!args.provider || !typedInclude(SUPPORTED_PROVIDERS, args.provider)) {
-		throw new Error(
-			`textToVideo inference is only supported for the following providers: ${SUPPORTED_PROVIDERS.join(", ")}`
-		);
-	}
-	const payload =
-		args.provider === "fal-ai" || args.provider === "replicate" || args.provider === "novita"
-			? { ...omit(args, ["inputs", "parameters"]), ...args.parameters, prompt: args.inputs }
-			: args;
-	const { data, requestContext } = await innerRequest<FalAiQueueOutput | ReplicateOutput | NovitaOutput>(payload, {
+	const provider = args.provider ?? "hf-inference";
+	const providerHelper = getProviderHelper(provider, "text-to-video");
+	const { data: response } = await innerRequest<FalAiQueueOutput | ReplicateOutput | NovitaOutput>(args, {
 		...options,
 		task: "text-to-video",
 	});
-	if (args.provider === "fal-ai") {
-		return await pollFalResponse(
-			data as FalAiQueueOutput,
-			requestContext.url,
-			requestContext.info.headers as Record<string, string>
-		);
-	} else if (args.provider === "novita") {
-		const isValidOutput =
-			typeof data === "object" &&
-			!!data &&
-			"video" in data &&
-			typeof data.video === "object" &&
-			!!data.video &&
-			"video_url" in data.video &&
-			typeof data.video.video_url === "string" &&
-			isUrl(data.video.video_url);
-		if (!isValidOutput) {
-			throw new InferenceOutputError("Expected { video: { video_url: string } }");
-		}
-		const urlResponse = await fetch((data as NovitaOutput).video.video_url);
-		return await urlResponse.blob();
-	} else {
-		/// TODO: Replicate: handle the case where the generation request "times out" / is async (ie output is null)
-		/// https://replicate.com/docs/topics/predictions/create-a-prediction
-		const isValidOutput =
-			typeof data === "object" && !!data && "output" in data && typeof data.output === "string" && isUrl(data.output);
-		if (!isValidOutput) {
-			throw new InferenceOutputError("Expected { output: string }");
-		}
-		const urlResponse = await fetch(data.output);
-		return await urlResponse.blob();
-	}
+	const { url, info } = await makeRequestOptions(args, { ...options, task: "text-to-video" });
+	return providerHelper.getResponse(response, url, info.headers as Record<string, string>);
 }