npm - @huggingface/inference - Versions diffs - 2.8.0 → 3.0.0 - Mend

@huggingface/inference 2.8.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/LICENSE +1 -1
package/README.md +39 -16
package/dist/index.cjs +364 -134
package/dist/index.js +359 -134
package/dist/src/config.d.ts +3 -0
package/dist/src/config.d.ts.map +1 -0
package/dist/src/index.d.ts +5 -0
package/dist/src/index.d.ts.map +1 -1
package/dist/src/lib/getDefaultTask.d.ts +0 -1
package/dist/src/lib/getDefaultTask.d.ts.map +1 -1
package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/src/providers/fal-ai.d.ts +6 -0
package/dist/src/providers/fal-ai.d.ts.map +1 -0
package/dist/src/providers/replicate.d.ts +6 -0
package/dist/src/providers/replicate.d.ts.map +1 -0
package/dist/src/providers/sambanova.d.ts +6 -0
package/dist/src/providers/sambanova.d.ts.map +1 -0
package/dist/src/providers/together.d.ts +12 -0
package/dist/src/providers/together.d.ts.map +1 -0
package/dist/src/providers/types.d.ts +4 -0
package/dist/src/providers/types.d.ts.map +1 -0
package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -1
package/dist/src/tasks/custom/request.d.ts +1 -1
package/dist/src/tasks/custom/request.d.ts.map +1 -1
package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -1
package/dist/src/tasks/cv/textToImage.d.ts +8 -0
package/dist/src/tasks/cv/textToImage.d.ts.map +1 -1
package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -1
package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -1
package/dist/src/types.d.ts +16 -2
package/dist/src/types.d.ts.map +1 -1
package/package.json +2 -2
package/src/config.ts +2 -0
package/src/index.ts +5 -0
package/src/lib/getDefaultTask.ts +1 -1
package/src/lib/makeRequestOptions.ts +199 -59
package/src/providers/fal-ai.ts +15 -0
package/src/providers/replicate.ts +16 -0
package/src/providers/sambanova.ts +23 -0
package/src/providers/together.ts +58 -0
package/src/providers/types.ts +6 -0
package/src/tasks/audio/automaticSpeechRecognition.ts +10 -1
package/src/tasks/custom/request.ts +12 -6
package/src/tasks/custom/streamingRequest.ts +18 -3
package/src/tasks/cv/textToImage.ts +44 -1
package/src/tasks/nlp/chatCompletion.ts +2 -2
package/src/tasks/nlp/textGeneration.ts +43 -9
package/src/types.ts +20 -2

package/src/providers/together.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import type { ProviderMapping } from "./types";
+export const TOGETHER_API_BASE_URL = "https://api.together.xyz";
+/**
+ * Same comment as in sambanova.ts
+ */
+type TogetherId = string;
+/**
+ * https://docs.together.ai/reference/models-1
+ */
+export const TOGETHER_SUPPORTED_MODEL_IDS: ProviderMapping<TogetherId> = {
+	"text-to-image": {
+		"black-forest-labs/FLUX.1-Canny-dev": "black-forest-labs/FLUX.1-canny",
+		"black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
+		"black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
+		"black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
+		"black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
+		"stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0",
+	},
+	conversational: {
+		"databricks/dbrx-instruct": "databricks/dbrx-instruct",
+		"deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
+		"google/gemma-2-9b-it": "google/gemma-2-9b-it",
+		"google/gemma-2b-it": "google/gemma-2-27b-it",
+		"llava-hf/llava-v1.6-mistral-7b-hf": "llava-hf/llava-v1.6-mistral-7b-hf",
+		"meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
+		"meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
+		"meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
+		"meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
+		"meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+		"meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
+		"meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+		"meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
+		"meta-llama/Meta-Llama-3-8B-Instruct": "togethercomputer/Llama-3-8b-chat-hf-int4",
+		"meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+		"meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+		"meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
+		"microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
+		"mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
+		"mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+		"mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+		"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+		"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
+		"Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
+		"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+		"Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
+		"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
+		"Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
+		"scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
+		"scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316",
+	},
+	"text-generation": {
+		"meta-llama/Meta-Llama-3-8B": "meta-llama/Meta-Llama-3-8B",
+		"mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1",
+	},
+};

package/src/providers/types.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { WidgetType } from "@huggingface/tasks";
+import type { ModelId } from "../types";
+export type ProviderMapping<ProviderId extends string> = Partial<
+	Record<WidgetType, Partial<Record<ModelId, ProviderId>>>
+>;

package/src/tasks/audio/automaticSpeechRecognition.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
-import type { BaseArgs, Options } from "../../types";
+import type { BaseArgs, Options, RequestArgs } from "../../types";
+import { base64FromBytes } from "../../utils/base64FromBytes";
 import { request } from "../custom/request";
 export type AutomaticSpeechRecognitionArgs = BaseArgs & {
@@ -24,6 +25,14 @@ export async function automaticSpeechRecognition(
 	args: AutomaticSpeechRecognitionArgs,
 	options?: Options
 ): Promise<AutomaticSpeechRecognitionOutput> {
+	if (args.provider === "fal-ai") {
+		const contentType = args.data instanceof Blob ? args.data.type : "audio/mpeg";
+		const base64audio = base64FromBytes(
+			new Uint8Array(args.data instanceof ArrayBuffer ? args.data : await args.data.arrayBuffer())
+		);
+		(args as RequestArgs & { audio_url: string }).audio_url = `data:${contentType};base64,${base64audio}`;
+		delete (args as RequestArgs & { data: unknown }).data;
+	}
 	const res = await request<AutomaticSpeechRecognitionOutput>(args, {
 		...options,
 		taskHint: "automatic-speech-recognition",

package/src/tasks/custom/request.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import type { InferenceTask, Options, RequestArgs } from "../../types";
 import { makeRequestOptions } from "../../lib/makeRequestOptions";
 /**
- * Primitive to make custom calls to Inference Endpoints
+ * Primitive to make custom calls to the inference provider
  */
 export async function request<T>(
 	args: RequestArgs,
@@ -26,16 +26,22 @@ export async function request<T>(
 	}
 	if (!response.ok) {
-		if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+		const contentType = response.headers.get("Content-Type");
+		if (["application/json", "application/problem+json"].some((ct) => contentType?.startsWith(ct))) {
 			const output = await response.json();
 			if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
-				throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
+				throw new Error(
+					`Server ${args.model} does not seem to support chat completion. Error: ${JSON.stringify(output.error)}`
+				);
 			}
-			if (output.error) {
-				throw new Error(output.error);
+			if (output.error || output.detail) {
+				throw new Error(JSON.stringify(output.error ?? output.detail));
+			} else {
+				throw new Error(output);
 			}
 		}
-		throw new Error("An error occurred while fetching the blob");
+		const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;
+		throw new Error(message ?? "An error occurred while fetching the blob");
 	}
 	if (response.headers.get("Content-Type")?.startsWith("application/json")) {

package/src/tasks/custom/streamingRequest.ts CHANGED Viewed

@@ -32,9 +32,13 @@ export async function* streamingRequest<T>(
 			if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
 				throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
 			}
-			if (output.error) {
+			if (typeof output.error === "string") {
 				throw new Error(output.error);
 			}
+			if (output.error && "message" in output.error && typeof output.error.message === "string") {
+				/// OpenAI errors
+				throw new Error(output.error.message);
+			}
 		}
 		throw new Error(`Server response contains error: ${response.status}`);
@@ -68,7 +72,9 @@ export async function* streamingRequest<T>(
 	try {
 		while (true) {
 			const { done, value } = await reader.read();
-			if (done) return;
+			if (done) {
+				return;
+			}
 			onChunk(value);
 			for (const event of events) {
 				if (event.data.length > 0) {
@@ -77,7 +83,16 @@ export async function* streamingRequest<T>(
 					}
 					const data = JSON.parse(event.data);
 					if (typeof data === "object" && data !== null && "error" in data) {
-						throw new Error(data.error);
+						const errorStr =
+							typeof data.error === "string"
+								? data.error
+								: typeof data.error === "object" &&
+								    data.error &&
+								    "message" in data.error &&
+								    typeof data.error.message === "string"
+								  ? data.error.message
+								  : JSON.stringify(data.error);
+						throw new Error(`Error forwarded from backend: ` + errorStr);
 					}
 					yield data as T;
 				}

package/src/tasks/cv/textToImage.ts CHANGED Viewed

@@ -8,6 +8,15 @@ export type TextToImageArgs = BaseArgs & {
 	 */
 	inputs: string;
+	/**
+	 * Same param but for external providers like Together, Replicate
+	 */
+	prompt?: string;
+	response_format?: "base64";
+	input?: {
+		prompt: string;
+	};
 	parameters?: {
 		/**
 		 * An optional negative prompt for the image generation
@@ -34,15 +43,49 @@ export type TextToImageArgs = BaseArgs & {
 export type TextToImageOutput = Blob;
+interface Base64ImageGeneration {
+	data: Array<{
+		b64_json: string;
+	}>;
+}
+interface OutputUrlImageGeneration {
+	output: string[];
+}
 /**
  * This task reads some text input and outputs an image.
  * Recommended model: stabilityai/stable-diffusion-2
  */
 export async function textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageOutput> {
-	const res = await request<TextToImageOutput>(args, {
+	if (args.provider === "together" || args.provider === "fal-ai") {
+		args.prompt = args.inputs;
+		args.inputs = "";
+		args.response_format = "base64";
+	} else if (args.provider === "replicate") {
+		args.input = { prompt: args.inputs };
+		delete (args as unknown as { inputs: unknown }).inputs;
+	}
+	const res = await request<TextToImageOutput | Base64ImageGeneration | OutputUrlImageGeneration>(args, {
 		...options,
 		taskHint: "text-to-image",
 	});
+	if (res && typeof res === "object") {
+		if (args.provider === "fal-ai" && "images" in res && Array.isArray(res.images) && res.images[0].url) {
+			const image = await fetch(res.images[0].url);
+			return await image.blob();
+		}
+		if ("data" in res && Array.isArray(res.data) && res.data[0].b64_json) {
+			const base64Data = res.data[0].b64_json;
+			const base64Response = await fetch(`data:image/jpeg;base64,${base64Data}`);
+			const blob = await base64Response.blob();
+			return blob;
+		}
+		if ("output" in res && Array.isArray(res.output)) {
+			const urlResponse = await fetch(res.output[0]);
+			const blob = await urlResponse.blob();
+			return blob;
+		}
+	}
 	const isValidOutput = res && res instanceof Blob;
 	if (!isValidOutput) {
 		throw new InferenceOutputError("Expected Blob");

package/src/tasks/nlp/chatCompletion.ts CHANGED Viewed

@@ -6,7 +6,6 @@ import type { ChatCompletionInput, ChatCompletionOutput } from "@huggingface/tas
 /**
  * Use the chat completion endpoint to generate a response to a prompt, using OpenAI message completion API no stream
  */
 export async function chatCompletion(
 	args: BaseArgs & ChatCompletionInput,
 	options?: Options
@@ -22,7 +21,8 @@ export async function chatCompletion(
 		typeof res?.created === "number" &&
 		typeof res?.id === "string" &&
 		typeof res?.model === "string" &&
-		typeof res?.system_fingerprint === "string" &&
+		/// Together.ai does not output a system_fingerprint
+		(res.system_fingerprint === undefined || typeof res.system_fingerprint === "string") &&
 		typeof res?.usage === "object";
 	if (!isValidOutput) {

package/src/tasks/nlp/textGeneration.ts CHANGED Viewed

@@ -1,4 +1,9 @@
-import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks";
+import type {
+	ChatCompletionOutput,
+	TextGenerationInput,
+	TextGenerationOutput,
+	TextGenerationOutputFinishReason,
+} from "@huggingface/tasks";
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
 import type { BaseArgs, Options } from "../../types";
 import { toArray } from "../../utils/toArray";
@@ -6,6 +11,16 @@ import { request } from "../custom/request";
 export type { TextGenerationInput, TextGenerationOutput };
+interface TogeteherTextCompletionOutput extends Omit<ChatCompletionOutput, "choices"> {
+	choices: Array<{
+		text: string;
+		finish_reason: TextGenerationOutputFinishReason;
+		seed: number;
+		logprobs: unknown;
+		index: number;
+	}>;
+}
 /**
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
  */
@@ -13,15 +28,34 @@ export async function textGeneration(
 	args: BaseArgs & TextGenerationInput,
 	options?: Options
 ): Promise<TextGenerationOutput> {
-	const res = toArray(
-		await request<TextGenerationOutput | TextGenerationOutput[]>(args, {
+	if (args.provider === "together") {
+		args.prompt = args.inputs;
+		const raw = await request<TogeteherTextCompletionOutput>(args, {
 			...options,
 			taskHint: "text-generation",
-		})
-	);
-	const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string");
-	if (!isValidOutput) {
-		throw new InferenceOutputError("Expected Array<{generated_text: string}>");
+		});
+		const isValidOutput =
+			typeof raw === "object" && "choices" in raw && Array.isArray(raw?.choices) && typeof raw?.model === "string";
+		if (!isValidOutput) {
+			throw new InferenceOutputError("Expected ChatCompletionOutput");
+		}
+		const completion = raw.choices[0];
+		return {
+			generated_text: completion.text,
+		};
+	} else {
+		const res = toArray(
+			await request<TextGenerationOutput | TextGenerationOutput[]>(args, {
+				...options,
+				taskHint: "text-generation",
+			})
+		);
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => "generated_text" in x && typeof x?.generated_text === "string");
+		if (!isValidOutput) {
+			throw new InferenceOutputError("Expected Array<{generated_text: string}>");
+		}
+		return (res as TextGenerationOutput[])?.[0];
 	}
-	return res?.[0];
 }

package/src/types.ts CHANGED Viewed

@@ -1,6 +1,11 @@
 import type { PipelineType } from "@huggingface/tasks";
 import type { ChatCompletionInput } from "@huggingface/tasks";
+/**
+ * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
+ */
+export type ModelId = string;
 export interface Options {
 	/**
 	 * (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true.
@@ -40,22 +45,28 @@ export interface Options {
 export type InferenceTask = Exclude<PipelineType, "other">;
+export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"] as const;
+export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
 export interface BaseArgs {
 	/**
 	 * The access token to use. Without it, you'll get rate-limited quickly.
 	 *
 	 * Can be created for free in hf.co/settings/token
+	 *
+	 * You can also pass an external Inference provider's key if you intend to call a compatible provider like Sambanova, Together, Replicate...
 	 */
 	accessToken?: string;
 	/**
-	 * The model to use.
+	 * The HF model to use.
 	 *
 	 * If not specified, will call huggingface.co/api/tasks to get the default model for the task.
 	 *
 	 * /!\ Legacy behavior allows this to be an URL, but this is deprecated and will be removed in the future.
 	 * Use the `endpointUrl` parameter instead.
 	 */
-	model?: string;
+	model?: ModelId;
 	/**
 	 * The URL of the endpoint to use. If not specified, will call huggingface.co/api/tasks to get the default endpoint for the task.
@@ -63,6 +74,13 @@ export interface BaseArgs {
 	 * If specified, will use this URL instead of the default one.
 	 */
 	endpointUrl?: string;
+	/**
+	 * Set an Inference provider to run this model on.
+	 *
+	 * Defaults to the first provider in your user settings that is compatible with this model.
+	 */
+	provider?: InferenceProvider;
 }
 export type RequestArgs = BaseArgs &