npm - @huggingface/inference - Versions diffs - 3.4.0 → 3.5.0 - Mend

@huggingface/inference 3.4.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +2 -0
package/dist/index.cjs +1081 -66
package/dist/index.js +1092 -66
package/dist/src/index.d.ts +2 -0
package/dist/src/index.d.ts.map +1 -1
package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/src/providers/cerebras.d.ts +19 -0
package/dist/src/providers/cerebras.d.ts.map +1 -0
package/dist/src/providers/consts.d.ts.map +1 -1
package/dist/src/providers/openai.d.ts +6 -0
package/dist/src/providers/openai.d.ts.map +1 -0
package/dist/src/snippets/curl.d.ts +17 -0
package/dist/src/snippets/curl.d.ts.map +1 -0
package/dist/src/snippets/index.d.ts +5 -0
package/dist/src/snippets/index.d.ts.map +1 -0
package/dist/src/snippets/js.d.ts +21 -0
package/dist/src/snippets/js.d.ts.map +1 -0
package/dist/src/snippets/python.d.ts +23 -0
package/dist/src/snippets/python.d.ts.map +1 -0
package/dist/src/types.d.ts +4 -3
package/dist/src/types.d.ts.map +1 -1
package/package.json +2 -2
package/src/index.ts +3 -0
package/src/lib/makeRequestOptions.ts +40 -13
package/src/providers/cerebras.ts +41 -0
package/src/providers/consts.ts +2 -0
package/src/providers/openai.ts +35 -0
package/src/snippets/curl.ts +177 -0
package/src/snippets/index.ts +5 -0
package/src/snippets/js.ts +475 -0
package/src/snippets/python.ts +487 -0
package/src/types.ts +4 -2

package/src/snippets/python.ts ADDED Viewed

@@ -0,0 +1,487 @@
+import { openAIbaseUrl, type SnippetInferenceProvider } from "@huggingface/tasks";
+import type { PipelineType, WidgetType } from "@huggingface/tasks/src/pipelines.js";
+import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks/src/tasks/index.js";
+import {
+	type InferenceSnippet,
+	type ModelDataMinimal,
+	getModelInputSnippet,
+	stringifyGenerationConfig,
+	stringifyMessages,
+} from "@huggingface/tasks";
+const HFH_INFERENCE_CLIENT_METHODS: Partial<Record<WidgetType, string>> = {
+	"audio-classification": "audio_classification",
+	"audio-to-audio": "audio_to_audio",
+	"automatic-speech-recognition": "automatic_speech_recognition",
+	"text-to-speech": "text_to_speech",
+	"image-classification": "image_classification",
+	"image-segmentation": "image_segmentation",
+	"image-to-image": "image_to_image",
+	"image-to-text": "image_to_text",
+	"object-detection": "object_detection",
+	"text-to-image": "text_to_image",
+	"text-to-video": "text_to_video",
+	"zero-shot-image-classification": "zero_shot_image_classification",
+	"document-question-answering": "document_question_answering",
+	"visual-question-answering": "visual_question_answering",
+	"feature-extraction": "feature_extraction",
+	"fill-mask": "fill_mask",
+	"question-answering": "question_answering",
+	"sentence-similarity": "sentence_similarity",
+	summarization: "summarization",
+	"table-question-answering": "table_question_answering",
+	"text-classification": "text_classification",
+	"text-generation": "text_generation",
+	"token-classification": "token_classification",
+	translation: "translation",
+	"zero-shot-classification": "zero_shot_classification",
+	"tabular-classification": "tabular_classification",
+	"tabular-regression": "tabular_regression",
+};
+const snippetImportInferenceClient = (accessToken: string, provider: SnippetInferenceProvider): string =>
+	`\
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+	provider="${provider}",
+	api_key="${accessToken || "{API_TOKEN}"}"
+)`;
+export const snippetConversational = (
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider,
+	providerModelId?: string,
+	opts?: {
+		streaming?: boolean;
+		messages?: ChatCompletionInputMessage[];
+		temperature?: GenerationParameters["temperature"];
+		max_tokens?: GenerationParameters["max_tokens"];
+		top_p?: GenerationParameters["top_p"];
+	}
+): InferenceSnippet[] => {
+	const streaming = opts?.streaming ?? true;
+	const exampleMessages = getModelInputSnippet(model) as ChatCompletionInputMessage[];
+	const messages = opts?.messages ?? exampleMessages;
+	const messagesStr = stringifyMessages(messages, { attributeKeyQuotes: true });
+	const config = {
+		...(opts?.temperature ? { temperature: opts.temperature } : undefined),
+		max_tokens: opts?.max_tokens ?? 500,
+		...(opts?.top_p ? { top_p: opts.top_p } : undefined),
+	};
+	const configStr = stringifyGenerationConfig(config, {
+		indent: "\n\t",
+		attributeValueConnector: "=",
+	});
+	if (streaming) {
+		return [
+			{
+				client: "huggingface_hub",
+				content: `\
+${snippetImportInferenceClient(accessToken, provider)}
+messages = ${messagesStr}
+stream = client.chat.completions.create(
+	model="${model.id}",
+	messages=messages,
+	${configStr}
+	stream=True
+)
+for chunk in stream:
+    print(chunk.choices[0].delta.content, end="")`,
+			},
+			{
+				client: "openai",
+				content: `\
+from openai import OpenAI
+client = OpenAI(
+	base_url="${openAIbaseUrl(provider)}",
+	api_key="${accessToken || "{API_TOKEN}"}"
+)
+messages = ${messagesStr}
+stream = client.chat.completions.create(
+    model="${providerModelId ?? model.id}",
+	messages=messages,
+	${configStr}
+	stream=True
+)
+for chunk in stream:
+	print(chunk.choices[0].delta.content, end="")`,
+			},
+		];
+	} else {
+		return [
+			{
+				client: "huggingface_hub",
+				content: `\
+${snippetImportInferenceClient(accessToken, provider)}
+messages = ${messagesStr}
+completion = client.chat.completions.create(
+    model="${model.id}",
+	messages=messages,
+	${configStr}
+)
+print(completion.choices[0].message)`,
+			},
+			{
+				client: "openai",
+				content: `\
+from openai import OpenAI
+client = OpenAI(
+	base_url="${openAIbaseUrl(provider)}",
+	api_key="${accessToken || "{API_TOKEN}"}"
+)
+messages = ${messagesStr}
+completion = client.chat.completions.create(
+	model="${providerModelId ?? model.id}",
+	messages=messages,
+	${configStr}
+)
+print(completion.choices[0].message)`,
+			},
+		];
+	}
+};
+export const snippetZeroShotClassification = (model: ModelDataMinimal): InferenceSnippet[] => {
+	return [
+		{
+			client: "requests",
+			content: `\
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "inputs": ${getModelInputSnippet(model)},
+    "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
+})`,
+		},
+	];
+};
+export const snippetZeroShotImageClassification = (model: ModelDataMinimal): InferenceSnippet[] => {
+	return [
+		{
+			client: "requests",
+			content: `\
+def query(data):
+	with open(data["image_path"], "rb") as f:
+		img = f.read()
+	payload={
+		"parameters": data["parameters"],
+		"inputs": base64.b64encode(img).decode("utf-8")
+	}
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+	"image_path": ${getModelInputSnippet(model)},
+	"parameters": {"candidate_labels": ["cat", "dog", "llama"]},
+})`,
+		},
+	];
+};
+export const snippetBasic = (
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider
+): InferenceSnippet[] => {
+	return [
+		...(model.pipeline_tag && model.pipeline_tag in HFH_INFERENCE_CLIENT_METHODS
+			? [
+					{
+						client: "huggingface_hub",
+						content: `\
+${snippetImportInferenceClient(accessToken, provider)}
+result = client.${HFH_INFERENCE_CLIENT_METHODS[model.pipeline_tag]}(
+	model="${model.id}",
+	inputs=${getModelInputSnippet(model)},
+	provider="${provider}",
+)
+print(result)
+`,
+					},
+			  ]
+			: []),
+		{
+			client: "requests",
+			content: `\
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+	"inputs": ${getModelInputSnippet(model)},
+})`,
+		},
+	];
+};
+export const snippetFile = (model: ModelDataMinimal): InferenceSnippet[] => {
+	return [
+		{
+			client: "requests",
+			content: `\
+def query(filename):
+	with open(filename, "rb") as f:
+		data = f.read()
+	response = requests.post(API_URL, headers=headers, data=data)
+	return response.json()
+output = query(${getModelInputSnippet(model)})`,
+		},
+	];
+};
+export const snippetTextToImage = (
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider,
+	providerModelId?: string
+): InferenceSnippet[] => {
+	return [
+		{
+			client: "huggingface_hub",
+			content: `\
+${snippetImportInferenceClient(accessToken, provider)}
+# output is a PIL.Image object
+image = client.text_to_image(
+	${getModelInputSnippet(model)},
+	model="${model.id}"
+)`,
+		},
+		...(provider === "fal-ai"
+			? [
+					{
+						client: "fal-client",
+						content: `\
+import fal_client
+result = fal_client.subscribe(
+	"${providerModelId ?? model.id}",
+	arguments={
+		"prompt": ${getModelInputSnippet(model)},
+	},
+)
+print(result)
+`,
+					},
+			  ]
+			: []),
+		...(provider === "hf-inference"
+			? [
+					{
+						client: "requests",
+						content: `\
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+image_bytes = query({
+	"inputs": ${getModelInputSnippet(model)},
+})
+# You can access the image with PIL.Image for example
+import io
+from PIL import Image
+image = Image.open(io.BytesIO(image_bytes))`,
+					},
+			  ]
+			: []),
+	];
+};
+export const snippetTextToVideo = (
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider
+): InferenceSnippet[] => {
+	return ["fal-ai", "replicate"].includes(provider)
+		? [
+				{
+					client: "huggingface_hub",
+					content: `\
+${snippetImportInferenceClient(accessToken, provider)}
+video = client.text_to_video(
+	${getModelInputSnippet(model)},
+	model="${model.id}"
+)`,
+				},
+		  ]
+		: [];
+};
+export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet[] => {
+	return [
+		{
+			client: "requests",
+			content: `\
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+response = query({
+	"inputs": {"data": ${getModelInputSnippet(model)}},
+})`,
+		},
+	];
+};
+export const snippetTextToAudio = (model: ModelDataMinimal): InferenceSnippet[] => {
+	// Transformers TTS pipeline and api-inference-community (AIC) pipeline outputs are diverged
+	// with the latest update to inference-api (IA).
+	// Transformers IA returns a byte object (wav file), whereas AIC returns wav and sampling_rate.
+	if (model.library_name === "transformers") {
+		return [
+			{
+				client: "requests",
+				content: `\
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+audio_bytes = query({
+	"inputs": ${getModelInputSnippet(model)},
+})
+# You can access the audio with IPython.display for example
+from IPython.display import Audio
+Audio(audio_bytes)`,
+			},
+		];
+	} else {
+		return [
+			{
+				client: "requests",
+				content: `\
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+audio, sampling_rate = query({
+	"inputs": ${getModelInputSnippet(model)},
+})
+# You can access the audio with IPython.display for example
+from IPython.display import Audio
+Audio(audio, rate=sampling_rate)`,
+			},
+		];
+	}
+};
+export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal): InferenceSnippet[] => {
+	return [
+		{
+			client: "requests",
+			content: `\
+def query(payload):
+	with open(payload["image"], "rb") as f:
+		img = f.read()
+		payload["image"] = base64.b64encode(img).decode("utf-8")
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "inputs": ${getModelInputSnippet(model)},
+})`,
+		},
+	];
+};
+export const pythonSnippets: Partial<
+	Record<
+		PipelineType,
+		(
+			model: ModelDataMinimal,
+			accessToken: string,
+			provider: SnippetInferenceProvider,
+			providerModelId?: string,
+			opts?: Record<string, unknown>
+		) => InferenceSnippet[]
+	>
+> = {
+	// Same order as in tasks/src/pipelines.ts
+	"text-classification": snippetBasic,
+	"token-classification": snippetBasic,
+	"table-question-answering": snippetBasic,
+	"question-answering": snippetBasic,
+	"zero-shot-classification": snippetZeroShotClassification,
+	translation: snippetBasic,
+	summarization: snippetBasic,
+	"feature-extraction": snippetBasic,
+	"text-generation": snippetBasic,
+	"text2text-generation": snippetBasic,
+	"image-text-to-text": snippetConversational,
+	"fill-mask": snippetBasic,
+	"sentence-similarity": snippetBasic,
+	"automatic-speech-recognition": snippetFile,
+	"text-to-image": snippetTextToImage,
+	"text-to-video": snippetTextToVideo,
+	"text-to-speech": snippetTextToAudio,
+	"text-to-audio": snippetTextToAudio,
+	"audio-to-audio": snippetFile,
+	"audio-classification": snippetFile,
+	"image-classification": snippetFile,
+	"tabular-regression": snippetTabular,
+	"tabular-classification": snippetTabular,
+	"object-detection": snippetFile,
+	"image-segmentation": snippetFile,
+	"document-question-answering": snippetDocumentQuestionAnswering,
+	"image-to-text": snippetFile,
+	"zero-shot-image-classification": snippetZeroShotImageClassification,
+};
+export function getPythonInferenceSnippet(
+	model: ModelDataMinimal,
+	accessToken: string,
+	provider: SnippetInferenceProvider,
+	providerModelId?: string,
+	opts?: Record<string, unknown>
+): InferenceSnippet[] {
+	if (model.tags.includes("conversational")) {
+		// Conversational model detected, so we display a code snippet that features the Messages API
+		return snippetConversational(model, accessToken, provider, providerModelId, opts);
+	} else {
+		const snippets =
+			model.pipeline_tag && model.pipeline_tag in pythonSnippets
+				? pythonSnippets[model.pipeline_tag]?.(model, accessToken, provider, providerModelId) ?? []
+				: [];
+		return snippets.map((snippet) => {
+			return {
+				...snippet,
+				content:
+					snippet.client === "requests"
+						? `\
+import requests
+API_URL = "${openAIbaseUrl(provider)}"
+headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
+${snippet.content}`
+						: snippet.content,
+			};
+		});
+	}
+}

package/src/types.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { ChatCompletionInput, FeatureExtractionInput, PipelineType } from "@huggingface/tasks";
+import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
 /**
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
@@ -30,6 +30,7 @@ export type InferenceTask = Exclude<PipelineType, "other">;
 export const INFERENCE_PROVIDERS = [
 	"black-forest-labs",
+	"cerebras",
 	"cohere",
 	"fal-ai",
 	"fireworks-ai",
@@ -37,6 +38,7 @@ export const INFERENCE_PROVIDERS = [
 	"hyperbolic",
 	"nebius",
 	"novita",
+	"openai",
 	"replicate",
 	"sambanova",
 	"together",
@@ -87,7 +89,6 @@ export type RequestArgs = BaseArgs &
 		| { text: string }
 		| { audio_url: string }
 		| ChatCompletionInput
-		| FeatureExtractionInput
 	) & {
 		parameters?: Record<string, unknown>;
 	};
@@ -97,6 +98,7 @@ export interface ProviderConfig {
 	makeBody: (params: BodyParams) => Record<string, unknown>;
 	makeHeaders: (params: HeaderParams) => Record<string, string>;
 	makeUrl: (params: UrlParams) => string;
+	clientSideRoutingOnly?: boolean;
 }
 export interface HeaderParams {