@huggingface/inference 3.13.0 → 3.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,7 @@ import type {
36
36
  import { HF_ROUTER_URL } from "../config";
37
37
  import { InferenceOutputError } from "../lib/InferenceOutputError";
38
38
  import type { TabularClassificationOutput } from "../tasks/tabular/tabularClassification";
39
- import type { BodyParams, UrlParams } from "../types";
39
+ import type { BodyParams, RequestArgs, UrlParams } from "../types";
40
40
  import { toArray } from "../utils/toArray";
41
41
  import type {
42
42
  AudioClassificationTaskHelper,
@@ -70,7 +70,10 @@ import type {
70
70
  } from "./providerHelper";
71
71
 
72
72
  import { TaskProviderHelper } from "./providerHelper";
73
-
73
+ import { base64FromBytes } from "../utils/base64FromBytes";
74
+ import type { ImageToImageArgs } from "../tasks/cv/imageToImage";
75
+ import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition";
76
+ import { omit } from "../utils/omit";
74
77
  interface Base64ImageGeneration {
75
78
  data: Array<{
76
79
  b64_json: string;
@@ -221,6 +224,15 @@ export class HFInferenceAutomaticSpeechRecognitionTask
221
224
  override async getResponse(response: AutomaticSpeechRecognitionOutput): Promise<AutomaticSpeechRecognitionOutput> {
222
225
  return response;
223
226
  }
227
+
228
+ async preparePayloadAsync(args: AutomaticSpeechRecognitionArgs): Promise<RequestArgs> {
229
+ return "data" in args
230
+ ? args
231
+ : {
232
+ ...omit(args, "inputs"),
233
+ data: args.inputs,
234
+ };
235
+ }
224
236
  }
225
237
 
226
238
  export class HFInferenceAudioToAudioTask extends HFInferenceTask implements AudioToAudioTaskHelper {
@@ -326,6 +338,23 @@ export class HFInferenceImageToTextTask extends HFInferenceTask implements Image
326
338
  }
327
339
 
328
340
  export class HFInferenceImageToImageTask extends HFInferenceTask implements ImageToImageTaskHelper {
341
+ async preparePayloadAsync(args: ImageToImageArgs): Promise<RequestArgs> {
342
+ if (!args.parameters) {
343
+ return {
344
+ ...args,
345
+ model: args.model,
346
+ data: args.inputs,
347
+ };
348
+ } else {
349
+ return {
350
+ ...args,
351
+ inputs: base64FromBytes(
352
+ new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await (args.inputs as Blob).arrayBuffer())
353
+ ),
354
+ };
355
+ }
356
+ }
357
+
329
358
  override async getResponse(response: Blob): Promise<Blob> {
330
359
  if (response instanceof Blob) {
331
360
  return response;
@@ -48,8 +48,10 @@ import type {
48
48
  import { HF_ROUTER_URL } from "../config";
49
49
  import { InferenceOutputError } from "../lib/InferenceOutputError";
50
50
  import type { AudioToAudioOutput } from "../tasks/audio/audioToAudio";
51
- import type { BaseArgs, BodyParams, HeaderParams, InferenceProvider, UrlParams } from "../types";
51
+ import type { BaseArgs, BodyParams, HeaderParams, InferenceProvider, RequestArgs, UrlParams } from "../types";
52
52
  import { toArray } from "../utils/toArray";
53
+ import type { ImageToImageArgs } from "../tasks/cv/imageToImage";
54
+ import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition";
53
55
 
54
56
  /**
55
57
  * Base class for task-specific provider helpers
@@ -142,6 +144,7 @@ export interface TextToVideoTaskHelper {
142
144
  export interface ImageToImageTaskHelper {
143
145
  getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<Blob>;
144
146
  preparePayload(params: BodyParams<ImageToImageInput & BaseArgs>): Record<string, unknown>;
147
+ preparePayloadAsync(args: ImageToImageArgs): Promise<RequestArgs>;
145
148
  }
146
149
 
147
150
  export interface ImageSegmentationTaskHelper {
@@ -245,6 +248,7 @@ export interface AudioToAudioTaskHelper {
245
248
  export interface AutomaticSpeechRecognitionTaskHelper {
246
249
  getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<AutomaticSpeechRecognitionOutput>;
247
250
  preparePayload(params: BodyParams<AutomaticSpeechRecognitionInput & BaseArgs>): Record<string, unknown> | BodyInit;
251
+ preparePayloadAsync(args: AutomaticSpeechRecognitionArgs): Promise<RequestArgs>;
248
252
  }
249
253
 
250
254
  export interface AudioClassificationTaskHelper {
@@ -144,8 +144,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
144
144
  }
145
145
  let providerHelper: ReturnType<typeof getProviderHelper>;
146
146
  try {
147
- /// For the "auto" provider policy we use hf-inference snippets
148
- providerHelper = getProviderHelper(provider === "auto" ? "hf-inference" : provider, task);
147
+ providerHelper = getProviderHelper(provider, task);
149
148
  } catch (e) {
150
149
  console.error(`Failed to get provider helper for ${provider} (${task})`, e);
151
150
  return [];
@@ -285,6 +284,16 @@ const prepareConversationalInput = (
285
284
  };
286
285
  };
287
286
 
287
+ const prepareQuestionAnsweringInput = (model: ModelDataMinimal): object => {
288
+ const data = JSON.parse(getModelInputSnippet(model) as string);
289
+ return { question: data.question, context: data.context };
290
+ };
291
+
292
+ const prepareTableQuestionAnsweringInput = (model: ModelDataMinimal): object => {
293
+ const data = JSON.parse(getModelInputSnippet(model) as string);
294
+ return { query: data.query, table: JSON.stringify(data.table) };
295
+ };
296
+
288
297
  const snippets: Partial<
289
298
  Record<
290
299
  PipelineType,
@@ -309,12 +318,12 @@ const snippets: Partial<
309
318
  "image-to-image": snippetGenerator("imageToImage", prepareImageToImageInput),
310
319
  "image-to-text": snippetGenerator("basicImage"),
311
320
  "object-detection": snippetGenerator("basicImage"),
312
- "question-answering": snippetGenerator("basic"),
321
+ "question-answering": snippetGenerator("questionAnswering", prepareQuestionAnsweringInput),
313
322
  "sentence-similarity": snippetGenerator("basic"),
314
323
  summarization: snippetGenerator("basic"),
315
324
  "tabular-classification": snippetGenerator("tabular"),
316
325
  "tabular-regression": snippetGenerator("tabular"),
317
- "table-question-answering": snippetGenerator("basic"),
326
+ "table-question-answering": snippetGenerator("tableQuestionAnswering", prepareTableQuestionAnsweringInput),
318
327
  "text-classification": snippetGenerator("basic"),
319
328
  "text-generation": snippetGenerator("basic"),
320
329
  "text-to-audio": snippetGenerator("textToAudio"),
@@ -30,7 +30,7 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
30
30
  "textToImage": "{% if provider == \"fal-ai\" %}\nimport fal_client\n\n{% if providerInputs.asObj.loras is defined and providerInputs.asObj.loras != none %}\nresult = fal_client.subscribe(\n \"{{ providerModelId }}\",\n arguments={\n \"prompt\": {{ inputs.asObj.inputs }},\n \"loras\":{{ providerInputs.asObj.loras | tojson }},\n },\n)\n{% else %}\nresult = fal_client.subscribe(\n \"{{ providerModelId }}\",\n arguments={\n \"prompt\": {{ inputs.asObj.inputs }},\n },\n)\n{% endif %} \nprint(result)\n{% endif %} "
31
31
  },
32
32
  "huggingface_hub": {
33
- "basic": "result = client.{{ methodName }}(\n inputs={{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n)",
33
+ "basic": "result = client.{{ methodName }}(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n)",
34
34
  "basicAudio": "output = client.{{ methodName }}({{ inputs.asObj.inputs }}, model=\"{{ model.id }}\")",
35
35
  "basicImage": "output = client.{{ methodName }}({{ inputs.asObj.inputs }}, model=\"{{ model.id }}\")",
36
36
  "conversational": "completion = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n)\n\nprint(completion.choices[0].message) ",
@@ -38,6 +38,8 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
38
38
  "documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
39
39
  "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
40
40
  "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
41
+ "questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
42
+ "tableQuestionAnswering": "answer = client.question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
41
43
  "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
42
44
  "textToSpeech": "# audio is returned as bytes\naudio = client.text_to_speech(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) \n",
43
45
  "textToVideo": "video = client.text_to_video(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) "
@@ -2,13 +2,9 @@ import type { AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput
2
2
  import { resolveProvider } from "../../lib/getInferenceProviderMapping";
3
3
  import { getProviderHelper } from "../../lib/getProviderHelper";
4
4
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
5
- import { FAL_AI_SUPPORTED_BLOB_TYPES } from "../../providers/fal-ai";
6
- import type { BaseArgs, Options, RequestArgs } from "../../types";
7
- import { base64FromBytes } from "../../utils/base64FromBytes";
8
- import { omit } from "../../utils/omit";
5
+ import type { BaseArgs, Options } from "../../types";
9
6
  import { innerRequest } from "../../utils/request";
10
7
  import type { LegacyAudioInput } from "./utils";
11
- import { preparePayload } from "./utils";
12
8
 
13
9
  export type AutomaticSpeechRecognitionArgs = BaseArgs & (AutomaticSpeechRecognitionInput | LegacyAudioInput);
14
10
  /**
@@ -21,7 +17,7 @@ export async function automaticSpeechRecognition(
21
17
  ): Promise<AutomaticSpeechRecognitionOutput> {
22
18
  const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
23
19
  const providerHelper = getProviderHelper(provider, "automatic-speech-recognition");
24
- const payload = await buildPayload(args);
20
+ const payload = await providerHelper.preparePayloadAsync(args);
25
21
  const { data: res } = await innerRequest<AutomaticSpeechRecognitionOutput>(payload, providerHelper, {
26
22
  ...options,
27
23
  task: "automatic-speech-recognition",
@@ -32,29 +28,3 @@ export async function automaticSpeechRecognition(
32
28
  }
33
29
  return providerHelper.getResponse(res);
34
30
  }
35
-
36
- async function buildPayload(args: AutomaticSpeechRecognitionArgs): Promise<RequestArgs> {
37
- if (args.provider === "fal-ai") {
38
- const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : undefined;
39
- const contentType = blob?.type;
40
- if (!contentType) {
41
- throw new Error(
42
- `Unable to determine the input's content-type. Make sure your are passing a Blob when using provider fal-ai.`
43
- );
44
- }
45
- if (!FAL_AI_SUPPORTED_BLOB_TYPES.includes(contentType)) {
46
- throw new Error(
47
- `Provider fal-ai does not support blob type ${contentType} - supported content types are: ${FAL_AI_SUPPORTED_BLOB_TYPES.join(
48
- ", "
49
- )}`
50
- );
51
- }
52
- const base64audio = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
53
- return {
54
- ...("data" in args ? omit(args, "data") : omit(args, "inputs")),
55
- audio_url: `data:${contentType};base64,${base64audio}`,
56
- };
57
- } else {
58
- return preparePayload(args);
59
- }
60
- }
@@ -1,8 +1,7 @@
1
1
  import type { ImageToImageInput } from "@huggingface/tasks";
2
2
  import { resolveProvider } from "../../lib/getInferenceProviderMapping";
3
3
  import { getProviderHelper } from "../../lib/getProviderHelper";
4
- import type { BaseArgs, Options, RequestArgs } from "../../types";
5
- import { base64FromBytes } from "../../utils/base64FromBytes";
4
+ import type { BaseArgs, Options } from "../../types";
6
5
  import { innerRequest } from "../../utils/request";
7
6
 
8
7
  export type ImageToImageArgs = BaseArgs & ImageToImageInput;
@@ -14,22 +13,8 @@ export type ImageToImageArgs = BaseArgs & ImageToImageInput;
14
13
  export async function imageToImage(args: ImageToImageArgs, options?: Options): Promise<Blob> {
15
14
  const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
16
15
  const providerHelper = getProviderHelper(provider, "image-to-image");
17
- let reqArgs: RequestArgs;
18
- if (!args.parameters) {
19
- reqArgs = {
20
- accessToken: args.accessToken,
21
- model: args.model,
22
- data: args.inputs,
23
- };
24
- } else {
25
- reqArgs = {
26
- ...args,
27
- inputs: base64FromBytes(
28
- new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await args.inputs.arrayBuffer())
29
- ),
30
- };
31
- }
32
- const { data: res } = await innerRequest<Blob>(reqArgs, providerHelper, {
16
+ const payload = await providerHelper.preparePayloadAsync(args);
17
+ const { data: res } = await innerRequest<Blob>(payload, providerHelper, {
33
18
  ...options,
34
19
  task: "image-to-image",
35
20
  });