@huggingface/inference 3.12.1 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -235,6 +235,43 @@ var BaseTextGenerationTask = class extends TaskProviderHelper {
235
235
  }
236
236
  };
237
237
 
238
+ // src/utils/base64FromBytes.ts
239
+ function base64FromBytes(arr) {
240
+ if (globalThis.Buffer) {
241
+ return globalThis.Buffer.from(arr).toString("base64");
242
+ } else {
243
+ const bin = [];
244
+ arr.forEach((byte) => {
245
+ bin.push(String.fromCharCode(byte));
246
+ });
247
+ return globalThis.btoa(bin.join(""));
248
+ }
249
+ }
250
+
251
+ // src/utils/pick.ts
252
+ function pick(o, props) {
253
+ return Object.assign(
254
+ {},
255
+ ...props.map((prop) => {
256
+ if (o[prop] !== void 0) {
257
+ return { [prop]: o[prop] };
258
+ }
259
+ })
260
+ );
261
+ }
262
+
263
+ // src/utils/typedInclude.ts
264
+ function typedInclude(arr, v) {
265
+ return arr.includes(v);
266
+ }
267
+
268
+ // src/utils/omit.ts
269
+ function omit(o, props) {
270
+ const propsArr = Array.isArray(props) ? props : [props];
271
+ const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
272
+ return pick(o, letsKeep);
273
+ }
274
+
238
275
  // src/providers/hf-inference.ts
239
276
  var EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS = ["feature-extraction", "sentence-similarity"];
240
277
  var HFInferenceTask = class extends TaskProviderHelper {
@@ -342,6 +379,12 @@ var HFInferenceAutomaticSpeechRecognitionTask = class extends HFInferenceTask {
342
379
  async getResponse(response) {
343
380
  return response;
344
381
  }
382
+ async preparePayloadAsync(args) {
383
+ return "data" in args ? args : {
384
+ ...omit(args, "inputs"),
385
+ data: args.inputs
386
+ };
387
+ }
345
388
  };
346
389
  var HFInferenceAudioToAudioTask = class extends HFInferenceTask {
347
390
  async getResponse(response) {
@@ -393,7 +436,9 @@ var HFInferenceImageClassificationTask = class extends HFInferenceTask {
393
436
  };
394
437
  var HFInferenceImageSegmentationTask = class extends HFInferenceTask {
395
438
  async getResponse(response) {
396
- if (Array.isArray(response) && response.every((x) => typeof x.label === "string" && typeof x.mask === "string" && typeof x.score === "number")) {
439
+ if (Array.isArray(response) && response.every(
440
+ (x) => typeof x.label === "string" && typeof x.mask === "string" && (x.score === void 0 || typeof x.score === "number")
441
+ )) {
397
442
  return response;
398
443
  }
399
444
  throw new InferenceOutputError("Expected Array<{label: string, mask: string, score: number}>");
@@ -408,6 +453,22 @@ var HFInferenceImageToTextTask = class extends HFInferenceTask {
408
453
  }
409
454
  };
410
455
  var HFInferenceImageToImageTask = class extends HFInferenceTask {
456
+ async preparePayloadAsync(args) {
457
+ if (!args.parameters) {
458
+ return {
459
+ ...args,
460
+ model: args.model,
461
+ data: args.inputs
462
+ };
463
+ } else {
464
+ return {
465
+ ...args,
466
+ inputs: base64FromBytes(
467
+ new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await args.inputs.arrayBuffer())
468
+ )
469
+ };
470
+ }
471
+ }
411
472
  async getResponse(response) {
412
473
  if (response instanceof Blob) {
413
474
  return response;
@@ -564,11 +625,6 @@ var HFInferenceTextToAudioTask = class extends HFInferenceTask {
564
625
  }
565
626
  };
566
627
 
567
- // src/utils/typedInclude.ts
568
- function typedInclude(arr, v) {
569
- return arr.includes(v);
570
- }
571
-
572
628
  // src/lib/getInferenceProviderMapping.ts
573
629
  var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
574
630
  async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
@@ -654,25 +710,6 @@ function delay(ms) {
654
710
  });
655
711
  }
656
712
 
657
- // src/utils/pick.ts
658
- function pick(o, props) {
659
- return Object.assign(
660
- {},
661
- ...props.map((prop) => {
662
- if (o[prop] !== void 0) {
663
- return { [prop]: o[prop] };
664
- }
665
- })
666
- );
667
- }
668
-
669
- // src/utils/omit.ts
670
- function omit(o, props) {
671
- const propsArr = Array.isArray(props) ? props : [props];
672
- const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
673
- return pick(o, letsKeep);
674
- }
675
-
676
713
  // src/providers/black-forest-labs.ts
677
714
  var BLACK_FOREST_LABS_AI_API_BASE_URL = "https://api.us1.bfl.ai";
678
715
  var BlackForestLabsTextToImageTask = class extends TaskProviderHelper {
@@ -879,6 +916,27 @@ var FalAIAutomaticSpeechRecognitionTask = class extends FalAITask {
879
916
  }
880
917
  return { text: res.text };
881
918
  }
919
+ async preparePayloadAsync(args) {
920
+ const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : void 0;
921
+ const contentType = blob?.type;
922
+ if (!contentType) {
923
+ throw new Error(
924
+ `Unable to determine the input's content-type. Make sure your are passing a Blob when using provider fal-ai.`
925
+ );
926
+ }
927
+ if (!FAL_AI_SUPPORTED_BLOB_TYPES.includes(contentType)) {
928
+ throw new Error(
929
+ `Provider fal-ai does not support blob type ${contentType} - supported content types are: ${FAL_AI_SUPPORTED_BLOB_TYPES.join(
930
+ ", "
931
+ )}`
932
+ );
933
+ }
934
+ const base64audio = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
935
+ return {
936
+ ..."data" in args ? omit(args, "data") : omit(args, "inputs"),
937
+ audio_url: `data:${contentType};base64,${base64audio}`
938
+ };
939
+ }
882
940
  };
883
941
  var FalAITextToSpeechTask = class extends FalAITask {
884
942
  preparePayload(params) {
@@ -1054,7 +1112,7 @@ var NebiusTextToImageTask = class extends TaskProviderHelper {
1054
1112
  model: params.model
1055
1113
  };
1056
1114
  }
1057
- makeRoute(params) {
1115
+ makeRoute() {
1058
1116
  return "v1/images/generations";
1059
1117
  }
1060
1118
  async getResponse(response, url, headers, outputType) {
@@ -1068,6 +1126,23 @@ var NebiusTextToImageTask = class extends TaskProviderHelper {
1068
1126
  throw new InferenceOutputError("Expected Nebius text-to-image response format");
1069
1127
  }
1070
1128
  };
1129
+ var NebiusFeatureExtractionTask = class extends TaskProviderHelper {
1130
+ constructor() {
1131
+ super("nebius", NEBIUS_API_BASE_URL);
1132
+ }
1133
+ preparePayload(params) {
1134
+ return {
1135
+ input: params.args.inputs,
1136
+ model: params.model
1137
+ };
1138
+ }
1139
+ makeRoute() {
1140
+ return "v1/embeddings";
1141
+ }
1142
+ async getResponse(response) {
1143
+ return response.data.map((item) => item.embedding);
1144
+ }
1145
+ };
1071
1146
 
1072
1147
  // src/providers/novita.ts
1073
1148
  var NOVITA_API_BASE_URL = "https://api.novita.ai";
@@ -1414,7 +1489,8 @@ var PROVIDERS = {
1414
1489
  nebius: {
1415
1490
  "text-to-image": new NebiusTextToImageTask(),
1416
1491
  conversational: new NebiusConversationalTask(),
1417
- "text-generation": new NebiusTextGenerationTask()
1492
+ "text-generation": new NebiusTextGenerationTask(),
1493
+ "feature-extraction": new NebiusFeatureExtractionTask()
1418
1494
  },
1419
1495
  novita: {
1420
1496
  conversational: new NovitaConversationalTask(),
@@ -1469,7 +1545,7 @@ function getProviderHelper(provider, task) {
1469
1545
 
1470
1546
  // package.json
1471
1547
  var name = "@huggingface/inference";
1472
- var version = "3.12.1";
1548
+ var version = "3.13.1";
1473
1549
 
1474
1550
  // src/lib/makeRequestOptions.ts
1475
1551
  var tasks = null;
@@ -1870,24 +1946,11 @@ async function audioToAudio(args, options) {
1870
1946
  return providerHelper.getResponse(res);
1871
1947
  }
1872
1948
 
1873
- // src/utils/base64FromBytes.ts
1874
- function base64FromBytes(arr) {
1875
- if (globalThis.Buffer) {
1876
- return globalThis.Buffer.from(arr).toString("base64");
1877
- } else {
1878
- const bin = [];
1879
- arr.forEach((byte) => {
1880
- bin.push(String.fromCharCode(byte));
1881
- });
1882
- return globalThis.btoa(bin.join(""));
1883
- }
1884
- }
1885
-
1886
1949
  // src/tasks/audio/automaticSpeechRecognition.ts
1887
1950
  async function automaticSpeechRecognition(args, options) {
1888
1951
  const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
1889
1952
  const providerHelper = getProviderHelper(provider, "automatic-speech-recognition");
1890
- const payload = await buildPayload(args);
1953
+ const payload = await providerHelper.preparePayloadAsync(args);
1891
1954
  const { data: res } = await innerRequest(payload, providerHelper, {
1892
1955
  ...options,
1893
1956
  task: "automatic-speech-recognition"
@@ -1898,31 +1961,6 @@ async function automaticSpeechRecognition(args, options) {
1898
1961
  }
1899
1962
  return providerHelper.getResponse(res);
1900
1963
  }
1901
- async function buildPayload(args) {
1902
- if (args.provider === "fal-ai") {
1903
- const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : void 0;
1904
- const contentType = blob?.type;
1905
- if (!contentType) {
1906
- throw new Error(
1907
- `Unable to determine the input's content-type. Make sure your are passing a Blob when using provider fal-ai.`
1908
- );
1909
- }
1910
- if (!FAL_AI_SUPPORTED_BLOB_TYPES.includes(contentType)) {
1911
- throw new Error(
1912
- `Provider fal-ai does not support blob type ${contentType} - supported content types are: ${FAL_AI_SUPPORTED_BLOB_TYPES.join(
1913
- ", "
1914
- )}`
1915
- );
1916
- }
1917
- const base64audio = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
1918
- return {
1919
- ..."data" in args ? omit(args, "data") : omit(args, "inputs"),
1920
- audio_url: `data:${contentType};base64,${base64audio}`
1921
- };
1922
- } else {
1923
- return preparePayload(args);
1924
- }
1925
- }
1926
1964
 
1927
1965
  // src/tasks/audio/textToSpeech.ts
1928
1966
  async function textToSpeech(args, options) {
@@ -1968,22 +2006,8 @@ async function imageSegmentation(args, options) {
1968
2006
  async function imageToImage(args, options) {
1969
2007
  const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
1970
2008
  const providerHelper = getProviderHelper(provider, "image-to-image");
1971
- let reqArgs;
1972
- if (!args.parameters) {
1973
- reqArgs = {
1974
- accessToken: args.accessToken,
1975
- model: args.model,
1976
- data: args.inputs
1977
- };
1978
- } else {
1979
- reqArgs = {
1980
- ...args,
1981
- inputs: base64FromBytes(
1982
- new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await args.inputs.arrayBuffer())
1983
- )
1984
- };
1985
- }
1986
- const { data: res } = await innerRequest(reqArgs, providerHelper, {
2009
+ const payload = await providerHelper.preparePayloadAsync(args);
2010
+ const { data: res } = await innerRequest(payload, providerHelper, {
1987
2011
  ...options,
1988
2012
  task: "image-to-image"
1989
2013
  });
@@ -2448,7 +2472,7 @@ const video = await client.textToVideo({
2448
2472
  "textToImage": '{% if provider == "fal-ai" %}\nimport fal_client\n\n{% if providerInputs.asObj.loras is defined and providerInputs.asObj.loras != none %}\nresult = fal_client.subscribe(\n "{{ providerModelId }}",\n arguments={\n "prompt": {{ inputs.asObj.inputs }},\n "loras":{{ providerInputs.asObj.loras | tojson }},\n },\n)\n{% else %}\nresult = fal_client.subscribe(\n "{{ providerModelId }}",\n arguments={\n "prompt": {{ inputs.asObj.inputs }},\n },\n)\n{% endif %} \nprint(result)\n{% endif %} '
2449
2473
  },
2450
2474
  "huggingface_hub": {
2451
- "basic": 'result = client.{{ methodName }}(\n inputs={{ inputs.asObj.inputs }},\n model="{{ model.id }}",\n)',
2475
+ "basic": 'result = client.{{ methodName }}(\n {{ inputs.asObj.inputs }},\n model="{{ model.id }}",\n)',
2452
2476
  "basicAudio": 'output = client.{{ methodName }}({{ inputs.asObj.inputs }}, model="{{ model.id }}")',
2453
2477
  "basicImage": 'output = client.{{ methodName }}({{ inputs.asObj.inputs }}, model="{{ model.id }}")',
2454
2478
  "conversational": 'completion = client.chat.completions.create(\n model="{{ model.id }}",\n{{ inputs.asPythonString }}\n)\n\nprint(completion.choices[0].message) ',
@@ -2456,6 +2480,8 @@ const video = await client.textToVideo({
2456
2480
  "documentQuestionAnswering": 'output = client.document_question_answering(\n "{{ inputs.asObj.image }}",\n question="{{ inputs.asObj.question }}",\n model="{{ model.id }}",\n) ',
2457
2481
  "imageToImage": '# output is a PIL.Image object\nimage = client.image_to_image(\n "{{ inputs.asObj.inputs }}",\n prompt="{{ inputs.asObj.parameters.prompt }}",\n model="{{ model.id }}",\n) ',
2458
2482
  "importInferenceClient": 'from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider="{{ provider }}",\n api_key="{{ accessToken }}",\n{% if billTo %}\n bill_to="{{ billTo }}",\n{% endif %}\n)',
2483
+ "questionAnswering": 'answer = client.question_answering(\n question="{{ inputs.asObj.question }}",\n context="{{ inputs.asObj.context }}",\n model="{{ model.id }}",\n) ',
2484
+ "tableQuestionAnswering": 'answer = client.question_answering(\n query="{{ inputs.asObj.query }}",\n table={{ inputs.asObj.table }},\n model="{{ model.id }}",\n) ',
2459
2485
  "textToImage": '# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model="{{ model.id }}",\n) ',
2460
2486
  "textToSpeech": '# audio is returned as bytes\naudio = client.text_to_speech(\n {{ inputs.asObj.inputs }},\n model="{{ model.id }}",\n) \n',
2461
2487
  "textToVideo": 'video = client.text_to_video(\n {{ inputs.asObj.inputs }},\n model="{{ model.id }}",\n) '
@@ -2527,6 +2553,10 @@ var CLIENTS = {
2527
2553
  python: [...PYTHON_CLIENTS],
2528
2554
  sh: [...SH_CLIENTS]
2529
2555
  };
2556
+ var CLIENTS_AUTO_POLICY = {
2557
+ js: ["huggingface.js"],
2558
+ python: ["huggingface_hub"]
2559
+ };
2530
2560
  var hasTemplate = (language, client, templateName) => templates[language]?.[client]?.[templateName] !== void 0;
2531
2561
  var loadTemplate = (language, client, templateName) => {
2532
2562
  const template = templates[language]?.[client]?.[templateName];
@@ -2593,7 +2623,7 @@ var snippetGenerator = (templateName, inputPreparationFn) => {
2593
2623
  }
2594
2624
  let providerHelper;
2595
2625
  try {
2596
- providerHelper = getProviderHelper(provider, task);
2626
+ providerHelper = getProviderHelper(provider === "auto" ? "hf-inference" : provider, task);
2597
2627
  } catch (e) {
2598
2628
  console.error(`Failed to get provider helper for ${provider} (${task})`, e);
2599
2629
  return [];
@@ -2646,8 +2676,10 @@ var snippetGenerator = (templateName, inputPreparationFn) => {
2646
2676
  providerModelId: providerModelId ?? model.id,
2647
2677
  billTo: opts?.billTo
2648
2678
  };
2679
+ const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
2649
2680
  return import_tasks.inferenceSnippetLanguages.map((language) => {
2650
- return CLIENTS[language].map((client) => {
2681
+ const langClients = clients[language] ?? [];
2682
+ return langClients.map((client) => {
2651
2683
  if (!hasTemplate(language, client, templateName)) {
2652
2684
  return;
2653
2685
  }
@@ -2703,6 +2735,14 @@ var prepareConversationalInput = (model, opts) => {
2703
2735
  ...opts?.top_p ? { top_p: opts?.top_p } : void 0
2704
2736
  };
2705
2737
  };
2738
+ var prepareQuestionAnsweringInput = (model) => {
2739
+ const data = JSON.parse((0, import_tasks.getModelInputSnippet)(model));
2740
+ return { question: data.question, context: data.context };
2741
+ };
2742
+ var prepareTableQuestionAnsweringInput = (model) => {
2743
+ const data = JSON.parse((0, import_tasks.getModelInputSnippet)(model));
2744
+ return { query: data.query, table: JSON.stringify(data.table) };
2745
+ };
2706
2746
  var snippets = {
2707
2747
  "audio-classification": snippetGenerator("basicAudio"),
2708
2748
  "audio-to-audio": snippetGenerator("basicAudio"),
@@ -2716,12 +2756,12 @@ var snippets = {
2716
2756
  "image-to-image": snippetGenerator("imageToImage", prepareImageToImageInput),
2717
2757
  "image-to-text": snippetGenerator("basicImage"),
2718
2758
  "object-detection": snippetGenerator("basicImage"),
2719
- "question-answering": snippetGenerator("basic"),
2759
+ "question-answering": snippetGenerator("questionAnswering", prepareQuestionAnsweringInput),
2720
2760
  "sentence-similarity": snippetGenerator("basic"),
2721
2761
  summarization: snippetGenerator("basic"),
2722
2762
  "tabular-classification": snippetGenerator("tabular"),
2723
2763
  "tabular-regression": snippetGenerator("tabular"),
2724
- "table-question-answering": snippetGenerator("basic"),
2764
+ "table-question-answering": snippetGenerator("tableQuestionAnswering", prepareTableQuestionAnsweringInput),
2725
2765
  "text-classification": snippetGenerator("basic"),
2726
2766
  "text-generation": snippetGenerator("basic"),
2727
2767
  "text-to-audio": snippetGenerator("textToAudio"),