@huggingface/inference 3.1.6 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +13 -7
  2. package/dist/index.cjs +89 -155
  3. package/dist/index.js +89 -151
  4. package/dist/src/config.d.ts +1 -0
  5. package/dist/src/config.d.ts.map +1 -1
  6. package/dist/src/index.d.ts +0 -5
  7. package/dist/src/index.d.ts.map +1 -1
  8. package/dist/src/lib/getProviderModelId.d.ts +10 -0
  9. package/dist/src/lib/getProviderModelId.d.ts.map +1 -0
  10. package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
  11. package/dist/src/providers/consts.d.ts +13 -0
  12. package/dist/src/providers/consts.d.ts.map +1 -0
  13. package/dist/src/providers/fal-ai.d.ts +16 -4
  14. package/dist/src/providers/fal-ai.d.ts.map +1 -1
  15. package/dist/src/providers/fireworks-ai.d.ts +18 -0
  16. package/dist/src/providers/fireworks-ai.d.ts.map +1 -0
  17. package/dist/src/providers/replicate.d.ts +16 -4
  18. package/dist/src/providers/replicate.d.ts.map +1 -1
  19. package/dist/src/providers/sambanova.d.ts +16 -4
  20. package/dist/src/providers/sambanova.d.ts.map +1 -1
  21. package/dist/src/providers/together.d.ts +14 -8
  22. package/dist/src/providers/together.d.ts.map +1 -1
  23. package/dist/src/types.d.ts +1 -1
  24. package/dist/src/types.d.ts.map +1 -1
  25. package/package.json +2 -2
  26. package/src/config.ts +1 -0
  27. package/src/index.ts +0 -5
  28. package/src/lib/getProviderModelId.ts +74 -0
  29. package/src/lib/makeRequestOptions.ts +26 -51
  30. package/src/providers/consts.ts +25 -0
  31. package/src/providers/fal-ai.ts +16 -29
  32. package/src/providers/fireworks-ai.ts +18 -0
  33. package/src/providers/replicate.ts +16 -28
  34. package/src/providers/sambanova.ts +16 -22
  35. package/src/providers/together.ts +14 -55
  36. package/src/types.ts +8 -1
  37. package/dist/src/providers/types.d.ts +0 -4
  38. package/dist/src/providers/types.d.ts.map +0 -1
  39. package/src/providers/types.ts +0 -6
package/README.md CHANGED
@@ -46,7 +46,12 @@ Your access token should be kept private. If you need to protect it in front-end
46
46
 
47
47
  You can send inference requests to third-party providers with the inference client.
48
48
 
49
- Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
49
+ Currently, we support the following providers:
50
+ - [Fal.ai](https://fal.ai)
51
+ - [Fireworks AI](https://fireworks.ai)
52
+ - [Replicate](https://replicate.com)
53
+ - [Sambanova](https://sambanova.ai)
54
+ - [Together](https://together.xyz)
50
55
 
51
56
  To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
52
57
  ```ts
@@ -64,10 +69,11 @@ When authenticated with a Hugging Face access token, the request is routed throu
64
69
  When authenticated with a third-party provider key, the request is made directly against that provider's inference API.
65
70
 
66
71
  Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
67
- - [Fal.ai supported models](./src/providers/fal-ai.ts)
68
- - [Replicate supported models](./src/providers/replicate.ts)
69
- - [Sambanova supported models](./src/providers/sambanova.ts)
70
- - [Together supported models](./src/providers/together.ts)
72
+ - [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
73
+ - [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
74
+ - [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
75
+ - [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)
76
+ - [Together supported models](https://huggingface.co/api/partners/together/models)
71
77
  - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
72
78
 
73
79
  ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.
@@ -117,7 +123,7 @@ for await (const output of hf.textGenerationStream({
117
123
 
118
124
  ### Text Generation (Chat Completion API Compatible)
119
125
 
120
- Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
126
+ Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://huggingface.co/docs/text-generation-inference/) on Hugging Face support Messages API.
121
127
 
122
128
  [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
123
129
 
@@ -611,7 +617,7 @@ const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the
611
617
 
612
618
  // Chat Completion Example
613
619
  const ep = hf.endpoint(
614
- "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
620
+ "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct"
615
621
  );
616
622
  const stream = ep.chatCompletionStream({
617
623
  model: "tgi",
package/dist/index.cjs CHANGED
@@ -20,14 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var src_exports = {};
22
22
  __export(src_exports, {
23
- FAL_AI_SUPPORTED_MODEL_IDS: () => FAL_AI_SUPPORTED_MODEL_IDS,
24
23
  HfInference: () => HfInference,
25
24
  HfInferenceEndpoint: () => HfInferenceEndpoint,
26
25
  INFERENCE_PROVIDERS: () => INFERENCE_PROVIDERS,
27
26
  InferenceOutputError: () => InferenceOutputError,
28
- REPLICATE_SUPPORTED_MODEL_IDS: () => REPLICATE_SUPPORTED_MODEL_IDS,
29
- SAMBANOVA_SUPPORTED_MODEL_IDS: () => SAMBANOVA_SUPPORTED_MODEL_IDS,
30
- TOGETHER_SUPPORTED_MODEL_IDS: () => TOGETHER_SUPPORTED_MODEL_IDS,
31
27
  audioClassification: () => audioClassification,
32
28
  audioToAudio: () => audioToAudio,
33
29
  automaticSpeechRecognition: () => automaticSpeechRecognition,
@@ -102,128 +98,22 @@ __export(tasks_exports, {
102
98
 
103
99
  // src/config.ts
104
100
  var HF_HUB_URL = "https://huggingface.co";
101
+ var HF_ROUTER_URL = "https://router.huggingface.co";
105
102
 
106
103
  // src/providers/fal-ai.ts
107
104
  var FAL_AI_API_BASE_URL = "https://fal.run";
108
- var FAL_AI_SUPPORTED_MODEL_IDS = {
109
- "text-to-image": {
110
- "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
111
- "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
112
- "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
113
- "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
114
- "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
115
- "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
116
- "Warlord-K/Sana-1024": "fal-ai/sana",
117
- "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
118
- "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
119
- "stabilityai/stable-diffusion-3.5-large-turbo": "fal-ai/stable-diffusion-v35-large/turbo",
120
- "stabilityai/stable-diffusion-3.5-medium": "fal-ai/stable-diffusion-v35-medium",
121
- "Kwai-Kolors/Kolors": "fal-ai/kolors"
122
- },
123
- "automatic-speech-recognition": {
124
- "openai/whisper-large-v3": "fal-ai/whisper"
125
- },
126
- "text-to-video": {
127
- "genmo/mochi-1-preview": "fal-ai/mochi-v1",
128
- "tencent/HunyuanVideo": "fal-ai/hunyuan-video",
129
- "THUDM/CogVideoX-5b": "fal-ai/cogvideox-5b",
130
- "Lightricks/LTX-Video": "fal-ai/ltx-video"
131
- }
132
- };
133
105
 
134
106
  // src/providers/replicate.ts
135
107
  var REPLICATE_API_BASE_URL = "https://api.replicate.com";
136
- var REPLICATE_SUPPORTED_MODEL_IDS = {
137
- "text-to-image": {
138
- "black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
139
- "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
140
- "ByteDance/Hyper-SD": "bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
141
- "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
142
- "playgroundai/playground-v2.5-1024px-aesthetic": "playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
143
- "stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
144
- "stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
145
- "stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
146
- "stabilityai/stable-diffusion-xl-base-1.0": "stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
147
- },
148
- "text-to-speech": {
149
- "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:3c645149db020c85d080e2f8cfe482a0e68189a922cde964fa9e80fb179191f3",
150
- "hexgrad/Kokoro-82M": "jaaari/kokoro-82m:dfdf537ba482b029e0a761699e6f55e9162cfd159270bfe0e44857caa5f275a6"
151
- },
152
- "text-to-video": {
153
- "genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
154
- }
155
- };
156
108
 
157
109
  // src/providers/sambanova.ts
158
110
  var SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
159
- var SAMBANOVA_SUPPORTED_MODEL_IDS = {
160
- /** Chat completion / conversational */
161
- conversational: {
162
- "deepseek-ai/DeepSeek-Distill-R1-Llama-70B": "DeepSeek-Distill-R1-Llama-70B",
163
- "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
164
- "Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
165
- "Qwen/QwQ-32B-Preview": "QwQ-32B-Preview",
166
- "meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct",
167
- "meta-llama/Llama-3.2-1B-Instruct": "Meta-Llama-3.2-1B-Instruct",
168
- "meta-llama/Llama-3.2-3B-Instruct": "Meta-Llama-3.2-3B-Instruct",
169
- "meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
170
- "meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
171
- "meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
172
- "meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
173
- "meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
174
- "meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B"
175
- }
176
- };
177
111
 
178
112
  // src/providers/together.ts
179
113
  var TOGETHER_API_BASE_URL = "https://api.together.xyz";
180
- var TOGETHER_SUPPORTED_MODEL_IDS = {
181
- "text-to-image": {
182
- "black-forest-labs/FLUX.1-Canny-dev": "black-forest-labs/FLUX.1-canny",
183
- "black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
184
- "black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
185
- "black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
186
- "black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
187
- "stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0"
188
- },
189
- conversational: {
190
- "databricks/dbrx-instruct": "databricks/dbrx-instruct",
191
- "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
192
- "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
193
- "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
194
- "google/gemma-2-9b-it": "google/gemma-2-9b-it",
195
- "google/gemma-2b-it": "google/gemma-2-27b-it",
196
- "meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
197
- "meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
198
- "meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
199
- "meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
200
- "meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
201
- "meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
202
- "meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
203
- "meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
204
- "meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
205
- "meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
206
- "meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
207
- "microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
208
- "mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
209
- "mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
210
- "mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
211
- "mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
212
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
213
- "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
214
- "Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
215
- "Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
216
- "Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
217
- "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
218
- "Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
219
- "scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
220
- "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316"
221
- },
222
- "text-generation": {
223
- "meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
224
- "mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1"
225
- }
226
- };
114
+
115
+ // src/providers/fireworks-ai.ts
116
+ var FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
227
117
 
228
118
  // src/lib/isUrl.ts
229
119
  function isUrl(modelOrUrl) {
@@ -232,10 +122,70 @@ function isUrl(modelOrUrl) {
232
122
 
233
123
  // package.json
234
124
  var name = "@huggingface/inference";
235
- var version = "3.1.6";
125
+ var version = "3.3.0";
126
+
127
+ // src/providers/consts.ts
128
+ var HARDCODED_MODEL_ID_MAPPING = {
129
+ /**
130
+ * "HF model ID" => "Model ID on Inference Provider's side"
131
+ *
132
+ * Example:
133
+ * "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
134
+ */
135
+ "fal-ai": {},
136
+ "fireworks-ai": {},
137
+ "hf-inference": {},
138
+ replicate: {},
139
+ sambanova: {},
140
+ together: {}
141
+ };
142
+
143
+ // src/lib/getProviderModelId.ts
144
+ var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
145
+ async function getProviderModelId(params, args, options = {}) {
146
+ if (params.provider === "hf-inference") {
147
+ return params.model;
148
+ }
149
+ if (!options.taskHint) {
150
+ throw new Error("taskHint must be specified when using a third-party provider");
151
+ }
152
+ const task = options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
153
+ if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
154
+ return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
155
+ }
156
+ let inferenceProviderMapping;
157
+ if (inferenceProviderMappingCache.has(params.model)) {
158
+ inferenceProviderMapping = inferenceProviderMappingCache.get(params.model);
159
+ } else {
160
+ inferenceProviderMapping = await (options?.fetch ?? fetch)(
161
+ `${HF_HUB_URL}/api/models/${params.model}?expand[]=inferenceProviderMapping`,
162
+ {
163
+ headers: args.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${args.accessToken}` } : {}
164
+ }
165
+ ).then((resp) => resp.json()).then((json) => json.inferenceProviderMapping).catch(() => null);
166
+ }
167
+ if (!inferenceProviderMapping) {
168
+ throw new Error(`We have not been able to find inference provider information for model ${params.model}.`);
169
+ }
170
+ const providerMapping = inferenceProviderMapping[params.provider];
171
+ if (providerMapping) {
172
+ if (providerMapping.task !== task) {
173
+ throw new Error(
174
+ `Model ${params.model} is not supported for task ${task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
175
+ );
176
+ }
177
+ if (providerMapping.status === "staging") {
178
+ console.warn(
179
+ `Model ${params.model} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
180
+ );
181
+ }
182
+ return providerMapping.providerId;
183
+ }
184
+ throw new Error(`Model ${params.model} is not supported provider ${params.provider}.`);
185
+ }
236
186
 
237
187
  // src/lib/makeRequestOptions.ts
238
- var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
188
+ var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
239
189
  var tasks = null;
240
190
  async function makeRequestOptions(args, options) {
241
191
  const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
@@ -251,16 +201,15 @@ async function makeRequestOptions(args, options) {
251
201
  if (maybeModel && isUrl(maybeModel)) {
252
202
  throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
253
203
  }
254
- let model;
255
- if (!maybeModel) {
256
- if (taskHint) {
257
- model = mapModel({ model: await loadDefaultModel(taskHint), provider, taskHint, chatCompletion: chatCompletion2 });
258
- } else {
259
- throw new Error("No model provided, and no default model found for this task");
260
- }
261
- } else {
262
- model = mapModel({ model: maybeModel, provider, taskHint, chatCompletion: chatCompletion2 });
204
+ if (!maybeModel && !taskHint) {
205
+ throw new Error("No model provided, and no task has been specified.");
263
206
  }
207
+ const hfModel = maybeModel ?? await loadDefaultModel(taskHint);
208
+ const model = await getProviderModelId({ model: hfModel, provider }, args, {
209
+ taskHint,
210
+ chatCompletion: chatCompletion2,
211
+ fetch: options?.fetch
212
+ });
264
213
  const authMethod = accessToken ? accessToken.startsWith("hf_") ? "hf-token" : "provider-key" : includeCredentials === "include" ? "credentials-include" : "none";
265
214
  const url = endpointUrl ? chatCompletion2 ? endpointUrl + `/v1/chat/completions` : endpointUrl : makeUrl({
266
215
  authMethod,
@@ -316,31 +265,6 @@ async function makeRequestOptions(args, options) {
316
265
  };
317
266
  return { url, info };
318
267
  }
319
- function mapModel(params) {
320
- if (params.provider === "hf-inference") {
321
- return params.model;
322
- }
323
- if (!params.taskHint) {
324
- throw new Error("taskHint must be specified when using a third-party provider");
325
- }
326
- const task = params.taskHint === "text-generation" && params.chatCompletion ? "conversational" : params.taskHint;
327
- const model = (() => {
328
- switch (params.provider) {
329
- case "fal-ai":
330
- return FAL_AI_SUPPORTED_MODEL_IDS[task]?.[params.model];
331
- case "replicate":
332
- return REPLICATE_SUPPORTED_MODEL_IDS[task]?.[params.model];
333
- case "sambanova":
334
- return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
335
- case "together":
336
- return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
337
- }
338
- })();
339
- if (!model) {
340
- throw new Error(`Model ${params.model} is not supported for task ${task} and provider ${params.provider}`);
341
- }
342
- return model;
343
- }
344
268
  function makeUrl(params) {
345
269
  if (params.authMethod === "none" && params.provider !== "hf-inference") {
346
270
  throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
@@ -378,6 +302,13 @@ function makeUrl(params) {
378
302
  }
379
303
  return baseUrl;
380
304
  }
305
+ case "fireworks-ai": {
306
+ const baseUrl = shouldProxy ? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider) : FIREWORKS_AI_API_BASE_URL;
307
+ if (params.taskHint === "text-generation" && params.chatCompletion) {
308
+ return `${baseUrl}/v1/chat/completions`;
309
+ }
310
+ return baseUrl;
311
+ }
381
312
  default: {
382
313
  const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
383
314
  const url = params.forceTask ? `${baseUrl}/pipeline/${params.forceTask}/${params.model}` : `${baseUrl}/models/${params.model}`;
@@ -1356,17 +1287,20 @@ var HfInferenceEndpoint = class {
1356
1287
  };
1357
1288
 
1358
1289
  // src/types.ts
1359
- var INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"];
1290
+ var INFERENCE_PROVIDERS = [
1291
+ "fal-ai",
1292
+ "fireworks-ai",
1293
+ "hf-inference",
1294
+ "replicate",
1295
+ "sambanova",
1296
+ "together"
1297
+ ];
1360
1298
  // Annotate the CommonJS export names for ESM import in node:
1361
1299
  0 && (module.exports = {
1362
- FAL_AI_SUPPORTED_MODEL_IDS,
1363
1300
  HfInference,
1364
1301
  HfInferenceEndpoint,
1365
1302
  INFERENCE_PROVIDERS,
1366
1303
  InferenceOutputError,
1367
- REPLICATE_SUPPORTED_MODEL_IDS,
1368
- SAMBANOVA_SUPPORTED_MODEL_IDS,
1369
- TOGETHER_SUPPORTED_MODEL_IDS,
1370
1304
  audioClassification,
1371
1305
  audioToAudio,
1372
1306
  automaticSpeechRecognition,
package/dist/index.js CHANGED
@@ -43,128 +43,22 @@ __export(tasks_exports, {
43
43
 
44
44
  // src/config.ts
45
45
  var HF_HUB_URL = "https://huggingface.co";
46
+ var HF_ROUTER_URL = "https://router.huggingface.co";
46
47
 
47
48
  // src/providers/fal-ai.ts
48
49
  var FAL_AI_API_BASE_URL = "https://fal.run";
49
- var FAL_AI_SUPPORTED_MODEL_IDS = {
50
- "text-to-image": {
51
- "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
52
- "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
53
- "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
54
- "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
55
- "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
56
- "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
57
- "Warlord-K/Sana-1024": "fal-ai/sana",
58
- "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
59
- "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
60
- "stabilityai/stable-diffusion-3.5-large-turbo": "fal-ai/stable-diffusion-v35-large/turbo",
61
- "stabilityai/stable-diffusion-3.5-medium": "fal-ai/stable-diffusion-v35-medium",
62
- "Kwai-Kolors/Kolors": "fal-ai/kolors"
63
- },
64
- "automatic-speech-recognition": {
65
- "openai/whisper-large-v3": "fal-ai/whisper"
66
- },
67
- "text-to-video": {
68
- "genmo/mochi-1-preview": "fal-ai/mochi-v1",
69
- "tencent/HunyuanVideo": "fal-ai/hunyuan-video",
70
- "THUDM/CogVideoX-5b": "fal-ai/cogvideox-5b",
71
- "Lightricks/LTX-Video": "fal-ai/ltx-video"
72
- }
73
- };
74
50
 
75
51
  // src/providers/replicate.ts
76
52
  var REPLICATE_API_BASE_URL = "https://api.replicate.com";
77
- var REPLICATE_SUPPORTED_MODEL_IDS = {
78
- "text-to-image": {
79
- "black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
80
- "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
81
- "ByteDance/Hyper-SD": "bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
82
- "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
83
- "playgroundai/playground-v2.5-1024px-aesthetic": "playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
84
- "stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
85
- "stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
86
- "stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
87
- "stabilityai/stable-diffusion-xl-base-1.0": "stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
88
- },
89
- "text-to-speech": {
90
- "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:3c645149db020c85d080e2f8cfe482a0e68189a922cde964fa9e80fb179191f3",
91
- "hexgrad/Kokoro-82M": "jaaari/kokoro-82m:dfdf537ba482b029e0a761699e6f55e9162cfd159270bfe0e44857caa5f275a6"
92
- },
93
- "text-to-video": {
94
- "genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
95
- }
96
- };
97
53
 
98
54
  // src/providers/sambanova.ts
99
55
  var SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
100
- var SAMBANOVA_SUPPORTED_MODEL_IDS = {
101
- /** Chat completion / conversational */
102
- conversational: {
103
- "deepseek-ai/DeepSeek-Distill-R1-Llama-70B": "DeepSeek-Distill-R1-Llama-70B",
104
- "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
105
- "Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
106
- "Qwen/QwQ-32B-Preview": "QwQ-32B-Preview",
107
- "meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct",
108
- "meta-llama/Llama-3.2-1B-Instruct": "Meta-Llama-3.2-1B-Instruct",
109
- "meta-llama/Llama-3.2-3B-Instruct": "Meta-Llama-3.2-3B-Instruct",
110
- "meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
111
- "meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
112
- "meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
113
- "meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
114
- "meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
115
- "meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B"
116
- }
117
- };
118
56
 
119
57
  // src/providers/together.ts
120
58
  var TOGETHER_API_BASE_URL = "https://api.together.xyz";
121
- var TOGETHER_SUPPORTED_MODEL_IDS = {
122
- "text-to-image": {
123
- "black-forest-labs/FLUX.1-Canny-dev": "black-forest-labs/FLUX.1-canny",
124
- "black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
125
- "black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
126
- "black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
127
- "black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
128
- "stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0"
129
- },
130
- conversational: {
131
- "databricks/dbrx-instruct": "databricks/dbrx-instruct",
132
- "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
133
- "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
134
- "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
135
- "google/gemma-2-9b-it": "google/gemma-2-9b-it",
136
- "google/gemma-2b-it": "google/gemma-2-27b-it",
137
- "meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
138
- "meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
139
- "meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
140
- "meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
141
- "meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
142
- "meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
143
- "meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
144
- "meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
145
- "meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
146
- "meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
147
- "meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
148
- "microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
149
- "mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
150
- "mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
151
- "mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
152
- "mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
153
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
154
- "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
155
- "Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
156
- "Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
157
- "Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
158
- "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
159
- "Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
160
- "scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
161
- "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316"
162
- },
163
- "text-generation": {
164
- "meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
165
- "mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1"
166
- }
167
- };
59
+
60
+ // src/providers/fireworks-ai.ts
61
+ var FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
168
62
 
169
63
  // src/lib/isUrl.ts
170
64
  function isUrl(modelOrUrl) {
@@ -173,10 +67,70 @@ function isUrl(modelOrUrl) {
173
67
 
174
68
  // package.json
175
69
  var name = "@huggingface/inference";
176
- var version = "3.1.6";
70
+ var version = "3.3.0";
71
+
72
+ // src/providers/consts.ts
73
+ var HARDCODED_MODEL_ID_MAPPING = {
74
+ /**
75
+ * "HF model ID" => "Model ID on Inference Provider's side"
76
+ *
77
+ * Example:
78
+ * "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
79
+ */
80
+ "fal-ai": {},
81
+ "fireworks-ai": {},
82
+ "hf-inference": {},
83
+ replicate: {},
84
+ sambanova: {},
85
+ together: {}
86
+ };
87
+
88
+ // src/lib/getProviderModelId.ts
89
+ var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
90
+ async function getProviderModelId(params, args, options = {}) {
91
+ if (params.provider === "hf-inference") {
92
+ return params.model;
93
+ }
94
+ if (!options.taskHint) {
95
+ throw new Error("taskHint must be specified when using a third-party provider");
96
+ }
97
+ const task = options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
98
+ if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
99
+ return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
100
+ }
101
+ let inferenceProviderMapping;
102
+ if (inferenceProviderMappingCache.has(params.model)) {
103
+ inferenceProviderMapping = inferenceProviderMappingCache.get(params.model);
104
+ } else {
105
+ inferenceProviderMapping = await (options?.fetch ?? fetch)(
106
+ `${HF_HUB_URL}/api/models/${params.model}?expand[]=inferenceProviderMapping`,
107
+ {
108
+ headers: args.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${args.accessToken}` } : {}
109
+ }
110
+ ).then((resp) => resp.json()).then((json) => json.inferenceProviderMapping).catch(() => null);
111
+ }
112
+ if (!inferenceProviderMapping) {
113
+ throw new Error(`We have not been able to find inference provider information for model ${params.model}.`);
114
+ }
115
+ const providerMapping = inferenceProviderMapping[params.provider];
116
+ if (providerMapping) {
117
+ if (providerMapping.task !== task) {
118
+ throw new Error(
119
+ `Model ${params.model} is not supported for task ${task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
120
+ );
121
+ }
122
+ if (providerMapping.status === "staging") {
123
+ console.warn(
124
+ `Model ${params.model} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
125
+ );
126
+ }
127
+ return providerMapping.providerId;
128
+ }
129
+ throw new Error(`Model ${params.model} is not supported provider ${params.provider}.`);
130
+ }
177
131
 
178
132
  // src/lib/makeRequestOptions.ts
179
- var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
133
+ var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
180
134
  var tasks = null;
181
135
  async function makeRequestOptions(args, options) {
182
136
  const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
@@ -192,16 +146,15 @@ async function makeRequestOptions(args, options) {
192
146
  if (maybeModel && isUrl(maybeModel)) {
193
147
  throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
194
148
  }
195
- let model;
196
- if (!maybeModel) {
197
- if (taskHint) {
198
- model = mapModel({ model: await loadDefaultModel(taskHint), provider, taskHint, chatCompletion: chatCompletion2 });
199
- } else {
200
- throw new Error("No model provided, and no default model found for this task");
201
- }
202
- } else {
203
- model = mapModel({ model: maybeModel, provider, taskHint, chatCompletion: chatCompletion2 });
149
+ if (!maybeModel && !taskHint) {
150
+ throw new Error("No model provided, and no task has been specified.");
204
151
  }
152
+ const hfModel = maybeModel ?? await loadDefaultModel(taskHint);
153
+ const model = await getProviderModelId({ model: hfModel, provider }, args, {
154
+ taskHint,
155
+ chatCompletion: chatCompletion2,
156
+ fetch: options?.fetch
157
+ });
205
158
  const authMethod = accessToken ? accessToken.startsWith("hf_") ? "hf-token" : "provider-key" : includeCredentials === "include" ? "credentials-include" : "none";
206
159
  const url = endpointUrl ? chatCompletion2 ? endpointUrl + `/v1/chat/completions` : endpointUrl : makeUrl({
207
160
  authMethod,
@@ -257,31 +210,6 @@ async function makeRequestOptions(args, options) {
257
210
  };
258
211
  return { url, info };
259
212
  }
260
- function mapModel(params) {
261
- if (params.provider === "hf-inference") {
262
- return params.model;
263
- }
264
- if (!params.taskHint) {
265
- throw new Error("taskHint must be specified when using a third-party provider");
266
- }
267
- const task = params.taskHint === "text-generation" && params.chatCompletion ? "conversational" : params.taskHint;
268
- const model = (() => {
269
- switch (params.provider) {
270
- case "fal-ai":
271
- return FAL_AI_SUPPORTED_MODEL_IDS[task]?.[params.model];
272
- case "replicate":
273
- return REPLICATE_SUPPORTED_MODEL_IDS[task]?.[params.model];
274
- case "sambanova":
275
- return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
276
- case "together":
277
- return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
278
- }
279
- })();
280
- if (!model) {
281
- throw new Error(`Model ${params.model} is not supported for task ${task} and provider ${params.provider}`);
282
- }
283
- return model;
284
- }
285
213
  function makeUrl(params) {
286
214
  if (params.authMethod === "none" && params.provider !== "hf-inference") {
287
215
  throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
@@ -319,6 +247,13 @@ function makeUrl(params) {
319
247
  }
320
248
  return baseUrl;
321
249
  }
250
+ case "fireworks-ai": {
251
+ const baseUrl = shouldProxy ? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider) : FIREWORKS_AI_API_BASE_URL;
252
+ if (params.taskHint === "text-generation" && params.chatCompletion) {
253
+ return `${baseUrl}/v1/chat/completions`;
254
+ }
255
+ return baseUrl;
256
+ }
322
257
  default: {
323
258
  const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
324
259
  const url = params.forceTask ? `${baseUrl}/pipeline/${params.forceTask}/${params.model}` : `${baseUrl}/models/${params.model}`;
@@ -1297,16 +1232,19 @@ var HfInferenceEndpoint = class {
1297
1232
  };
1298
1233
 
1299
1234
  // src/types.ts
1300
- var INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"];
1235
+ var INFERENCE_PROVIDERS = [
1236
+ "fal-ai",
1237
+ "fireworks-ai",
1238
+ "hf-inference",
1239
+ "replicate",
1240
+ "sambanova",
1241
+ "together"
1242
+ ];
1301
1243
  export {
1302
- FAL_AI_SUPPORTED_MODEL_IDS,
1303
1244
  HfInference,
1304
1245
  HfInferenceEndpoint,
1305
1246
  INFERENCE_PROVIDERS,
1306
1247
  InferenceOutputError,
1307
- REPLICATE_SUPPORTED_MODEL_IDS,
1308
- SAMBANOVA_SUPPORTED_MODEL_IDS,
1309
- TOGETHER_SUPPORTED_MODEL_IDS,
1310
1248
  audioClassification,
1311
1249
  audioToAudio,
1312
1250
  automaticSpeechRecognition,