npm - @huggingface/inference - Versions diffs - 3.1.6 → 3.3.0 - Mend

@huggingface/inference 3.1.6 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +13 -7
package/dist/index.cjs +89 -155
package/dist/index.js +89 -151
package/dist/src/config.d.ts +1 -0
package/dist/src/config.d.ts.map +1 -1
package/dist/src/index.d.ts +0 -5
package/dist/src/index.d.ts.map +1 -1
package/dist/src/lib/getProviderModelId.d.ts +10 -0
package/dist/src/lib/getProviderModelId.d.ts.map +1 -0
package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/src/providers/consts.d.ts +13 -0
package/dist/src/providers/consts.d.ts.map +1 -0
package/dist/src/providers/fal-ai.d.ts +16 -4
package/dist/src/providers/fal-ai.d.ts.map +1 -1
package/dist/src/providers/fireworks-ai.d.ts +18 -0
package/dist/src/providers/fireworks-ai.d.ts.map +1 -0
package/dist/src/providers/replicate.d.ts +16 -4
package/dist/src/providers/replicate.d.ts.map +1 -1
package/dist/src/providers/sambanova.d.ts +16 -4
package/dist/src/providers/sambanova.d.ts.map +1 -1
package/dist/src/providers/together.d.ts +14 -8
package/dist/src/providers/together.d.ts.map +1 -1
package/dist/src/types.d.ts +1 -1
package/dist/src/types.d.ts.map +1 -1
package/package.json +2 -2
package/src/config.ts +1 -0
package/src/index.ts +0 -5
package/src/lib/getProviderModelId.ts +74 -0
package/src/lib/makeRequestOptions.ts +26 -51
package/src/providers/consts.ts +25 -0
package/src/providers/fal-ai.ts +16 -29
package/src/providers/fireworks-ai.ts +18 -0
package/src/providers/replicate.ts +16 -28
package/src/providers/sambanova.ts +16 -22
package/src/providers/together.ts +14 -55
package/src/types.ts +8 -1
package/dist/src/providers/types.d.ts +0 -4
package/dist/src/providers/types.d.ts.map +0 -1
package/src/providers/types.ts +0 -6

package/README.md CHANGED Viewed

@@ -46,7 +46,12 @@ Your access token should be kept private. If you need to protect it in front-end
 You can send inference requests to third-party providers with the inference client.
-Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
+Currently, we support the following providers:
+- [Fal.ai](https://fal.ai)
+- [Fireworks AI](https://fireworks.ai)
+- [Replicate](https://replicate.com)
+- [Sambanova](https://sambanova.ai)
+- [Together](https://together.xyz)
 To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
 ```ts
@@ -64,10 +69,11 @@ When authenticated with a Hugging Face access token, the request is routed throu
 When authenticated with a third-party provider key, the request is made directly against that provider's inference API.
 Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
-- [Fal.ai supported models](./src/providers/fal-ai.ts)
-- [Replicate supported models](./src/providers/replicate.ts)
-- [Sambanova supported models](./src/providers/sambanova.ts)
-- [Together supported models](./src/providers/together.ts)
+- [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
+- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
+- [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
+- [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)
+- [Together supported models](https://huggingface.co/api/partners/together/models)
 - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
 ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.
@@ -117,7 +123,7 @@ for await (const output of hf.textGenerationStream({
 ### Text Generation (Chat Completion API Compatible)
-Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
+Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://huggingface.co/docs/text-generation-inference/) on Hugging Face support Messages API.
 [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
@@ -611,7 +617,7 @@ const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the
 // Chat Completion Example
 const ep = hf.endpoint(
-  "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct"
+  "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct"
 );
 const stream = ep.chatCompletionStream({
   model: "tgi",

package/dist/index.cjs CHANGED Viewed

@@ -20,14 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var src_exports = {};
 __export(src_exports, {
-  FAL_AI_SUPPORTED_MODEL_IDS: () => FAL_AI_SUPPORTED_MODEL_IDS,
   HfInference: () => HfInference,
   HfInferenceEndpoint: () => HfInferenceEndpoint,
   INFERENCE_PROVIDERS: () => INFERENCE_PROVIDERS,
   InferenceOutputError: () => InferenceOutputError,
-  REPLICATE_SUPPORTED_MODEL_IDS: () => REPLICATE_SUPPORTED_MODEL_IDS,
-  SAMBANOVA_SUPPORTED_MODEL_IDS: () => SAMBANOVA_SUPPORTED_MODEL_IDS,
-  TOGETHER_SUPPORTED_MODEL_IDS: () => TOGETHER_SUPPORTED_MODEL_IDS,
   audioClassification: () => audioClassification,
   audioToAudio: () => audioToAudio,
   automaticSpeechRecognition: () => automaticSpeechRecognition,
@@ -102,128 +98,22 @@ __export(tasks_exports, {
 // src/config.ts
 var HF_HUB_URL = "https://huggingface.co";
+var HF_ROUTER_URL = "https://router.huggingface.co";
 // src/providers/fal-ai.ts
 var FAL_AI_API_BASE_URL = "https://fal.run";
-var FAL_AI_SUPPORTED_MODEL_IDS = {
-  "text-to-image": {
-    "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
-    "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
-    "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
-    "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
-    "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
-    "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
-    "Warlord-K/Sana-1024": "fal-ai/sana",
-    "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
-    "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
-    "stabilityai/stable-diffusion-3.5-large-turbo": "fal-ai/stable-diffusion-v35-large/turbo",
-    "stabilityai/stable-diffusion-3.5-medium": "fal-ai/stable-diffusion-v35-medium",
-    "Kwai-Kolors/Kolors": "fal-ai/kolors"
-  },
-  "automatic-speech-recognition": {
-    "openai/whisper-large-v3": "fal-ai/whisper"
-  },
-  "text-to-video": {
-    "genmo/mochi-1-preview": "fal-ai/mochi-v1",
-    "tencent/HunyuanVideo": "fal-ai/hunyuan-video",
-    "THUDM/CogVideoX-5b": "fal-ai/cogvideox-5b",
-    "Lightricks/LTX-Video": "fal-ai/ltx-video"
-  }
-};
 // src/providers/replicate.ts
 var REPLICATE_API_BASE_URL = "https://api.replicate.com";
-var REPLICATE_SUPPORTED_MODEL_IDS = {
-  "text-to-image": {
-    "black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
-    "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
-    "ByteDance/Hyper-SD": "bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
-    "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
-    "playgroundai/playground-v2.5-1024px-aesthetic": "playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
-    "stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
-    "stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
-    "stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
-    "stabilityai/stable-diffusion-xl-base-1.0": "stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
-  },
-  "text-to-speech": {
-    "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:3c645149db020c85d080e2f8cfe482a0e68189a922cde964fa9e80fb179191f3",
-    "hexgrad/Kokoro-82M": "jaaari/kokoro-82m:dfdf537ba482b029e0a761699e6f55e9162cfd159270bfe0e44857caa5f275a6"
-  },
-  "text-to-video": {
-    "genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
-  }
-};
 // src/providers/sambanova.ts
 var SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
-var SAMBANOVA_SUPPORTED_MODEL_IDS = {
-  /** Chat completion / conversational */
-  conversational: {
-    "deepseek-ai/DeepSeek-Distill-R1-Llama-70B": "DeepSeek-Distill-R1-Llama-70B",
-    "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
-    "Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
-    "Qwen/QwQ-32B-Preview": "QwQ-32B-Preview",
-    "meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct",
-    "meta-llama/Llama-3.2-1B-Instruct": "Meta-Llama-3.2-1B-Instruct",
-    "meta-llama/Llama-3.2-3B-Instruct": "Meta-Llama-3.2-3B-Instruct",
-    "meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
-    "meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
-    "meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
-    "meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
-    "meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
-    "meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B"
-  }
-};
 // src/providers/together.ts
 var TOGETHER_API_BASE_URL = "https://api.together.xyz";
-var TOGETHER_SUPPORTED_MODEL_IDS = {
-  "text-to-image": {
-    "black-forest-labs/FLUX.1-Canny-dev": "black-forest-labs/FLUX.1-canny",
-    "black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
-    "black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
-    "black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
-    "black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
-    "stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0"
-  },
-  conversational: {
-    "databricks/dbrx-instruct": "databricks/dbrx-instruct",
-    "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
-    "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
-    "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
-    "google/gemma-2-9b-it": "google/gemma-2-9b-it",
-    "google/gemma-2b-it": "google/gemma-2-27b-it",
-    "meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
-    "meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
-    "meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
-    "meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-    "meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-    "meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
-    "meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
-    "microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
-    "mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
-    "mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
-    "mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
-    "Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
-    "Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
-    "Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
-    "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
-    "Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
-    "scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
-    "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316"
-  },
-  "text-generation": {
-    "meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
-    "mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1"
-  }
-};
+// src/providers/fireworks-ai.ts
+var FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
 // src/lib/isUrl.ts
 function isUrl(modelOrUrl) {
@@ -232,10 +122,70 @@ function isUrl(modelOrUrl) {
 // package.json
 var name = "@huggingface/inference";
-var version = "3.1.6";
+var version = "3.3.0";
+// src/providers/consts.ts
+var HARDCODED_MODEL_ID_MAPPING = {
+  /**
+   * "HF model ID" => "Model ID on Inference Provider's side"
+   *
+   * Example:
+   * "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
+   */
+  "fal-ai": {},
+  "fireworks-ai": {},
+  "hf-inference": {},
+  replicate: {},
+  sambanova: {},
+  together: {}
+};
+// src/lib/getProviderModelId.ts
+var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
+async function getProviderModelId(params, args, options = {}) {
+  if (params.provider === "hf-inference") {
+    return params.model;
+  }
+  if (!options.taskHint) {
+    throw new Error("taskHint must be specified when using a third-party provider");
+  }
+  const task = options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
+  if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
+    return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
+  }
+  let inferenceProviderMapping;
+  if (inferenceProviderMappingCache.has(params.model)) {
+    inferenceProviderMapping = inferenceProviderMappingCache.get(params.model);
+  } else {
+    inferenceProviderMapping = await (options?.fetch ?? fetch)(
+      `${HF_HUB_URL}/api/models/${params.model}?expand[]=inferenceProviderMapping`,
+      {
+        headers: args.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${args.accessToken}` } : {}
+      }
+    ).then((resp) => resp.json()).then((json) => json.inferenceProviderMapping).catch(() => null);
+  }
+  if (!inferenceProviderMapping) {
+    throw new Error(`We have not been able to find inference provider information for model ${params.model}.`);
+  }
+  const providerMapping = inferenceProviderMapping[params.provider];
+  if (providerMapping) {
+    if (providerMapping.task !== task) {
+      throw new Error(
+        `Model ${params.model} is not supported for task ${task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
+      );
+    }
+    if (providerMapping.status === "staging") {
+      console.warn(
+        `Model ${params.model} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
+      );
+    }
+    return providerMapping.providerId;
+  }
+  throw new Error(`Model ${params.model} is not supported provider ${params.provider}.`);
+}
 // src/lib/makeRequestOptions.ts
-var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
+var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
 var tasks = null;
 async function makeRequestOptions(args, options) {
   const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
@@ -251,16 +201,15 @@ async function makeRequestOptions(args, options) {
   if (maybeModel && isUrl(maybeModel)) {
     throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
   }
-  let model;
-  if (!maybeModel) {
-    if (taskHint) {
-      model = mapModel({ model: await loadDefaultModel(taskHint), provider, taskHint, chatCompletion: chatCompletion2 });
-    } else {
-      throw new Error("No model provided, and no default model found for this task");
-    }
-  } else {
-    model = mapModel({ model: maybeModel, provider, taskHint, chatCompletion: chatCompletion2 });
+  if (!maybeModel && !taskHint) {
+    throw new Error("No model provided, and no task has been specified.");
   }
+  const hfModel = maybeModel ?? await loadDefaultModel(taskHint);
+  const model = await getProviderModelId({ model: hfModel, provider }, args, {
+    taskHint,
+    chatCompletion: chatCompletion2,
+    fetch: options?.fetch
+  });
   const authMethod = accessToken ? accessToken.startsWith("hf_") ? "hf-token" : "provider-key" : includeCredentials === "include" ? "credentials-include" : "none";
   const url = endpointUrl ? chatCompletion2 ? endpointUrl + `/v1/chat/completions` : endpointUrl : makeUrl({
     authMethod,
@@ -316,31 +265,6 @@ async function makeRequestOptions(args, options) {
   };
   return { url, info };
 }
-function mapModel(params) {
-  if (params.provider === "hf-inference") {
-    return params.model;
-  }
-  if (!params.taskHint) {
-    throw new Error("taskHint must be specified when using a third-party provider");
-  }
-  const task = params.taskHint === "text-generation" && params.chatCompletion ? "conversational" : params.taskHint;
-  const model = (() => {
-    switch (params.provider) {
-      case "fal-ai":
-        return FAL_AI_SUPPORTED_MODEL_IDS[task]?.[params.model];
-      case "replicate":
-        return REPLICATE_SUPPORTED_MODEL_IDS[task]?.[params.model];
-      case "sambanova":
-        return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
-      case "together":
-        return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
-    }
-  })();
-  if (!model) {
-    throw new Error(`Model ${params.model} is not supported for task ${task} and provider ${params.provider}`);
-  }
-  return model;
-}
 function makeUrl(params) {
   if (params.authMethod === "none" && params.provider !== "hf-inference") {
     throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
@@ -378,6 +302,13 @@ function makeUrl(params) {
       }
       return baseUrl;
     }
+    case "fireworks-ai": {
+      const baseUrl = shouldProxy ? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider) : FIREWORKS_AI_API_BASE_URL;
+      if (params.taskHint === "text-generation" && params.chatCompletion) {
+        return `${baseUrl}/v1/chat/completions`;
+      }
+      return baseUrl;
+    }
     default: {
       const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
       const url = params.forceTask ? `${baseUrl}/pipeline/${params.forceTask}/${params.model}` : `${baseUrl}/models/${params.model}`;
@@ -1356,17 +1287,20 @@ var HfInferenceEndpoint = class {
 };
 // src/types.ts
-var INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"];
+var INFERENCE_PROVIDERS = [
+  "fal-ai",
+  "fireworks-ai",
+  "hf-inference",
+  "replicate",
+  "sambanova",
+  "together"
+];
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
-  FAL_AI_SUPPORTED_MODEL_IDS,
   HfInference,
   HfInferenceEndpoint,
   INFERENCE_PROVIDERS,
   InferenceOutputError,
-  REPLICATE_SUPPORTED_MODEL_IDS,
-  SAMBANOVA_SUPPORTED_MODEL_IDS,
-  TOGETHER_SUPPORTED_MODEL_IDS,
   audioClassification,
   audioToAudio,
   automaticSpeechRecognition,

package/dist/index.js CHANGED Viewed

@@ -43,128 +43,22 @@ __export(tasks_exports, {
 // src/config.ts
 var HF_HUB_URL = "https://huggingface.co";
+var HF_ROUTER_URL = "https://router.huggingface.co";
 // src/providers/fal-ai.ts
 var FAL_AI_API_BASE_URL = "https://fal.run";
-var FAL_AI_SUPPORTED_MODEL_IDS = {
-  "text-to-image": {
-    "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
-    "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
-    "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
-    "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
-    "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
-    "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
-    "Warlord-K/Sana-1024": "fal-ai/sana",
-    "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
-    "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
-    "stabilityai/stable-diffusion-3.5-large-turbo": "fal-ai/stable-diffusion-v35-large/turbo",
-    "stabilityai/stable-diffusion-3.5-medium": "fal-ai/stable-diffusion-v35-medium",
-    "Kwai-Kolors/Kolors": "fal-ai/kolors"
-  },
-  "automatic-speech-recognition": {
-    "openai/whisper-large-v3": "fal-ai/whisper"
-  },
-  "text-to-video": {
-    "genmo/mochi-1-preview": "fal-ai/mochi-v1",
-    "tencent/HunyuanVideo": "fal-ai/hunyuan-video",
-    "THUDM/CogVideoX-5b": "fal-ai/cogvideox-5b",
-    "Lightricks/LTX-Video": "fal-ai/ltx-video"
-  }
-};
 // src/providers/replicate.ts
 var REPLICATE_API_BASE_URL = "https://api.replicate.com";
-var REPLICATE_SUPPORTED_MODEL_IDS = {
-  "text-to-image": {
-    "black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
-    "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
-    "ByteDance/Hyper-SD": "bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
-    "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
-    "playgroundai/playground-v2.5-1024px-aesthetic": "playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
-    "stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
-    "stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
-    "stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
-    "stabilityai/stable-diffusion-xl-base-1.0": "stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
-  },
-  "text-to-speech": {
-    "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:3c645149db020c85d080e2f8cfe482a0e68189a922cde964fa9e80fb179191f3",
-    "hexgrad/Kokoro-82M": "jaaari/kokoro-82m:dfdf537ba482b029e0a761699e6f55e9162cfd159270bfe0e44857caa5f275a6"
-  },
-  "text-to-video": {
-    "genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
-  }
-};
 // src/providers/sambanova.ts
 var SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
-var SAMBANOVA_SUPPORTED_MODEL_IDS = {
-  /** Chat completion / conversational */
-  conversational: {
-    "deepseek-ai/DeepSeek-Distill-R1-Llama-70B": "DeepSeek-Distill-R1-Llama-70B",
-    "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
-    "Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
-    "Qwen/QwQ-32B-Preview": "QwQ-32B-Preview",
-    "meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct",
-    "meta-llama/Llama-3.2-1B-Instruct": "Meta-Llama-3.2-1B-Instruct",
-    "meta-llama/Llama-3.2-3B-Instruct": "Meta-Llama-3.2-3B-Instruct",
-    "meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
-    "meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
-    "meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
-    "meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
-    "meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
-    "meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B"
-  }
-};
 // src/providers/together.ts
 var TOGETHER_API_BASE_URL = "https://api.together.xyz";
-var TOGETHER_SUPPORTED_MODEL_IDS = {
-  "text-to-image": {
-    "black-forest-labs/FLUX.1-Canny-dev": "black-forest-labs/FLUX.1-canny",
-    "black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
-    "black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
-    "black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
-    "black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
-    "stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0"
-  },
-  conversational: {
-    "databricks/dbrx-instruct": "databricks/dbrx-instruct",
-    "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
-    "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
-    "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
-    "google/gemma-2-9b-it": "google/gemma-2-9b-it",
-    "google/gemma-2b-it": "google/gemma-2-27b-it",
-    "meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
-    "meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
-    "meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
-    "meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-    "meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-    "meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
-    "meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-    "meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
-    "microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
-    "mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
-    "mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
-    "mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
-    "Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
-    "Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
-    "Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
-    "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
-    "Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
-    "scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
-    "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316"
-  },
-  "text-generation": {
-    "meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
-    "mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1"
-  }
-};
+// src/providers/fireworks-ai.ts
+var FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
 // src/lib/isUrl.ts
 function isUrl(modelOrUrl) {
@@ -173,10 +67,70 @@ function isUrl(modelOrUrl) {
 // package.json
 var name = "@huggingface/inference";
-var version = "3.1.6";
+var version = "3.3.0";
+// src/providers/consts.ts
+var HARDCODED_MODEL_ID_MAPPING = {
+  /**
+   * "HF model ID" => "Model ID on Inference Provider's side"
+   *
+   * Example:
+   * "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
+   */
+  "fal-ai": {},
+  "fireworks-ai": {},
+  "hf-inference": {},
+  replicate: {},
+  sambanova: {},
+  together: {}
+};
+// src/lib/getProviderModelId.ts
+var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
+async function getProviderModelId(params, args, options = {}) {
+  if (params.provider === "hf-inference") {
+    return params.model;
+  }
+  if (!options.taskHint) {
+    throw new Error("taskHint must be specified when using a third-party provider");
+  }
+  const task = options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
+  if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
+    return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
+  }
+  let inferenceProviderMapping;
+  if (inferenceProviderMappingCache.has(params.model)) {
+    inferenceProviderMapping = inferenceProviderMappingCache.get(params.model);
+  } else {
+    inferenceProviderMapping = await (options?.fetch ?? fetch)(
+      `${HF_HUB_URL}/api/models/${params.model}?expand[]=inferenceProviderMapping`,
+      {
+        headers: args.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${args.accessToken}` } : {}
+      }
+    ).then((resp) => resp.json()).then((json) => json.inferenceProviderMapping).catch(() => null);
+  }
+  if (!inferenceProviderMapping) {
+    throw new Error(`We have not been able to find inference provider information for model ${params.model}.`);
+  }
+  const providerMapping = inferenceProviderMapping[params.provider];
+  if (providerMapping) {
+    if (providerMapping.task !== task) {
+      throw new Error(
+        `Model ${params.model} is not supported for task ${task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
+      );
+    }
+    if (providerMapping.status === "staging") {
+      console.warn(
+        `Model ${params.model} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
+      );
+    }
+    return providerMapping.providerId;
+  }
+  throw new Error(`Model ${params.model} is not supported provider ${params.provider}.`);
+}
 // src/lib/makeRequestOptions.ts
-var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
+var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
 var tasks = null;
 async function makeRequestOptions(args, options) {
   const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
@@ -192,16 +146,15 @@ async function makeRequestOptions(args, options) {
   if (maybeModel && isUrl(maybeModel)) {
     throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
   }
-  let model;
-  if (!maybeModel) {
-    if (taskHint) {
-      model = mapModel({ model: await loadDefaultModel(taskHint), provider, taskHint, chatCompletion: chatCompletion2 });
-    } else {
-      throw new Error("No model provided, and no default model found for this task");
-    }
-  } else {
-    model = mapModel({ model: maybeModel, provider, taskHint, chatCompletion: chatCompletion2 });
+  if (!maybeModel && !taskHint) {
+    throw new Error("No model provided, and no task has been specified.");
   }
+  const hfModel = maybeModel ?? await loadDefaultModel(taskHint);
+  const model = await getProviderModelId({ model: hfModel, provider }, args, {
+    taskHint,
+    chatCompletion: chatCompletion2,
+    fetch: options?.fetch
+  });
   const authMethod = accessToken ? accessToken.startsWith("hf_") ? "hf-token" : "provider-key" : includeCredentials === "include" ? "credentials-include" : "none";
   const url = endpointUrl ? chatCompletion2 ? endpointUrl + `/v1/chat/completions` : endpointUrl : makeUrl({
     authMethod,
@@ -257,31 +210,6 @@ async function makeRequestOptions(args, options) {
   };
   return { url, info };
 }
-function mapModel(params) {
-  if (params.provider === "hf-inference") {
-    return params.model;
-  }
-  if (!params.taskHint) {
-    throw new Error("taskHint must be specified when using a third-party provider");
-  }
-  const task = params.taskHint === "text-generation" && params.chatCompletion ? "conversational" : params.taskHint;
-  const model = (() => {
-    switch (params.provider) {
-      case "fal-ai":
-        return FAL_AI_SUPPORTED_MODEL_IDS[task]?.[params.model];
-      case "replicate":
-        return REPLICATE_SUPPORTED_MODEL_IDS[task]?.[params.model];
-      case "sambanova":
-        return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
-      case "together":
-        return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
-    }
-  })();
-  if (!model) {
-    throw new Error(`Model ${params.model} is not supported for task ${task} and provider ${params.provider}`);
-  }
-  return model;
-}
 function makeUrl(params) {
   if (params.authMethod === "none" && params.provider !== "hf-inference") {
     throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
@@ -319,6 +247,13 @@ function makeUrl(params) {
       }
       return baseUrl;
     }
+    case "fireworks-ai": {
+      const baseUrl = shouldProxy ? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider) : FIREWORKS_AI_API_BASE_URL;
+      if (params.taskHint === "text-generation" && params.chatCompletion) {
+        return `${baseUrl}/v1/chat/completions`;
+      }
+      return baseUrl;
+    }
     default: {
       const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
       const url = params.forceTask ? `${baseUrl}/pipeline/${params.forceTask}/${params.model}` : `${baseUrl}/models/${params.model}`;
@@ -1297,16 +1232,19 @@ var HfInferenceEndpoint = class {
 };
 // src/types.ts
-var INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"];
+var INFERENCE_PROVIDERS = [
+  "fal-ai",
+  "fireworks-ai",
+  "hf-inference",
+  "replicate",
+  "sambanova",
+  "together"
+];
 export {
-  FAL_AI_SUPPORTED_MODEL_IDS,
   HfInference,
   HfInferenceEndpoint,
   INFERENCE_PROVIDERS,
   InferenceOutputError,
-  REPLICATE_SUPPORTED_MODEL_IDS,
-  SAMBANOVA_SUPPORTED_MODEL_IDS,
-  TOGETHER_SUPPORTED_MODEL_IDS,
   audioClassification,
   audioToAudio,
   automaticSpeechRecognition,