@huggingface/inference 3.1.6 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -7
- package/dist/index.cjs +89 -155
- package/dist/index.js +89 -151
- package/dist/src/config.d.ts +1 -0
- package/dist/src/config.d.ts.map +1 -1
- package/dist/src/index.d.ts +0 -5
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/lib/getProviderModelId.d.ts +10 -0
- package/dist/src/lib/getProviderModelId.d.ts.map +1 -0
- package/dist/src/lib/makeRequestOptions.d.ts.map +1 -1
- package/dist/src/providers/consts.d.ts +13 -0
- package/dist/src/providers/consts.d.ts.map +1 -0
- package/dist/src/providers/fal-ai.d.ts +16 -4
- package/dist/src/providers/fal-ai.d.ts.map +1 -1
- package/dist/src/providers/fireworks-ai.d.ts +18 -0
- package/dist/src/providers/fireworks-ai.d.ts.map +1 -0
- package/dist/src/providers/replicate.d.ts +16 -4
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/sambanova.d.ts +16 -4
- package/dist/src/providers/sambanova.d.ts.map +1 -1
- package/dist/src/providers/together.d.ts +14 -8
- package/dist/src/providers/together.d.ts.map +1 -1
- package/dist/src/types.d.ts +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/config.ts +1 -0
- package/src/index.ts +0 -5
- package/src/lib/getProviderModelId.ts +74 -0
- package/src/lib/makeRequestOptions.ts +26 -51
- package/src/providers/consts.ts +25 -0
- package/src/providers/fal-ai.ts +16 -29
- package/src/providers/fireworks-ai.ts +18 -0
- package/src/providers/replicate.ts +16 -28
- package/src/providers/sambanova.ts +16 -22
- package/src/providers/together.ts +14 -55
- package/src/types.ts +8 -1
- package/dist/src/providers/types.d.ts +0 -4
- package/dist/src/providers/types.d.ts.map +0 -1
- package/src/providers/types.ts +0 -6
package/README.md
CHANGED
|
@@ -46,7 +46,12 @@ Your access token should be kept private. If you need to protect it in front-end
|
|
|
46
46
|
|
|
47
47
|
You can send inference requests to third-party providers with the inference client.
|
|
48
48
|
|
|
49
|
-
Currently, we support the following providers:
|
|
49
|
+
Currently, we support the following providers:
|
|
50
|
+
- [Fal.ai](https://fal.ai)
|
|
51
|
+
- [Fireworks AI](https://fireworks.ai)
|
|
52
|
+
- [Replicate](https://replicate.com)
|
|
53
|
+
- [Sambanova](https://sambanova.ai)
|
|
54
|
+
- [Together](https://together.xyz)
|
|
50
55
|
|
|
51
56
|
To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
|
|
52
57
|
```ts
|
|
@@ -64,10 +69,11 @@ When authenticated with a Hugging Face access token, the request is routed throu
|
|
|
64
69
|
When authenticated with a third-party provider key, the request is made directly against that provider's inference API.
|
|
65
70
|
|
|
66
71
|
Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
|
|
67
|
-
- [Fal.ai supported models](
|
|
68
|
-
- [
|
|
69
|
-
- [
|
|
70
|
-
- [
|
|
72
|
+
- [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
|
|
73
|
+
- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
|
|
74
|
+
- [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
|
|
75
|
+
- [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)
|
|
76
|
+
- [Together supported models](https://huggingface.co/api/partners/together/models)
|
|
71
77
|
- [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
|
|
72
78
|
|
|
73
79
|
❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.
|
|
@@ -117,7 +123,7 @@ for await (const output of hf.textGenerationStream({
|
|
|
117
123
|
|
|
118
124
|
### Text Generation (Chat Completion API Compatible)
|
|
119
125
|
|
|
120
|
-
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://
|
|
126
|
+
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://huggingface.co/docs/text-generation-inference/) on Hugging Face support Messages API.
|
|
121
127
|
|
|
122
128
|
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
|
|
123
129
|
|
|
@@ -611,7 +617,7 @@ const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the
|
|
|
611
617
|
|
|
612
618
|
// Chat Completion Example
|
|
613
619
|
const ep = hf.endpoint(
|
|
614
|
-
"https://
|
|
620
|
+
"https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct"
|
|
615
621
|
);
|
|
616
622
|
const stream = ep.chatCompletionStream({
|
|
617
623
|
model: "tgi",
|
package/dist/index.cjs
CHANGED
|
@@ -20,14 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var src_exports = {};
|
|
22
22
|
__export(src_exports, {
|
|
23
|
-
FAL_AI_SUPPORTED_MODEL_IDS: () => FAL_AI_SUPPORTED_MODEL_IDS,
|
|
24
23
|
HfInference: () => HfInference,
|
|
25
24
|
HfInferenceEndpoint: () => HfInferenceEndpoint,
|
|
26
25
|
INFERENCE_PROVIDERS: () => INFERENCE_PROVIDERS,
|
|
27
26
|
InferenceOutputError: () => InferenceOutputError,
|
|
28
|
-
REPLICATE_SUPPORTED_MODEL_IDS: () => REPLICATE_SUPPORTED_MODEL_IDS,
|
|
29
|
-
SAMBANOVA_SUPPORTED_MODEL_IDS: () => SAMBANOVA_SUPPORTED_MODEL_IDS,
|
|
30
|
-
TOGETHER_SUPPORTED_MODEL_IDS: () => TOGETHER_SUPPORTED_MODEL_IDS,
|
|
31
27
|
audioClassification: () => audioClassification,
|
|
32
28
|
audioToAudio: () => audioToAudio,
|
|
33
29
|
automaticSpeechRecognition: () => automaticSpeechRecognition,
|
|
@@ -102,128 +98,22 @@ __export(tasks_exports, {
|
|
|
102
98
|
|
|
103
99
|
// src/config.ts
|
|
104
100
|
var HF_HUB_URL = "https://huggingface.co";
|
|
101
|
+
var HF_ROUTER_URL = "https://router.huggingface.co";
|
|
105
102
|
|
|
106
103
|
// src/providers/fal-ai.ts
|
|
107
104
|
var FAL_AI_API_BASE_URL = "https://fal.run";
|
|
108
|
-
var FAL_AI_SUPPORTED_MODEL_IDS = {
|
|
109
|
-
"text-to-image": {
|
|
110
|
-
"black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
|
|
111
|
-
"black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
|
|
112
|
-
"playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
|
|
113
|
-
"ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
|
|
114
|
-
"PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
|
|
115
|
-
"stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
|
|
116
|
-
"Warlord-K/Sana-1024": "fal-ai/sana",
|
|
117
|
-
"fal/AuraFlow-v0.2": "fal-ai/aura-flow",
|
|
118
|
-
"stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
|
|
119
|
-
"stabilityai/stable-diffusion-3.5-large-turbo": "fal-ai/stable-diffusion-v35-large/turbo",
|
|
120
|
-
"stabilityai/stable-diffusion-3.5-medium": "fal-ai/stable-diffusion-v35-medium",
|
|
121
|
-
"Kwai-Kolors/Kolors": "fal-ai/kolors"
|
|
122
|
-
},
|
|
123
|
-
"automatic-speech-recognition": {
|
|
124
|
-
"openai/whisper-large-v3": "fal-ai/whisper"
|
|
125
|
-
},
|
|
126
|
-
"text-to-video": {
|
|
127
|
-
"genmo/mochi-1-preview": "fal-ai/mochi-v1",
|
|
128
|
-
"tencent/HunyuanVideo": "fal-ai/hunyuan-video",
|
|
129
|
-
"THUDM/CogVideoX-5b": "fal-ai/cogvideox-5b",
|
|
130
|
-
"Lightricks/LTX-Video": "fal-ai/ltx-video"
|
|
131
|
-
}
|
|
132
|
-
};
|
|
133
105
|
|
|
134
106
|
// src/providers/replicate.ts
|
|
135
107
|
var REPLICATE_API_BASE_URL = "https://api.replicate.com";
|
|
136
|
-
var REPLICATE_SUPPORTED_MODEL_IDS = {
|
|
137
|
-
"text-to-image": {
|
|
138
|
-
"black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
|
|
139
|
-
"black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
|
|
140
|
-
"ByteDance/Hyper-SD": "bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
|
|
141
|
-
"ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
|
|
142
|
-
"playgroundai/playground-v2.5-1024px-aesthetic": "playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
|
|
143
|
-
"stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
|
|
144
|
-
"stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
|
|
145
|
-
"stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
|
|
146
|
-
"stabilityai/stable-diffusion-xl-base-1.0": "stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
|
|
147
|
-
},
|
|
148
|
-
"text-to-speech": {
|
|
149
|
-
"OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:3c645149db020c85d080e2f8cfe482a0e68189a922cde964fa9e80fb179191f3",
|
|
150
|
-
"hexgrad/Kokoro-82M": "jaaari/kokoro-82m:dfdf537ba482b029e0a761699e6f55e9162cfd159270bfe0e44857caa5f275a6"
|
|
151
|
-
},
|
|
152
|
-
"text-to-video": {
|
|
153
|
-
"genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
|
|
154
|
-
}
|
|
155
|
-
};
|
|
156
108
|
|
|
157
109
|
// src/providers/sambanova.ts
|
|
158
110
|
var SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
|
|
159
|
-
var SAMBANOVA_SUPPORTED_MODEL_IDS = {
|
|
160
|
-
/** Chat completion / conversational */
|
|
161
|
-
conversational: {
|
|
162
|
-
"deepseek-ai/DeepSeek-Distill-R1-Llama-70B": "DeepSeek-Distill-R1-Llama-70B",
|
|
163
|
-
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
|
|
164
|
-
"Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
|
|
165
|
-
"Qwen/QwQ-32B-Preview": "QwQ-32B-Preview",
|
|
166
|
-
"meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct",
|
|
167
|
-
"meta-llama/Llama-3.2-1B-Instruct": "Meta-Llama-3.2-1B-Instruct",
|
|
168
|
-
"meta-llama/Llama-3.2-3B-Instruct": "Meta-Llama-3.2-3B-Instruct",
|
|
169
|
-
"meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
|
|
170
|
-
"meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
|
|
171
|
-
"meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
|
|
172
|
-
"meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
|
|
173
|
-
"meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
|
|
174
|
-
"meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B"
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
111
|
|
|
178
112
|
// src/providers/together.ts
|
|
179
113
|
var TOGETHER_API_BASE_URL = "https://api.together.xyz";
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
"black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
|
|
184
|
-
"black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
|
|
185
|
-
"black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
|
|
186
|
-
"black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
|
|
187
|
-
"stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0"
|
|
188
|
-
},
|
|
189
|
-
conversational: {
|
|
190
|
-
"databricks/dbrx-instruct": "databricks/dbrx-instruct",
|
|
191
|
-
"deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
|
|
192
|
-
"deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
|
|
193
|
-
"deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
|
|
194
|
-
"google/gemma-2-9b-it": "google/gemma-2-9b-it",
|
|
195
|
-
"google/gemma-2b-it": "google/gemma-2-27b-it",
|
|
196
|
-
"meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
|
|
197
|
-
"meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
|
|
198
|
-
"meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
|
|
199
|
-
"meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
|
200
|
-
"meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
|
201
|
-
"meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
202
|
-
"meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
|
|
203
|
-
"meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
|
|
204
|
-
"meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
|
205
|
-
"meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
|
206
|
-
"meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
|
|
207
|
-
"microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
|
|
208
|
-
"mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
209
|
-
"mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
|
|
210
|
-
"mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
|
211
|
-
"mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
212
|
-
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
|
213
|
-
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
|
214
|
-
"Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
|
|
215
|
-
"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
|
|
216
|
-
"Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
|
|
217
|
-
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
218
|
-
"Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
|
|
219
|
-
"scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
|
|
220
|
-
"scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316"
|
|
221
|
-
},
|
|
222
|
-
"text-generation": {
|
|
223
|
-
"meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
|
|
224
|
-
"mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1"
|
|
225
|
-
}
|
|
226
|
-
};
|
|
114
|
+
|
|
115
|
+
// src/providers/fireworks-ai.ts
|
|
116
|
+
var FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
|
|
227
117
|
|
|
228
118
|
// src/lib/isUrl.ts
|
|
229
119
|
function isUrl(modelOrUrl) {
|
|
@@ -232,10 +122,70 @@ function isUrl(modelOrUrl) {
|
|
|
232
122
|
|
|
233
123
|
// package.json
|
|
234
124
|
var name = "@huggingface/inference";
|
|
235
|
-
var version = "3.
|
|
125
|
+
var version = "3.3.0";
|
|
126
|
+
|
|
127
|
+
// src/providers/consts.ts
|
|
128
|
+
var HARDCODED_MODEL_ID_MAPPING = {
|
|
129
|
+
/**
|
|
130
|
+
* "HF model ID" => "Model ID on Inference Provider's side"
|
|
131
|
+
*
|
|
132
|
+
* Example:
|
|
133
|
+
* "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
|
|
134
|
+
*/
|
|
135
|
+
"fal-ai": {},
|
|
136
|
+
"fireworks-ai": {},
|
|
137
|
+
"hf-inference": {},
|
|
138
|
+
replicate: {},
|
|
139
|
+
sambanova: {},
|
|
140
|
+
together: {}
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
// src/lib/getProviderModelId.ts
|
|
144
|
+
var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
|
|
145
|
+
async function getProviderModelId(params, args, options = {}) {
|
|
146
|
+
if (params.provider === "hf-inference") {
|
|
147
|
+
return params.model;
|
|
148
|
+
}
|
|
149
|
+
if (!options.taskHint) {
|
|
150
|
+
throw new Error("taskHint must be specified when using a third-party provider");
|
|
151
|
+
}
|
|
152
|
+
const task = options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
|
|
153
|
+
if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
|
|
154
|
+
return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
|
|
155
|
+
}
|
|
156
|
+
let inferenceProviderMapping;
|
|
157
|
+
if (inferenceProviderMappingCache.has(params.model)) {
|
|
158
|
+
inferenceProviderMapping = inferenceProviderMappingCache.get(params.model);
|
|
159
|
+
} else {
|
|
160
|
+
inferenceProviderMapping = await (options?.fetch ?? fetch)(
|
|
161
|
+
`${HF_HUB_URL}/api/models/${params.model}?expand[]=inferenceProviderMapping`,
|
|
162
|
+
{
|
|
163
|
+
headers: args.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${args.accessToken}` } : {}
|
|
164
|
+
}
|
|
165
|
+
).then((resp) => resp.json()).then((json) => json.inferenceProviderMapping).catch(() => null);
|
|
166
|
+
}
|
|
167
|
+
if (!inferenceProviderMapping) {
|
|
168
|
+
throw new Error(`We have not been able to find inference provider information for model ${params.model}.`);
|
|
169
|
+
}
|
|
170
|
+
const providerMapping = inferenceProviderMapping[params.provider];
|
|
171
|
+
if (providerMapping) {
|
|
172
|
+
if (providerMapping.task !== task) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
`Model ${params.model} is not supported for task ${task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
if (providerMapping.status === "staging") {
|
|
178
|
+
console.warn(
|
|
179
|
+
`Model ${params.model} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
return providerMapping.providerId;
|
|
183
|
+
}
|
|
184
|
+
throw new Error(`Model ${params.model} is not supported provider ${params.provider}.`);
|
|
185
|
+
}
|
|
236
186
|
|
|
237
187
|
// src/lib/makeRequestOptions.ts
|
|
238
|
-
var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${
|
|
188
|
+
var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
|
|
239
189
|
var tasks = null;
|
|
240
190
|
async function makeRequestOptions(args, options) {
|
|
241
191
|
const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
|
|
@@ -251,16 +201,15 @@ async function makeRequestOptions(args, options) {
|
|
|
251
201
|
if (maybeModel && isUrl(maybeModel)) {
|
|
252
202
|
throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
|
|
253
203
|
}
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
if (taskHint) {
|
|
257
|
-
model = mapModel({ model: await loadDefaultModel(taskHint), provider, taskHint, chatCompletion: chatCompletion2 });
|
|
258
|
-
} else {
|
|
259
|
-
throw new Error("No model provided, and no default model found for this task");
|
|
260
|
-
}
|
|
261
|
-
} else {
|
|
262
|
-
model = mapModel({ model: maybeModel, provider, taskHint, chatCompletion: chatCompletion2 });
|
|
204
|
+
if (!maybeModel && !taskHint) {
|
|
205
|
+
throw new Error("No model provided, and no task has been specified.");
|
|
263
206
|
}
|
|
207
|
+
const hfModel = maybeModel ?? await loadDefaultModel(taskHint);
|
|
208
|
+
const model = await getProviderModelId({ model: hfModel, provider }, args, {
|
|
209
|
+
taskHint,
|
|
210
|
+
chatCompletion: chatCompletion2,
|
|
211
|
+
fetch: options?.fetch
|
|
212
|
+
});
|
|
264
213
|
const authMethod = accessToken ? accessToken.startsWith("hf_") ? "hf-token" : "provider-key" : includeCredentials === "include" ? "credentials-include" : "none";
|
|
265
214
|
const url = endpointUrl ? chatCompletion2 ? endpointUrl + `/v1/chat/completions` : endpointUrl : makeUrl({
|
|
266
215
|
authMethod,
|
|
@@ -316,31 +265,6 @@ async function makeRequestOptions(args, options) {
|
|
|
316
265
|
};
|
|
317
266
|
return { url, info };
|
|
318
267
|
}
|
|
319
|
-
function mapModel(params) {
|
|
320
|
-
if (params.provider === "hf-inference") {
|
|
321
|
-
return params.model;
|
|
322
|
-
}
|
|
323
|
-
if (!params.taskHint) {
|
|
324
|
-
throw new Error("taskHint must be specified when using a third-party provider");
|
|
325
|
-
}
|
|
326
|
-
const task = params.taskHint === "text-generation" && params.chatCompletion ? "conversational" : params.taskHint;
|
|
327
|
-
const model = (() => {
|
|
328
|
-
switch (params.provider) {
|
|
329
|
-
case "fal-ai":
|
|
330
|
-
return FAL_AI_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
331
|
-
case "replicate":
|
|
332
|
-
return REPLICATE_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
333
|
-
case "sambanova":
|
|
334
|
-
return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
335
|
-
case "together":
|
|
336
|
-
return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
337
|
-
}
|
|
338
|
-
})();
|
|
339
|
-
if (!model) {
|
|
340
|
-
throw new Error(`Model ${params.model} is not supported for task ${task} and provider ${params.provider}`);
|
|
341
|
-
}
|
|
342
|
-
return model;
|
|
343
|
-
}
|
|
344
268
|
function makeUrl(params) {
|
|
345
269
|
if (params.authMethod === "none" && params.provider !== "hf-inference") {
|
|
346
270
|
throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
|
|
@@ -378,6 +302,13 @@ function makeUrl(params) {
|
|
|
378
302
|
}
|
|
379
303
|
return baseUrl;
|
|
380
304
|
}
|
|
305
|
+
case "fireworks-ai": {
|
|
306
|
+
const baseUrl = shouldProxy ? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider) : FIREWORKS_AI_API_BASE_URL;
|
|
307
|
+
if (params.taskHint === "text-generation" && params.chatCompletion) {
|
|
308
|
+
return `${baseUrl}/v1/chat/completions`;
|
|
309
|
+
}
|
|
310
|
+
return baseUrl;
|
|
311
|
+
}
|
|
381
312
|
default: {
|
|
382
313
|
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
|
|
383
314
|
const url = params.forceTask ? `${baseUrl}/pipeline/${params.forceTask}/${params.model}` : `${baseUrl}/models/${params.model}`;
|
|
@@ -1356,17 +1287,20 @@ var HfInferenceEndpoint = class {
|
|
|
1356
1287
|
};
|
|
1357
1288
|
|
|
1358
1289
|
// src/types.ts
|
|
1359
|
-
var INFERENCE_PROVIDERS = [
|
|
1290
|
+
var INFERENCE_PROVIDERS = [
|
|
1291
|
+
"fal-ai",
|
|
1292
|
+
"fireworks-ai",
|
|
1293
|
+
"hf-inference",
|
|
1294
|
+
"replicate",
|
|
1295
|
+
"sambanova",
|
|
1296
|
+
"together"
|
|
1297
|
+
];
|
|
1360
1298
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1361
1299
|
0 && (module.exports = {
|
|
1362
|
-
FAL_AI_SUPPORTED_MODEL_IDS,
|
|
1363
1300
|
HfInference,
|
|
1364
1301
|
HfInferenceEndpoint,
|
|
1365
1302
|
INFERENCE_PROVIDERS,
|
|
1366
1303
|
InferenceOutputError,
|
|
1367
|
-
REPLICATE_SUPPORTED_MODEL_IDS,
|
|
1368
|
-
SAMBANOVA_SUPPORTED_MODEL_IDS,
|
|
1369
|
-
TOGETHER_SUPPORTED_MODEL_IDS,
|
|
1370
1304
|
audioClassification,
|
|
1371
1305
|
audioToAudio,
|
|
1372
1306
|
automaticSpeechRecognition,
|
package/dist/index.js
CHANGED
|
@@ -43,128 +43,22 @@ __export(tasks_exports, {
|
|
|
43
43
|
|
|
44
44
|
// src/config.ts
|
|
45
45
|
var HF_HUB_URL = "https://huggingface.co";
|
|
46
|
+
var HF_ROUTER_URL = "https://router.huggingface.co";
|
|
46
47
|
|
|
47
48
|
// src/providers/fal-ai.ts
|
|
48
49
|
var FAL_AI_API_BASE_URL = "https://fal.run";
|
|
49
|
-
var FAL_AI_SUPPORTED_MODEL_IDS = {
|
|
50
|
-
"text-to-image": {
|
|
51
|
-
"black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
|
|
52
|
-
"black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
|
|
53
|
-
"playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
|
|
54
|
-
"ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
|
|
55
|
-
"PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
|
|
56
|
-
"stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
|
|
57
|
-
"Warlord-K/Sana-1024": "fal-ai/sana",
|
|
58
|
-
"fal/AuraFlow-v0.2": "fal-ai/aura-flow",
|
|
59
|
-
"stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
|
|
60
|
-
"stabilityai/stable-diffusion-3.5-large-turbo": "fal-ai/stable-diffusion-v35-large/turbo",
|
|
61
|
-
"stabilityai/stable-diffusion-3.5-medium": "fal-ai/stable-diffusion-v35-medium",
|
|
62
|
-
"Kwai-Kolors/Kolors": "fal-ai/kolors"
|
|
63
|
-
},
|
|
64
|
-
"automatic-speech-recognition": {
|
|
65
|
-
"openai/whisper-large-v3": "fal-ai/whisper"
|
|
66
|
-
},
|
|
67
|
-
"text-to-video": {
|
|
68
|
-
"genmo/mochi-1-preview": "fal-ai/mochi-v1",
|
|
69
|
-
"tencent/HunyuanVideo": "fal-ai/hunyuan-video",
|
|
70
|
-
"THUDM/CogVideoX-5b": "fal-ai/cogvideox-5b",
|
|
71
|
-
"Lightricks/LTX-Video": "fal-ai/ltx-video"
|
|
72
|
-
}
|
|
73
|
-
};
|
|
74
50
|
|
|
75
51
|
// src/providers/replicate.ts
|
|
76
52
|
var REPLICATE_API_BASE_URL = "https://api.replicate.com";
|
|
77
|
-
var REPLICATE_SUPPORTED_MODEL_IDS = {
|
|
78
|
-
"text-to-image": {
|
|
79
|
-
"black-forest-labs/FLUX.1-dev": "black-forest-labs/flux-dev",
|
|
80
|
-
"black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
|
|
81
|
-
"ByteDance/Hyper-SD": "bytedance/hyper-flux-16step:382cf8959fb0f0d665b26e7e80b8d6dc3faaef1510f14ce017e8c732bb3d1eb7",
|
|
82
|
-
"ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
|
|
83
|
-
"playgroundai/playground-v2.5-1024px-aesthetic": "playgroundai/playground-v2.5-1024px-aesthetic:a45f82a1382bed5c7aeb861dac7c7d191b0fdf74d8d57c4a0e6ed7d4d0bf7d24",
|
|
84
|
-
"stabilityai/stable-diffusion-3.5-large-turbo": "stability-ai/stable-diffusion-3.5-large-turbo",
|
|
85
|
-
"stabilityai/stable-diffusion-3.5-large": "stability-ai/stable-diffusion-3.5-large",
|
|
86
|
-
"stabilityai/stable-diffusion-3.5-medium": "stability-ai/stable-diffusion-3.5-medium",
|
|
87
|
-
"stabilityai/stable-diffusion-xl-base-1.0": "stability-ai/sdxl:7762fd07cf82c948538e41f63f77d685e02b063e37e496e96eefd46c929f9bdc"
|
|
88
|
-
},
|
|
89
|
-
"text-to-speech": {
|
|
90
|
-
"OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:3c645149db020c85d080e2f8cfe482a0e68189a922cde964fa9e80fb179191f3",
|
|
91
|
-
"hexgrad/Kokoro-82M": "jaaari/kokoro-82m:dfdf537ba482b029e0a761699e6f55e9162cfd159270bfe0e44857caa5f275a6"
|
|
92
|
-
},
|
|
93
|
-
"text-to-video": {
|
|
94
|
-
"genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
|
|
95
|
-
}
|
|
96
|
-
};
|
|
97
53
|
|
|
98
54
|
// src/providers/sambanova.ts
|
|
99
55
|
var SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai";
|
|
100
|
-
var SAMBANOVA_SUPPORTED_MODEL_IDS = {
|
|
101
|
-
/** Chat completion / conversational */
|
|
102
|
-
conversational: {
|
|
103
|
-
"deepseek-ai/DeepSeek-Distill-R1-Llama-70B": "DeepSeek-Distill-R1-Llama-70B",
|
|
104
|
-
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
|
|
105
|
-
"Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
|
|
106
|
-
"Qwen/QwQ-32B-Preview": "QwQ-32B-Preview",
|
|
107
|
-
"meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct",
|
|
108
|
-
"meta-llama/Llama-3.2-1B-Instruct": "Meta-Llama-3.2-1B-Instruct",
|
|
109
|
-
"meta-llama/Llama-3.2-3B-Instruct": "Meta-Llama-3.2-3B-Instruct",
|
|
110
|
-
"meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
|
|
111
|
-
"meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct",
|
|
112
|
-
"meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
|
|
113
|
-
"meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct",
|
|
114
|
-
"meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct",
|
|
115
|
-
"meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B"
|
|
116
|
-
}
|
|
117
|
-
};
|
|
118
56
|
|
|
119
57
|
// src/providers/together.ts
|
|
120
58
|
var TOGETHER_API_BASE_URL = "https://api.together.xyz";
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
"black-forest-labs/FLUX.1-Depth-dev": "black-forest-labs/FLUX.1-depth",
|
|
125
|
-
"black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
|
|
126
|
-
"black-forest-labs/FLUX.1-Redux-dev": "black-forest-labs/FLUX.1-redux",
|
|
127
|
-
"black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-pro",
|
|
128
|
-
"stabilityai/stable-diffusion-xl-base-1.0": "stabilityai/stable-diffusion-xl-base-1.0"
|
|
129
|
-
},
|
|
130
|
-
conversational: {
|
|
131
|
-
"databricks/dbrx-instruct": "databricks/dbrx-instruct",
|
|
132
|
-
"deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
|
|
133
|
-
"deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
|
|
134
|
-
"deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
|
|
135
|
-
"google/gemma-2-9b-it": "google/gemma-2-9b-it",
|
|
136
|
-
"google/gemma-2b-it": "google/gemma-2-27b-it",
|
|
137
|
-
"meta-llama/Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
|
|
138
|
-
"meta-llama/Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
|
|
139
|
-
"meta-llama/Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-Vision-Free",
|
|
140
|
-
"meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
|
141
|
-
"meta-llama/Llama-3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
|
142
|
-
"meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
143
|
-
"meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-chat-hf",
|
|
144
|
-
"meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
|
|
145
|
-
"meta-llama/Meta-Llama-3.1-405B-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
|
146
|
-
"meta-llama/Meta-Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
|
147
|
-
"meta-llama/Meta-Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K",
|
|
148
|
-
"microsoft/WizardLM-2-8x22B": "microsoft/WizardLM-2-8x22B",
|
|
149
|
-
"mistralai/Mistral-7B-Instruct-v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
150
|
-
"mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
|
|
151
|
-
"mistralai/Mixtral-8x22B-Instruct-v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
|
152
|
-
"mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
153
|
-
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
|
154
|
-
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
|
155
|
-
"Qwen/Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
|
|
156
|
-
"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-Turbo",
|
|
157
|
-
"Qwen/Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct-Turbo",
|
|
158
|
-
"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
159
|
-
"Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview",
|
|
160
|
-
"scb10x/llama-3-typhoon-v1.5-8b-instruct": "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
|
|
161
|
-
"scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": "scb10x/scb10x-llama3-typhoon-v1-5x-4f316"
|
|
162
|
-
},
|
|
163
|
-
"text-generation": {
|
|
164
|
-
"meta-llama/Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
|
|
165
|
-
"mistralai/Mixtral-8x7B-v0.1": "mistralai/Mixtral-8x7B-v0.1"
|
|
166
|
-
}
|
|
167
|
-
};
|
|
59
|
+
|
|
60
|
+
// src/providers/fireworks-ai.ts
|
|
61
|
+
var FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
|
|
168
62
|
|
|
169
63
|
// src/lib/isUrl.ts
|
|
170
64
|
function isUrl(modelOrUrl) {
|
|
@@ -173,10 +67,70 @@ function isUrl(modelOrUrl) {
|
|
|
173
67
|
|
|
174
68
|
// package.json
|
|
175
69
|
var name = "@huggingface/inference";
|
|
176
|
-
var version = "3.
|
|
70
|
+
var version = "3.3.0";
|
|
71
|
+
|
|
72
|
+
// src/providers/consts.ts
|
|
73
|
+
var HARDCODED_MODEL_ID_MAPPING = {
|
|
74
|
+
/**
|
|
75
|
+
* "HF model ID" => "Model ID on Inference Provider's side"
|
|
76
|
+
*
|
|
77
|
+
* Example:
|
|
78
|
+
* "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
|
|
79
|
+
*/
|
|
80
|
+
"fal-ai": {},
|
|
81
|
+
"fireworks-ai": {},
|
|
82
|
+
"hf-inference": {},
|
|
83
|
+
replicate: {},
|
|
84
|
+
sambanova: {},
|
|
85
|
+
together: {}
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// src/lib/getProviderModelId.ts
|
|
89
|
+
var inferenceProviderMappingCache = /* @__PURE__ */ new Map();
|
|
90
|
+
async function getProviderModelId(params, args, options = {}) {
|
|
91
|
+
if (params.provider === "hf-inference") {
|
|
92
|
+
return params.model;
|
|
93
|
+
}
|
|
94
|
+
if (!options.taskHint) {
|
|
95
|
+
throw new Error("taskHint must be specified when using a third-party provider");
|
|
96
|
+
}
|
|
97
|
+
const task = options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
|
|
98
|
+
if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
|
|
99
|
+
return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
|
|
100
|
+
}
|
|
101
|
+
let inferenceProviderMapping;
|
|
102
|
+
if (inferenceProviderMappingCache.has(params.model)) {
|
|
103
|
+
inferenceProviderMapping = inferenceProviderMappingCache.get(params.model);
|
|
104
|
+
} else {
|
|
105
|
+
inferenceProviderMapping = await (options?.fetch ?? fetch)(
|
|
106
|
+
`${HF_HUB_URL}/api/models/${params.model}?expand[]=inferenceProviderMapping`,
|
|
107
|
+
{
|
|
108
|
+
headers: args.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${args.accessToken}` } : {}
|
|
109
|
+
}
|
|
110
|
+
).then((resp) => resp.json()).then((json) => json.inferenceProviderMapping).catch(() => null);
|
|
111
|
+
}
|
|
112
|
+
if (!inferenceProviderMapping) {
|
|
113
|
+
throw new Error(`We have not been able to find inference provider information for model ${params.model}.`);
|
|
114
|
+
}
|
|
115
|
+
const providerMapping = inferenceProviderMapping[params.provider];
|
|
116
|
+
if (providerMapping) {
|
|
117
|
+
if (providerMapping.task !== task) {
|
|
118
|
+
throw new Error(
|
|
119
|
+
`Model ${params.model} is not supported for task ${task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
if (providerMapping.status === "staging") {
|
|
123
|
+
console.warn(
|
|
124
|
+
`Model ${params.model} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
return providerMapping.providerId;
|
|
128
|
+
}
|
|
129
|
+
throw new Error(`Model ${params.model} is not supported provider ${params.provider}.`);
|
|
130
|
+
}
|
|
177
131
|
|
|
178
132
|
// src/lib/makeRequestOptions.ts
|
|
179
|
-
var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${
|
|
133
|
+
var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_ROUTER_URL}/{{PROVIDER}}`;
|
|
180
134
|
var tasks = null;
|
|
181
135
|
async function makeRequestOptions(args, options) {
|
|
182
136
|
const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
|
|
@@ -192,16 +146,15 @@ async function makeRequestOptions(args, options) {
|
|
|
192
146
|
if (maybeModel && isUrl(maybeModel)) {
|
|
193
147
|
throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
|
|
194
148
|
}
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if (taskHint) {
|
|
198
|
-
model = mapModel({ model: await loadDefaultModel(taskHint), provider, taskHint, chatCompletion: chatCompletion2 });
|
|
199
|
-
} else {
|
|
200
|
-
throw new Error("No model provided, and no default model found for this task");
|
|
201
|
-
}
|
|
202
|
-
} else {
|
|
203
|
-
model = mapModel({ model: maybeModel, provider, taskHint, chatCompletion: chatCompletion2 });
|
|
149
|
+
if (!maybeModel && !taskHint) {
|
|
150
|
+
throw new Error("No model provided, and no task has been specified.");
|
|
204
151
|
}
|
|
152
|
+
const hfModel = maybeModel ?? await loadDefaultModel(taskHint);
|
|
153
|
+
const model = await getProviderModelId({ model: hfModel, provider }, args, {
|
|
154
|
+
taskHint,
|
|
155
|
+
chatCompletion: chatCompletion2,
|
|
156
|
+
fetch: options?.fetch
|
|
157
|
+
});
|
|
205
158
|
const authMethod = accessToken ? accessToken.startsWith("hf_") ? "hf-token" : "provider-key" : includeCredentials === "include" ? "credentials-include" : "none";
|
|
206
159
|
const url = endpointUrl ? chatCompletion2 ? endpointUrl + `/v1/chat/completions` : endpointUrl : makeUrl({
|
|
207
160
|
authMethod,
|
|
@@ -257,31 +210,6 @@ async function makeRequestOptions(args, options) {
|
|
|
257
210
|
};
|
|
258
211
|
return { url, info };
|
|
259
212
|
}
|
|
260
|
-
function mapModel(params) {
|
|
261
|
-
if (params.provider === "hf-inference") {
|
|
262
|
-
return params.model;
|
|
263
|
-
}
|
|
264
|
-
if (!params.taskHint) {
|
|
265
|
-
throw new Error("taskHint must be specified when using a third-party provider");
|
|
266
|
-
}
|
|
267
|
-
const task = params.taskHint === "text-generation" && params.chatCompletion ? "conversational" : params.taskHint;
|
|
268
|
-
const model = (() => {
|
|
269
|
-
switch (params.provider) {
|
|
270
|
-
case "fal-ai":
|
|
271
|
-
return FAL_AI_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
272
|
-
case "replicate":
|
|
273
|
-
return REPLICATE_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
274
|
-
case "sambanova":
|
|
275
|
-
return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
276
|
-
case "together":
|
|
277
|
-
return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
|
|
278
|
-
}
|
|
279
|
-
})();
|
|
280
|
-
if (!model) {
|
|
281
|
-
throw new Error(`Model ${params.model} is not supported for task ${task} and provider ${params.provider}`);
|
|
282
|
-
}
|
|
283
|
-
return model;
|
|
284
|
-
}
|
|
285
213
|
function makeUrl(params) {
|
|
286
214
|
if (params.authMethod === "none" && params.provider !== "hf-inference") {
|
|
287
215
|
throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
|
|
@@ -319,6 +247,13 @@ function makeUrl(params) {
|
|
|
319
247
|
}
|
|
320
248
|
return baseUrl;
|
|
321
249
|
}
|
|
250
|
+
case "fireworks-ai": {
|
|
251
|
+
const baseUrl = shouldProxy ? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider) : FIREWORKS_AI_API_BASE_URL;
|
|
252
|
+
if (params.taskHint === "text-generation" && params.chatCompletion) {
|
|
253
|
+
return `${baseUrl}/v1/chat/completions`;
|
|
254
|
+
}
|
|
255
|
+
return baseUrl;
|
|
256
|
+
}
|
|
322
257
|
default: {
|
|
323
258
|
const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
|
|
324
259
|
const url = params.forceTask ? `${baseUrl}/pipeline/${params.forceTask}/${params.model}` : `${baseUrl}/models/${params.model}`;
|
|
@@ -1297,16 +1232,19 @@ var HfInferenceEndpoint = class {
|
|
|
1297
1232
|
};
|
|
1298
1233
|
|
|
1299
1234
|
// src/types.ts
|
|
1300
|
-
var INFERENCE_PROVIDERS = [
|
|
1235
|
+
var INFERENCE_PROVIDERS = [
|
|
1236
|
+
"fal-ai",
|
|
1237
|
+
"fireworks-ai",
|
|
1238
|
+
"hf-inference",
|
|
1239
|
+
"replicate",
|
|
1240
|
+
"sambanova",
|
|
1241
|
+
"together"
|
|
1242
|
+
];
|
|
1301
1243
|
export {
|
|
1302
|
-
FAL_AI_SUPPORTED_MODEL_IDS,
|
|
1303
1244
|
HfInference,
|
|
1304
1245
|
HfInferenceEndpoint,
|
|
1305
1246
|
INFERENCE_PROVIDERS,
|
|
1306
1247
|
InferenceOutputError,
|
|
1307
|
-
REPLICATE_SUPPORTED_MODEL_IDS,
|
|
1308
|
-
SAMBANOVA_SUPPORTED_MODEL_IDS,
|
|
1309
|
-
TOGETHER_SUPPORTED_MODEL_IDS,
|
|
1310
1248
|
audioClassification,
|
|
1311
1249
|
audioToAudio,
|
|
1312
1250
|
automaticSpeechRecognition,
|