@huggingface/inference 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -42,15 +42,15 @@ const hf = new HfInference('your access token')
42
42
 
43
43
  Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
44
44
 
45
- ### Requesting third-party inference providers
45
+ ### Third-party inference providers
46
46
 
47
- You can request inference from third-party providers with the inference client.
47
+ You can send inference requests to third-party providers with the inference client.
48
48
 
49
49
  Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
50
50
 
51
- To make request to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
51
+ To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
52
52
  ```ts
53
- const accessToken = "hf_..."; // Either a HF access token, or an API key from the 3rd party provider (Replicate in this example)
53
+ const accessToken = "hf_..."; // Either a HF access token, or an API key from the third-party provider (Replicate in this example)
54
54
 
55
55
  const client = new HfInference(accessToken);
56
56
  await client.textToImage({
@@ -63,14 +63,19 @@ await client.textToImage({
63
63
  When authenticated with a Hugging Face access token, the request is routed through https://huggingface.co.
64
64
  When authenticated with a third-party provider key, the request is made directly against that provider's inference API.
65
65
 
66
- Only a subset of models are supported when requesting 3rd party providers. You can check the list of supported models per pipeline tasks here:
66
+ Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
67
67
  - [Fal.ai supported models](./src/providers/fal-ai.ts)
68
68
  - [Replicate supported models](./src/providers/replicate.ts)
69
69
  - [Sambanova supported models](./src/providers/sambanova.ts)
70
70
  - [Together supported models](./src/providers/together.ts)
71
71
  - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
72
72
 
73
- #### Tree-shaking
73
+ ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.
74
+ This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you!
75
+
76
+ 👋**Want to add another provider?** Get in touch if you'd like to add support for another Inference provider, and/or request it on https://huggingface.co/spaces/huggingface/HuggingDiscussions/discussions/49
77
+
78
+ ### Tree-shaking
74
79
 
75
80
  You can import the functions you need directly from the module instead of using the `HfInference` class.
76
81
 
package/dist/index.cjs CHANGED
@@ -107,7 +107,15 @@ var FAL_AI_API_BASE_URL = "https://fal.run";
107
107
  var FAL_AI_SUPPORTED_MODEL_IDS = {
108
108
  "text-to-image": {
109
109
  "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
110
- "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev"
110
+ "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
111
+ "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
112
+ "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
113
+ "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
114
+ "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
115
+ "Warlord-K/Sana-1024": "fal-ai/sana",
116
+ "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
117
+ "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
118
+ "Kwai-Kolors/Kolors": "fal-ai/kolors"
111
119
  },
112
120
  "automatic-speech-recognition": {
113
121
  "openai/whisper-large-v3": "fal-ai/whisper"
@@ -120,10 +128,10 @@ var REPLICATE_SUPPORTED_MODEL_IDS = {
120
128
  "text-to-image": {
121
129
  "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
122
130
  "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637"
131
+ },
132
+ "text-to-speech": {
133
+ "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:39a59319327b27327fa3095149c5a746e7f2aee18c75055c3368237a6503cd26"
123
134
  }
124
- // "text-to-speech": {
125
- // "SWivid/F5-TTS": "x-lance/f5-tts:87faf6dd7a692dd82043f662e76369cab126a2cf1937e25a9d41e0b834fd230e"
126
- // },
127
135
  };
128
136
 
129
137
  // src/providers/sambanova.ts
@@ -159,6 +167,8 @@ var TOGETHER_SUPPORTED_MODEL_IDS = {
159
167
  },
160
168
  conversational: {
161
169
  "databricks/dbrx-instruct": "databricks/dbrx-instruct",
170
+ "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
171
+ "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
162
172
  "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
163
173
  "google/gemma-2-9b-it": "google/gemma-2-9b-it",
164
174
  "google/gemma-2b-it": "google/gemma-2-27b-it",
@@ -204,7 +214,8 @@ function isUrl(modelOrUrl) {
204
214
  var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
205
215
  var tasks = null;
206
216
  async function makeRequestOptions(args, options) {
207
- const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...otherArgs } = args;
217
+ const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
218
+ let otherArgs = remainingArgs;
208
219
  const provider = maybeProvider ?? "hf-inference";
209
220
  const { forceTask, includeCredentials, taskHint, wait_for_model, use_cache, dont_load_model, chatCompletion: chatCompletion2 } = options ?? {};
210
221
  if (endpointUrl && provider !== "hf-inference") {
@@ -263,9 +274,9 @@ async function makeRequestOptions(args, options) {
263
274
  } else if (includeCredentials === true) {
264
275
  credentials = "include";
265
276
  }
266
- if (provider === "replicate" && model.includes(":")) {
267
- const version = model.split(":")[1];
268
- otherArgs.version = version;
277
+ if (provider === "replicate") {
278
+ const version = model.includes(":") ? model.split(":")[1] : void 0;
279
+ otherArgs = { input: otherArgs, version };
269
280
  }
270
281
  const info = {
271
282
  headers,
@@ -638,6 +649,19 @@ async function textToSpeech(args, options) {
638
649
  ...options,
639
650
  taskHint: "text-to-speech"
640
651
  });
652
+ if (res && typeof res === "object") {
653
+ if ("output" in res) {
654
+ if (typeof res.output === "string") {
655
+ const urlResponse = await fetch(res.output);
656
+ const blob = await urlResponse.blob();
657
+ return blob;
658
+ } else if (Array.isArray(res.output)) {
659
+ const urlResponse = await fetch(res.output[0]);
660
+ const blob = await urlResponse.blob();
661
+ return blob;
662
+ }
663
+ }
664
+ }
641
665
  const isValidOutput = res && res instanceof Blob;
642
666
  if (!isValidOutput) {
643
667
  throw new InferenceOutputError("Expected Blob");
@@ -719,10 +743,10 @@ async function objectDetection(args, options) {
719
743
  async function textToImage(args, options) {
720
744
  if (args.provider === "together" || args.provider === "fal-ai") {
721
745
  args.prompt = args.inputs;
722
- args.inputs = "";
746
+ delete args.inputs;
723
747
  args.response_format = "base64";
724
748
  } else if (args.provider === "replicate") {
725
- args.input = { prompt: args.inputs };
749
+ args.prompt = args.inputs;
726
750
  delete args.inputs;
727
751
  }
728
752
  const res = await request(args, {
package/dist/index.js CHANGED
@@ -49,7 +49,15 @@ var FAL_AI_API_BASE_URL = "https://fal.run";
49
49
  var FAL_AI_SUPPORTED_MODEL_IDS = {
50
50
  "text-to-image": {
51
51
  "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
52
- "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev"
52
+ "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
53
+ "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
54
+ "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
55
+ "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
56
+ "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
57
+ "Warlord-K/Sana-1024": "fal-ai/sana",
58
+ "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
59
+ "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
60
+ "Kwai-Kolors/Kolors": "fal-ai/kolors"
53
61
  },
54
62
  "automatic-speech-recognition": {
55
63
  "openai/whisper-large-v3": "fal-ai/whisper"
@@ -62,10 +70,10 @@ var REPLICATE_SUPPORTED_MODEL_IDS = {
62
70
  "text-to-image": {
63
71
  "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell",
64
72
  "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637"
73
+ },
74
+ "text-to-speech": {
75
+ "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:39a59319327b27327fa3095149c5a746e7f2aee18c75055c3368237a6503cd26"
65
76
  }
66
- // "text-to-speech": {
67
- // "SWivid/F5-TTS": "x-lance/f5-tts:87faf6dd7a692dd82043f662e76369cab126a2cf1937e25a9d41e0b834fd230e"
68
- // },
69
77
  };
70
78
 
71
79
  // src/providers/sambanova.ts
@@ -101,6 +109,8 @@ var TOGETHER_SUPPORTED_MODEL_IDS = {
101
109
  },
102
110
  conversational: {
103
111
  "databricks/dbrx-instruct": "databricks/dbrx-instruct",
112
+ "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
113
+ "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
104
114
  "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
105
115
  "google/gemma-2-9b-it": "google/gemma-2-9b-it",
106
116
  "google/gemma-2b-it": "google/gemma-2-27b-it",
@@ -146,7 +156,8 @@ function isUrl(modelOrUrl) {
146
156
  var HF_HUB_INFERENCE_PROXY_TEMPLATE = `${HF_HUB_URL}/api/inference-proxy/{{PROVIDER}}`;
147
157
  var tasks = null;
148
158
  async function makeRequestOptions(args, options) {
149
- const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...otherArgs } = args;
159
+ const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
160
+ let otherArgs = remainingArgs;
150
161
  const provider = maybeProvider ?? "hf-inference";
151
162
  const { forceTask, includeCredentials, taskHint, wait_for_model, use_cache, dont_load_model, chatCompletion: chatCompletion2 } = options ?? {};
152
163
  if (endpointUrl && provider !== "hf-inference") {
@@ -205,9 +216,9 @@ async function makeRequestOptions(args, options) {
205
216
  } else if (includeCredentials === true) {
206
217
  credentials = "include";
207
218
  }
208
- if (provider === "replicate" && model.includes(":")) {
209
- const version = model.split(":")[1];
210
- otherArgs.version = version;
219
+ if (provider === "replicate") {
220
+ const version = model.includes(":") ? model.split(":")[1] : void 0;
221
+ otherArgs = { input: otherArgs, version };
211
222
  }
212
223
  const info = {
213
224
  headers,
@@ -580,6 +591,19 @@ async function textToSpeech(args, options) {
580
591
  ...options,
581
592
  taskHint: "text-to-speech"
582
593
  });
594
+ if (res && typeof res === "object") {
595
+ if ("output" in res) {
596
+ if (typeof res.output === "string") {
597
+ const urlResponse = await fetch(res.output);
598
+ const blob = await urlResponse.blob();
599
+ return blob;
600
+ } else if (Array.isArray(res.output)) {
601
+ const urlResponse = await fetch(res.output[0]);
602
+ const blob = await urlResponse.blob();
603
+ return blob;
604
+ }
605
+ }
606
+ }
583
607
  const isValidOutput = res && res instanceof Blob;
584
608
  if (!isValidOutput) {
585
609
  throw new InferenceOutputError("Expected Blob");
@@ -661,10 +685,10 @@ async function objectDetection(args, options) {
661
685
  async function textToImage(args, options) {
662
686
  if (args.provider === "together" || args.provider === "fal-ai") {
663
687
  args.prompt = args.inputs;
664
- args.inputs = "";
688
+ delete args.inputs;
665
689
  args.response_format = "base64";
666
690
  } else if (args.provider === "replicate") {
667
- args.input = { prompt: args.inputs };
691
+ args.prompt = args.inputs;
668
692
  delete args.inputs;
669
693
  }
670
694
  const res = await request(args, {
@@ -1 +1 @@
1
- {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAWpE;;GAEG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;IACnC,sCAAsC;IACtC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CA+G7C"}
1
+ {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAWpE;;GAEG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;IACnC,sCAAsC;IACtC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAiH7C"}
@@ -1 +1 @@
1
- {"version":3,"file":"fal-ai.d.ts","sourceRoot":"","sources":["../../../src/providers/fal-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAErD,KAAK,OAAO,GAAG,MAAM,CAAC;AAEtB,eAAO,MAAM,0BAA0B,EAAE,eAAe,CAAC,OAAO,CAQ/D,CAAC"}
1
+ {"version":3,"file":"fal-ai.d.ts","sourceRoot":"","sources":["../../../src/providers/fal-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAErD,KAAK,OAAO,GAAG,MAAM,CAAC;AAEtB,eAAO,MAAM,0BAA0B,EAAE,eAAe,CAAC,OAAO,CAgB/D,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"together.d.ts","sourceRoot":"","sources":["../../../src/providers/together.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,qBAAqB,6BAA6B,CAAC;AAEhE;;GAEG;AACH,KAAK,UAAU,GAAG,MAAM,CAAC;AAEzB;;GAEG;AACH,eAAO,MAAM,4BAA4B,EAAE,eAAe,CAAC,UAAU,CA6CpE,CAAC"}
1
+ {"version":3,"file":"together.d.ts","sourceRoot":"","sources":["../../../src/providers/together.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,qBAAqB,6BAA6B,CAAC;AAEhE;;GAEG;AACH,KAAK,UAAU,GAAG,MAAM,CAAC;AAEzB;;GAEG;AACH,eAAO,MAAM,4BAA4B,EAAE,eAAe,CAAC,UAAU,CA+CpE,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/textToSpeech.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG;IACzC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAEtC;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAUzG"}
1
+ {"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/textToSpeech.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG;IACzC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAItC;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAuBzG"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "3.0.0",
3
+ "version": "3.0.1",
4
4
  "packageManager": "pnpm@8.10.5",
5
5
  "license": "MIT",
6
6
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
@@ -39,7 +39,7 @@
39
39
  },
40
40
  "type": "module",
41
41
  "dependencies": {
42
- "@huggingface/tasks": "^0.13.16"
42
+ "@huggingface/tasks": "^0.13.17"
43
43
  },
44
44
  "devDependencies": {
45
45
  "@types/node": "18.13.0"
@@ -32,7 +32,8 @@ export async function makeRequestOptions(
32
32
  chatCompletion?: boolean;
33
33
  }
34
34
  ): Promise<{ url: string; info: RequestInit }> {
35
- const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...otherArgs } = args;
35
+ const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
36
+ let otherArgs = remainingArgs;
36
37
  const provider = maybeProvider ?? "hf-inference";
37
38
 
38
39
  const { forceTask, includeCredentials, taskHint, wait_for_model, use_cache, dont_load_model, chatCompletion } =
@@ -120,12 +121,13 @@ export async function makeRequestOptions(
120
121
  credentials = "include";
121
122
  }
122
123
 
123
- /*
124
+ /**
125
+ * Replicate models wrap all inputs inside { input: ... }
124
126
  * Versioned Replicate models in the format `owner/model:version` expect the version in the body
125
127
  */
126
- if (provider === "replicate" && model.includes(":")) {
127
- const version = model.split(":")[1];
128
- (otherArgs as typeof otherArgs & { version: string }).version = version;
128
+ if (provider === "replicate") {
129
+ const version = model.includes(":") ? model.split(":")[1] : undefined;
130
+ (otherArgs as unknown) = { input: otherArgs, version };
129
131
  }
130
132
 
131
133
  const info: RequestInit = {
@@ -8,6 +8,14 @@ export const FAL_AI_SUPPORTED_MODEL_IDS: ProviderMapping<FalAiId> = {
8
8
  "text-to-image": {
9
9
  "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
10
10
  "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
11
+ "playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
12
+ "ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
13
+ "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
14
+ "stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
15
+ "Warlord-K/Sana-1024": "fal-ai/sana",
16
+ "fal/AuraFlow-v0.2": "fal-ai/aura-flow",
17
+ "stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
18
+ "Kwai-Kolors/Kolors": "fal-ai/kolors",
11
19
  },
12
20
  "automatic-speech-recognition": {
13
21
  "openai/whisper-large-v3": "fal-ai/whisper",
@@ -10,7 +10,7 @@ export const REPLICATE_SUPPORTED_MODEL_IDS: ProviderMapping<ReplicateId> = {
10
10
  "ByteDance/SDXL-Lightning":
11
11
  "bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637",
12
12
  },
13
- // "text-to-speech": {
14
- // "SWivid/F5-TTS": "x-lance/f5-tts:87faf6dd7a692dd82043f662e76369cab126a2cf1937e25a9d41e0b834fd230e"
15
- // },
13
+ "text-to-speech": {
14
+ "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:39a59319327b27327fa3095149c5a746e7f2aee18c75055c3368237a6503cd26",
15
+ },
16
16
  };
@@ -21,6 +21,8 @@ export const TOGETHER_SUPPORTED_MODEL_IDS: ProviderMapping<TogetherId> = {
21
21
  },
22
22
  conversational: {
23
23
  "databricks/dbrx-instruct": "databricks/dbrx-instruct",
24
+ "deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
25
+ "deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
24
26
  "deepseek-ai/deepseek-llm-67b-chat": "deepseek-ai/deepseek-llm-67b-chat",
25
27
  "google/gemma-2-9b-it": "google/gemma-2-9b-it",
26
28
  "google/gemma-2b-it": "google/gemma-2-27b-it",
@@ -10,16 +10,31 @@ export type TextToSpeechArgs = BaseArgs & {
10
10
  };
11
11
 
12
12
  export type TextToSpeechOutput = Blob;
13
-
13
+ interface OutputUrlTextToSpeechGeneration {
14
+ output: string | string[];
15
+ }
14
16
  /**
15
17
  * This task synthesize an audio of a voice pronouncing a given text.
16
18
  * Recommended model: espnet/kan-bayashi_ljspeech_vits
17
19
  */
18
20
  export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
19
- const res = await request<TextToSpeechOutput>(args, {
21
+ const res = await request<TextToSpeechOutput | OutputUrlTextToSpeechGeneration>(args, {
20
22
  ...options,
21
23
  taskHint: "text-to-speech",
22
24
  });
25
+ if (res && typeof res === "object") {
26
+ if ("output" in res) {
27
+ if (typeof res.output === "string") {
28
+ const urlResponse = await fetch(res.output);
29
+ const blob = await urlResponse.blob();
30
+ return blob;
31
+ } else if (Array.isArray(res.output)) {
32
+ const urlResponse = await fetch(res.output[0]);
33
+ const blob = await urlResponse.blob();
34
+ return blob;
35
+ }
36
+ }
37
+ }
23
38
  const isValidOutput = res && res instanceof Blob;
24
39
  if (!isValidOutput) {
25
40
  throw new InferenceOutputError("Expected Blob");
@@ -59,10 +59,10 @@ interface OutputUrlImageGeneration {
59
59
  export async function textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageOutput> {
60
60
  if (args.provider === "together" || args.provider === "fal-ai") {
61
61
  args.prompt = args.inputs;
62
- args.inputs = "";
62
+ delete (args as unknown as { inputs: unknown }).inputs;
63
63
  args.response_format = "base64";
64
64
  } else if (args.provider === "replicate") {
65
- args.input = { prompt: args.inputs };
65
+ args.prompt = args.inputs;
66
66
  delete (args as unknown as { inputs: unknown }).inputs;
67
67
  }
68
68
  const res = await request<TextToImageOutput | Base64ImageGeneration | OutputUrlImageGeneration>(args, {