@huggingface/inference 4.0.0 → 4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
- export declare const PACKAGE_VERSION = "4.0.0";
1
+ export declare const PACKAGE_VERSION = "4.0.2";
2
2
  export declare const PACKAGE_NAME = "@huggingface/inference";
3
3
  //# sourceMappingURL=package.d.ts.map
@@ -2,5 +2,5 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.PACKAGE_NAME = exports.PACKAGE_VERSION = void 0;
4
4
  // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
5
- exports.PACKAGE_VERSION = "4.0.0";
5
+ exports.PACKAGE_VERSION = "4.0.2";
6
6
  exports.PACKAGE_NAME = "@huggingface/inference";
@@ -4,6 +4,8 @@ import type { InferenceProviderOrPolicy } from "../types.js";
4
4
  export type InferenceSnippetOptions = {
5
5
  streaming?: boolean;
6
6
  billTo?: string;
7
+ accessToken?: string;
8
+ directRequest?: boolean;
7
9
  } & Record<string, unknown>;
8
- export declare function getInferenceSnippets(model: ModelDataMinimal, accessToken: string, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
10
+ export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
9
11
  //# sourceMappingURL=getInferenceSnippets.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IAAE,SAAS,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAmUzG,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
1
+ {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;CACxB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAiV5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
@@ -76,9 +76,13 @@ const HF_JS_METHODS = {
76
76
  "text-to-speech": "textToSpeech",
77
77
  translation: "translation",
78
78
  };
79
+ // Placeholders to replace with env variable in snippets
80
+ // little hack to support both direct requests and routing => routed requests should start with "hf_"
81
+ const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
82
+ const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
79
83
  // Snippet generators
80
84
  const snippetGenerator = (templateName, inputPreparationFn) => {
81
- return (model, accessToken, provider, inferenceProviderMapping, opts) => {
85
+ return (model, provider, inferenceProviderMapping, opts) => {
82
86
  const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
83
87
  /// Hacky: hard-code conversational templates here
84
88
  let task = model.pipeline_tag;
@@ -97,10 +101,14 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
97
101
  console.error(`Failed to get provider helper for ${provider} (${task})`, e);
98
102
  return [];
99
103
  }
104
+ const placeholder = opts?.directRequest
105
+ ? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
106
+ : ACCESS_TOKEN_ROUTING_PLACEHOLDER;
107
+ const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
100
108
  /// Prepare inputs + make request
101
109
  const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: (0, tasks_1.getModelInputSnippet)(model) };
102
110
  const request = (0, makeRequestOptions_js_1.makeRequestOptionsFromResolvedModel)(providerModelId, providerHelper, {
103
- accessToken,
111
+ accessToken: accessTokenOrPlaceholder,
104
112
  provider,
105
113
  ...inputs,
106
114
  }, inferenceProviderMapping, {
@@ -121,7 +129,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
121
129
  }
122
130
  /// Prepare template injection data
123
131
  const params = {
124
- accessToken,
132
+ accessToken: accessTokenOrPlaceholder,
125
133
  authorizationHeader: request.info.headers?.Authorization,
126
134
  baseUrl: removeSuffix(request.url, "/chat/completions"),
127
135
  fullUrl: request.url,
@@ -185,6 +193,10 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
185
193
  });
186
194
  snippet = `${importSection}\n\n${snippet}`;
187
195
  }
196
+ /// Replace access token placeholder
197
+ if (snippet.includes(placeholder)) {
198
+ snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
199
+ }
188
200
  /// Snippet is ready!
189
201
  return { language, client: client, content: snippet };
190
202
  })
@@ -247,9 +259,9 @@ const snippets = {
247
259
  "zero-shot-classification": snippetGenerator("zeroShotClassification"),
248
260
  "zero-shot-image-classification": snippetGenerator("zeroShotImageClassification"),
249
261
  };
250
- function getInferenceSnippets(model, accessToken, provider, inferenceProviderMapping, opts) {
262
+ function getInferenceSnippets(model, provider, inferenceProviderMapping, opts) {
251
263
  return model.pipeline_tag && model.pipeline_tag in snippets
252
- ? snippets[model.pipeline_tag]?.(model, accessToken, provider, inferenceProviderMapping, opts) ?? []
264
+ ? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
253
265
  : [];
254
266
  }
255
267
  // String manipulation helpers
@@ -310,3 +322,42 @@ function indentString(str) {
310
322
  function removeSuffix(str, suffix) {
311
323
  return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
312
324
  }
325
+ function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
326
+ // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
327
+ // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
328
+ // Determine if HF_TOKEN or specific provider token should be used
329
+ const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
330
+ (!directRequest && // if explicit directRequest => use provider-specific token
331
+ (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
332
+ snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
333
+ const accessTokenEnvVar = useHfToken
334
+ ? "HF_TOKEN" // e.g. routed request or hf-inference
335
+ : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
336
+ // Replace the placeholder with the env variable
337
+ if (language === "sh") {
338
+ snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
339
+ );
340
+ }
341
+ else if (language === "python") {
342
+ snippet = "import os\n" + snippet;
343
+ snippet = snippet.replace(`"${placeholder}"`, `os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
344
+ );
345
+ snippet = snippet.replace(`"Bearer ${placeholder}"`, `f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
346
+ );
347
+ snippet = snippet.replace(`"Key ${placeholder}"`, `f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
348
+ );
349
+ snippet = snippet.replace(`"X-Key ${placeholder}"`, `f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
350
+ );
351
+ }
352
+ else if (language === "js") {
353
+ snippet = snippet.replace(`"${placeholder}"`, `process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
354
+ );
355
+ snippet = snippet.replace(`Authorization: "Bearer ${placeholder}",`, `Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
356
+ );
357
+ snippet = snippet.replace(`Authorization: "Key ${placeholder}",`, `Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
358
+ );
359
+ snippet = snippet.replace(`Authorization: "X-Key ${placeholder}",`, `Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
360
+ );
361
+ }
362
+ return snippet;
363
+ }
@@ -35,8 +35,8 @@ async function innerRequest(args, providerHelper, options) {
35
35
  body: requestArgsToJson(args),
36
36
  }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
37
37
  }
38
- if (typeof output.error === "string" || typeof output.detail === "string") {
39
- throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail}`, {
38
+ if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
39
+ throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`, {
40
40
  url,
41
41
  method: info.method ?? "GET",
42
42
  headers: info.headers,
@@ -1,3 +1,3 @@
1
- export declare const PACKAGE_VERSION = "4.0.0";
1
+ export declare const PACKAGE_VERSION = "4.0.2";
2
2
  export declare const PACKAGE_NAME = "@huggingface/inference";
3
3
  //# sourceMappingURL=package.d.ts.map
@@ -1,3 +1,3 @@
1
1
  // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2
- export const PACKAGE_VERSION = "4.0.0";
2
+ export const PACKAGE_VERSION = "4.0.2";
3
3
  export const PACKAGE_NAME = "@huggingface/inference";
@@ -4,6 +4,8 @@ import type { InferenceProviderOrPolicy } from "../types.js";
4
4
  export type InferenceSnippetOptions = {
5
5
  streaming?: boolean;
6
6
  billTo?: string;
7
+ accessToken?: string;
8
+ directRequest?: boolean;
7
9
  } & Record<string, unknown>;
8
- export declare function getInferenceSnippets(model: ModelDataMinimal, accessToken: string, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
10
+ export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
9
11
  //# sourceMappingURL=getInferenceSnippets.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IAAE,SAAS,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAmUzG,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
1
+ {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;CACxB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAiV5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
@@ -73,9 +73,13 @@ const HF_JS_METHODS = {
73
73
  "text-to-speech": "textToSpeech",
74
74
  translation: "translation",
75
75
  };
76
+ // Placeholders to replace with env variable in snippets
77
+ // little hack to support both direct requests and routing => routed requests should start with "hf_"
78
+ const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
79
+ const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
76
80
  // Snippet generators
77
81
  const snippetGenerator = (templateName, inputPreparationFn) => {
78
- return (model, accessToken, provider, inferenceProviderMapping, opts) => {
82
+ return (model, provider, inferenceProviderMapping, opts) => {
79
83
  const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
80
84
  /// Hacky: hard-code conversational templates here
81
85
  let task = model.pipeline_tag;
@@ -94,10 +98,14 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
94
98
  console.error(`Failed to get provider helper for ${provider} (${task})`, e);
95
99
  return [];
96
100
  }
101
+ const placeholder = opts?.directRequest
102
+ ? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
103
+ : ACCESS_TOKEN_ROUTING_PLACEHOLDER;
104
+ const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
97
105
  /// Prepare inputs + make request
98
106
  const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
99
107
  const request = makeRequestOptionsFromResolvedModel(providerModelId, providerHelper, {
100
- accessToken,
108
+ accessToken: accessTokenOrPlaceholder,
101
109
  provider,
102
110
  ...inputs,
103
111
  }, inferenceProviderMapping, {
@@ -118,7 +126,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
118
126
  }
119
127
  /// Prepare template injection data
120
128
  const params = {
121
- accessToken,
129
+ accessToken: accessTokenOrPlaceholder,
122
130
  authorizationHeader: request.info.headers?.Authorization,
123
131
  baseUrl: removeSuffix(request.url, "/chat/completions"),
124
132
  fullUrl: request.url,
@@ -182,6 +190,10 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
182
190
  });
183
191
  snippet = `${importSection}\n\n${snippet}`;
184
192
  }
193
+ /// Replace access token placeholder
194
+ if (snippet.includes(placeholder)) {
195
+ snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
196
+ }
185
197
  /// Snippet is ready!
186
198
  return { language, client: client, content: snippet };
187
199
  })
@@ -244,9 +256,9 @@ const snippets = {
244
256
  "zero-shot-classification": snippetGenerator("zeroShotClassification"),
245
257
  "zero-shot-image-classification": snippetGenerator("zeroShotImageClassification"),
246
258
  };
247
- export function getInferenceSnippets(model, accessToken, provider, inferenceProviderMapping, opts) {
259
+ export function getInferenceSnippets(model, provider, inferenceProviderMapping, opts) {
248
260
  return model.pipeline_tag && model.pipeline_tag in snippets
249
- ? snippets[model.pipeline_tag]?.(model, accessToken, provider, inferenceProviderMapping, opts) ?? []
261
+ ? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
250
262
  : [];
251
263
  }
252
264
  // String manipulation helpers
@@ -307,3 +319,42 @@ function indentString(str) {
307
319
  function removeSuffix(str, suffix) {
308
320
  return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
309
321
  }
322
+ function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
323
+ // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
324
+ // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
325
+ // Determine if HF_TOKEN or specific provider token should be used
326
+ const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
327
+ (!directRequest && // if explicit directRequest => use provider-specific token
328
+ (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
329
+ snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
330
+ const accessTokenEnvVar = useHfToken
331
+ ? "HF_TOKEN" // e.g. routed request or hf-inference
332
+ : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
333
+ // Replace the placeholder with the env variable
334
+ if (language === "sh") {
335
+ snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
336
+ );
337
+ }
338
+ else if (language === "python") {
339
+ snippet = "import os\n" + snippet;
340
+ snippet = snippet.replace(`"${placeholder}"`, `os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
341
+ );
342
+ snippet = snippet.replace(`"Bearer ${placeholder}"`, `f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
343
+ );
344
+ snippet = snippet.replace(`"Key ${placeholder}"`, `f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
345
+ );
346
+ snippet = snippet.replace(`"X-Key ${placeholder}"`, `f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
347
+ );
348
+ }
349
+ else if (language === "js") {
350
+ snippet = snippet.replace(`"${placeholder}"`, `process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
351
+ );
352
+ snippet = snippet.replace(`Authorization: "Bearer ${placeholder}",`, `Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
353
+ );
354
+ snippet = snippet.replace(`Authorization: "Key ${placeholder}",`, `Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
355
+ );
356
+ snippet = snippet.replace(`Authorization: "X-Key ${placeholder}",`, `Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
357
+ );
358
+ }
359
+ return snippet;
360
+ }
@@ -31,8 +31,8 @@ export async function innerRequest(args, providerHelper, options) {
31
31
  body: requestArgsToJson(args),
32
32
  }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
33
33
  }
34
- if (typeof output.error === "string" || typeof output.detail === "string") {
35
- throw new InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail}`, {
34
+ if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
35
+ throw new InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`, {
36
36
  url,
37
37
  method: info.method ?? "GET",
38
38
  headers: info.headers,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "4.0.0",
3
+ "version": "4.0.2",
4
4
  "license": "MIT",
5
5
  "author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
package/src/package.ts CHANGED
@@ -1,3 +1,3 @@
1
1
  // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2
- export const PACKAGE_VERSION = "4.0.0";
2
+ export const PACKAGE_VERSION = "4.0.2";
3
3
  export const PACKAGE_NAME = "@huggingface/inference";
@@ -14,7 +14,12 @@ import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.j
14
14
  import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
15
15
  import { templates } from "./templates.exported.js";
16
16
 
17
- export type InferenceSnippetOptions = { streaming?: boolean; billTo?: string } & Record<string, unknown>;
17
+ export type InferenceSnippetOptions = {
18
+ streaming?: boolean;
19
+ billTo?: string;
20
+ accessToken?: string;
21
+ directRequest?: boolean;
22
+ } & Record<string, unknown>;
18
23
 
19
24
  const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
20
25
  const JS_CLIENTS = ["fetch", "huggingface.js", "openai"] as const;
@@ -121,11 +126,15 @@ const HF_JS_METHODS: Partial<Record<WidgetType, string>> = {
121
126
  translation: "translation",
122
127
  };
123
128
 
129
+ // Placeholders to replace with env variable in snippets
130
+ // little hack to support both direct requests and routing => routed requests should start with "hf_"
131
+ const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
132
+ const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
133
+
124
134
  // Snippet generators
125
135
  const snippetGenerator = (templateName: string, inputPreparationFn?: InputPreparationFn) => {
126
136
  return (
127
137
  model: ModelDataMinimal,
128
- accessToken: string,
129
138
  provider: InferenceProviderOrPolicy,
130
139
  inferenceProviderMapping?: InferenceProviderModelMapping,
131
140
  opts?: InferenceSnippetOptions
@@ -149,13 +158,19 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
149
158
  console.error(`Failed to get provider helper for ${provider} (${task})`, e);
150
159
  return [];
151
160
  }
161
+
162
+ const placeholder = opts?.directRequest
163
+ ? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
164
+ : ACCESS_TOKEN_ROUTING_PLACEHOLDER;
165
+ const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
166
+
152
167
  /// Prepare inputs + make request
153
168
  const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
154
169
  const request = makeRequestOptionsFromResolvedModel(
155
170
  providerModelId,
156
171
  providerHelper,
157
172
  {
158
- accessToken,
173
+ accessToken: accessTokenOrPlaceholder,
159
174
  provider,
160
175
  ...inputs,
161
176
  } as RequestArgs,
@@ -180,7 +195,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
180
195
 
181
196
  /// Prepare template injection data
182
197
  const params: TemplateParams = {
183
- accessToken,
198
+ accessToken: accessTokenOrPlaceholder,
184
199
  authorizationHeader: (request.info.headers as Record<string, string>)?.Authorization,
185
200
  baseUrl: removeSuffix(request.url, "/chat/completions"),
186
201
  fullUrl: request.url,
@@ -248,6 +263,11 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
248
263
  snippet = `${importSection}\n\n${snippet}`;
249
264
  }
250
265
 
266
+ /// Replace access token placeholder
267
+ if (snippet.includes(placeholder)) {
268
+ snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
269
+ }
270
+
251
271
  /// Snippet is ready!
252
272
  return { language, client: client as string, content: snippet };
253
273
  })
@@ -299,7 +319,6 @@ const snippets: Partial<
299
319
  PipelineType,
300
320
  (
301
321
  model: ModelDataMinimal,
302
- accessToken: string,
303
322
  provider: InferenceProviderOrPolicy,
304
323
  inferenceProviderMapping?: InferenceProviderModelMapping,
305
324
  opts?: InferenceSnippetOptions
@@ -339,13 +358,12 @@ const snippets: Partial<
339
358
 
340
359
  export function getInferenceSnippets(
341
360
  model: ModelDataMinimal,
342
- accessToken: string,
343
361
  provider: InferenceProviderOrPolicy,
344
362
  inferenceProviderMapping?: InferenceProviderModelMapping,
345
363
  opts?: Record<string, unknown>
346
364
  ): InferenceSnippet[] {
347
365
  return model.pipeline_tag && model.pipeline_tag in snippets
348
- ? snippets[model.pipeline_tag]?.(model, accessToken, provider, inferenceProviderMapping, opts) ?? []
366
+ ? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
349
367
  : [];
350
368
  }
351
369
 
@@ -420,3 +438,69 @@ function indentString(str: string): string {
420
438
  function removeSuffix(str: string, suffix: string) {
421
439
  return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
422
440
  }
441
+
442
+ function replaceAccessTokenPlaceholder(
443
+ directRequest: boolean | undefined,
444
+ placeholder: string,
445
+ snippet: string,
446
+ language: InferenceSnippetLanguage,
447
+ provider: InferenceProviderOrPolicy
448
+ ): string {
449
+ // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
450
+ // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
451
+
452
+ // Determine if HF_TOKEN or specific provider token should be used
453
+ const useHfToken =
454
+ provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
455
+ (!directRequest && // if explicit directRequest => use provider-specific token
456
+ (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
457
+ snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
458
+
459
+ const accessTokenEnvVar = useHfToken
460
+ ? "HF_TOKEN" // e.g. routed request or hf-inference
461
+ : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
462
+
463
+ // Replace the placeholder with the env variable
464
+ if (language === "sh") {
465
+ snippet = snippet.replace(
466
+ `'Authorization: Bearer ${placeholder}'`,
467
+ `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
468
+ );
469
+ } else if (language === "python") {
470
+ snippet = "import os\n" + snippet;
471
+ snippet = snippet.replace(
472
+ `"${placeholder}"`,
473
+ `os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
474
+ );
475
+ snippet = snippet.replace(
476
+ `"Bearer ${placeholder}"`,
477
+ `f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
478
+ );
479
+ snippet = snippet.replace(
480
+ `"Key ${placeholder}"`,
481
+ `f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
482
+ );
483
+ snippet = snippet.replace(
484
+ `"X-Key ${placeholder}"`,
485
+ `f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
486
+ );
487
+ } else if (language === "js") {
488
+ snippet = snippet.replace(
489
+ `"${placeholder}"`,
490
+ `process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
491
+ );
492
+ snippet = snippet.replace(
493
+ `Authorization: "Bearer ${placeholder}",`,
494
+ `Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
495
+ );
496
+ snippet = snippet.replace(
497
+ `Authorization: "Key ${placeholder}",`,
498
+ `Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
499
+ );
500
+ snippet = snippet.replace(
501
+ `Authorization: "X-Key ${placeholder}",`,
502
+ `Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
503
+ );
504
+ }
505
+ return snippet;
506
+ }
@@ -65,9 +65,9 @@ export async function innerRequest<T>(
65
65
  { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }
66
66
  );
67
67
  }
68
- if (typeof output.error === "string" || typeof output.detail === "string") {
68
+ if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
69
69
  throw new InferenceClientProviderApiError(
70
- `Failed to perform inference: ${output.error ?? output.detail}`,
70
+ `Failed to perform inference: ${output.error ?? output.detail ?? output.message}`,
71
71
  {
72
72
  url,
73
73
  method: info.method ?? "GET",