@huggingface/inference 4.0.0 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/package.d.ts +1 -1
- package/dist/commonjs/package.js +1 -1
- package/dist/commonjs/snippets/getInferenceSnippets.d.ts +3 -1
- package/dist/commonjs/snippets/getInferenceSnippets.d.ts.map +1 -1
- package/dist/commonjs/snippets/getInferenceSnippets.js +56 -5
- package/dist/commonjs/utils/request.js +2 -2
- package/dist/esm/package.d.ts +1 -1
- package/dist/esm/package.js +1 -1
- package/dist/esm/snippets/getInferenceSnippets.d.ts +3 -1
- package/dist/esm/snippets/getInferenceSnippets.d.ts.map +1 -1
- package/dist/esm/snippets/getInferenceSnippets.js +56 -5
- package/dist/esm/utils/request.js +2 -2
- package/package.json +1 -1
- package/src/package.ts +1 -1
- package/src/snippets/getInferenceSnippets.ts +91 -7
- package/src/utils/request.ts +2 -2
package/dist/commonjs/package.js
CHANGED
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.PACKAGE_NAME = exports.PACKAGE_VERSION = void 0;
|
|
4
4
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
5
|
-
exports.PACKAGE_VERSION = "4.0.
|
|
5
|
+
exports.PACKAGE_VERSION = "4.0.2";
|
|
6
6
|
exports.PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -4,6 +4,8 @@ import type { InferenceProviderOrPolicy } from "../types.js";
|
|
|
4
4
|
export type InferenceSnippetOptions = {
|
|
5
5
|
streaming?: boolean;
|
|
6
6
|
billTo?: string;
|
|
7
|
+
accessToken?: string;
|
|
8
|
+
directRequest?: boolean;
|
|
7
9
|
} & Record<string, unknown>;
|
|
8
|
-
export declare function getInferenceSnippets(model: ModelDataMinimal,
|
|
10
|
+
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
|
|
9
11
|
//# sourceMappingURL=getInferenceSnippets.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;
|
|
1
|
+
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;CACxB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAiV5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
|
|
@@ -76,9 +76,13 @@ const HF_JS_METHODS = {
|
|
|
76
76
|
"text-to-speech": "textToSpeech",
|
|
77
77
|
translation: "translation",
|
|
78
78
|
};
|
|
79
|
+
// Placeholders to replace with env variable in snippets
|
|
80
|
+
// little hack to support both direct requests and routing => routed requests should start with "hf_"
|
|
81
|
+
const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
|
|
82
|
+
const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
|
|
79
83
|
// Snippet generators
|
|
80
84
|
const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
81
|
-
return (model,
|
|
85
|
+
return (model, provider, inferenceProviderMapping, opts) => {
|
|
82
86
|
const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
|
|
83
87
|
/// Hacky: hard-code conversational templates here
|
|
84
88
|
let task = model.pipeline_tag;
|
|
@@ -97,10 +101,14 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
97
101
|
console.error(`Failed to get provider helper for ${provider} (${task})`, e);
|
|
98
102
|
return [];
|
|
99
103
|
}
|
|
104
|
+
const placeholder = opts?.directRequest
|
|
105
|
+
? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
|
|
106
|
+
: ACCESS_TOKEN_ROUTING_PLACEHOLDER;
|
|
107
|
+
const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
|
|
100
108
|
/// Prepare inputs + make request
|
|
101
109
|
const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: (0, tasks_1.getModelInputSnippet)(model) };
|
|
102
110
|
const request = (0, makeRequestOptions_js_1.makeRequestOptionsFromResolvedModel)(providerModelId, providerHelper, {
|
|
103
|
-
accessToken,
|
|
111
|
+
accessToken: accessTokenOrPlaceholder,
|
|
104
112
|
provider,
|
|
105
113
|
...inputs,
|
|
106
114
|
}, inferenceProviderMapping, {
|
|
@@ -121,7 +129,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
121
129
|
}
|
|
122
130
|
/// Prepare template injection data
|
|
123
131
|
const params = {
|
|
124
|
-
accessToken,
|
|
132
|
+
accessToken: accessTokenOrPlaceholder,
|
|
125
133
|
authorizationHeader: request.info.headers?.Authorization,
|
|
126
134
|
baseUrl: removeSuffix(request.url, "/chat/completions"),
|
|
127
135
|
fullUrl: request.url,
|
|
@@ -185,6 +193,10 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
185
193
|
});
|
|
186
194
|
snippet = `${importSection}\n\n${snippet}`;
|
|
187
195
|
}
|
|
196
|
+
/// Replace access token placeholder
|
|
197
|
+
if (snippet.includes(placeholder)) {
|
|
198
|
+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
|
|
199
|
+
}
|
|
188
200
|
/// Snippet is ready!
|
|
189
201
|
return { language, client: client, content: snippet };
|
|
190
202
|
})
|
|
@@ -247,9 +259,9 @@ const snippets = {
|
|
|
247
259
|
"zero-shot-classification": snippetGenerator("zeroShotClassification"),
|
|
248
260
|
"zero-shot-image-classification": snippetGenerator("zeroShotImageClassification"),
|
|
249
261
|
};
|
|
250
|
-
function getInferenceSnippets(model,
|
|
262
|
+
function getInferenceSnippets(model, provider, inferenceProviderMapping, opts) {
|
|
251
263
|
return model.pipeline_tag && model.pipeline_tag in snippets
|
|
252
|
-
? snippets[model.pipeline_tag]?.(model,
|
|
264
|
+
? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
|
|
253
265
|
: [];
|
|
254
266
|
}
|
|
255
267
|
// String manipulation helpers
|
|
@@ -310,3 +322,42 @@ function indentString(str) {
|
|
|
310
322
|
function removeSuffix(str, suffix) {
|
|
311
323
|
return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
|
|
312
324
|
}
|
|
325
|
+
function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
|
|
326
|
+
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
|
|
327
|
+
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
|
|
328
|
+
// Determine if HF_TOKEN or specific provider token should be used
|
|
329
|
+
const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
330
|
+
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
331
|
+
(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
|
|
332
|
+
snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
|
|
333
|
+
const accessTokenEnvVar = useHfToken
|
|
334
|
+
? "HF_TOKEN" // e.g. routed request or hf-inference
|
|
335
|
+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
|
|
336
|
+
// Replace the placeholder with the env variable
|
|
337
|
+
if (language === "sh") {
|
|
338
|
+
snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
else if (language === "python") {
|
|
342
|
+
snippet = "import os\n" + snippet;
|
|
343
|
+
snippet = snippet.replace(`"${placeholder}"`, `os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
|
|
344
|
+
);
|
|
345
|
+
snippet = snippet.replace(`"Bearer ${placeholder}"`, `f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
|
|
346
|
+
);
|
|
347
|
+
snippet = snippet.replace(`"Key ${placeholder}"`, `f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
|
|
348
|
+
);
|
|
349
|
+
snippet = snippet.replace(`"X-Key ${placeholder}"`, `f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
else if (language === "js") {
|
|
353
|
+
snippet = snippet.replace(`"${placeholder}"`, `process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
|
|
354
|
+
);
|
|
355
|
+
snippet = snippet.replace(`Authorization: "Bearer ${placeholder}",`, `Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
|
|
356
|
+
);
|
|
357
|
+
snippet = snippet.replace(`Authorization: "Key ${placeholder}",`, `Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
|
|
358
|
+
);
|
|
359
|
+
snippet = snippet.replace(`Authorization: "X-Key ${placeholder}",`, `Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
return snippet;
|
|
363
|
+
}
|
|
@@ -35,8 +35,8 @@ async function innerRequest(args, providerHelper, options) {
|
|
|
35
35
|
body: requestArgsToJson(args),
|
|
36
36
|
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
|
|
37
37
|
}
|
|
38
|
-
if (typeof output.error === "string" || typeof output.detail === "string") {
|
|
39
|
-
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail}`, {
|
|
38
|
+
if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
|
|
39
|
+
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`, {
|
|
40
40
|
url,
|
|
41
41
|
method: info.method ?? "GET",
|
|
42
42
|
headers: info.headers,
|
package/dist/esm/package.d.ts
CHANGED
package/dist/esm/package.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
2
|
-
export const PACKAGE_VERSION = "4.0.
|
|
2
|
+
export const PACKAGE_VERSION = "4.0.2";
|
|
3
3
|
export const PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -4,6 +4,8 @@ import type { InferenceProviderOrPolicy } from "../types.js";
|
|
|
4
4
|
export type InferenceSnippetOptions = {
|
|
5
5
|
streaming?: boolean;
|
|
6
6
|
billTo?: string;
|
|
7
|
+
accessToken?: string;
|
|
8
|
+
directRequest?: boolean;
|
|
7
9
|
} & Record<string, unknown>;
|
|
8
|
-
export declare function getInferenceSnippets(model: ModelDataMinimal,
|
|
10
|
+
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
|
|
9
11
|
//# sourceMappingURL=getInferenceSnippets.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;
|
|
1
|
+
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;CACxB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAiV5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
|
|
@@ -73,9 +73,13 @@ const HF_JS_METHODS = {
|
|
|
73
73
|
"text-to-speech": "textToSpeech",
|
|
74
74
|
translation: "translation",
|
|
75
75
|
};
|
|
76
|
+
// Placeholders to replace with env variable in snippets
|
|
77
|
+
// little hack to support both direct requests and routing => routed requests should start with "hf_"
|
|
78
|
+
const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
|
|
79
|
+
const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
|
|
76
80
|
// Snippet generators
|
|
77
81
|
const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
78
|
-
return (model,
|
|
82
|
+
return (model, provider, inferenceProviderMapping, opts) => {
|
|
79
83
|
const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
|
|
80
84
|
/// Hacky: hard-code conversational templates here
|
|
81
85
|
let task = model.pipeline_tag;
|
|
@@ -94,10 +98,14 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
94
98
|
console.error(`Failed to get provider helper for ${provider} (${task})`, e);
|
|
95
99
|
return [];
|
|
96
100
|
}
|
|
101
|
+
const placeholder = opts?.directRequest
|
|
102
|
+
? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
|
|
103
|
+
: ACCESS_TOKEN_ROUTING_PLACEHOLDER;
|
|
104
|
+
const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
|
|
97
105
|
/// Prepare inputs + make request
|
|
98
106
|
const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
|
|
99
107
|
const request = makeRequestOptionsFromResolvedModel(providerModelId, providerHelper, {
|
|
100
|
-
accessToken,
|
|
108
|
+
accessToken: accessTokenOrPlaceholder,
|
|
101
109
|
provider,
|
|
102
110
|
...inputs,
|
|
103
111
|
}, inferenceProviderMapping, {
|
|
@@ -118,7 +126,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
118
126
|
}
|
|
119
127
|
/// Prepare template injection data
|
|
120
128
|
const params = {
|
|
121
|
-
accessToken,
|
|
129
|
+
accessToken: accessTokenOrPlaceholder,
|
|
122
130
|
authorizationHeader: request.info.headers?.Authorization,
|
|
123
131
|
baseUrl: removeSuffix(request.url, "/chat/completions"),
|
|
124
132
|
fullUrl: request.url,
|
|
@@ -182,6 +190,10 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
182
190
|
});
|
|
183
191
|
snippet = `${importSection}\n\n${snippet}`;
|
|
184
192
|
}
|
|
193
|
+
/// Replace access token placeholder
|
|
194
|
+
if (snippet.includes(placeholder)) {
|
|
195
|
+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
|
|
196
|
+
}
|
|
185
197
|
/// Snippet is ready!
|
|
186
198
|
return { language, client: client, content: snippet };
|
|
187
199
|
})
|
|
@@ -244,9 +256,9 @@ const snippets = {
|
|
|
244
256
|
"zero-shot-classification": snippetGenerator("zeroShotClassification"),
|
|
245
257
|
"zero-shot-image-classification": snippetGenerator("zeroShotImageClassification"),
|
|
246
258
|
};
|
|
247
|
-
export function getInferenceSnippets(model,
|
|
259
|
+
export function getInferenceSnippets(model, provider, inferenceProviderMapping, opts) {
|
|
248
260
|
return model.pipeline_tag && model.pipeline_tag in snippets
|
|
249
|
-
? snippets[model.pipeline_tag]?.(model,
|
|
261
|
+
? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
|
|
250
262
|
: [];
|
|
251
263
|
}
|
|
252
264
|
// String manipulation helpers
|
|
@@ -307,3 +319,42 @@ function indentString(str) {
|
|
|
307
319
|
function removeSuffix(str, suffix) {
|
|
308
320
|
return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
|
|
309
321
|
}
|
|
322
|
+
function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
|
|
323
|
+
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
|
|
324
|
+
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
|
|
325
|
+
// Determine if HF_TOKEN or specific provider token should be used
|
|
326
|
+
const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
327
|
+
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
328
|
+
(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
|
|
329
|
+
snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
|
|
330
|
+
const accessTokenEnvVar = useHfToken
|
|
331
|
+
? "HF_TOKEN" // e.g. routed request or hf-inference
|
|
332
|
+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
|
|
333
|
+
// Replace the placeholder with the env variable
|
|
334
|
+
if (language === "sh") {
|
|
335
|
+
snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
|
|
336
|
+
);
|
|
337
|
+
}
|
|
338
|
+
else if (language === "python") {
|
|
339
|
+
snippet = "import os\n" + snippet;
|
|
340
|
+
snippet = snippet.replace(`"${placeholder}"`, `os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
|
|
341
|
+
);
|
|
342
|
+
snippet = snippet.replace(`"Bearer ${placeholder}"`, `f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
|
|
343
|
+
);
|
|
344
|
+
snippet = snippet.replace(`"Key ${placeholder}"`, `f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
|
|
345
|
+
);
|
|
346
|
+
snippet = snippet.replace(`"X-Key ${placeholder}"`, `f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
|
|
347
|
+
);
|
|
348
|
+
}
|
|
349
|
+
else if (language === "js") {
|
|
350
|
+
snippet = snippet.replace(`"${placeholder}"`, `process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
|
|
351
|
+
);
|
|
352
|
+
snippet = snippet.replace(`Authorization: "Bearer ${placeholder}",`, `Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
|
|
353
|
+
);
|
|
354
|
+
snippet = snippet.replace(`Authorization: "Key ${placeholder}",`, `Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
|
|
355
|
+
);
|
|
356
|
+
snippet = snippet.replace(`Authorization: "X-Key ${placeholder}",`, `Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
|
|
357
|
+
);
|
|
358
|
+
}
|
|
359
|
+
return snippet;
|
|
360
|
+
}
|
|
@@ -31,8 +31,8 @@ export async function innerRequest(args, providerHelper, options) {
|
|
|
31
31
|
body: requestArgsToJson(args),
|
|
32
32
|
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
|
|
33
33
|
}
|
|
34
|
-
if (typeof output.error === "string" || typeof output.detail === "string") {
|
|
35
|
-
throw new InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail}`, {
|
|
34
|
+
if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
|
|
35
|
+
throw new InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`, {
|
|
36
36
|
url,
|
|
37
37
|
method: info.method ?? "GET",
|
|
38
38
|
headers: info.headers,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.2",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
6
6
|
"description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
|
package/src/package.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
2
|
-
export const PACKAGE_VERSION = "4.0.
|
|
2
|
+
export const PACKAGE_VERSION = "4.0.2";
|
|
3
3
|
export const PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -14,7 +14,12 @@ import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.j
|
|
|
14
14
|
import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
|
|
15
15
|
import { templates } from "./templates.exported.js";
|
|
16
16
|
|
|
17
|
-
export type InferenceSnippetOptions = {
|
|
17
|
+
export type InferenceSnippetOptions = {
|
|
18
|
+
streaming?: boolean;
|
|
19
|
+
billTo?: string;
|
|
20
|
+
accessToken?: string;
|
|
21
|
+
directRequest?: boolean;
|
|
22
|
+
} & Record<string, unknown>;
|
|
18
23
|
|
|
19
24
|
const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
|
|
20
25
|
const JS_CLIENTS = ["fetch", "huggingface.js", "openai"] as const;
|
|
@@ -121,11 +126,15 @@ const HF_JS_METHODS: Partial<Record<WidgetType, string>> = {
|
|
|
121
126
|
translation: "translation",
|
|
122
127
|
};
|
|
123
128
|
|
|
129
|
+
// Placeholders to replace with env variable in snippets
|
|
130
|
+
// little hack to support both direct requests and routing => routed requests should start with "hf_"
|
|
131
|
+
const ACCESS_TOKEN_ROUTING_PLACEHOLDER = "hf_token_placeholder";
|
|
132
|
+
const ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER = "not_hf_token_placeholder";
|
|
133
|
+
|
|
124
134
|
// Snippet generators
|
|
125
135
|
const snippetGenerator = (templateName: string, inputPreparationFn?: InputPreparationFn) => {
|
|
126
136
|
return (
|
|
127
137
|
model: ModelDataMinimal,
|
|
128
|
-
accessToken: string,
|
|
129
138
|
provider: InferenceProviderOrPolicy,
|
|
130
139
|
inferenceProviderMapping?: InferenceProviderModelMapping,
|
|
131
140
|
opts?: InferenceSnippetOptions
|
|
@@ -149,13 +158,19 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
149
158
|
console.error(`Failed to get provider helper for ${provider} (${task})`, e);
|
|
150
159
|
return [];
|
|
151
160
|
}
|
|
161
|
+
|
|
162
|
+
const placeholder = opts?.directRequest
|
|
163
|
+
? ACCESS_TOKEN_DIRECT_REQUEST_PLACEHOLDER
|
|
164
|
+
: ACCESS_TOKEN_ROUTING_PLACEHOLDER;
|
|
165
|
+
const accessTokenOrPlaceholder = opts?.accessToken ?? placeholder;
|
|
166
|
+
|
|
152
167
|
/// Prepare inputs + make request
|
|
153
168
|
const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
|
|
154
169
|
const request = makeRequestOptionsFromResolvedModel(
|
|
155
170
|
providerModelId,
|
|
156
171
|
providerHelper,
|
|
157
172
|
{
|
|
158
|
-
accessToken,
|
|
173
|
+
accessToken: accessTokenOrPlaceholder,
|
|
159
174
|
provider,
|
|
160
175
|
...inputs,
|
|
161
176
|
} as RequestArgs,
|
|
@@ -180,7 +195,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
180
195
|
|
|
181
196
|
/// Prepare template injection data
|
|
182
197
|
const params: TemplateParams = {
|
|
183
|
-
accessToken,
|
|
198
|
+
accessToken: accessTokenOrPlaceholder,
|
|
184
199
|
authorizationHeader: (request.info.headers as Record<string, string>)?.Authorization,
|
|
185
200
|
baseUrl: removeSuffix(request.url, "/chat/completions"),
|
|
186
201
|
fullUrl: request.url,
|
|
@@ -248,6 +263,11 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
248
263
|
snippet = `${importSection}\n\n${snippet}`;
|
|
249
264
|
}
|
|
250
265
|
|
|
266
|
+
/// Replace access token placeholder
|
|
267
|
+
if (snippet.includes(placeholder)) {
|
|
268
|
+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
|
|
269
|
+
}
|
|
270
|
+
|
|
251
271
|
/// Snippet is ready!
|
|
252
272
|
return { language, client: client as string, content: snippet };
|
|
253
273
|
})
|
|
@@ -299,7 +319,6 @@ const snippets: Partial<
|
|
|
299
319
|
PipelineType,
|
|
300
320
|
(
|
|
301
321
|
model: ModelDataMinimal,
|
|
302
|
-
accessToken: string,
|
|
303
322
|
provider: InferenceProviderOrPolicy,
|
|
304
323
|
inferenceProviderMapping?: InferenceProviderModelMapping,
|
|
305
324
|
opts?: InferenceSnippetOptions
|
|
@@ -339,13 +358,12 @@ const snippets: Partial<
|
|
|
339
358
|
|
|
340
359
|
export function getInferenceSnippets(
|
|
341
360
|
model: ModelDataMinimal,
|
|
342
|
-
accessToken: string,
|
|
343
361
|
provider: InferenceProviderOrPolicy,
|
|
344
362
|
inferenceProviderMapping?: InferenceProviderModelMapping,
|
|
345
363
|
opts?: Record<string, unknown>
|
|
346
364
|
): InferenceSnippet[] {
|
|
347
365
|
return model.pipeline_tag && model.pipeline_tag in snippets
|
|
348
|
-
? snippets[model.pipeline_tag]?.(model,
|
|
366
|
+
? snippets[model.pipeline_tag]?.(model, provider, inferenceProviderMapping, opts) ?? []
|
|
349
367
|
: [];
|
|
350
368
|
}
|
|
351
369
|
|
|
@@ -420,3 +438,69 @@ function indentString(str: string): string {
|
|
|
420
438
|
function removeSuffix(str: string, suffix: string) {
|
|
421
439
|
return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
|
|
422
440
|
}
|
|
441
|
+
|
|
442
|
+
function replaceAccessTokenPlaceholder(
|
|
443
|
+
directRequest: boolean | undefined,
|
|
444
|
+
placeholder: string,
|
|
445
|
+
snippet: string,
|
|
446
|
+
language: InferenceSnippetLanguage,
|
|
447
|
+
provider: InferenceProviderOrPolicy
|
|
448
|
+
): string {
|
|
449
|
+
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
|
|
450
|
+
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
|
|
451
|
+
|
|
452
|
+
// Determine if HF_TOKEN or specific provider token should be used
|
|
453
|
+
const useHfToken =
|
|
454
|
+
provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
455
|
+
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
456
|
+
(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
|
|
457
|
+
snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
|
|
458
|
+
|
|
459
|
+
const accessTokenEnvVar = useHfToken
|
|
460
|
+
? "HF_TOKEN" // e.g. routed request or hf-inference
|
|
461
|
+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
|
|
462
|
+
|
|
463
|
+
// Replace the placeholder with the env variable
|
|
464
|
+
if (language === "sh") {
|
|
465
|
+
snippet = snippet.replace(
|
|
466
|
+
`'Authorization: Bearer ${placeholder}'`,
|
|
467
|
+
`"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
|
|
468
|
+
);
|
|
469
|
+
} else if (language === "python") {
|
|
470
|
+
snippet = "import os\n" + snippet;
|
|
471
|
+
snippet = snippet.replace(
|
|
472
|
+
`"${placeholder}"`,
|
|
473
|
+
`os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
|
|
474
|
+
);
|
|
475
|
+
snippet = snippet.replace(
|
|
476
|
+
`"Bearer ${placeholder}"`,
|
|
477
|
+
`f"Bearer {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Bearer {os.environ['HF_TOKEN']}"
|
|
478
|
+
);
|
|
479
|
+
snippet = snippet.replace(
|
|
480
|
+
`"Key ${placeholder}"`,
|
|
481
|
+
`f"Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"Key {os.environ['FAL_AI_API_KEY']}"
|
|
482
|
+
);
|
|
483
|
+
snippet = snippet.replace(
|
|
484
|
+
`"X-Key ${placeholder}"`,
|
|
485
|
+
`f"X-Key {os.environ['${accessTokenEnvVar}']}"` // e.g. f"X-Key {os.environ['BLACK_FOREST_LABS_API_KEY']}"
|
|
486
|
+
);
|
|
487
|
+
} else if (language === "js") {
|
|
488
|
+
snippet = snippet.replace(
|
|
489
|
+
`"${placeholder}"`,
|
|
490
|
+
`process.env.${accessTokenEnvVar}` // e.g. process.env.HF_TOKEN
|
|
491
|
+
);
|
|
492
|
+
snippet = snippet.replace(
|
|
493
|
+
`Authorization: "Bearer ${placeholder}",`,
|
|
494
|
+
`Authorization: \`Bearer $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Bearer ${process.env.HF_TOKEN}`,
|
|
495
|
+
);
|
|
496
|
+
snippet = snippet.replace(
|
|
497
|
+
`Authorization: "Key ${placeholder}",`,
|
|
498
|
+
`Authorization: \`Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `Key ${process.env.FAL_AI_API_KEY}`,
|
|
499
|
+
);
|
|
500
|
+
snippet = snippet.replace(
|
|
501
|
+
`Authorization: "X-Key ${placeholder}",`,
|
|
502
|
+
`Authorization: \`X-Key $\{process.env.${accessTokenEnvVar}}\`,` // e.g. Authorization: `X-Key ${process.env.BLACK_FOREST_LABS_AI_API_KEY}`,
|
|
503
|
+
);
|
|
504
|
+
}
|
|
505
|
+
return snippet;
|
|
506
|
+
}
|
package/src/utils/request.ts
CHANGED
|
@@ -65,9 +65,9 @@ export async function innerRequest<T>(
|
|
|
65
65
|
{ requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }
|
|
66
66
|
);
|
|
67
67
|
}
|
|
68
|
-
if (typeof output.error === "string" || typeof output.detail === "string") {
|
|
68
|
+
if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
|
|
69
69
|
throw new InferenceClientProviderApiError(
|
|
70
|
-
`Failed to perform inference: ${output.error ?? output.detail}`,
|
|
70
|
+
`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`,
|
|
71
71
|
{
|
|
72
72
|
url,
|
|
73
73
|
method: info.method ?? "GET",
|