@huggingface/inference 4.0.4 → 4.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/lib/getInferenceProviderMapping.d.ts +6 -5
- package/dist/commonjs/lib/getInferenceProviderMapping.d.ts.map +1 -1
- package/dist/commonjs/lib/getInferenceProviderMapping.js +30 -6
- package/dist/commonjs/lib/makeRequestOptions.d.ts +2 -2
- package/dist/commonjs/lib/makeRequestOptions.d.ts.map +1 -1
- package/dist/commonjs/lib/makeRequestOptions.js +1 -0
- package/dist/commonjs/package.d.ts +1 -1
- package/dist/commonjs/package.js +1 -1
- package/dist/commonjs/providers/consts.d.ts +2 -2
- package/dist/commonjs/snippets/getInferenceSnippets.d.ts +2 -2
- package/dist/commonjs/snippets/getInferenceSnippets.js +1 -1
- package/dist/commonjs/types.d.ts +2 -2
- package/dist/esm/lib/getInferenceProviderMapping.d.ts +6 -5
- package/dist/esm/lib/getInferenceProviderMapping.d.ts.map +1 -1
- package/dist/esm/lib/getInferenceProviderMapping.js +30 -6
- package/dist/esm/lib/makeRequestOptions.d.ts +2 -2
- package/dist/esm/lib/makeRequestOptions.d.ts.map +1 -1
- package/dist/esm/lib/makeRequestOptions.js +1 -0
- package/dist/esm/package.d.ts +1 -1
- package/dist/esm/package.js +1 -1
- package/dist/esm/providers/consts.d.ts +2 -2
- package/dist/esm/snippets/getInferenceSnippets.d.ts +2 -2
- package/dist/esm/snippets/getInferenceSnippets.js +1 -1
- package/dist/esm/types.d.ts +2 -2
- package/package.json +2 -2
- package/src/lib/getInferenceProviderMapping.ts +50 -20
- package/src/lib/makeRequestOptions.ts +4 -3
- package/src/package.ts +1 -1
- package/src/providers/consts.ts +2 -2
- package/src/snippets/getInferenceSnippets.ts +5 -5
- package/src/types.ts +2 -2
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import type { WidgetType } from "@huggingface/tasks";
|
|
2
2
|
import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
|
|
3
|
-
export declare const inferenceProviderMappingCache: Map<string,
|
|
4
|
-
export
|
|
5
|
-
export interface InferenceProviderModelMapping {
|
|
3
|
+
export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
|
|
4
|
+
export interface InferenceProviderMappingEntry {
|
|
6
5
|
adapter?: string;
|
|
7
6
|
adapterWeightsPath?: string;
|
|
8
7
|
hfModelId: ModelId;
|
|
8
|
+
provider: string;
|
|
9
9
|
providerId: string;
|
|
10
10
|
status: "live" | "staging";
|
|
11
11
|
task: WidgetType;
|
|
12
|
+
type?: "single-model" | "tag-filter";
|
|
12
13
|
}
|
|
13
14
|
export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
|
|
14
15
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
15
|
-
}): Promise<
|
|
16
|
+
}): Promise<InferenceProviderMappingEntry[]>;
|
|
16
17
|
export declare function getInferenceProviderMapping(params: {
|
|
17
18
|
accessToken?: string;
|
|
18
19
|
modelId: ModelId;
|
|
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
|
|
|
20
21
|
task: WidgetType;
|
|
21
22
|
}, options: {
|
|
22
23
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
23
|
-
}): Promise<
|
|
24
|
+
}): Promise<InferenceProviderMappingEntry | null>;
|
|
24
25
|
export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
|
|
25
26
|
//# sourceMappingURL=getInferenceProviderMapping.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,
|
|
1
|
+
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
|
|
@@ -10,6 +10,29 @@ const hf_inference_js_1 = require("../providers/hf-inference.js");
|
|
|
10
10
|
const typedInclude_js_1 = require("../utils/typedInclude.js");
|
|
11
11
|
const errors_js_1 = require("../errors.js");
|
|
12
12
|
exports.inferenceProviderMappingCache = new Map();
|
|
13
|
+
/**
|
|
14
|
+
* Normalize inferenceProviderMapping to always return an array format.
|
|
15
|
+
* This provides backward and forward compatibility for the API changes.
|
|
16
|
+
*
|
|
17
|
+
* Vendored from @huggingface/hub to avoid extra dependency.
|
|
18
|
+
*/
|
|
19
|
+
function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
|
|
20
|
+
if (!inferenceProviderMapping) {
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
// If it's already an array, return it as is
|
|
24
|
+
if (Array.isArray(inferenceProviderMapping)) {
|
|
25
|
+
return inferenceProviderMapping;
|
|
26
|
+
}
|
|
27
|
+
// Convert mapping to array format
|
|
28
|
+
return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
|
|
29
|
+
provider,
|
|
30
|
+
hfModelId: modelId,
|
|
31
|
+
providerId: mapping.providerId,
|
|
32
|
+
status: mapping.status,
|
|
33
|
+
task: mapping.task,
|
|
34
|
+
}));
|
|
35
|
+
}
|
|
13
36
|
async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
|
|
14
37
|
let inferenceProviderMapping;
|
|
15
38
|
if (exports.inferenceProviderMappingCache.has(modelId)) {
|
|
@@ -42,7 +65,8 @@ async function fetchInferenceProviderMappingForModel(modelId, accessToken, optio
|
|
|
42
65
|
if (!payload?.inferenceProviderMapping) {
|
|
43
66
|
throw new errors_js_1.InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
|
|
44
67
|
}
|
|
45
|
-
inferenceProviderMapping = payload.inferenceProviderMapping;
|
|
68
|
+
inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
|
|
69
|
+
exports.inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
|
|
46
70
|
}
|
|
47
71
|
return inferenceProviderMapping;
|
|
48
72
|
}
|
|
@@ -50,8 +74,8 @@ async function getInferenceProviderMapping(params, options) {
|
|
|
50
74
|
if (consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
|
|
51
75
|
return consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
|
|
52
76
|
}
|
|
53
|
-
const
|
|
54
|
-
const providerMapping =
|
|
77
|
+
const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
|
|
78
|
+
const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
|
|
55
79
|
if (providerMapping) {
|
|
56
80
|
const equivalentTasks = params.provider === "hf-inference" && (0, typedInclude_js_1.typedInclude)(hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
|
|
57
81
|
? hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
|
|
@@ -62,7 +86,7 @@ async function getInferenceProviderMapping(params, options) {
|
|
|
62
86
|
if (providerMapping.status === "staging") {
|
|
63
87
|
console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
|
|
64
88
|
}
|
|
65
|
-
return
|
|
89
|
+
return providerMapping;
|
|
66
90
|
}
|
|
67
91
|
return null;
|
|
68
92
|
}
|
|
@@ -82,8 +106,8 @@ async function resolveProvider(provider, modelId, endpointUrl) {
|
|
|
82
106
|
if (!modelId) {
|
|
83
107
|
throw new errors_js_1.InferenceClientInputError("Specifying a model is required when provider is 'auto'");
|
|
84
108
|
}
|
|
85
|
-
const
|
|
86
|
-
provider =
|
|
109
|
+
const mappings = await fetchInferenceProviderMappingForModel(modelId);
|
|
110
|
+
provider = mappings[0]?.provider;
|
|
87
111
|
}
|
|
88
112
|
if (!provider) {
|
|
89
113
|
throw new errors_js_1.InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { InferenceTask, Options, RequestArgs } from "../types.js";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
|
|
3
3
|
import type { getProviderHelper } from "./getProviderHelper.js";
|
|
4
4
|
/**
|
|
5
5
|
* Helper that prepares request arguments.
|
|
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
|
|
|
22
22
|
export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
|
|
23
23
|
data?: Blob | ArrayBuffer;
|
|
24
24
|
stream?: boolean;
|
|
25
|
-
}, mapping:
|
|
25
|
+
}, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
|
|
26
26
|
task?: InferenceTask;
|
|
27
27
|
}): {
|
|
28
28
|
url: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
|
|
@@ -41,6 +41,7 @@ async function makeRequestOptions(args, providerHelper, options) {
|
|
|
41
41
|
}
|
|
42
42
|
const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
|
|
43
43
|
? {
|
|
44
|
+
provider: provider,
|
|
44
45
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
45
46
|
providerId: removeProviderPrefix(maybeModel, provider),
|
|
46
47
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
package/dist/commonjs/package.js
CHANGED
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.PACKAGE_NAME = exports.PACKAGE_VERSION = void 0;
|
|
4
4
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
5
|
-
exports.PACKAGE_VERSION = "4.0.
|
|
5
|
+
exports.PACKAGE_VERSION = "4.0.5";
|
|
6
6
|
exports.PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
2
2
|
import type { InferenceProvider } from "../types.js";
|
|
3
3
|
import { type ModelId } from "../types.js";
|
|
4
4
|
/**
|
|
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
|
|
|
8
8
|
*
|
|
9
9
|
* We also inject into this dictionary from tests.
|
|
10
10
|
*/
|
|
11
|
-
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId,
|
|
11
|
+
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
|
|
12
12
|
//# sourceMappingURL=consts.d.ts.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
3
3
|
import type { InferenceProviderOrPolicy } from "../types.js";
|
|
4
4
|
export type InferenceSnippetOptions = {
|
|
5
5
|
streaming?: boolean;
|
|
@@ -8,5 +8,5 @@ export type InferenceSnippetOptions = {
|
|
|
8
8
|
directRequest?: boolean;
|
|
9
9
|
endpointUrl?: string;
|
|
10
10
|
} & Record<string, unknown>;
|
|
11
|
-
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?:
|
|
11
|
+
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
|
|
12
12
|
//# sourceMappingURL=getInferenceSnippets.d.ts.map
|
|
@@ -331,7 +331,7 @@ function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, lang
|
|
|
331
331
|
const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
|
|
332
332
|
(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
333
333
|
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
334
|
-
(
|
|
334
|
+
(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
|
|
335
335
|
snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
|
|
336
336
|
const accessTokenEnvVar = useHfToken
|
|
337
337
|
? "HF_TOKEN" // e.g. routed request or hf-inference
|
package/dist/commonjs/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
|
|
3
3
|
/**
|
|
4
4
|
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
|
|
5
5
|
*/
|
|
@@ -91,7 +91,7 @@ export interface UrlParams {
|
|
|
91
91
|
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
|
|
92
92
|
args: T;
|
|
93
93
|
model: string;
|
|
94
|
-
mapping?:
|
|
94
|
+
mapping?: InferenceProviderMappingEntry | undefined;
|
|
95
95
|
task?: InferenceTask;
|
|
96
96
|
}
|
|
97
97
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import type { WidgetType } from "@huggingface/tasks";
|
|
2
2
|
import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
|
|
3
|
-
export declare const inferenceProviderMappingCache: Map<string,
|
|
4
|
-
export
|
|
5
|
-
export interface InferenceProviderModelMapping {
|
|
3
|
+
export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
|
|
4
|
+
export interface InferenceProviderMappingEntry {
|
|
6
5
|
adapter?: string;
|
|
7
6
|
adapterWeightsPath?: string;
|
|
8
7
|
hfModelId: ModelId;
|
|
8
|
+
provider: string;
|
|
9
9
|
providerId: string;
|
|
10
10
|
status: "live" | "staging";
|
|
11
11
|
task: WidgetType;
|
|
12
|
+
type?: "single-model" | "tag-filter";
|
|
12
13
|
}
|
|
13
14
|
export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
|
|
14
15
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
15
|
-
}): Promise<
|
|
16
|
+
}): Promise<InferenceProviderMappingEntry[]>;
|
|
16
17
|
export declare function getInferenceProviderMapping(params: {
|
|
17
18
|
accessToken?: string;
|
|
18
19
|
modelId: ModelId;
|
|
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
|
|
|
20
21
|
task: WidgetType;
|
|
21
22
|
}, options: {
|
|
22
23
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
23
|
-
}): Promise<
|
|
24
|
+
}): Promise<InferenceProviderMappingEntry | null>;
|
|
24
25
|
export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
|
|
25
26
|
//# sourceMappingURL=getInferenceProviderMapping.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,
|
|
1
|
+
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
|
|
@@ -4,6 +4,29 @@ import { EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS } from "../providers/hf-inferenc
|
|
|
4
4
|
import { typedInclude } from "../utils/typedInclude.js";
|
|
5
5
|
import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
|
|
6
6
|
export const inferenceProviderMappingCache = new Map();
|
|
7
|
+
/**
|
|
8
|
+
* Normalize inferenceProviderMapping to always return an array format.
|
|
9
|
+
* This provides backward and forward compatibility for the API changes.
|
|
10
|
+
*
|
|
11
|
+
* Vendored from @huggingface/hub to avoid extra dependency.
|
|
12
|
+
*/
|
|
13
|
+
function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
|
|
14
|
+
if (!inferenceProviderMapping) {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
// If it's already an array, return it as is
|
|
18
|
+
if (Array.isArray(inferenceProviderMapping)) {
|
|
19
|
+
return inferenceProviderMapping;
|
|
20
|
+
}
|
|
21
|
+
// Convert mapping to array format
|
|
22
|
+
return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
|
|
23
|
+
provider,
|
|
24
|
+
hfModelId: modelId,
|
|
25
|
+
providerId: mapping.providerId,
|
|
26
|
+
status: mapping.status,
|
|
27
|
+
task: mapping.task,
|
|
28
|
+
}));
|
|
29
|
+
}
|
|
7
30
|
export async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
|
|
8
31
|
let inferenceProviderMapping;
|
|
9
32
|
if (inferenceProviderMappingCache.has(modelId)) {
|
|
@@ -36,7 +59,8 @@ export async function fetchInferenceProviderMappingForModel(modelId, accessToken
|
|
|
36
59
|
if (!payload?.inferenceProviderMapping) {
|
|
37
60
|
throw new InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
|
|
38
61
|
}
|
|
39
|
-
inferenceProviderMapping = payload.inferenceProviderMapping;
|
|
62
|
+
inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
|
|
63
|
+
inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
|
|
40
64
|
}
|
|
41
65
|
return inferenceProviderMapping;
|
|
42
66
|
}
|
|
@@ -44,8 +68,8 @@ export async function getInferenceProviderMapping(params, options) {
|
|
|
44
68
|
if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
|
|
45
69
|
return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
|
|
46
70
|
}
|
|
47
|
-
const
|
|
48
|
-
const providerMapping =
|
|
71
|
+
const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
|
|
72
|
+
const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
|
|
49
73
|
if (providerMapping) {
|
|
50
74
|
const equivalentTasks = params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
|
|
51
75
|
? EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
|
|
@@ -56,7 +80,7 @@ export async function getInferenceProviderMapping(params, options) {
|
|
|
56
80
|
if (providerMapping.status === "staging") {
|
|
57
81
|
console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
|
|
58
82
|
}
|
|
59
|
-
return
|
|
83
|
+
return providerMapping;
|
|
60
84
|
}
|
|
61
85
|
return null;
|
|
62
86
|
}
|
|
@@ -76,8 +100,8 @@ export async function resolveProvider(provider, modelId, endpointUrl) {
|
|
|
76
100
|
if (!modelId) {
|
|
77
101
|
throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
|
|
78
102
|
}
|
|
79
|
-
const
|
|
80
|
-
provider =
|
|
103
|
+
const mappings = await fetchInferenceProviderMappingForModel(modelId);
|
|
104
|
+
provider = mappings[0]?.provider;
|
|
81
105
|
}
|
|
82
106
|
if (!provider) {
|
|
83
107
|
throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { InferenceTask, Options, RequestArgs } from "../types.js";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
|
|
3
3
|
import type { getProviderHelper } from "./getProviderHelper.js";
|
|
4
4
|
/**
|
|
5
5
|
* Helper that prepares request arguments.
|
|
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
|
|
|
22
22
|
export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
|
|
23
23
|
data?: Blob | ArrayBuffer;
|
|
24
24
|
stream?: boolean;
|
|
25
|
-
}, mapping:
|
|
25
|
+
}, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
|
|
26
26
|
task?: InferenceTask;
|
|
27
27
|
}): {
|
|
28
28
|
url: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
|
|
@@ -37,6 +37,7 @@ export async function makeRequestOptions(args, providerHelper, options) {
|
|
|
37
37
|
}
|
|
38
38
|
const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
|
|
39
39
|
? {
|
|
40
|
+
provider: provider,
|
|
40
41
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
41
42
|
providerId: removeProviderPrefix(maybeModel, provider),
|
|
42
43
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
package/dist/esm/package.d.ts
CHANGED
package/dist/esm/package.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
2
|
-
export const PACKAGE_VERSION = "4.0.
|
|
2
|
+
export const PACKAGE_VERSION = "4.0.5";
|
|
3
3
|
export const PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
2
2
|
import type { InferenceProvider } from "../types.js";
|
|
3
3
|
import { type ModelId } from "../types.js";
|
|
4
4
|
/**
|
|
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
|
|
|
8
8
|
*
|
|
9
9
|
* We also inject into this dictionary from tests.
|
|
10
10
|
*/
|
|
11
|
-
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId,
|
|
11
|
+
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
|
|
12
12
|
//# sourceMappingURL=consts.d.ts.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
3
3
|
import type { InferenceProviderOrPolicy } from "../types.js";
|
|
4
4
|
export type InferenceSnippetOptions = {
|
|
5
5
|
streaming?: boolean;
|
|
@@ -8,5 +8,5 @@ export type InferenceSnippetOptions = {
|
|
|
8
8
|
directRequest?: boolean;
|
|
9
9
|
endpointUrl?: string;
|
|
10
10
|
} & Record<string, unknown>;
|
|
11
|
-
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?:
|
|
11
|
+
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
|
|
12
12
|
//# sourceMappingURL=getInferenceSnippets.d.ts.map
|
|
@@ -328,7 +328,7 @@ function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, lang
|
|
|
328
328
|
const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
|
|
329
329
|
(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
330
330
|
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
331
|
-
(
|
|
331
|
+
(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
|
|
332
332
|
snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
|
|
333
333
|
const accessTokenEnvVar = useHfToken
|
|
334
334
|
? "HF_TOKEN" // e.g. routed request or hf-inference
|
package/dist/esm/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
|
|
3
3
|
/**
|
|
4
4
|
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
|
|
5
5
|
*/
|
|
@@ -91,7 +91,7 @@ export interface UrlParams {
|
|
|
91
91
|
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
|
|
92
92
|
args: T;
|
|
93
93
|
model: string;
|
|
94
|
-
mapping?:
|
|
94
|
+
mapping?: InferenceProviderMappingEntry | undefined;
|
|
95
95
|
task?: InferenceTask;
|
|
96
96
|
}
|
|
97
97
|
//# sourceMappingURL=types.d.ts.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.5",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
6
6
|
"description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
},
|
|
41
41
|
"type": "module",
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@huggingface/tasks": "^0.19.
|
|
43
|
+
"@huggingface/tasks": "^0.19.15",
|
|
44
44
|
"@huggingface/jinja": "^0.5.0"
|
|
45
45
|
},
|
|
46
46
|
"devDependencies": {
|
|
@@ -6,19 +6,48 @@ import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../t
|
|
|
6
6
|
import { typedInclude } from "../utils/typedInclude.js";
|
|
7
7
|
import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
|
|
8
8
|
|
|
9
|
-
export const inferenceProviderMappingCache = new Map<ModelId,
|
|
9
|
+
export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMappingEntry[]>();
|
|
10
10
|
|
|
11
|
-
export
|
|
12
|
-
Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>
|
|
13
|
-
>;
|
|
14
|
-
|
|
15
|
-
export interface InferenceProviderModelMapping {
|
|
11
|
+
export interface InferenceProviderMappingEntry {
|
|
16
12
|
adapter?: string;
|
|
17
13
|
adapterWeightsPath?: string;
|
|
18
14
|
hfModelId: ModelId;
|
|
15
|
+
provider: string;
|
|
19
16
|
providerId: string;
|
|
20
17
|
status: "live" | "staging";
|
|
21
18
|
task: WidgetType;
|
|
19
|
+
type?: "single-model" | "tag-filter";
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Normalize inferenceProviderMapping to always return an array format.
|
|
24
|
+
* This provides backward and forward compatibility for the API changes.
|
|
25
|
+
*
|
|
26
|
+
* Vendored from @huggingface/hub to avoid extra dependency.
|
|
27
|
+
*/
|
|
28
|
+
function normalizeInferenceProviderMapping(
|
|
29
|
+
modelId: ModelId,
|
|
30
|
+
inferenceProviderMapping?:
|
|
31
|
+
| InferenceProviderMappingEntry[]
|
|
32
|
+
| Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>
|
|
33
|
+
): InferenceProviderMappingEntry[] {
|
|
34
|
+
if (!inferenceProviderMapping) {
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// If it's already an array, return it as is
|
|
39
|
+
if (Array.isArray(inferenceProviderMapping)) {
|
|
40
|
+
return inferenceProviderMapping;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Convert mapping to array format
|
|
44
|
+
return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
|
|
45
|
+
provider,
|
|
46
|
+
hfModelId: modelId,
|
|
47
|
+
providerId: mapping.providerId,
|
|
48
|
+
status: mapping.status,
|
|
49
|
+
task: mapping.task,
|
|
50
|
+
}));
|
|
22
51
|
}
|
|
23
52
|
|
|
24
53
|
export async function fetchInferenceProviderMappingForModel(
|
|
@@ -27,8 +56,8 @@ export async function fetchInferenceProviderMappingForModel(
|
|
|
27
56
|
options?: {
|
|
28
57
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
29
58
|
}
|
|
30
|
-
): Promise<
|
|
31
|
-
let inferenceProviderMapping:
|
|
59
|
+
): Promise<InferenceProviderMappingEntry[]> {
|
|
60
|
+
let inferenceProviderMapping: InferenceProviderMappingEntry[] | null;
|
|
32
61
|
if (inferenceProviderMappingCache.has(modelId)) {
|
|
33
62
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
34
63
|
inferenceProviderMapping = inferenceProviderMappingCache.get(modelId)!;
|
|
@@ -55,7 +84,11 @@ export async function fetchInferenceProviderMappingForModel(
|
|
|
55
84
|
);
|
|
56
85
|
}
|
|
57
86
|
}
|
|
58
|
-
let payload: {
|
|
87
|
+
let payload: {
|
|
88
|
+
inferenceProviderMapping?:
|
|
89
|
+
| InferenceProviderMappingEntry[]
|
|
90
|
+
| Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>;
|
|
91
|
+
} | null = null;
|
|
59
92
|
try {
|
|
60
93
|
payload = await resp.json();
|
|
61
94
|
} catch {
|
|
@@ -72,7 +105,8 @@ export async function fetchInferenceProviderMappingForModel(
|
|
|
72
105
|
{ requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() }
|
|
73
106
|
);
|
|
74
107
|
}
|
|
75
|
-
inferenceProviderMapping = payload.inferenceProviderMapping;
|
|
108
|
+
inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
|
|
109
|
+
inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
|
|
76
110
|
}
|
|
77
111
|
return inferenceProviderMapping;
|
|
78
112
|
}
|
|
@@ -87,16 +121,12 @@ export async function getInferenceProviderMapping(
|
|
|
87
121
|
options: {
|
|
88
122
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
89
123
|
}
|
|
90
|
-
): Promise<
|
|
124
|
+
): Promise<InferenceProviderMappingEntry | null> {
|
|
91
125
|
if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
|
|
92
126
|
return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
|
|
93
127
|
}
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
params.accessToken,
|
|
97
|
-
options
|
|
98
|
-
);
|
|
99
|
-
const providerMapping = inferenceProviderMapping[params.provider];
|
|
128
|
+
const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
|
|
129
|
+
const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
|
|
100
130
|
if (providerMapping) {
|
|
101
131
|
const equivalentTasks =
|
|
102
132
|
params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
|
|
@@ -112,7 +142,7 @@ export async function getInferenceProviderMapping(
|
|
|
112
142
|
`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
|
|
113
143
|
);
|
|
114
144
|
}
|
|
115
|
-
return
|
|
145
|
+
return providerMapping;
|
|
116
146
|
}
|
|
117
147
|
return null;
|
|
118
148
|
}
|
|
@@ -139,8 +169,8 @@ export async function resolveProvider(
|
|
|
139
169
|
if (!modelId) {
|
|
140
170
|
throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
|
|
141
171
|
}
|
|
142
|
-
const
|
|
143
|
-
provider =
|
|
172
|
+
const mappings = await fetchInferenceProviderMappingForModel(modelId);
|
|
173
|
+
provider = mappings[0]?.provider as InferenceProvider | undefined;
|
|
144
174
|
}
|
|
145
175
|
if (!provider) {
|
|
146
176
|
throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { HF_HEADER_X_BILL_TO, HF_HUB_URL } from "../config.js";
|
|
2
2
|
import { PACKAGE_NAME, PACKAGE_VERSION } from "../package.js";
|
|
3
3
|
import type { InferenceTask, Options, RequestArgs } from "../types.js";
|
|
4
|
-
import type {
|
|
4
|
+
import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
|
|
5
5
|
import { getInferenceProviderMapping } from "./getInferenceProviderMapping.js";
|
|
6
6
|
import type { getProviderHelper } from "./getProviderHelper.js";
|
|
7
7
|
import { isUrl } from "./isUrl.js";
|
|
@@ -64,6 +64,7 @@ export async function makeRequestOptions(
|
|
|
64
64
|
|
|
65
65
|
const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
|
|
66
66
|
? ({
|
|
67
|
+
provider: provider,
|
|
67
68
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
68
69
|
providerId: removeProviderPrefix(maybeModel!, provider),
|
|
69
70
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
@@ -71,7 +72,7 @@ export async function makeRequestOptions(
|
|
|
71
72
|
status: "live",
|
|
72
73
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
73
74
|
task: task!,
|
|
74
|
-
} satisfies
|
|
75
|
+
} satisfies InferenceProviderMappingEntry)
|
|
75
76
|
: await getInferenceProviderMapping(
|
|
76
77
|
{
|
|
77
78
|
modelId: hfModel,
|
|
@@ -109,7 +110,7 @@ export function makeRequestOptionsFromResolvedModel(
|
|
|
109
110
|
data?: Blob | ArrayBuffer;
|
|
110
111
|
stream?: boolean;
|
|
111
112
|
},
|
|
112
|
-
mapping:
|
|
113
|
+
mapping: InferenceProviderMappingEntry | undefined,
|
|
113
114
|
options?: Options & {
|
|
114
115
|
task?: InferenceTask;
|
|
115
116
|
}
|
package/src/package.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
2
|
-
export const PACKAGE_VERSION = "4.0.
|
|
2
|
+
export const PACKAGE_VERSION = "4.0.5";
|
|
3
3
|
export const PACKAGE_NAME = "@huggingface/inference";
|
package/src/providers/consts.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
2
2
|
import type { InferenceProvider } from "../types.js";
|
|
3
3
|
import { type ModelId } from "../types.js";
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ import { type ModelId } from "../types.js";
|
|
|
11
11
|
*/
|
|
12
12
|
export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
|
|
13
13
|
InferenceProvider,
|
|
14
|
-
Record<ModelId,
|
|
14
|
+
Record<ModelId, InferenceProviderMappingEntry>
|
|
15
15
|
> = {
|
|
16
16
|
/**
|
|
17
17
|
* "HF model ID" => "Model ID on Inference Provider's side"
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "@huggingface/tasks";
|
|
9
9
|
import type { PipelineType, WidgetType } from "@huggingface/tasks";
|
|
10
10
|
import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks";
|
|
11
|
-
import type {
|
|
11
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
12
12
|
import { getProviderHelper } from "../lib/getProviderHelper.js";
|
|
13
13
|
import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.js";
|
|
14
14
|
import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
|
|
@@ -138,7 +138,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
138
138
|
return (
|
|
139
139
|
model: ModelDataMinimal,
|
|
140
140
|
provider: InferenceProviderOrPolicy,
|
|
141
|
-
inferenceProviderMapping?:
|
|
141
|
+
inferenceProviderMapping?: InferenceProviderMappingEntry,
|
|
142
142
|
opts?: InferenceSnippetOptions
|
|
143
143
|
): InferenceSnippet[] => {
|
|
144
144
|
const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
|
|
@@ -331,7 +331,7 @@ const snippets: Partial<
|
|
|
331
331
|
(
|
|
332
332
|
model: ModelDataMinimal,
|
|
333
333
|
provider: InferenceProviderOrPolicy,
|
|
334
|
-
inferenceProviderMapping?:
|
|
334
|
+
inferenceProviderMapping?: InferenceProviderMappingEntry,
|
|
335
335
|
opts?: InferenceSnippetOptions
|
|
336
336
|
) => InferenceSnippet[]
|
|
337
337
|
>
|
|
@@ -370,7 +370,7 @@ const snippets: Partial<
|
|
|
370
370
|
export function getInferenceSnippets(
|
|
371
371
|
model: ModelDataMinimal,
|
|
372
372
|
provider: InferenceProviderOrPolicy,
|
|
373
|
-
inferenceProviderMapping?:
|
|
373
|
+
inferenceProviderMapping?: InferenceProviderMappingEntry,
|
|
374
374
|
opts?: Record<string, unknown>
|
|
375
375
|
): InferenceSnippet[] {
|
|
376
376
|
return model.pipeline_tag && model.pipeline_tag in snippets
|
|
@@ -466,7 +466,7 @@ function replaceAccessTokenPlaceholder(
|
|
|
466
466
|
!endpointUrl && // custom endpointUrl => use a generic API_TOKEN
|
|
467
467
|
(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
468
468
|
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
469
|
-
(
|
|
469
|
+
(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
|
|
470
470
|
snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
|
|
471
471
|
const accessTokenEnvVar = useHfToken
|
|
472
472
|
? "HF_TOKEN" // e.g. routed request or hf-inference
|
package/src/types.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
|
|
@@ -126,6 +126,6 @@ export interface UrlParams {
|
|
|
126
126
|
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
|
|
127
127
|
args: T;
|
|
128
128
|
model: string;
|
|
129
|
-
mapping?:
|
|
129
|
+
mapping?: InferenceProviderMappingEntry | undefined;
|
|
130
130
|
task?: InferenceTask;
|
|
131
131
|
}
|