@huggingface/inference 4.0.3 → 4.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/commonjs/lib/getInferenceProviderMapping.d.ts +6 -5
- package/dist/commonjs/lib/getInferenceProviderMapping.d.ts.map +1 -1
- package/dist/commonjs/lib/getInferenceProviderMapping.js +30 -6
- package/dist/commonjs/lib/makeRequestOptions.d.ts +2 -2
- package/dist/commonjs/lib/makeRequestOptions.d.ts.map +1 -1
- package/dist/commonjs/lib/makeRequestOptions.js +1 -0
- package/dist/commonjs/package.d.ts +1 -1
- package/dist/commonjs/package.js +1 -1
- package/dist/commonjs/providers/consts.d.ts +2 -2
- package/dist/commonjs/snippets/getInferenceSnippets.d.ts +3 -2
- package/dist/commonjs/snippets/getInferenceSnippets.d.ts.map +1 -1
- package/dist/commonjs/snippets/getInferenceSnippets.js +12 -7
- package/dist/commonjs/snippets/templates.exported.js +9 -9
- package/dist/commonjs/types.d.ts +2 -2
- package/dist/esm/lib/getInferenceProviderMapping.d.ts +6 -5
- package/dist/esm/lib/getInferenceProviderMapping.d.ts.map +1 -1
- package/dist/esm/lib/getInferenceProviderMapping.js +30 -6
- package/dist/esm/lib/makeRequestOptions.d.ts +2 -2
- package/dist/esm/lib/makeRequestOptions.d.ts.map +1 -1
- package/dist/esm/lib/makeRequestOptions.js +1 -0
- package/dist/esm/package.d.ts +1 -1
- package/dist/esm/package.js +1 -1
- package/dist/esm/providers/consts.d.ts +2 -2
- package/dist/esm/snippets/getInferenceSnippets.d.ts +3 -2
- package/dist/esm/snippets/getInferenceSnippets.d.ts.map +1 -1
- package/dist/esm/snippets/getInferenceSnippets.js +12 -7
- package/dist/esm/snippets/templates.exported.js +9 -9
- package/dist/esm/types.d.ts +2 -2
- package/package.json +2 -2
- package/src/lib/getInferenceProviderMapping.ts +50 -20
- package/src/lib/makeRequestOptions.ts +4 -3
- package/src/package.ts +1 -1
- package/src/providers/consts.ts +2 -2
- package/src/snippets/getInferenceSnippets.ts +27 -13
- package/src/snippets/templates.exported.ts +9 -9
- package/src/types.ts +2 -2
package/README.md
CHANGED
|
@@ -651,9 +651,10 @@ You can use any Chat Completion API-compatible provider with the `chatCompletion
|
|
|
651
651
|
```typescript
|
|
652
652
|
// Chat Completion Example
|
|
653
653
|
const MISTRAL_KEY = process.env.MISTRAL_KEY;
|
|
654
|
-
const hf = new InferenceClient(MISTRAL_KEY
|
|
655
|
-
|
|
656
|
-
|
|
654
|
+
const hf = new InferenceClient(MISTRAL_KEY, {
|
|
655
|
+
endpointUrl: "https://api.mistral.ai",
|
|
656
|
+
});
|
|
657
|
+
const stream = hf.chatCompletionStream({
|
|
657
658
|
model: "mistral-tiny",
|
|
658
659
|
messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
|
|
659
660
|
});
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import type { WidgetType } from "@huggingface/tasks";
|
|
2
2
|
import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
|
|
3
|
-
export declare const inferenceProviderMappingCache: Map<string,
|
|
4
|
-
export
|
|
5
|
-
export interface InferenceProviderModelMapping {
|
|
3
|
+
export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
|
|
4
|
+
export interface InferenceProviderMappingEntry {
|
|
6
5
|
adapter?: string;
|
|
7
6
|
adapterWeightsPath?: string;
|
|
8
7
|
hfModelId: ModelId;
|
|
8
|
+
provider: string;
|
|
9
9
|
providerId: string;
|
|
10
10
|
status: "live" | "staging";
|
|
11
11
|
task: WidgetType;
|
|
12
|
+
type?: "single-model" | "tag-filter";
|
|
12
13
|
}
|
|
13
14
|
export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
|
|
14
15
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
15
|
-
}): Promise<
|
|
16
|
+
}): Promise<InferenceProviderMappingEntry[]>;
|
|
16
17
|
export declare function getInferenceProviderMapping(params: {
|
|
17
18
|
accessToken?: string;
|
|
18
19
|
modelId: ModelId;
|
|
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
|
|
|
20
21
|
task: WidgetType;
|
|
21
22
|
}, options: {
|
|
22
23
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
23
|
-
}): Promise<
|
|
24
|
+
}): Promise<InferenceProviderMappingEntry | null>;
|
|
24
25
|
export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
|
|
25
26
|
//# sourceMappingURL=getInferenceProviderMapping.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,
|
|
1
|
+
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
|
|
@@ -10,6 +10,29 @@ const hf_inference_js_1 = require("../providers/hf-inference.js");
|
|
|
10
10
|
const typedInclude_js_1 = require("../utils/typedInclude.js");
|
|
11
11
|
const errors_js_1 = require("../errors.js");
|
|
12
12
|
exports.inferenceProviderMappingCache = new Map();
|
|
13
|
+
/**
|
|
14
|
+
* Normalize inferenceProviderMapping to always return an array format.
|
|
15
|
+
* This provides backward and forward compatibility for the API changes.
|
|
16
|
+
*
|
|
17
|
+
* Vendored from @huggingface/hub to avoid extra dependency.
|
|
18
|
+
*/
|
|
19
|
+
function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
|
|
20
|
+
if (!inferenceProviderMapping) {
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
// If it's already an array, return it as is
|
|
24
|
+
if (Array.isArray(inferenceProviderMapping)) {
|
|
25
|
+
return inferenceProviderMapping;
|
|
26
|
+
}
|
|
27
|
+
// Convert mapping to array format
|
|
28
|
+
return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
|
|
29
|
+
provider,
|
|
30
|
+
hfModelId: modelId,
|
|
31
|
+
providerId: mapping.providerId,
|
|
32
|
+
status: mapping.status,
|
|
33
|
+
task: mapping.task,
|
|
34
|
+
}));
|
|
35
|
+
}
|
|
13
36
|
async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
|
|
14
37
|
let inferenceProviderMapping;
|
|
15
38
|
if (exports.inferenceProviderMappingCache.has(modelId)) {
|
|
@@ -42,7 +65,8 @@ async function fetchInferenceProviderMappingForModel(modelId, accessToken, optio
|
|
|
42
65
|
if (!payload?.inferenceProviderMapping) {
|
|
43
66
|
throw new errors_js_1.InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
|
|
44
67
|
}
|
|
45
|
-
inferenceProviderMapping = payload.inferenceProviderMapping;
|
|
68
|
+
inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
|
|
69
|
+
exports.inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
|
|
46
70
|
}
|
|
47
71
|
return inferenceProviderMapping;
|
|
48
72
|
}
|
|
@@ -50,8 +74,8 @@ async function getInferenceProviderMapping(params, options) {
|
|
|
50
74
|
if (consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
|
|
51
75
|
return consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
|
|
52
76
|
}
|
|
53
|
-
const
|
|
54
|
-
const providerMapping =
|
|
77
|
+
const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
|
|
78
|
+
const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
|
|
55
79
|
if (providerMapping) {
|
|
56
80
|
const equivalentTasks = params.provider === "hf-inference" && (0, typedInclude_js_1.typedInclude)(hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
|
|
57
81
|
? hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
|
|
@@ -62,7 +86,7 @@ async function getInferenceProviderMapping(params, options) {
|
|
|
62
86
|
if (providerMapping.status === "staging") {
|
|
63
87
|
console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
|
|
64
88
|
}
|
|
65
|
-
return
|
|
89
|
+
return providerMapping;
|
|
66
90
|
}
|
|
67
91
|
return null;
|
|
68
92
|
}
|
|
@@ -82,8 +106,8 @@ async function resolveProvider(provider, modelId, endpointUrl) {
|
|
|
82
106
|
if (!modelId) {
|
|
83
107
|
throw new errors_js_1.InferenceClientInputError("Specifying a model is required when provider is 'auto'");
|
|
84
108
|
}
|
|
85
|
-
const
|
|
86
|
-
provider =
|
|
109
|
+
const mappings = await fetchInferenceProviderMappingForModel(modelId);
|
|
110
|
+
provider = mappings[0]?.provider;
|
|
87
111
|
}
|
|
88
112
|
if (!provider) {
|
|
89
113
|
throw new errors_js_1.InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { InferenceTask, Options, RequestArgs } from "../types.js";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
|
|
3
3
|
import type { getProviderHelper } from "./getProviderHelper.js";
|
|
4
4
|
/**
|
|
5
5
|
* Helper that prepares request arguments.
|
|
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
|
|
|
22
22
|
export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
|
|
23
23
|
data?: Blob | ArrayBuffer;
|
|
24
24
|
stream?: boolean;
|
|
25
|
-
}, mapping:
|
|
25
|
+
}, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
|
|
26
26
|
task?: InferenceTask;
|
|
27
27
|
}): {
|
|
28
28
|
url: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
|
|
@@ -41,6 +41,7 @@ async function makeRequestOptions(args, providerHelper, options) {
|
|
|
41
41
|
}
|
|
42
42
|
const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
|
|
43
43
|
? {
|
|
44
|
+
provider: provider,
|
|
44
45
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
45
46
|
providerId: removeProviderPrefix(maybeModel, provider),
|
|
46
47
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
package/dist/commonjs/package.js
CHANGED
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.PACKAGE_NAME = exports.PACKAGE_VERSION = void 0;
|
|
4
4
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
5
|
-
exports.PACKAGE_VERSION = "4.0.
|
|
5
|
+
exports.PACKAGE_VERSION = "4.0.5";
|
|
6
6
|
exports.PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
2
2
|
import type { InferenceProvider } from "../types.js";
|
|
3
3
|
import { type ModelId } from "../types.js";
|
|
4
4
|
/**
|
|
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
|
|
|
8
8
|
*
|
|
9
9
|
* We also inject into this dictionary from tests.
|
|
10
10
|
*/
|
|
11
|
-
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId,
|
|
11
|
+
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
|
|
12
12
|
//# sourceMappingURL=consts.d.ts.map
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
3
3
|
import type { InferenceProviderOrPolicy } from "../types.js";
|
|
4
4
|
export type InferenceSnippetOptions = {
|
|
5
5
|
streaming?: boolean;
|
|
6
6
|
billTo?: string;
|
|
7
7
|
accessToken?: string;
|
|
8
8
|
directRequest?: boolean;
|
|
9
|
+
endpointUrl?: string;
|
|
9
10
|
} & Record<string, unknown>;
|
|
10
|
-
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?:
|
|
11
|
+
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
|
|
11
12
|
//# sourceMappingURL=getInferenceSnippets.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AA2V5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
|
|
@@ -110,6 +110,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
110
110
|
const request = (0, makeRequestOptions_js_1.makeRequestOptionsFromResolvedModel)(providerModelId, providerHelper, {
|
|
111
111
|
accessToken: accessTokenOrPlaceholder,
|
|
112
112
|
provider,
|
|
113
|
+
endpointUrl: opts?.endpointUrl,
|
|
113
114
|
...inputs,
|
|
114
115
|
}, inferenceProviderMapping, {
|
|
115
116
|
task,
|
|
@@ -151,6 +152,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
151
152
|
provider,
|
|
152
153
|
providerModelId: providerModelId ?? model.id,
|
|
153
154
|
billTo: opts?.billTo,
|
|
155
|
+
endpointUrl: opts?.endpointUrl,
|
|
154
156
|
};
|
|
155
157
|
/// Iterate over clients => check if a snippet exists => generate
|
|
156
158
|
const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
|
|
@@ -195,7 +197,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
195
197
|
}
|
|
196
198
|
/// Replace access token placeholder
|
|
197
199
|
if (snippet.includes(placeholder)) {
|
|
198
|
-
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
|
|
200
|
+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider, opts?.endpointUrl);
|
|
199
201
|
}
|
|
200
202
|
/// Snippet is ready!
|
|
201
203
|
return { language, client: client, content: snippet };
|
|
@@ -322,17 +324,20 @@ function indentString(str) {
|
|
|
322
324
|
function removeSuffix(str, suffix) {
|
|
323
325
|
return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
|
|
324
326
|
}
|
|
325
|
-
function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
|
|
327
|
+
function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider, endpointUrl) {
|
|
326
328
|
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
|
|
327
329
|
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
|
|
328
330
|
// Determine if HF_TOKEN or specific provider token should be used
|
|
329
|
-
const useHfToken =
|
|
330
|
-
(
|
|
331
|
-
(!
|
|
332
|
-
snippet.includes("
|
|
331
|
+
const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
|
|
332
|
+
(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
333
|
+
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
334
|
+
(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
|
|
335
|
+
snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
|
|
333
336
|
const accessTokenEnvVar = useHfToken
|
|
334
337
|
? "HF_TOKEN" // e.g. routed request or hf-inference
|
|
335
|
-
:
|
|
338
|
+
: endpointUrl
|
|
339
|
+
? "API_TOKEN"
|
|
340
|
+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
|
|
336
341
|
// Replace the placeholder with the env variable
|
|
337
342
|
if (language === "sh") {
|
|
338
343
|
snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
|
|
@@ -14,14 +14,14 @@ exports.templates = {
|
|
|
14
14
|
"zeroShotClassification": "async function query(data) {\n const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n {\n headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n \"Content-Type\": \"application/json\",\n{% if billTo %}\n \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %} },\n method: \"POST\",\n body: JSON.stringify(data),\n }\n );\n const result = await response.json();\n return result;\n}\n\nquery({\n inputs: {{ providerInputs.asObj.inputs }},\n parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n console.log(JSON.stringify(response));\n});"
|
|
15
15
|
},
|
|
16
16
|
"huggingface.js": {
|
|
17
|
-
"basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
18
|
-
"basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
19
|
-
"basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
20
|
-
"conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
21
|
-
"conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
|
|
22
|
-
"textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
|
|
23
|
-
"textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
|
|
24
|
-
"textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
|
|
17
|
+
"basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
18
|
+
"basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
19
|
+
"basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
20
|
+
"conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
21
|
+
"conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
|
|
22
|
+
"textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
|
|
23
|
+
"textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
|
|
24
|
+
"textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
|
|
25
25
|
},
|
|
26
26
|
"openai": {
|
|
27
27
|
"conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
@@ -40,7 +40,7 @@ exports.templates = {
|
|
|
40
40
|
"conversationalStream": "stream = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n stream=True,\n)\n\nfor chunk in stream:\n print(chunk.choices[0].delta.content, end=\"\") ",
|
|
41
41
|
"documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
42
42
|
"imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
43
|
-
"importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
|
|
43
|
+
"importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n base_url=\"{{ baseUrl }}\",\n{% endif %}\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
|
|
44
44
|
"questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
45
45
|
"tableQuestionAnswering": "answer = client.table_question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
|
|
46
46
|
"textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
|
package/dist/commonjs/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
|
|
3
3
|
/**
|
|
4
4
|
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
|
|
5
5
|
*/
|
|
@@ -91,7 +91,7 @@ export interface UrlParams {
|
|
|
91
91
|
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
|
|
92
92
|
args: T;
|
|
93
93
|
model: string;
|
|
94
|
-
mapping?:
|
|
94
|
+
mapping?: InferenceProviderMappingEntry | undefined;
|
|
95
95
|
task?: InferenceTask;
|
|
96
96
|
}
|
|
97
97
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import type { WidgetType } from "@huggingface/tasks";
|
|
2
2
|
import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
|
|
3
|
-
export declare const inferenceProviderMappingCache: Map<string,
|
|
4
|
-
export
|
|
5
|
-
export interface InferenceProviderModelMapping {
|
|
3
|
+
export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
|
|
4
|
+
export interface InferenceProviderMappingEntry {
|
|
6
5
|
adapter?: string;
|
|
7
6
|
adapterWeightsPath?: string;
|
|
8
7
|
hfModelId: ModelId;
|
|
8
|
+
provider: string;
|
|
9
9
|
providerId: string;
|
|
10
10
|
status: "live" | "staging";
|
|
11
11
|
task: WidgetType;
|
|
12
|
+
type?: "single-model" | "tag-filter";
|
|
12
13
|
}
|
|
13
14
|
export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
|
|
14
15
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
15
|
-
}): Promise<
|
|
16
|
+
}): Promise<InferenceProviderMappingEntry[]>;
|
|
16
17
|
export declare function getInferenceProviderMapping(params: {
|
|
17
18
|
accessToken?: string;
|
|
18
19
|
modelId: ModelId;
|
|
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
|
|
|
20
21
|
task: WidgetType;
|
|
21
22
|
}, options: {
|
|
22
23
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
23
|
-
}): Promise<
|
|
24
|
+
}): Promise<InferenceProviderMappingEntry | null>;
|
|
24
25
|
export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
|
|
25
26
|
//# sourceMappingURL=getInferenceProviderMapping.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,
|
|
1
|
+
{"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
|
|
@@ -4,6 +4,29 @@ import { EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS } from "../providers/hf-inferenc
|
|
|
4
4
|
import { typedInclude } from "../utils/typedInclude.js";
|
|
5
5
|
import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
|
|
6
6
|
export const inferenceProviderMappingCache = new Map();
|
|
7
|
+
/**
|
|
8
|
+
* Normalize inferenceProviderMapping to always return an array format.
|
|
9
|
+
* This provides backward and forward compatibility for the API changes.
|
|
10
|
+
*
|
|
11
|
+
* Vendored from @huggingface/hub to avoid extra dependency.
|
|
12
|
+
*/
|
|
13
|
+
function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
|
|
14
|
+
if (!inferenceProviderMapping) {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
// If it's already an array, return it as is
|
|
18
|
+
if (Array.isArray(inferenceProviderMapping)) {
|
|
19
|
+
return inferenceProviderMapping;
|
|
20
|
+
}
|
|
21
|
+
// Convert mapping to array format
|
|
22
|
+
return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
|
|
23
|
+
provider,
|
|
24
|
+
hfModelId: modelId,
|
|
25
|
+
providerId: mapping.providerId,
|
|
26
|
+
status: mapping.status,
|
|
27
|
+
task: mapping.task,
|
|
28
|
+
}));
|
|
29
|
+
}
|
|
7
30
|
export async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
|
|
8
31
|
let inferenceProviderMapping;
|
|
9
32
|
if (inferenceProviderMappingCache.has(modelId)) {
|
|
@@ -36,7 +59,8 @@ export async function fetchInferenceProviderMappingForModel(modelId, accessToken
|
|
|
36
59
|
if (!payload?.inferenceProviderMapping) {
|
|
37
60
|
throw new InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
|
|
38
61
|
}
|
|
39
|
-
inferenceProviderMapping = payload.inferenceProviderMapping;
|
|
62
|
+
inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
|
|
63
|
+
inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
|
|
40
64
|
}
|
|
41
65
|
return inferenceProviderMapping;
|
|
42
66
|
}
|
|
@@ -44,8 +68,8 @@ export async function getInferenceProviderMapping(params, options) {
|
|
|
44
68
|
if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
|
|
45
69
|
return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
|
|
46
70
|
}
|
|
47
|
-
const
|
|
48
|
-
const providerMapping =
|
|
71
|
+
const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
|
|
72
|
+
const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
|
|
49
73
|
if (providerMapping) {
|
|
50
74
|
const equivalentTasks = params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
|
|
51
75
|
? EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
|
|
@@ -56,7 +80,7 @@ export async function getInferenceProviderMapping(params, options) {
|
|
|
56
80
|
if (providerMapping.status === "staging") {
|
|
57
81
|
console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
|
|
58
82
|
}
|
|
59
|
-
return
|
|
83
|
+
return providerMapping;
|
|
60
84
|
}
|
|
61
85
|
return null;
|
|
62
86
|
}
|
|
@@ -76,8 +100,8 @@ export async function resolveProvider(provider, modelId, endpointUrl) {
|
|
|
76
100
|
if (!modelId) {
|
|
77
101
|
throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
|
|
78
102
|
}
|
|
79
|
-
const
|
|
80
|
-
provider =
|
|
103
|
+
const mappings = await fetchInferenceProviderMappingForModel(modelId);
|
|
104
|
+
provider = mappings[0]?.provider;
|
|
81
105
|
}
|
|
82
106
|
if (!provider) {
|
|
83
107
|
throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { InferenceTask, Options, RequestArgs } from "../types.js";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
|
|
3
3
|
import type { getProviderHelper } from "./getProviderHelper.js";
|
|
4
4
|
/**
|
|
5
5
|
* Helper that prepares request arguments.
|
|
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
|
|
|
22
22
|
export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
|
|
23
23
|
data?: Blob | ArrayBuffer;
|
|
24
24
|
stream?: boolean;
|
|
25
|
-
}, mapping:
|
|
25
|
+
}, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
|
|
26
26
|
task?: InferenceTask;
|
|
27
27
|
}): {
|
|
28
28
|
url: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
|
|
@@ -37,6 +37,7 @@ export async function makeRequestOptions(args, providerHelper, options) {
|
|
|
37
37
|
}
|
|
38
38
|
const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
|
|
39
39
|
? {
|
|
40
|
+
provider: provider,
|
|
40
41
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
41
42
|
providerId: removeProviderPrefix(maybeModel, provider),
|
|
42
43
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
package/dist/esm/package.d.ts
CHANGED
package/dist/esm/package.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
2
|
-
export const PACKAGE_VERSION = "4.0.
|
|
2
|
+
export const PACKAGE_VERSION = "4.0.5";
|
|
3
3
|
export const PACKAGE_NAME = "@huggingface/inference";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
2
2
|
import type { InferenceProvider } from "../types.js";
|
|
3
3
|
import { type ModelId } from "../types.js";
|
|
4
4
|
/**
|
|
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
|
|
|
8
8
|
*
|
|
9
9
|
* We also inject into this dictionary from tests.
|
|
10
10
|
*/
|
|
11
|
-
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId,
|
|
11
|
+
export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
|
|
12
12
|
//# sourceMappingURL=consts.d.ts.map
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
3
3
|
import type { InferenceProviderOrPolicy } from "../types.js";
|
|
4
4
|
export type InferenceSnippetOptions = {
|
|
5
5
|
streaming?: boolean;
|
|
6
6
|
billTo?: string;
|
|
7
7
|
accessToken?: string;
|
|
8
8
|
directRequest?: boolean;
|
|
9
|
+
endpointUrl?: string;
|
|
9
10
|
} & Record<string, unknown>;
|
|
10
|
-
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?:
|
|
11
|
+
export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
|
|
11
12
|
//# sourceMappingURL=getInferenceSnippets.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AA2V5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
|
|
@@ -107,6 +107,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
107
107
|
const request = makeRequestOptionsFromResolvedModel(providerModelId, providerHelper, {
|
|
108
108
|
accessToken: accessTokenOrPlaceholder,
|
|
109
109
|
provider,
|
|
110
|
+
endpointUrl: opts?.endpointUrl,
|
|
110
111
|
...inputs,
|
|
111
112
|
}, inferenceProviderMapping, {
|
|
112
113
|
task,
|
|
@@ -148,6 +149,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
148
149
|
provider,
|
|
149
150
|
providerModelId: providerModelId ?? model.id,
|
|
150
151
|
billTo: opts?.billTo,
|
|
152
|
+
endpointUrl: opts?.endpointUrl,
|
|
151
153
|
};
|
|
152
154
|
/// Iterate over clients => check if a snippet exists => generate
|
|
153
155
|
const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
|
|
@@ -192,7 +194,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
|
|
|
192
194
|
}
|
|
193
195
|
/// Replace access token placeholder
|
|
194
196
|
if (snippet.includes(placeholder)) {
|
|
195
|
-
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
|
|
197
|
+
snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider, opts?.endpointUrl);
|
|
196
198
|
}
|
|
197
199
|
/// Snippet is ready!
|
|
198
200
|
return { language, client: client, content: snippet };
|
|
@@ -319,17 +321,20 @@ function indentString(str) {
|
|
|
319
321
|
function removeSuffix(str, suffix) {
|
|
320
322
|
return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
|
|
321
323
|
}
|
|
322
|
-
function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
|
|
324
|
+
function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider, endpointUrl) {
|
|
323
325
|
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
|
|
324
326
|
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
|
|
325
327
|
// Determine if HF_TOKEN or specific provider token should be used
|
|
326
|
-
const useHfToken =
|
|
327
|
-
(
|
|
328
|
-
(!
|
|
329
|
-
snippet.includes("
|
|
328
|
+
const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
|
|
329
|
+
(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
330
|
+
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
331
|
+
(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
|
|
332
|
+
snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
|
|
330
333
|
const accessTokenEnvVar = useHfToken
|
|
331
334
|
? "HF_TOKEN" // e.g. routed request or hf-inference
|
|
332
|
-
:
|
|
335
|
+
: endpointUrl
|
|
336
|
+
? "API_TOKEN"
|
|
337
|
+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
|
|
333
338
|
// Replace the placeholder with the env variable
|
|
334
339
|
if (language === "sh") {
|
|
335
340
|
snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
|
|
@@ -11,14 +11,14 @@ export const templates = {
|
|
|
11
11
|
"zeroShotClassification": "async function query(data) {\n const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n {\n headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n \"Content-Type\": \"application/json\",\n{% if billTo %}\n \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %} },\n method: \"POST\",\n body: JSON.stringify(data),\n }\n );\n const result = await response.json();\n return result;\n}\n\nquery({\n inputs: {{ providerInputs.asObj.inputs }},\n parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n console.log(JSON.stringify(response));\n});"
|
|
12
12
|
},
|
|
13
13
|
"huggingface.js": {
|
|
14
|
-
"basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
15
|
-
"basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
16
|
-
"basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
17
|
-
"conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
18
|
-
"conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
|
|
19
|
-
"textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
|
|
20
|
-
"textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
|
|
21
|
-
"textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
|
|
14
|
+
"basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
15
|
+
"basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
16
|
+
"basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
17
|
+
"conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
18
|
+
"conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
|
|
19
|
+
"textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
|
|
20
|
+
"textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
|
|
21
|
+
"textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
|
|
22
22
|
},
|
|
23
23
|
"openai": {
|
|
24
24
|
"conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
@@ -37,7 +37,7 @@ export const templates = {
|
|
|
37
37
|
"conversationalStream": "stream = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n stream=True,\n)\n\nfor chunk in stream:\n print(chunk.choices[0].delta.content, end=\"\") ",
|
|
38
38
|
"documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
39
39
|
"imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
40
|
-
"importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
|
|
40
|
+
"importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n base_url=\"{{ baseUrl }}\",\n{% endif %}\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
|
|
41
41
|
"questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
42
42
|
"tableQuestionAnswering": "answer = client.table_question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
|
|
43
43
|
"textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
|
package/dist/esm/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
|
|
3
3
|
/**
|
|
4
4
|
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
|
|
5
5
|
*/
|
|
@@ -91,7 +91,7 @@ export interface UrlParams {
|
|
|
91
91
|
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
|
|
92
92
|
args: T;
|
|
93
93
|
model: string;
|
|
94
|
-
mapping?:
|
|
94
|
+
mapping?: InferenceProviderMappingEntry | undefined;
|
|
95
95
|
task?: InferenceTask;
|
|
96
96
|
}
|
|
97
97
|
//# sourceMappingURL=types.d.ts.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.5",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
6
6
|
"description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
},
|
|
41
41
|
"type": "module",
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@huggingface/tasks": "^0.19.
|
|
43
|
+
"@huggingface/tasks": "^0.19.15",
|
|
44
44
|
"@huggingface/jinja": "^0.5.0"
|
|
45
45
|
},
|
|
46
46
|
"devDependencies": {
|
|
@@ -6,19 +6,48 @@ import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../t
|
|
|
6
6
|
import { typedInclude } from "../utils/typedInclude.js";
|
|
7
7
|
import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
|
|
8
8
|
|
|
9
|
-
export const inferenceProviderMappingCache = new Map<ModelId,
|
|
9
|
+
export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMappingEntry[]>();
|
|
10
10
|
|
|
11
|
-
export
|
|
12
|
-
Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>
|
|
13
|
-
>;
|
|
14
|
-
|
|
15
|
-
export interface InferenceProviderModelMapping {
|
|
11
|
+
export interface InferenceProviderMappingEntry {
|
|
16
12
|
adapter?: string;
|
|
17
13
|
adapterWeightsPath?: string;
|
|
18
14
|
hfModelId: ModelId;
|
|
15
|
+
provider: string;
|
|
19
16
|
providerId: string;
|
|
20
17
|
status: "live" | "staging";
|
|
21
18
|
task: WidgetType;
|
|
19
|
+
type?: "single-model" | "tag-filter";
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Normalize inferenceProviderMapping to always return an array format.
|
|
24
|
+
* This provides backward and forward compatibility for the API changes.
|
|
25
|
+
*
|
|
26
|
+
* Vendored from @huggingface/hub to avoid extra dependency.
|
|
27
|
+
*/
|
|
28
|
+
function normalizeInferenceProviderMapping(
|
|
29
|
+
modelId: ModelId,
|
|
30
|
+
inferenceProviderMapping?:
|
|
31
|
+
| InferenceProviderMappingEntry[]
|
|
32
|
+
| Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>
|
|
33
|
+
): InferenceProviderMappingEntry[] {
|
|
34
|
+
if (!inferenceProviderMapping) {
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// If it's already an array, return it as is
|
|
39
|
+
if (Array.isArray(inferenceProviderMapping)) {
|
|
40
|
+
return inferenceProviderMapping;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Convert mapping to array format
|
|
44
|
+
return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
|
|
45
|
+
provider,
|
|
46
|
+
hfModelId: modelId,
|
|
47
|
+
providerId: mapping.providerId,
|
|
48
|
+
status: mapping.status,
|
|
49
|
+
task: mapping.task,
|
|
50
|
+
}));
|
|
22
51
|
}
|
|
23
52
|
|
|
24
53
|
export async function fetchInferenceProviderMappingForModel(
|
|
@@ -27,8 +56,8 @@ export async function fetchInferenceProviderMappingForModel(
|
|
|
27
56
|
options?: {
|
|
28
57
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
29
58
|
}
|
|
30
|
-
): Promise<
|
|
31
|
-
let inferenceProviderMapping:
|
|
59
|
+
): Promise<InferenceProviderMappingEntry[]> {
|
|
60
|
+
let inferenceProviderMapping: InferenceProviderMappingEntry[] | null;
|
|
32
61
|
if (inferenceProviderMappingCache.has(modelId)) {
|
|
33
62
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
34
63
|
inferenceProviderMapping = inferenceProviderMappingCache.get(modelId)!;
|
|
@@ -55,7 +84,11 @@ export async function fetchInferenceProviderMappingForModel(
|
|
|
55
84
|
);
|
|
56
85
|
}
|
|
57
86
|
}
|
|
58
|
-
let payload: {
|
|
87
|
+
let payload: {
|
|
88
|
+
inferenceProviderMapping?:
|
|
89
|
+
| InferenceProviderMappingEntry[]
|
|
90
|
+
| Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>;
|
|
91
|
+
} | null = null;
|
|
59
92
|
try {
|
|
60
93
|
payload = await resp.json();
|
|
61
94
|
} catch {
|
|
@@ -72,7 +105,8 @@ export async function fetchInferenceProviderMappingForModel(
|
|
|
72
105
|
{ requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() }
|
|
73
106
|
);
|
|
74
107
|
}
|
|
75
|
-
inferenceProviderMapping = payload.inferenceProviderMapping;
|
|
108
|
+
inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
|
|
109
|
+
inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
|
|
76
110
|
}
|
|
77
111
|
return inferenceProviderMapping;
|
|
78
112
|
}
|
|
@@ -87,16 +121,12 @@ export async function getInferenceProviderMapping(
|
|
|
87
121
|
options: {
|
|
88
122
|
fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
|
|
89
123
|
}
|
|
90
|
-
): Promise<
|
|
124
|
+
): Promise<InferenceProviderMappingEntry | null> {
|
|
91
125
|
if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
|
|
92
126
|
return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
|
|
93
127
|
}
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
params.accessToken,
|
|
97
|
-
options
|
|
98
|
-
);
|
|
99
|
-
const providerMapping = inferenceProviderMapping[params.provider];
|
|
128
|
+
const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
|
|
129
|
+
const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
|
|
100
130
|
if (providerMapping) {
|
|
101
131
|
const equivalentTasks =
|
|
102
132
|
params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
|
|
@@ -112,7 +142,7 @@ export async function getInferenceProviderMapping(
|
|
|
112
142
|
`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
|
|
113
143
|
);
|
|
114
144
|
}
|
|
115
|
-
return
|
|
145
|
+
return providerMapping;
|
|
116
146
|
}
|
|
117
147
|
return null;
|
|
118
148
|
}
|
|
@@ -139,8 +169,8 @@ export async function resolveProvider(
|
|
|
139
169
|
if (!modelId) {
|
|
140
170
|
throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
|
|
141
171
|
}
|
|
142
|
-
const
|
|
143
|
-
provider =
|
|
172
|
+
const mappings = await fetchInferenceProviderMappingForModel(modelId);
|
|
173
|
+
provider = mappings[0]?.provider as InferenceProvider | undefined;
|
|
144
174
|
}
|
|
145
175
|
if (!provider) {
|
|
146
176
|
throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { HF_HEADER_X_BILL_TO, HF_HUB_URL } from "../config.js";
|
|
2
2
|
import { PACKAGE_NAME, PACKAGE_VERSION } from "../package.js";
|
|
3
3
|
import type { InferenceTask, Options, RequestArgs } from "../types.js";
|
|
4
|
-
import type {
|
|
4
|
+
import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
|
|
5
5
|
import { getInferenceProviderMapping } from "./getInferenceProviderMapping.js";
|
|
6
6
|
import type { getProviderHelper } from "./getProviderHelper.js";
|
|
7
7
|
import { isUrl } from "./isUrl.js";
|
|
@@ -64,6 +64,7 @@ export async function makeRequestOptions(
|
|
|
64
64
|
|
|
65
65
|
const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
|
|
66
66
|
? ({
|
|
67
|
+
provider: provider,
|
|
67
68
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
68
69
|
providerId: removeProviderPrefix(maybeModel!, provider),
|
|
69
70
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
@@ -71,7 +72,7 @@ export async function makeRequestOptions(
|
|
|
71
72
|
status: "live",
|
|
72
73
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
73
74
|
task: task!,
|
|
74
|
-
} satisfies
|
|
75
|
+
} satisfies InferenceProviderMappingEntry)
|
|
75
76
|
: await getInferenceProviderMapping(
|
|
76
77
|
{
|
|
77
78
|
modelId: hfModel,
|
|
@@ -109,7 +110,7 @@ export function makeRequestOptionsFromResolvedModel(
|
|
|
109
110
|
data?: Blob | ArrayBuffer;
|
|
110
111
|
stream?: boolean;
|
|
111
112
|
},
|
|
112
|
-
mapping:
|
|
113
|
+
mapping: InferenceProviderMappingEntry | undefined,
|
|
113
114
|
options?: Options & {
|
|
114
115
|
task?: InferenceTask;
|
|
115
116
|
}
|
package/src/package.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
// Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
|
|
2
|
-
export const PACKAGE_VERSION = "4.0.
|
|
2
|
+
export const PACKAGE_VERSION = "4.0.5";
|
|
3
3
|
export const PACKAGE_NAME = "@huggingface/inference";
|
package/src/providers/consts.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
2
2
|
import type { InferenceProvider } from "../types.js";
|
|
3
3
|
import { type ModelId } from "../types.js";
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ import { type ModelId } from "../types.js";
|
|
|
11
11
|
*/
|
|
12
12
|
export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
|
|
13
13
|
InferenceProvider,
|
|
14
|
-
Record<ModelId,
|
|
14
|
+
Record<ModelId, InferenceProviderMappingEntry>
|
|
15
15
|
> = {
|
|
16
16
|
/**
|
|
17
17
|
* "HF model ID" => "Model ID on Inference Provider's side"
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "@huggingface/tasks";
|
|
9
9
|
import type { PipelineType, WidgetType } from "@huggingface/tasks";
|
|
10
10
|
import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks";
|
|
11
|
-
import type {
|
|
11
|
+
import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
|
|
12
12
|
import { getProviderHelper } from "../lib/getProviderHelper.js";
|
|
13
13
|
import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.js";
|
|
14
14
|
import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
|
|
@@ -18,7 +18,8 @@ export type InferenceSnippetOptions = {
|
|
|
18
18
|
streaming?: boolean;
|
|
19
19
|
billTo?: string;
|
|
20
20
|
accessToken?: string;
|
|
21
|
-
directRequest?: boolean;
|
|
21
|
+
directRequest?: boolean; // to bypass HF routing and call the provider directly
|
|
22
|
+
endpointUrl?: string; // to call a local endpoint directly
|
|
22
23
|
} & Record<string, unknown>;
|
|
23
24
|
|
|
24
25
|
const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
|
|
@@ -53,6 +54,7 @@ interface TemplateParams {
|
|
|
53
54
|
methodName?: string; // specific to snippetBasic
|
|
54
55
|
importBase64?: boolean; // specific to snippetImportRequests
|
|
55
56
|
importJson?: boolean; // specific to snippetImportRequests
|
|
57
|
+
endpointUrl?: string;
|
|
56
58
|
}
|
|
57
59
|
|
|
58
60
|
// Helpers to find + load templates
|
|
@@ -136,7 +138,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
136
138
|
return (
|
|
137
139
|
model: ModelDataMinimal,
|
|
138
140
|
provider: InferenceProviderOrPolicy,
|
|
139
|
-
inferenceProviderMapping?:
|
|
141
|
+
inferenceProviderMapping?: InferenceProviderMappingEntry,
|
|
140
142
|
opts?: InferenceSnippetOptions
|
|
141
143
|
): InferenceSnippet[] => {
|
|
142
144
|
const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
|
|
@@ -172,6 +174,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
172
174
|
{
|
|
173
175
|
accessToken: accessTokenOrPlaceholder,
|
|
174
176
|
provider,
|
|
177
|
+
endpointUrl: opts?.endpointUrl,
|
|
175
178
|
...inputs,
|
|
176
179
|
} as RequestArgs,
|
|
177
180
|
inferenceProviderMapping,
|
|
@@ -217,6 +220,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
217
220
|
provider,
|
|
218
221
|
providerModelId: providerModelId ?? model.id,
|
|
219
222
|
billTo: opts?.billTo,
|
|
223
|
+
endpointUrl: opts?.endpointUrl,
|
|
220
224
|
};
|
|
221
225
|
|
|
222
226
|
/// Iterate over clients => check if a snippet exists => generate
|
|
@@ -265,7 +269,14 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
|
|
|
265
269
|
|
|
266
270
|
/// Replace access token placeholder
|
|
267
271
|
if (snippet.includes(placeholder)) {
|
|
268
|
-
snippet = replaceAccessTokenPlaceholder(
|
|
272
|
+
snippet = replaceAccessTokenPlaceholder(
|
|
273
|
+
opts?.directRequest,
|
|
274
|
+
placeholder,
|
|
275
|
+
snippet,
|
|
276
|
+
language,
|
|
277
|
+
provider,
|
|
278
|
+
opts?.endpointUrl
|
|
279
|
+
);
|
|
269
280
|
}
|
|
270
281
|
|
|
271
282
|
/// Snippet is ready!
|
|
@@ -320,7 +331,7 @@ const snippets: Partial<
|
|
|
320
331
|
(
|
|
321
332
|
model: ModelDataMinimal,
|
|
322
333
|
provider: InferenceProviderOrPolicy,
|
|
323
|
-
inferenceProviderMapping?:
|
|
334
|
+
inferenceProviderMapping?: InferenceProviderMappingEntry,
|
|
324
335
|
opts?: InferenceSnippetOptions
|
|
325
336
|
) => InferenceSnippet[]
|
|
326
337
|
>
|
|
@@ -359,7 +370,7 @@ const snippets: Partial<
|
|
|
359
370
|
export function getInferenceSnippets(
|
|
360
371
|
model: ModelDataMinimal,
|
|
361
372
|
provider: InferenceProviderOrPolicy,
|
|
362
|
-
inferenceProviderMapping?:
|
|
373
|
+
inferenceProviderMapping?: InferenceProviderMappingEntry,
|
|
363
374
|
opts?: Record<string, unknown>
|
|
364
375
|
): InferenceSnippet[] {
|
|
365
376
|
return model.pipeline_tag && model.pipeline_tag in snippets
|
|
@@ -444,21 +455,24 @@ function replaceAccessTokenPlaceholder(
|
|
|
444
455
|
placeholder: string,
|
|
445
456
|
snippet: string,
|
|
446
457
|
language: InferenceSnippetLanguage,
|
|
447
|
-
provider: InferenceProviderOrPolicy
|
|
458
|
+
provider: InferenceProviderOrPolicy,
|
|
459
|
+
endpointUrl?: string
|
|
448
460
|
): string {
|
|
449
461
|
// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
|
|
450
462
|
// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
|
|
451
463
|
|
|
452
464
|
// Determine if HF_TOKEN or specific provider token should be used
|
|
453
465
|
const useHfToken =
|
|
454
|
-
|
|
455
|
-
(
|
|
456
|
-
(!
|
|
457
|
-
snippet.includes("
|
|
458
|
-
|
|
466
|
+
!endpointUrl && // custom endpointUrl => use a generic API_TOKEN
|
|
467
|
+
(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
|
|
468
|
+
(!directRequest && // if explicit directRequest => use provider-specific token
|
|
469
|
+
(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
|
|
470
|
+
snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
|
|
459
471
|
const accessTokenEnvVar = useHfToken
|
|
460
472
|
? "HF_TOKEN" // e.g. routed request or hf-inference
|
|
461
|
-
:
|
|
473
|
+
: endpointUrl
|
|
474
|
+
? "API_TOKEN"
|
|
475
|
+
: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
|
|
462
476
|
|
|
463
477
|
// Replace the placeholder with the env variable
|
|
464
478
|
if (language === "sh") {
|
|
@@ -11,14 +11,14 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
|
|
|
11
11
|
"zeroShotClassification": "async function query(data) {\n const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n {\n headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n \"Content-Type\": \"application/json\",\n{% if billTo %}\n \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %} },\n method: \"POST\",\n body: JSON.stringify(data),\n }\n );\n const result = await response.json();\n return result;\n}\n\nquery({\n inputs: {{ providerInputs.asObj.inputs }},\n parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n console.log(JSON.stringify(response));\n});"
|
|
12
12
|
},
|
|
13
13
|
"huggingface.js": {
|
|
14
|
-
"basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
15
|
-
"basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
16
|
-
"basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
17
|
-
"conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
18
|
-
"conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
|
|
19
|
-
"textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
|
|
20
|
-
"textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
|
|
21
|
-
"textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
|
|
14
|
+
"basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
15
|
+
"basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
16
|
+
"basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
|
|
17
|
+
"conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
18
|
+
"conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
|
|
19
|
+
"textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
|
|
20
|
+
"textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
|
|
21
|
+
"textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
|
|
22
22
|
},
|
|
23
23
|
"openai": {
|
|
24
24
|
"conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
|
|
@@ -37,7 +37,7 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
|
|
|
37
37
|
"conversationalStream": "stream = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n stream=True,\n)\n\nfor chunk in stream:\n print(chunk.choices[0].delta.content, end=\"\") ",
|
|
38
38
|
"documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
39
39
|
"imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
40
|
-
"importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
|
|
40
|
+
"importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n base_url=\"{{ baseUrl }}\",\n{% endif %}\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
|
|
41
41
|
"questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
|
|
42
42
|
"tableQuestionAnswering": "answer = client.table_question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
|
|
43
43
|
"textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
|
package/src/types.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
|
|
2
|
-
import type {
|
|
2
|
+
import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
|
|
@@ -126,6 +126,6 @@ export interface UrlParams {
|
|
|
126
126
|
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
|
|
127
127
|
args: T;
|
|
128
128
|
model: string;
|
|
129
|
-
mapping?:
|
|
129
|
+
mapping?: InferenceProviderMappingEntry | undefined;
|
|
130
130
|
task?: InferenceTask;
|
|
131
131
|
}
|