@huggingface/inference 4.0.3 → 4.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +4 -3
  2. package/dist/commonjs/lib/getInferenceProviderMapping.d.ts +6 -5
  3. package/dist/commonjs/lib/getInferenceProviderMapping.d.ts.map +1 -1
  4. package/dist/commonjs/lib/getInferenceProviderMapping.js +30 -6
  5. package/dist/commonjs/lib/makeRequestOptions.d.ts +2 -2
  6. package/dist/commonjs/lib/makeRequestOptions.d.ts.map +1 -1
  7. package/dist/commonjs/lib/makeRequestOptions.js +1 -0
  8. package/dist/commonjs/package.d.ts +1 -1
  9. package/dist/commonjs/package.js +1 -1
  10. package/dist/commonjs/providers/consts.d.ts +2 -2
  11. package/dist/commonjs/snippets/getInferenceSnippets.d.ts +3 -2
  12. package/dist/commonjs/snippets/getInferenceSnippets.d.ts.map +1 -1
  13. package/dist/commonjs/snippets/getInferenceSnippets.js +12 -7
  14. package/dist/commonjs/snippets/templates.exported.js +9 -9
  15. package/dist/commonjs/types.d.ts +2 -2
  16. package/dist/esm/lib/getInferenceProviderMapping.d.ts +6 -5
  17. package/dist/esm/lib/getInferenceProviderMapping.d.ts.map +1 -1
  18. package/dist/esm/lib/getInferenceProviderMapping.js +30 -6
  19. package/dist/esm/lib/makeRequestOptions.d.ts +2 -2
  20. package/dist/esm/lib/makeRequestOptions.d.ts.map +1 -1
  21. package/dist/esm/lib/makeRequestOptions.js +1 -0
  22. package/dist/esm/package.d.ts +1 -1
  23. package/dist/esm/package.js +1 -1
  24. package/dist/esm/providers/consts.d.ts +2 -2
  25. package/dist/esm/snippets/getInferenceSnippets.d.ts +3 -2
  26. package/dist/esm/snippets/getInferenceSnippets.d.ts.map +1 -1
  27. package/dist/esm/snippets/getInferenceSnippets.js +12 -7
  28. package/dist/esm/snippets/templates.exported.js +9 -9
  29. package/dist/esm/types.d.ts +2 -2
  30. package/package.json +2 -2
  31. package/src/lib/getInferenceProviderMapping.ts +50 -20
  32. package/src/lib/makeRequestOptions.ts +4 -3
  33. package/src/package.ts +1 -1
  34. package/src/providers/consts.ts +2 -2
  35. package/src/snippets/getInferenceSnippets.ts +27 -13
  36. package/src/snippets/templates.exported.ts +9 -9
  37. package/src/types.ts +2 -2
package/README.md CHANGED
@@ -651,9 +651,10 @@ You can use any Chat Completion API-compatible provider with the `chatCompletion
651
651
  ```typescript
652
652
  // Chat Completion Example
653
653
  const MISTRAL_KEY = process.env.MISTRAL_KEY;
654
- const hf = new InferenceClient(MISTRAL_KEY);
655
- const ep = hf.endpoint("https://api.mistral.ai");
656
- const stream = ep.chatCompletionStream({
654
+ const hf = new InferenceClient(MISTRAL_KEY, {
655
+ endpointUrl: "https://api.mistral.ai",
656
+ });
657
+ const stream = hf.chatCompletionStream({
657
658
  model: "mistral-tiny",
658
659
  messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
659
660
  });
@@ -1,18 +1,19 @@
1
1
  import type { WidgetType } from "@huggingface/tasks";
2
2
  import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
3
- export declare const inferenceProviderMappingCache: Map<string, Partial<Record<"black-forest-labs" | "cerebras" | "cohere" | "fal-ai" | "featherless-ai" | "fireworks-ai" | "groq" | "hf-inference" | "hyperbolic" | "nebius" | "novita" | "nscale" | "openai" | "ovhcloud" | "replicate" | "sambanova" | "together", Omit<InferenceProviderModelMapping, "hfModelId">>>>;
4
- export type InferenceProviderMapping = Partial<Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>>;
5
- export interface InferenceProviderModelMapping {
3
+ export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
4
+ export interface InferenceProviderMappingEntry {
6
5
  adapter?: string;
7
6
  adapterWeightsPath?: string;
8
7
  hfModelId: ModelId;
8
+ provider: string;
9
9
  providerId: string;
10
10
  status: "live" | "staging";
11
11
  task: WidgetType;
12
+ type?: "single-model" | "tag-filter";
12
13
  }
13
14
  export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
14
15
  fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
15
- }): Promise<InferenceProviderMapping>;
16
+ }): Promise<InferenceProviderMappingEntry[]>;
16
17
  export declare function getInferenceProviderMapping(params: {
17
18
  accessToken?: string;
18
19
  modelId: ModelId;
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
20
21
  task: WidgetType;
21
22
  }, options: {
22
23
  fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
23
- }): Promise<InferenceProviderModelMapping | null>;
24
+ }): Promise<InferenceProviderMappingEntry | null>;
24
25
  export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
25
26
  //# sourceMappingURL=getInferenceProviderMapping.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,uTAA+C,CAAC;AAE1F,MAAM,MAAM,wBAAwB,GAAG,OAAO,CAC7C,MAAM,CAAC,iBAAiB,EAAE,IAAI,CAAC,6BAA6B,EAAE,WAAW,CAAC,CAAC,CAC3E,CAAC;AAEF,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;CACjB;AAED,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,wBAAwB,CAAC,CAgDnC;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CA4B/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
1
+ {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
@@ -10,6 +10,29 @@ const hf_inference_js_1 = require("../providers/hf-inference.js");
10
10
  const typedInclude_js_1 = require("../utils/typedInclude.js");
11
11
  const errors_js_1 = require("../errors.js");
12
12
  exports.inferenceProviderMappingCache = new Map();
13
+ /**
14
+ * Normalize inferenceProviderMapping to always return an array format.
15
+ * This provides backward and forward compatibility for the API changes.
16
+ *
17
+ * Vendored from @huggingface/hub to avoid extra dependency.
18
+ */
19
+ function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
20
+ if (!inferenceProviderMapping) {
21
+ return [];
22
+ }
23
+ // If it's already an array, return it as is
24
+ if (Array.isArray(inferenceProviderMapping)) {
25
+ return inferenceProviderMapping;
26
+ }
27
+ // Convert mapping to array format
28
+ return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
29
+ provider,
30
+ hfModelId: modelId,
31
+ providerId: mapping.providerId,
32
+ status: mapping.status,
33
+ task: mapping.task,
34
+ }));
35
+ }
13
36
  async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
14
37
  let inferenceProviderMapping;
15
38
  if (exports.inferenceProviderMappingCache.has(modelId)) {
@@ -42,7 +65,8 @@ async function fetchInferenceProviderMappingForModel(modelId, accessToken, optio
42
65
  if (!payload?.inferenceProviderMapping) {
43
66
  throw new errors_js_1.InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
44
67
  }
45
- inferenceProviderMapping = payload.inferenceProviderMapping;
68
+ inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
69
+ exports.inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
46
70
  }
47
71
  return inferenceProviderMapping;
48
72
  }
@@ -50,8 +74,8 @@ async function getInferenceProviderMapping(params, options) {
50
74
  if (consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
51
75
  return consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
52
76
  }
53
- const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
54
- const providerMapping = inferenceProviderMapping[params.provider];
77
+ const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
78
+ const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
55
79
  if (providerMapping) {
56
80
  const equivalentTasks = params.provider === "hf-inference" && (0, typedInclude_js_1.typedInclude)(hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
57
81
  ? hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
@@ -62,7 +86,7 @@ async function getInferenceProviderMapping(params, options) {
62
86
  if (providerMapping.status === "staging") {
63
87
  console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
64
88
  }
65
- return { ...providerMapping, hfModelId: params.modelId };
89
+ return providerMapping;
66
90
  }
67
91
  return null;
68
92
  }
@@ -82,8 +106,8 @@ async function resolveProvider(provider, modelId, endpointUrl) {
82
106
  if (!modelId) {
83
107
  throw new errors_js_1.InferenceClientInputError("Specifying a model is required when provider is 'auto'");
84
108
  }
85
- const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(modelId);
86
- provider = Object.keys(inferenceProviderMapping)[0];
109
+ const mappings = await fetchInferenceProviderMappingForModel(modelId);
110
+ provider = mappings[0]?.provider;
87
111
  }
88
112
  if (!provider) {
89
113
  throw new errors_js_1.InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
@@ -1,5 +1,5 @@
1
1
  import type { InferenceTask, Options, RequestArgs } from "../types.js";
2
- import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
3
3
  import type { getProviderHelper } from "./getProviderHelper.js";
4
4
  /**
5
5
  * Helper that prepares request arguments.
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
22
22
  export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
23
23
  data?: Blob | ArrayBuffer;
24
24
  stream?: boolean;
25
- }, mapping: InferenceProviderModelMapping | undefined, options?: Options & {
25
+ }, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
26
26
  task?: InferenceTask;
27
27
  }): {
28
28
  url: string;
@@ -1 +1 @@
1
- {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAqE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
1
+ {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
@@ -41,6 +41,7 @@ async function makeRequestOptions(args, providerHelper, options) {
41
41
  }
42
42
  const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
43
43
  ? {
44
+ provider: provider,
44
45
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
45
46
  providerId: removeProviderPrefix(maybeModel, provider),
46
47
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
@@ -1,3 +1,3 @@
1
- export declare const PACKAGE_VERSION = "4.0.3";
1
+ export declare const PACKAGE_VERSION = "4.0.5";
2
2
  export declare const PACKAGE_NAME = "@huggingface/inference";
3
3
  //# sourceMappingURL=package.d.ts.map
@@ -2,5 +2,5 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.PACKAGE_NAME = exports.PACKAGE_VERSION = void 0;
4
4
  // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
5
- exports.PACKAGE_VERSION = "4.0.3";
5
+ exports.PACKAGE_VERSION = "4.0.5";
6
6
  exports.PACKAGE_NAME = "@huggingface/inference";
@@ -1,4 +1,4 @@
1
- import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
1
+ import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
2
2
  import type { InferenceProvider } from "../types.js";
3
3
  import { type ModelId } from "../types.js";
4
4
  /**
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
8
8
  *
9
9
  * We also inject into this dictionary from tests.
10
10
  */
11
- export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderModelMapping>>;
11
+ export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
12
12
  //# sourceMappingURL=consts.d.ts.map
@@ -1,11 +1,12 @@
1
1
  import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
2
- import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
3
3
  import type { InferenceProviderOrPolicy } from "../types.js";
4
4
  export type InferenceSnippetOptions = {
5
5
  streaming?: boolean;
6
6
  billTo?: string;
7
7
  accessToken?: string;
8
8
  directRequest?: boolean;
9
+ endpointUrl?: string;
9
10
  } & Record<string, unknown>;
10
- export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
11
+ export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
11
12
  //# sourceMappingURL=getInferenceSnippets.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;CACxB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAiV5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
1
+ {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AA2V5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
@@ -110,6 +110,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
110
110
  const request = (0, makeRequestOptions_js_1.makeRequestOptionsFromResolvedModel)(providerModelId, providerHelper, {
111
111
  accessToken: accessTokenOrPlaceholder,
112
112
  provider,
113
+ endpointUrl: opts?.endpointUrl,
113
114
  ...inputs,
114
115
  }, inferenceProviderMapping, {
115
116
  task,
@@ -151,6 +152,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
151
152
  provider,
152
153
  providerModelId: providerModelId ?? model.id,
153
154
  billTo: opts?.billTo,
155
+ endpointUrl: opts?.endpointUrl,
154
156
  };
155
157
  /// Iterate over clients => check if a snippet exists => generate
156
158
  const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
@@ -195,7 +197,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
195
197
  }
196
198
  /// Replace access token placeholder
197
199
  if (snippet.includes(placeholder)) {
198
- snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
200
+ snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider, opts?.endpointUrl);
199
201
  }
200
202
  /// Snippet is ready!
201
203
  return { language, client: client, content: snippet };
@@ -322,17 +324,20 @@ function indentString(str) {
322
324
  function removeSuffix(str, suffix) {
323
325
  return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
324
326
  }
325
- function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
327
+ function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider, endpointUrl) {
326
328
  // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
327
329
  // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
328
330
  // Determine if HF_TOKEN or specific provider token should be used
329
- const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
330
- (!directRequest && // if explicit directRequest => use provider-specific token
331
- (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
332
- snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
331
+ const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
332
+ (provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
333
+ (!directRequest && // if explicit directRequest => use provider-specific token
334
+ (snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
335
+ snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
333
336
  const accessTokenEnvVar = useHfToken
334
337
  ? "HF_TOKEN" // e.g. routed request or hf-inference
335
- : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
338
+ : endpointUrl
339
+ ? "API_TOKEN"
340
+ : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
336
341
  // Replace the placeholder with the env variable
337
342
  if (language === "sh") {
338
343
  snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
@@ -14,14 +14,14 @@ exports.templates = {
14
14
  "zeroShotClassification": "async function query(data) {\n const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n {\n headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n \"Content-Type\": \"application/json\",\n{% if billTo %}\n \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %} },\n method: \"POST\",\n body: JSON.stringify(data),\n }\n );\n const result = await response.json();\n return result;\n}\n\nquery({\n inputs: {{ providerInputs.asObj.inputs }},\n parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n console.log(JSON.stringify(response));\n});"
15
15
  },
16
16
  "huggingface.js": {
17
- "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
18
- "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
19
- "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
20
- "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
21
- "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
22
- "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
23
- "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
24
- "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
17
+ "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
18
+ "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
19
+ "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
20
+ "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
21
+ "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
22
+ "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
23
+ "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
24
+ "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
25
25
  },
26
26
  "openai": {
27
27
  "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
@@ -40,7 +40,7 @@ exports.templates = {
40
40
  "conversationalStream": "stream = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n stream=True,\n)\n\nfor chunk in stream:\n print(chunk.choices[0].delta.content, end=\"\") ",
41
41
  "documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
42
42
  "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
43
- "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
43
+ "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n base_url=\"{{ baseUrl }}\",\n{% endif %}\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
44
44
  "questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
45
45
  "tableQuestionAnswering": "answer = client.table_question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
46
46
  "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
@@ -1,5 +1,5 @@
1
1
  import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
2
- import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
3
3
  /**
4
4
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
5
5
  */
@@ -91,7 +91,7 @@ export interface UrlParams {
91
91
  export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
92
92
  args: T;
93
93
  model: string;
94
- mapping?: InferenceProviderModelMapping | undefined;
94
+ mapping?: InferenceProviderMappingEntry | undefined;
95
95
  task?: InferenceTask;
96
96
  }
97
97
  //# sourceMappingURL=types.d.ts.map
@@ -1,18 +1,19 @@
1
1
  import type { WidgetType } from "@huggingface/tasks";
2
2
  import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
3
- export declare const inferenceProviderMappingCache: Map<string, Partial<Record<"black-forest-labs" | "cerebras" | "cohere" | "fal-ai" | "featherless-ai" | "fireworks-ai" | "groq" | "hf-inference" | "hyperbolic" | "nebius" | "novita" | "nscale" | "openai" | "ovhcloud" | "replicate" | "sambanova" | "together", Omit<InferenceProviderModelMapping, "hfModelId">>>>;
4
- export type InferenceProviderMapping = Partial<Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>>;
5
- export interface InferenceProviderModelMapping {
3
+ export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
4
+ export interface InferenceProviderMappingEntry {
6
5
  adapter?: string;
7
6
  adapterWeightsPath?: string;
8
7
  hfModelId: ModelId;
8
+ provider: string;
9
9
  providerId: string;
10
10
  status: "live" | "staging";
11
11
  task: WidgetType;
12
+ type?: "single-model" | "tag-filter";
12
13
  }
13
14
  export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
14
15
  fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
15
- }): Promise<InferenceProviderMapping>;
16
+ }): Promise<InferenceProviderMappingEntry[]>;
16
17
  export declare function getInferenceProviderMapping(params: {
17
18
  accessToken?: string;
18
19
  modelId: ModelId;
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
20
21
  task: WidgetType;
21
22
  }, options: {
22
23
  fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
23
- }): Promise<InferenceProviderModelMapping | null>;
24
+ }): Promise<InferenceProviderMappingEntry | null>;
24
25
  export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
25
26
  //# sourceMappingURL=getInferenceProviderMapping.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,uTAA+C,CAAC;AAE1F,MAAM,MAAM,wBAAwB,GAAG,OAAO,CAC7C,MAAM,CAAC,iBAAiB,EAAE,IAAI,CAAC,6BAA6B,EAAE,WAAW,CAAC,CAAC,CAC3E,CAAC;AAEF,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;CACjB;AAED,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,wBAAwB,CAAC,CAgDnC;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CA4B/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
1
+ {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
@@ -4,6 +4,29 @@ import { EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS } from "../providers/hf-inferenc
4
4
  import { typedInclude } from "../utils/typedInclude.js";
5
5
  import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
6
6
  export const inferenceProviderMappingCache = new Map();
7
+ /**
8
+ * Normalize inferenceProviderMapping to always return an array format.
9
+ * This provides backward and forward compatibility for the API changes.
10
+ *
11
+ * Vendored from @huggingface/hub to avoid extra dependency.
12
+ */
13
+ function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
14
+ if (!inferenceProviderMapping) {
15
+ return [];
16
+ }
17
+ // If it's already an array, return it as is
18
+ if (Array.isArray(inferenceProviderMapping)) {
19
+ return inferenceProviderMapping;
20
+ }
21
+ // Convert mapping to array format
22
+ return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
23
+ provider,
24
+ hfModelId: modelId,
25
+ providerId: mapping.providerId,
26
+ status: mapping.status,
27
+ task: mapping.task,
28
+ }));
29
+ }
7
30
  export async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
8
31
  let inferenceProviderMapping;
9
32
  if (inferenceProviderMappingCache.has(modelId)) {
@@ -36,7 +59,8 @@ export async function fetchInferenceProviderMappingForModel(modelId, accessToken
36
59
  if (!payload?.inferenceProviderMapping) {
37
60
  throw new InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
38
61
  }
39
- inferenceProviderMapping = payload.inferenceProviderMapping;
62
+ inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
63
+ inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
40
64
  }
41
65
  return inferenceProviderMapping;
42
66
  }
@@ -44,8 +68,8 @@ export async function getInferenceProviderMapping(params, options) {
44
68
  if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
45
69
  return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
46
70
  }
47
- const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
48
- const providerMapping = inferenceProviderMapping[params.provider];
71
+ const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
72
+ const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
49
73
  if (providerMapping) {
50
74
  const equivalentTasks = params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
51
75
  ? EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
@@ -56,7 +80,7 @@ export async function getInferenceProviderMapping(params, options) {
56
80
  if (providerMapping.status === "staging") {
57
81
  console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
58
82
  }
59
- return { ...providerMapping, hfModelId: params.modelId };
83
+ return providerMapping;
60
84
  }
61
85
  return null;
62
86
  }
@@ -76,8 +100,8 @@ export async function resolveProvider(provider, modelId, endpointUrl) {
76
100
  if (!modelId) {
77
101
  throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
78
102
  }
79
- const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(modelId);
80
- provider = Object.keys(inferenceProviderMapping)[0];
103
+ const mappings = await fetchInferenceProviderMappingForModel(modelId);
104
+ provider = mappings[0]?.provider;
81
105
  }
82
106
  if (!provider) {
83
107
  throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
@@ -1,5 +1,5 @@
1
1
  import type { InferenceTask, Options, RequestArgs } from "../types.js";
2
- import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
3
3
  import type { getProviderHelper } from "./getProviderHelper.js";
4
4
  /**
5
5
  * Helper that prepares request arguments.
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
22
22
  export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
23
23
  data?: Blob | ArrayBuffer;
24
24
  stream?: boolean;
25
- }, mapping: InferenceProviderModelMapping | undefined, options?: Options & {
25
+ }, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
26
26
  task?: InferenceTask;
27
27
  }): {
28
28
  url: string;
@@ -1 +1 @@
1
- {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAqE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
1
+ {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
@@ -37,6 +37,7 @@ export async function makeRequestOptions(args, providerHelper, options) {
37
37
  }
38
38
  const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
39
39
  ? {
40
+ provider: provider,
40
41
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
41
42
  providerId: removeProviderPrefix(maybeModel, provider),
42
43
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
@@ -1,3 +1,3 @@
1
- export declare const PACKAGE_VERSION = "4.0.3";
1
+ export declare const PACKAGE_VERSION = "4.0.5";
2
2
  export declare const PACKAGE_NAME = "@huggingface/inference";
3
3
  //# sourceMappingURL=package.d.ts.map
@@ -1,3 +1,3 @@
1
1
  // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2
- export const PACKAGE_VERSION = "4.0.3";
2
+ export const PACKAGE_VERSION = "4.0.5";
3
3
  export const PACKAGE_NAME = "@huggingface/inference";
@@ -1,4 +1,4 @@
1
- import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
1
+ import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
2
2
  import type { InferenceProvider } from "../types.js";
3
3
  import { type ModelId } from "../types.js";
4
4
  /**
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
8
8
  *
9
9
  * We also inject into this dictionary from tests.
10
10
  */
11
- export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderModelMapping>>;
11
+ export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
12
12
  //# sourceMappingURL=consts.d.ts.map
@@ -1,11 +1,12 @@
1
1
  import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
2
- import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
3
3
  import type { InferenceProviderOrPolicy } from "../types.js";
4
4
  export type InferenceSnippetOptions = {
5
5
  streaming?: boolean;
6
6
  billTo?: string;
7
7
  accessToken?: string;
8
8
  directRequest?: boolean;
9
+ endpointUrl?: string;
9
10
  } & Record<string, unknown>;
10
- export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
11
+ export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
11
12
  //# sourceMappingURL=getInferenceSnippets.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;CACxB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAiV5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
1
+ {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AA2V5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
@@ -107,6 +107,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
107
107
  const request = makeRequestOptionsFromResolvedModel(providerModelId, providerHelper, {
108
108
  accessToken: accessTokenOrPlaceholder,
109
109
  provider,
110
+ endpointUrl: opts?.endpointUrl,
110
111
  ...inputs,
111
112
  }, inferenceProviderMapping, {
112
113
  task,
@@ -148,6 +149,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
148
149
  provider,
149
150
  providerModelId: providerModelId ?? model.id,
150
151
  billTo: opts?.billTo,
152
+ endpointUrl: opts?.endpointUrl,
151
153
  };
152
154
  /// Iterate over clients => check if a snippet exists => generate
153
155
  const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
@@ -192,7 +194,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
192
194
  }
193
195
  /// Replace access token placeholder
194
196
  if (snippet.includes(placeholder)) {
195
- snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
197
+ snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider, opts?.endpointUrl);
196
198
  }
197
199
  /// Snippet is ready!
198
200
  return { language, client: client, content: snippet };
@@ -319,17 +321,20 @@ function indentString(str) {
319
321
  function removeSuffix(str, suffix) {
320
322
  return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
321
323
  }
322
- function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
324
+ function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider, endpointUrl) {
323
325
  // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
324
326
  // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
325
327
  // Determine if HF_TOKEN or specific provider token should be used
326
- const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
327
- (!directRequest && // if explicit directRequest => use provider-specific token
328
- (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
329
- snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
328
+ const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
329
+ (provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
330
+ (!directRequest && // if explicit directRequest => use provider-specific token
331
+ (snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
332
+ snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
330
333
  const accessTokenEnvVar = useHfToken
331
334
  ? "HF_TOKEN" // e.g. routed request or hf-inference
332
- : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
335
+ : endpointUrl
336
+ ? "API_TOKEN"
337
+ : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
333
338
  // Replace the placeholder with the env variable
334
339
  if (language === "sh") {
335
340
  snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
@@ -11,14 +11,14 @@ export const templates = {
11
11
  "zeroShotClassification": "async function query(data) {\n const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n {\n headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n \"Content-Type\": \"application/json\",\n{% if billTo %}\n \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %} },\n method: \"POST\",\n body: JSON.stringify(data),\n }\n );\n const result = await response.json();\n return result;\n}\n\nquery({\n inputs: {{ providerInputs.asObj.inputs }},\n parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n console.log(JSON.stringify(response));\n});"
12
12
  },
13
13
  "huggingface.js": {
14
- "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
15
- "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
16
- "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
17
- "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
18
- "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
19
- "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
20
- "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
21
- "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
14
+ "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
15
+ "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
16
+ "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
17
+ "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
18
+ "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
19
+ "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
20
+ "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
21
+ "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
22
22
  },
23
23
  "openai": {
24
24
  "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
@@ -37,7 +37,7 @@ export const templates = {
37
37
  "conversationalStream": "stream = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n stream=True,\n)\n\nfor chunk in stream:\n print(chunk.choices[0].delta.content, end=\"\") ",
38
38
  "documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
39
39
  "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
40
- "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
40
+ "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n base_url=\"{{ baseUrl }}\",\n{% endif %}\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
41
41
  "questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
42
42
  "tableQuestionAnswering": "answer = client.table_question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
43
43
  "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
@@ -1,5 +1,5 @@
1
1
  import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
2
- import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
3
3
  /**
4
4
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
5
5
  */
@@ -91,7 +91,7 @@ export interface UrlParams {
91
91
  export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
92
92
  args: T;
93
93
  model: string;
94
- mapping?: InferenceProviderModelMapping | undefined;
94
+ mapping?: InferenceProviderMappingEntry | undefined;
95
95
  task?: InferenceTask;
96
96
  }
97
97
  //# sourceMappingURL=types.d.ts.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "4.0.3",
3
+ "version": "4.0.5",
4
4
  "license": "MIT",
5
5
  "author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
@@ -40,7 +40,7 @@
40
40
  },
41
41
  "type": "module",
42
42
  "dependencies": {
43
- "@huggingface/tasks": "^0.19.12",
43
+ "@huggingface/tasks": "^0.19.15",
44
44
  "@huggingface/jinja": "^0.5.0"
45
45
  },
46
46
  "devDependencies": {
@@ -6,19 +6,48 @@ import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../t
6
6
  import { typedInclude } from "../utils/typedInclude.js";
7
7
  import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
8
8
 
9
- export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMapping>();
9
+ export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMappingEntry[]>();
10
10
 
11
- export type InferenceProviderMapping = Partial<
12
- Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>
13
- >;
14
-
15
- export interface InferenceProviderModelMapping {
11
+ export interface InferenceProviderMappingEntry {
16
12
  adapter?: string;
17
13
  adapterWeightsPath?: string;
18
14
  hfModelId: ModelId;
15
+ provider: string;
19
16
  providerId: string;
20
17
  status: "live" | "staging";
21
18
  task: WidgetType;
19
+ type?: "single-model" | "tag-filter";
20
+ }
21
+
22
+ /**
23
+ * Normalize inferenceProviderMapping to always return an array format.
24
+ * This provides backward and forward compatibility for the API changes.
25
+ *
26
+ * Vendored from @huggingface/hub to avoid extra dependency.
27
+ */
28
+ function normalizeInferenceProviderMapping(
29
+ modelId: ModelId,
30
+ inferenceProviderMapping?:
31
+ | InferenceProviderMappingEntry[]
32
+ | Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>
33
+ ): InferenceProviderMappingEntry[] {
34
+ if (!inferenceProviderMapping) {
35
+ return [];
36
+ }
37
+
38
+ // If it's already an array, return it as is
39
+ if (Array.isArray(inferenceProviderMapping)) {
40
+ return inferenceProviderMapping;
41
+ }
42
+
43
+ // Convert mapping to array format
44
+ return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
45
+ provider,
46
+ hfModelId: modelId,
47
+ providerId: mapping.providerId,
48
+ status: mapping.status,
49
+ task: mapping.task,
50
+ }));
22
51
  }
23
52
 
24
53
  export async function fetchInferenceProviderMappingForModel(
@@ -27,8 +56,8 @@ export async function fetchInferenceProviderMappingForModel(
27
56
  options?: {
28
57
  fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
29
58
  }
30
- ): Promise<InferenceProviderMapping> {
31
- let inferenceProviderMapping: InferenceProviderMapping | null;
59
+ ): Promise<InferenceProviderMappingEntry[]> {
60
+ let inferenceProviderMapping: InferenceProviderMappingEntry[] | null;
32
61
  if (inferenceProviderMappingCache.has(modelId)) {
33
62
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
34
63
  inferenceProviderMapping = inferenceProviderMappingCache.get(modelId)!;
@@ -55,7 +84,11 @@ export async function fetchInferenceProviderMappingForModel(
55
84
  );
56
85
  }
57
86
  }
58
- let payload: { inferenceProviderMapping?: InferenceProviderMapping } | null = null;
87
+ let payload: {
88
+ inferenceProviderMapping?:
89
+ | InferenceProviderMappingEntry[]
90
+ | Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>;
91
+ } | null = null;
59
92
  try {
60
93
  payload = await resp.json();
61
94
  } catch {
@@ -72,7 +105,8 @@ export async function fetchInferenceProviderMappingForModel(
72
105
  { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() }
73
106
  );
74
107
  }
75
- inferenceProviderMapping = payload.inferenceProviderMapping;
108
+ inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
109
+ inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
76
110
  }
77
111
  return inferenceProviderMapping;
78
112
  }
@@ -87,16 +121,12 @@ export async function getInferenceProviderMapping(
87
121
  options: {
88
122
  fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
89
123
  }
90
- ): Promise<InferenceProviderModelMapping | null> {
124
+ ): Promise<InferenceProviderMappingEntry | null> {
91
125
  if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
92
126
  return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
93
127
  }
94
- const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(
95
- params.modelId,
96
- params.accessToken,
97
- options
98
- );
99
- const providerMapping = inferenceProviderMapping[params.provider];
128
+ const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
129
+ const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
100
130
  if (providerMapping) {
101
131
  const equivalentTasks =
102
132
  params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
@@ -112,7 +142,7 @@ export async function getInferenceProviderMapping(
112
142
  `Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
113
143
  );
114
144
  }
115
- return { ...providerMapping, hfModelId: params.modelId };
145
+ return providerMapping;
116
146
  }
117
147
  return null;
118
148
  }
@@ -139,8 +169,8 @@ export async function resolveProvider(
139
169
  if (!modelId) {
140
170
  throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
141
171
  }
142
- const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(modelId);
143
- provider = Object.keys(inferenceProviderMapping)[0] as InferenceProvider | undefined;
172
+ const mappings = await fetchInferenceProviderMappingForModel(modelId);
173
+ provider = mappings[0]?.provider as InferenceProvider | undefined;
144
174
  }
145
175
  if (!provider) {
146
176
  throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);
@@ -1,7 +1,7 @@
1
1
  import { HF_HEADER_X_BILL_TO, HF_HUB_URL } from "../config.js";
2
2
  import { PACKAGE_NAME, PACKAGE_VERSION } from "../package.js";
3
3
  import type { InferenceTask, Options, RequestArgs } from "../types.js";
4
- import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping.js";
4
+ import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
5
5
  import { getInferenceProviderMapping } from "./getInferenceProviderMapping.js";
6
6
  import type { getProviderHelper } from "./getProviderHelper.js";
7
7
  import { isUrl } from "./isUrl.js";
@@ -64,6 +64,7 @@ export async function makeRequestOptions(
64
64
 
65
65
  const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
66
66
  ? ({
67
+ provider: provider,
67
68
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
68
69
  providerId: removeProviderPrefix(maybeModel!, provider),
69
70
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
@@ -71,7 +72,7 @@ export async function makeRequestOptions(
71
72
  status: "live",
72
73
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
73
74
  task: task!,
74
- } satisfies InferenceProviderModelMapping)
75
+ } satisfies InferenceProviderMappingEntry)
75
76
  : await getInferenceProviderMapping(
76
77
  {
77
78
  modelId: hfModel,
@@ -109,7 +110,7 @@ export function makeRequestOptionsFromResolvedModel(
109
110
  data?: Blob | ArrayBuffer;
110
111
  stream?: boolean;
111
112
  },
112
- mapping: InferenceProviderModelMapping | undefined,
113
+ mapping: InferenceProviderMappingEntry | undefined,
113
114
  options?: Options & {
114
115
  task?: InferenceTask;
115
116
  }
package/src/package.ts CHANGED
@@ -1,3 +1,3 @@
1
1
  // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
2
- export const PACKAGE_VERSION = "4.0.3";
2
+ export const PACKAGE_VERSION = "4.0.5";
3
3
  export const PACKAGE_NAME = "@huggingface/inference";
@@ -1,4 +1,4 @@
1
- import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
1
+ import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
2
2
  import type { InferenceProvider } from "../types.js";
3
3
  import { type ModelId } from "../types.js";
4
4
 
@@ -11,7 +11,7 @@ import { type ModelId } from "../types.js";
11
11
  */
12
12
  export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
13
13
  InferenceProvider,
14
- Record<ModelId, InferenceProviderModelMapping>
14
+ Record<ModelId, InferenceProviderMappingEntry>
15
15
  > = {
16
16
  /**
17
17
  * "HF model ID" => "Model ID on Inference Provider's side"
@@ -8,7 +8,7 @@ import {
8
8
  } from "@huggingface/tasks";
9
9
  import type { PipelineType, WidgetType } from "@huggingface/tasks";
10
10
  import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks";
11
- import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
11
+ import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
12
12
  import { getProviderHelper } from "../lib/getProviderHelper.js";
13
13
  import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.js";
14
14
  import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
@@ -18,7 +18,8 @@ export type InferenceSnippetOptions = {
18
18
  streaming?: boolean;
19
19
  billTo?: string;
20
20
  accessToken?: string;
21
- directRequest?: boolean;
21
+ directRequest?: boolean; // to bypass HF routing and call the provider directly
22
+ endpointUrl?: string; // to call a local endpoint directly
22
23
  } & Record<string, unknown>;
23
24
 
24
25
  const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
@@ -53,6 +54,7 @@ interface TemplateParams {
53
54
  methodName?: string; // specific to snippetBasic
54
55
  importBase64?: boolean; // specific to snippetImportRequests
55
56
  importJson?: boolean; // specific to snippetImportRequests
57
+ endpointUrl?: string;
56
58
  }
57
59
 
58
60
  // Helpers to find + load templates
@@ -136,7 +138,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
136
138
  return (
137
139
  model: ModelDataMinimal,
138
140
  provider: InferenceProviderOrPolicy,
139
- inferenceProviderMapping?: InferenceProviderModelMapping,
141
+ inferenceProviderMapping?: InferenceProviderMappingEntry,
140
142
  opts?: InferenceSnippetOptions
141
143
  ): InferenceSnippet[] => {
142
144
  const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
@@ -172,6 +174,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
172
174
  {
173
175
  accessToken: accessTokenOrPlaceholder,
174
176
  provider,
177
+ endpointUrl: opts?.endpointUrl,
175
178
  ...inputs,
176
179
  } as RequestArgs,
177
180
  inferenceProviderMapping,
@@ -217,6 +220,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
217
220
  provider,
218
221
  providerModelId: providerModelId ?? model.id,
219
222
  billTo: opts?.billTo,
223
+ endpointUrl: opts?.endpointUrl,
220
224
  };
221
225
 
222
226
  /// Iterate over clients => check if a snippet exists => generate
@@ -265,7 +269,14 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
265
269
 
266
270
  /// Replace access token placeholder
267
271
  if (snippet.includes(placeholder)) {
268
- snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
272
+ snippet = replaceAccessTokenPlaceholder(
273
+ opts?.directRequest,
274
+ placeholder,
275
+ snippet,
276
+ language,
277
+ provider,
278
+ opts?.endpointUrl
279
+ );
269
280
  }
270
281
 
271
282
  /// Snippet is ready!
@@ -320,7 +331,7 @@ const snippets: Partial<
320
331
  (
321
332
  model: ModelDataMinimal,
322
333
  provider: InferenceProviderOrPolicy,
323
- inferenceProviderMapping?: InferenceProviderModelMapping,
334
+ inferenceProviderMapping?: InferenceProviderMappingEntry,
324
335
  opts?: InferenceSnippetOptions
325
336
  ) => InferenceSnippet[]
326
337
  >
@@ -359,7 +370,7 @@ const snippets: Partial<
359
370
  export function getInferenceSnippets(
360
371
  model: ModelDataMinimal,
361
372
  provider: InferenceProviderOrPolicy,
362
- inferenceProviderMapping?: InferenceProviderModelMapping,
373
+ inferenceProviderMapping?: InferenceProviderMappingEntry,
363
374
  opts?: Record<string, unknown>
364
375
  ): InferenceSnippet[] {
365
376
  return model.pipeline_tag && model.pipeline_tag in snippets
@@ -444,21 +455,24 @@ function replaceAccessTokenPlaceholder(
444
455
  placeholder: string,
445
456
  snippet: string,
446
457
  language: InferenceSnippetLanguage,
447
- provider: InferenceProviderOrPolicy
458
+ provider: InferenceProviderOrPolicy,
459
+ endpointUrl?: string
448
460
  ): string {
449
461
  // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
450
462
  // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
451
463
 
452
464
  // Determine if HF_TOKEN or specific provider token should be used
453
465
  const useHfToken =
454
- provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
455
- (!directRequest && // if explicit directRequest => use provider-specific token
456
- (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
457
- snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
458
-
466
+ !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
467
+ (provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
468
+ (!directRequest && // if explicit directRequest => use provider-specific token
469
+ (snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
470
+ snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
459
471
  const accessTokenEnvVar = useHfToken
460
472
  ? "HF_TOKEN" // e.g. routed request or hf-inference
461
- : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
473
+ : endpointUrl
474
+ ? "API_TOKEN"
475
+ : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
462
476
 
463
477
  // Replace the placeholder with the env variable
464
478
  if (language === "sh") {
@@ -11,14 +11,14 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
11
11
  "zeroShotClassification": "async function query(data) {\n const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n {\n headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n \"Content-Type\": \"application/json\",\n{% if billTo %}\n \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %} },\n method: \"POST\",\n body: JSON.stringify(data),\n }\n );\n const result = await response.json();\n return result;\n}\n\nquery({\n inputs: {{ providerInputs.asObj.inputs }},\n parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n console.log(JSON.stringify(response));\n});"
12
12
  },
13
13
  "huggingface.js": {
14
- "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
15
- "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
16
- "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
17
- "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
18
- "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
19
- "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
20
- "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
21
- "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
14
+ "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
15
+ "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
16
+ "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
17
+ "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
18
+ "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
19
+ "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
20
+ "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
21
+ "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n provider: \"{{ provider }}\",\n model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
22
22
  },
23
23
  "openai": {
24
24
  "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
@@ -37,7 +37,7 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
37
37
  "conversationalStream": "stream = client.chat.completions.create(\n model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n stream=True,\n)\n\nfor chunk in stream:\n print(chunk.choices[0].delta.content, end=\"\") ",
38
38
  "documentQuestionAnswering": "output = client.document_question_answering(\n \"{{ inputs.asObj.image }}\",\n question=\"{{ inputs.asObj.question }}\",\n model=\"{{ model.id }}\",\n) ",
39
39
  "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n \"{{ inputs.asObj.inputs }}\",\n prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n model=\"{{ model.id }}\",\n) ",
40
- "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
40
+ "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n base_url=\"{{ baseUrl }}\",\n{% endif %}\n provider=\"{{ provider }}\",\n api_key=\"{{ accessToken }}\",\n{% if billTo %}\n bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
41
41
  "questionAnswering": "answer = client.question_answering(\n question=\"{{ inputs.asObj.question }}\",\n context=\"{{ inputs.asObj.context }}\",\n model=\"{{ model.id }}\",\n) ",
42
42
  "tableQuestionAnswering": "answer = client.table_question_answering(\n query=\"{{ inputs.asObj.query }}\",\n table={{ inputs.asObj.table }},\n model=\"{{ model.id }}\",\n) ",
43
43
  "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n {{ inputs.asObj.inputs }},\n model=\"{{ model.id }}\",\n) ",
package/src/types.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
2
- import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping.js";
2
+ import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
3
3
 
4
4
  /**
5
5
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
@@ -126,6 +126,6 @@ export interface UrlParams {
126
126
  export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
127
127
  args: T;
128
128
  model: string;
129
- mapping?: InferenceProviderModelMapping | undefined;
129
+ mapping?: InferenceProviderMappingEntry | undefined;
130
130
  task?: InferenceTask;
131
131
  }