npm - @huggingface/inference - Versions diffs - 4.0.3 → 4.0.5 - Mend

@huggingface/inference 4.0.3 → 4.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +4 -3
package/dist/commonjs/lib/getInferenceProviderMapping.d.ts +6 -5
package/dist/commonjs/lib/getInferenceProviderMapping.d.ts.map +1 -1
package/dist/commonjs/lib/getInferenceProviderMapping.js +30 -6
package/dist/commonjs/lib/makeRequestOptions.d.ts +2 -2
package/dist/commonjs/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/commonjs/lib/makeRequestOptions.js +1 -0
package/dist/commonjs/package.d.ts +1 -1
package/dist/commonjs/package.js +1 -1
package/dist/commonjs/providers/consts.d.ts +2 -2
package/dist/commonjs/snippets/getInferenceSnippets.d.ts +3 -2
package/dist/commonjs/snippets/getInferenceSnippets.d.ts.map +1 -1
package/dist/commonjs/snippets/getInferenceSnippets.js +12 -7
package/dist/commonjs/snippets/templates.exported.js +9 -9
package/dist/commonjs/types.d.ts +2 -2
package/dist/esm/lib/getInferenceProviderMapping.d.ts +6 -5
package/dist/esm/lib/getInferenceProviderMapping.d.ts.map +1 -1
package/dist/esm/lib/getInferenceProviderMapping.js +30 -6
package/dist/esm/lib/makeRequestOptions.d.ts +2 -2
package/dist/esm/lib/makeRequestOptions.d.ts.map +1 -1
package/dist/esm/lib/makeRequestOptions.js +1 -0
package/dist/esm/package.d.ts +1 -1
package/dist/esm/package.js +1 -1
package/dist/esm/providers/consts.d.ts +2 -2
package/dist/esm/snippets/getInferenceSnippets.d.ts +3 -2
package/dist/esm/snippets/getInferenceSnippets.d.ts.map +1 -1
package/dist/esm/snippets/getInferenceSnippets.js +12 -7
package/dist/esm/snippets/templates.exported.js +9 -9
package/dist/esm/types.d.ts +2 -2
package/package.json +2 -2
package/src/lib/getInferenceProviderMapping.ts +50 -20
package/src/lib/makeRequestOptions.ts +4 -3
package/src/package.ts +1 -1
package/src/providers/consts.ts +2 -2
package/src/snippets/getInferenceSnippets.ts +27 -13
package/src/snippets/templates.exported.ts +9 -9
package/src/types.ts +2 -2

package/README.md CHANGED Viewed

@@ -651,9 +651,10 @@ You can use any Chat Completion API-compatible provider with the `chatCompletion
 ```typescript
 // Chat Completion Example
 const MISTRAL_KEY = process.env.MISTRAL_KEY;
-const hf = new InferenceClient(MISTRAL_KEY);
-const ep = hf.endpoint("https://api.mistral.ai");
-const stream = ep.chatCompletionStream({
+const hf = new InferenceClient(MISTRAL_KEY, {
+  endpointUrl: "https://api.mistral.ai",
+});
+const stream = hf.chatCompletionStream({
   model: "mistral-tiny",
   messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
 });

package/dist/commonjs/lib/getInferenceProviderMapping.d.ts CHANGED Viewed

@@ -1,18 +1,19 @@
 import type { WidgetType } from "@huggingface/tasks";
 import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
-export declare const inferenceProviderMappingCache: Map<string, Partial<Record<"black-forest-labs" | "cerebras" | "cohere" | "fal-ai" | "featherless-ai" | "fireworks-ai" | "groq" | "hf-inference" | "hyperbolic" | "nebius" | "novita" | "nscale" | "openai" | "ovhcloud" | "replicate" | "sambanova" | "together", Omit<InferenceProviderModelMapping, "hfModelId">>>>;
-export type InferenceProviderMapping = Partial<Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>>;
-export interface InferenceProviderModelMapping {
+export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
+export interface InferenceProviderMappingEntry {
     adapter?: string;
     adapterWeightsPath?: string;
     hfModelId: ModelId;
+    provider: string;
     providerId: string;
     status: "live" | "staging";
     task: WidgetType;
+    type?: "single-model" | "tag-filter";
 }
 export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
     fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
-}): Promise<InferenceProviderMapping>;
+}): Promise<InferenceProviderMappingEntry[]>;
 export declare function getInferenceProviderMapping(params: {
     accessToken?: string;
     modelId: ModelId;
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
     task: WidgetType;
 }, options: {
     fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
-}): Promise<InferenceProviderModelMapping | null>;
+}): Promise<InferenceProviderMappingEntry | null>;
 export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
 //# sourceMappingURL=getInferenceProviderMapping.d.ts.map

package/dist/commonjs/lib/getInferenceProviderMapping.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,~~uTAA+C~~,CAAC;~~AAE1F~~,MAAM,~~MAAM,wBAAwB,GAAG,OAAO,CAC7C,MAAM,CAAC,iBAAiB,EAAE,IAAI,CAAC,6BAA6B,EAAE,~~WAAW,~~CAAC,CAAC,CAC3E,CAAC;AAEF,MAAM,WAAW,~~6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;~~CACjB~~;~~AAED~~,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,~~wBAAwB~~,CAAC,~~CAgDnC~~;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,~~CA4B~~/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
1	+ {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}

package/dist/commonjs/lib/getInferenceProviderMapping.js CHANGED Viewed

@@ -10,6 +10,29 @@ const hf_inference_js_1 = require("../providers/hf-inference.js");
 const typedInclude_js_1 = require("../utils/typedInclude.js");
 const errors_js_1 = require("../errors.js");
 exports.inferenceProviderMappingCache = new Map();
+/**
+ * Normalize inferenceProviderMapping to always return an array format.
+ * This provides backward and forward compatibility for the API changes.
+ *
+ * Vendored from @huggingface/hub to avoid extra dependency.
+ */
+function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
+    if (!inferenceProviderMapping) {
+        return [];
+    }
+    // If it's already an array, return it as is
+    if (Array.isArray(inferenceProviderMapping)) {
+        return inferenceProviderMapping;
+    }
+    // Convert mapping to array format
+    return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
+        provider,
+        hfModelId: modelId,
+        providerId: mapping.providerId,
+        status: mapping.status,
+        task: mapping.task,
+    }));
+}
 async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
     let inferenceProviderMapping;
     if (exports.inferenceProviderMappingCache.has(modelId)) {
@@ -42,7 +65,8 @@ async function fetchInferenceProviderMappingForModel(modelId, accessToken, optio
         if (!payload?.inferenceProviderMapping) {
             throw new errors_js_1.InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
         }
-        inferenceProviderMapping = payload.inferenceProviderMapping;
+        inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
+        exports.inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
     }
     return inferenceProviderMapping;
 }
@@ -50,8 +74,8 @@ async function getInferenceProviderMapping(params, options) {
     if (consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
         return consts_js_1.HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
     }
-    const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
-    const providerMapping = inferenceProviderMapping[params.provider];
+    const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
+    const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
     if (providerMapping) {
         const equivalentTasks = params.provider === "hf-inference" && (0, typedInclude_js_1.typedInclude)(hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
             ? hf_inference_js_1.EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
@@ -62,7 +86,7 @@ async function getInferenceProviderMapping(params, options) {
         if (providerMapping.status === "staging") {
             console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
         }
-        return { ...providerMapping, hfModelId: params.modelId };
+        return providerMapping;
     }
     return null;
 }
@@ -82,8 +106,8 @@ async function resolveProvider(provider, modelId, endpointUrl) {
         if (!modelId) {
             throw new errors_js_1.InferenceClientInputError("Specifying a model is required when provider is 'auto'");
         }
-        const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(modelId);
-        provider = Object.keys(inferenceProviderMapping)[0];
+        const mappings = await fetchInferenceProviderMappingForModel(modelId);
+        provider = mappings[0]?.provider;
     }
     if (!provider) {
         throw new errors_js_1.InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);

package/dist/commonjs/lib/makeRequestOptions.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { InferenceTask, Options, RequestArgs } from "../types.js";
-import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
 import type { getProviderHelper } from "./getProviderHelper.js";
 /**
  * Helper that prepares request arguments.
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
 export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
     data?: Blob | ArrayBuffer;
     stream?: boolean;
-}, mapping: InferenceProviderModelMapping | undefined, options?: Options & {
+}, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
     task?: InferenceTask;
 }): {
     url: string;

package/dist/commonjs/lib/makeRequestOptions.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,~~CAqE7C~~;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
1	+ {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}

package/dist/commonjs/lib/makeRequestOptions.js CHANGED Viewed

@@ -41,6 +41,7 @@ async function makeRequestOptions(args, providerHelper, options) {
     }
     const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
         ? {
+            provider: provider,
             // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
             providerId: removeProviderPrefix(maybeModel, provider),
             // eslint-disable-next-line @typescript-eslint/no-non-null-assertion

package/dist/commonjs/package.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-export declare const PACKAGE_VERSION = "4.0.3";
+export declare const PACKAGE_VERSION = "4.0.5";
 export declare const PACKAGE_NAME = "@huggingface/inference";
 //# sourceMappingURL=package.d.ts.map

package/dist/commonjs/package.js CHANGED Viewed

@@ -2,5 +2,5 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.PACKAGE_NAME = exports.PACKAGE_VERSION = void 0;
 // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
-exports.PACKAGE_VERSION = "4.0.3";
+exports.PACKAGE_VERSION = "4.0.5";
 exports.PACKAGE_NAME = "@huggingface/inference";

package/dist/commonjs/providers/consts.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
 import type { InferenceProvider } from "../types.js";
 import { type ModelId } from "../types.js";
 /**
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
  *
  * We also inject into this dictionary from tests.
  */
-export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderModelMapping>>;
+export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
 //# sourceMappingURL=consts.d.ts.map

package/dist/commonjs/snippets/getInferenceSnippets.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
 import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
-import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
 import type { InferenceProviderOrPolicy } from "../types.js";
 export type InferenceSnippetOptions = {
     streaming?: boolean;
     billTo?: string;
     accessToken?: string;
     directRequest?: boolean;
+    endpointUrl?: string;
 } & Record<string, unknown>;
-export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
+export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
 //# sourceMappingURL=getInferenceSnippets.d.ts.map

package/dist/commonjs/snippets/getInferenceSnippets.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;~~CACxB~~,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;~~AAiV5B~~,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
1	+ {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AA2V5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}

package/dist/commonjs/snippets/getInferenceSnippets.js CHANGED Viewed

@@ -110,6 +110,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
         const request = (0, makeRequestOptions_js_1.makeRequestOptionsFromResolvedModel)(providerModelId, providerHelper, {
             accessToken: accessTokenOrPlaceholder,
             provider,
+            endpointUrl: opts?.endpointUrl,
             ...inputs,
         }, inferenceProviderMapping, {
             task,
@@ -151,6 +152,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
             provider,
             providerModelId: providerModelId ?? model.id,
             billTo: opts?.billTo,
+            endpointUrl: opts?.endpointUrl,
         };
         /// Iterate over clients => check if a snippet exists => generate
         const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
@@ -195,7 +197,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
                 }
                 /// Replace access token placeholder
                 if (snippet.includes(placeholder)) {
-                    snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
+                    snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider, opts?.endpointUrl);
                 }
                 /// Snippet is ready!
                 return { language, client: client, content: snippet };
@@ -322,17 +324,20 @@ function indentString(str) {
 function removeSuffix(str, suffix) {
     return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
 }
-function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
+function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider, endpointUrl) {
     // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
     // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
     // Determine if HF_TOKEN or specific provider token should be used
-    const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
-        (!directRequest && // if explicit directRequest => use provider-specific token
-            (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
-                snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
+    const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
+        (provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
+            (!directRequest && // if explicit directRequest => use provider-specific token
+                (snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
+                    snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
     const accessTokenEnvVar = useHfToken
         ? "HF_TOKEN" // e.g. routed request or hf-inference
-        : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
+        : endpointUrl
+            ? "API_TOKEN"
+            : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
     // Replace the placeholder with the env variable
     if (language === "sh") {
         snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"

package/dist/commonjs/snippets/templates.exported.js CHANGED Viewed

@@ -14,14 +14,14 @@ exports.templates = {
             "zeroShotClassification": "async function query(data) {\n    const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n        {\n            headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n                \"Content-Type\": \"application/json\",\n{% if billTo %}\n                \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %}         },\n            method: \"POST\",\n            body: JSON.stringify(data),\n        }\n    );\n    const result = await response.json();\n    return result;\n}\n\nquery({\n    inputs: {{ providerInputs.asObj.inputs }},\n    parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n    console.log(JSON.stringify(response));\n});"
         },
         "huggingface.js": {
-            "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-            "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-            "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-            "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
-            "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
-            "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
-            "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
-            "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
+            "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+            "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+            "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+            "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
+            "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
+            "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
+            "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
+            "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
         },
         "openai": {
             "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
@@ -40,7 +40,7 @@ exports.templates = {
             "conversationalStream": "stream = client.chat.completions.create(\n    model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n    stream=True,\n)\n\nfor chunk in stream:\n    print(chunk.choices[0].delta.content, end=\"\") ",
             "documentQuestionAnswering": "output = client.document_question_answering(\n    \"{{ inputs.asObj.image }}\",\n    question=\"{{ inputs.asObj.question }}\",\n    model=\"{{ model.id }}\",\n) ",
             "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n    \"{{ inputs.asObj.inputs }}\",\n    prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n    model=\"{{ model.id }}\",\n) ",
-            "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n    provider=\"{{ provider }}\",\n    api_key=\"{{ accessToken }}\",\n{% if billTo %}\n    bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
+            "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n    base_url=\"{{ baseUrl }}\",\n{% endif %}\n    provider=\"{{ provider }}\",\n    api_key=\"{{ accessToken }}\",\n{% if billTo %}\n    bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
             "questionAnswering": "answer = client.question_answering(\n    question=\"{{ inputs.asObj.question }}\",\n    context=\"{{ inputs.asObj.context }}\",\n    model=\"{{ model.id }}\",\n) ",
             "tableQuestionAnswering": "answer = client.table_question_answering(\n    query=\"{{ inputs.asObj.query }}\",\n    table={{ inputs.asObj.table }},\n    model=\"{{ model.id }}\",\n) ",
             "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n    {{ inputs.asObj.inputs }},\n    model=\"{{ model.id }}\",\n) ",

package/dist/commonjs/types.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
-import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
 /**
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
  */
@@ -91,7 +91,7 @@ export interface UrlParams {
 export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
     args: T;
     model: string;
-    mapping?: InferenceProviderModelMapping | undefined;
+    mapping?: InferenceProviderMappingEntry | undefined;
     task?: InferenceTask;
 }
 //# sourceMappingURL=types.d.ts.map

package/dist/esm/lib/getInferenceProviderMapping.d.ts CHANGED Viewed

@@ -1,18 +1,19 @@
 import type { WidgetType } from "@huggingface/tasks";
 import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../types.js";
-export declare const inferenceProviderMappingCache: Map<string, Partial<Record<"black-forest-labs" | "cerebras" | "cohere" | "fal-ai" | "featherless-ai" | "fireworks-ai" | "groq" | "hf-inference" | "hyperbolic" | "nebius" | "novita" | "nscale" | "openai" | "ovhcloud" | "replicate" | "sambanova" | "together", Omit<InferenceProviderModelMapping, "hfModelId">>>>;
-export type InferenceProviderMapping = Partial<Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>>;
-export interface InferenceProviderModelMapping {
+export declare const inferenceProviderMappingCache: Map<string, InferenceProviderMappingEntry[]>;
+export interface InferenceProviderMappingEntry {
     adapter?: string;
     adapterWeightsPath?: string;
     hfModelId: ModelId;
+    provider: string;
     providerId: string;
     status: "live" | "staging";
     task: WidgetType;
+    type?: "single-model" | "tag-filter";
 }
 export declare function fetchInferenceProviderMappingForModel(modelId: ModelId, accessToken?: string, options?: {
     fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
-}): Promise<InferenceProviderMapping>;
+}): Promise<InferenceProviderMappingEntry[]>;
 export declare function getInferenceProviderMapping(params: {
     accessToken?: string;
     modelId: ModelId;
@@ -20,6 +21,6 @@ export declare function getInferenceProviderMapping(params: {
     task: WidgetType;
 }, options: {
     fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
-}): Promise<InferenceProviderModelMapping | null>;
+}): Promise<InferenceProviderMappingEntry | null>;
 export declare function resolveProvider(provider?: InferenceProviderOrPolicy, modelId?: string, endpointUrl?: string): Promise<InferenceProvider>;
 //# sourceMappingURL=getInferenceProviderMapping.d.ts.map

package/dist/esm/lib/getInferenceProviderMapping.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,~~uTAA+C~~,CAAC;~~AAE1F~~,MAAM,~~MAAM,wBAAwB,GAAG,OAAO,CAC7C,MAAM,CAAC,iBAAiB,EAAE,IAAI,CAAC,6BAA6B,EAAE,~~WAAW,~~CAAC,CAAC,CAC3E,CAAC;AAEF,MAAM,WAAW,~~6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;~~CACjB~~;~~AAED~~,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,~~wBAAwB~~,CAAC,~~CAgDnC~~;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,~~CA4B~~/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}
1	+ {"version":3,"file":"getInferenceProviderMapping.d.ts","sourceRoot":"","sources":["../../../src/lib/getInferenceProviderMapping.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAIrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAIzF,eAAO,MAAM,6BAA6B,8CAAsD,CAAC;AAEjG,MAAM,WAAW,6BAA6B;IAC7C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,IAAI,EAAE,UAAU,CAAC;IACjB,IAAI,CAAC,EAAE,cAAc,GAAG,YAAY,CAAC;CACrC;AAiCD,wBAAsB,qCAAqC,CAC1D,OAAO,EAAE,OAAO,EAChB,WAAW,CAAC,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE;IACT,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,EAAE,CAAC,CAqD1C;AAED,wBAAsB,2BAA2B,CAChD,MAAM,EAAE;IACP,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,IAAI,EAAE,UAAU,CAAC;CACjB,EACD,OAAO,EAAE;IACR,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CACtE,GACC,OAAO,CAAC,6BAA6B,GAAG,IAAI,CAAC,CAwB/C;AAED,wBAAsB,eAAe,CACpC,QAAQ,CAAC,EAAE,yBAAyB,EACpC,OAAO,CAAC,EAAE,MAAM,EAChB,WAAW,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAyB5B"}

package/dist/esm/lib/getInferenceProviderMapping.js CHANGED Viewed

@@ -4,6 +4,29 @@ import { EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS } from "../providers/hf-inferenc
 import { typedInclude } from "../utils/typedInclude.js";
 import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
 export const inferenceProviderMappingCache = new Map();
+/**
+ * Normalize inferenceProviderMapping to always return an array format.
+ * This provides backward and forward compatibility for the API changes.
+ *
+ * Vendored from @huggingface/hub to avoid extra dependency.
+ */
+function normalizeInferenceProviderMapping(modelId, inferenceProviderMapping) {
+    if (!inferenceProviderMapping) {
+        return [];
+    }
+    // If it's already an array, return it as is
+    if (Array.isArray(inferenceProviderMapping)) {
+        return inferenceProviderMapping;
+    }
+    // Convert mapping to array format
+    return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
+        provider,
+        hfModelId: modelId,
+        providerId: mapping.providerId,
+        status: mapping.status,
+        task: mapping.task,
+    }));
+}
 export async function fetchInferenceProviderMappingForModel(modelId, accessToken, options) {
     let inferenceProviderMapping;
     if (inferenceProviderMappingCache.has(modelId)) {
@@ -36,7 +59,8 @@ export async function fetchInferenceProviderMappingForModel(modelId, accessToken
         if (!payload?.inferenceProviderMapping) {
             throw new InferenceClientHubApiError(`We have not been able to find inference provider information for model ${modelId}.`, { url, method: "GET" }, { requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() });
         }
-        inferenceProviderMapping = payload.inferenceProviderMapping;
+        inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
+        inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
     }
     return inferenceProviderMapping;
 }
@@ -44,8 +68,8 @@ export async function getInferenceProviderMapping(params, options) {
     if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
         return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
     }
-    const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
-    const providerMapping = inferenceProviderMapping[params.provider];
+    const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
+    const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
     if (providerMapping) {
         const equivalentTasks = params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
             ? EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
@@ -56,7 +80,7 @@ export async function getInferenceProviderMapping(params, options) {
         if (providerMapping.status === "staging") {
             console.warn(`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`);
         }
-        return { ...providerMapping, hfModelId: params.modelId };
+        return providerMapping;
     }
     return null;
 }
@@ -76,8 +100,8 @@ export async function resolveProvider(provider, modelId, endpointUrl) {
         if (!modelId) {
             throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
         }
-        const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(modelId);
-        provider = Object.keys(inferenceProviderMapping)[0];
+        const mappings = await fetchInferenceProviderMappingForModel(modelId);
+        provider = mappings[0]?.provider;
     }
     if (!provider) {
         throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);

package/dist/esm/lib/makeRequestOptions.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { InferenceTask, Options, RequestArgs } from "../types.js";
-import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
 import type { getProviderHelper } from "./getProviderHelper.js";
 /**
  * Helper that prepares request arguments.
@@ -22,7 +22,7 @@ export declare function makeRequestOptions(args: RequestArgs & {
 export declare function makeRequestOptionsFromResolvedModel(resolvedModel: string, providerHelper: ReturnType<typeof getProviderHelper>, args: RequestArgs & {
     data?: Blob | ArrayBuffer;
     stream?: boolean;
-}, mapping: InferenceProviderModelMapping | undefined, options?: Options & {
+}, mapping: InferenceProviderMappingEntry | undefined, options?: Options & {
     task?: InferenceTask;
 }): {
     url: string;

package/dist/esm/lib/makeRequestOptions.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,~~CAqE7C~~;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}
1	+ {"version":3,"file":"makeRequestOptions.d.ts","sourceRoot":"","sources":["../../../src/lib/makeRequestOptions.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvE,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,kCAAkC,CAAC;AAEtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAUhE;;;GAGG;AACH,wBAAsB,kBAAkB,CACvC,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,oEAAoE;IACpE,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,CAsE7C;AAED;;;GAGG;AACH,wBAAgB,mCAAmC,CAClD,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAAC,EACpD,IAAI,EAAE,WAAW,GAAG;IACnB,IAAI,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB,EACD,OAAO,EAAE,6BAA6B,GAAG,SAAS,EAClD,OAAO,CAAC,EAAE,OAAO,GAAG;IACnB,IAAI,CAAC,EAAE,aAAa,CAAC;CACrB,GACC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,WAAW,CAAA;CAAE,CA8EpC"}

package/dist/esm/lib/makeRequestOptions.js CHANGED Viewed

@@ -37,6 +37,7 @@ export async function makeRequestOptions(args, providerHelper, options) {
     }
     const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
         ? {
+            provider: provider,
             // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
             providerId: removeProviderPrefix(maybeModel, provider),
             // eslint-disable-next-line @typescript-eslint/no-non-null-assertion

package/dist/esm/package.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-export declare const PACKAGE_VERSION = "4.0.3";
+export declare const PACKAGE_VERSION = "4.0.5";
 export declare const PACKAGE_NAME = "@huggingface/inference";
 //# sourceMappingURL=package.d.ts.map

package/dist/esm/package.js CHANGED Viewed

@@ -1,3 +1,3 @@
 // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
-export const PACKAGE_VERSION = "4.0.3";
+export const PACKAGE_VERSION = "4.0.5";
 export const PACKAGE_NAME = "@huggingface/inference";

package/dist/esm/providers/consts.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
 import type { InferenceProvider } from "../types.js";
 import { type ModelId } from "../types.js";
 /**
@@ -8,5 +8,5 @@ import { type ModelId } from "../types.js";
  *
  * We also inject into this dictionary from tests.
  */
-export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderModelMapping>>;
+export declare const HARDCODED_MODEL_INFERENCE_MAPPING: Record<InferenceProvider, Record<ModelId, InferenceProviderMappingEntry>>;
 //# sourceMappingURL=consts.d.ts.map

package/dist/esm/snippets/getInferenceSnippets.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
 import { type InferenceSnippet, type ModelDataMinimal } from "@huggingface/tasks";
-import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
 import type { InferenceProviderOrPolicy } from "../types.js";
 export type InferenceSnippetOptions = {
     streaming?: boolean;
     billTo?: string;
     accessToken?: string;
     directRequest?: boolean;
+    endpointUrl?: string;
 } & Record<string, unknown>;
-export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderModelMapping, opts?: Record<string, unknown>): InferenceSnippet[];
+export declare function getInferenceSnippets(model: ModelDataMinimal, provider: InferenceProviderOrPolicy, inferenceProviderMapping?: InferenceProviderMappingEntry, opts?: Record<string, unknown>): InferenceSnippet[];
 //# sourceMappingURL=getInferenceSnippets.d.ts.map

package/dist/esm/snippets/getInferenceSnippets.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;~~CACxB~~,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;~~AAiV5B~~,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}
1	+ {"version":3,"file":"getInferenceSnippets.d.ts","sourceRoot":"","sources":["../../../src/snippets/getInferenceSnippets.ts"],"names":[],"mappings":"AACA,OAAO,EACN,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAGrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,uCAAuC,CAAC;AAG3F,OAAO,KAAK,EAAE,yBAAyB,EAA8B,MAAM,aAAa,CAAC;AAGzF,MAAM,MAAM,uBAAuB,GAAG;IACrC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AA2V5B,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,gBAAgB,EACvB,QAAQ,EAAE,yBAAyB,EACnC,wBAAwB,CAAC,EAAE,6BAA6B,EACxD,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC5B,gBAAgB,EAAE,CAIpB"}

package/dist/esm/snippets/getInferenceSnippets.js CHANGED Viewed

@@ -107,6 +107,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
         const request = makeRequestOptionsFromResolvedModel(providerModelId, providerHelper, {
             accessToken: accessTokenOrPlaceholder,
             provider,
+            endpointUrl: opts?.endpointUrl,
             ...inputs,
         }, inferenceProviderMapping, {
             task,
@@ -148,6 +149,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
             provider,
             providerModelId: providerModelId ?? model.id,
             billTo: opts?.billTo,
+            endpointUrl: opts?.endpointUrl,
         };
         /// Iterate over clients => check if a snippet exists => generate
         const clients = provider === "auto" ? CLIENTS_AUTO_POLICY : CLIENTS;
@@ -192,7 +194,7 @@ const snippetGenerator = (templateName, inputPreparationFn) => {
                 }
                 /// Replace access token placeholder
                 if (snippet.includes(placeholder)) {
-                    snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
+                    snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider, opts?.endpointUrl);
                 }
                 /// Snippet is ready!
                 return { language, client: client, content: snippet };
@@ -319,17 +321,20 @@ function indentString(str) {
 function removeSuffix(str, suffix) {
     return str.endsWith(suffix) ? str.slice(0, -suffix.length) : str;
 }
-function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider) {
+function replaceAccessTokenPlaceholder(directRequest, placeholder, snippet, language, provider, endpointUrl) {
     // If "opts.accessToken" is not set, the snippets are generated with a placeholder.
     // Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
     // Determine if HF_TOKEN or specific provider token should be used
-    const useHfToken = provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
-        (!directRequest && // if explicit directRequest => use provider-specific token
-            (!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
-                snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
+    const useHfToken = !endpointUrl && // custom endpointUrl => use a generic API_TOKEN
+        (provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
+            (!directRequest && // if explicit directRequest => use provider-specific token
+                (snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
+                    snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
     const accessTokenEnvVar = useHfToken
         ? "HF_TOKEN" // e.g. routed request or hf-inference
-        : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
+        : endpointUrl
+            ? "API_TOKEN"
+            : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
     // Replace the placeholder with the env variable
     if (language === "sh") {
         snippet = snippet.replace(`'Authorization: Bearer ${placeholder}'`, `"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"

package/dist/esm/snippets/templates.exported.js CHANGED Viewed

@@ -11,14 +11,14 @@ export const templates = {
             "zeroShotClassification": "async function query(data) {\n    const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n        {\n            headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n                \"Content-Type\": \"application/json\",\n{% if billTo %}\n                \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %}         },\n            method: \"POST\",\n            body: JSON.stringify(data),\n        }\n    );\n    const result = await response.json();\n    return result;\n}\n\nquery({\n    inputs: {{ providerInputs.asObj.inputs }},\n    parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n    console.log(JSON.stringify(response));\n});"
         },
         "huggingface.js": {
-            "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-            "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-            "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-            "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
-            "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
-            "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
-            "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
-            "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
+            "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+            "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+            "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+            "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
+            "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
+            "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
+            "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
+            "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
         },
         "openai": {
             "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
@@ -37,7 +37,7 @@ export const templates = {
             "conversationalStream": "stream = client.chat.completions.create(\n    model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n    stream=True,\n)\n\nfor chunk in stream:\n    print(chunk.choices[0].delta.content, end=\"\") ",
             "documentQuestionAnswering": "output = client.document_question_answering(\n    \"{{ inputs.asObj.image }}\",\n    question=\"{{ inputs.asObj.question }}\",\n    model=\"{{ model.id }}\",\n) ",
             "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n    \"{{ inputs.asObj.inputs }}\",\n    prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n    model=\"{{ model.id }}\",\n) ",
-            "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n    provider=\"{{ provider }}\",\n    api_key=\"{{ accessToken }}\",\n{% if billTo %}\n    bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
+            "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n    base_url=\"{{ baseUrl }}\",\n{% endif %}\n    provider=\"{{ provider }}\",\n    api_key=\"{{ accessToken }}\",\n{% if billTo %}\n    bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
             "questionAnswering": "answer = client.question_answering(\n    question=\"{{ inputs.asObj.question }}\",\n    context=\"{{ inputs.asObj.context }}\",\n    model=\"{{ model.id }}\",\n) ",
             "tableQuestionAnswering": "answer = client.table_question_answering(\n    query=\"{{ inputs.asObj.query }}\",\n    table={{ inputs.asObj.table }},\n    model=\"{{ model.id }}\",\n) ",
             "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n    {{ inputs.asObj.inputs }},\n    model=\"{{ model.id }}\",\n) ",

package/dist/esm/types.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
-import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
 /**
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
  */
@@ -91,7 +91,7 @@ export interface UrlParams {
 export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
     args: T;
     model: string;
-    mapping?: InferenceProviderModelMapping | undefined;
+    mapping?: InferenceProviderMappingEntry | undefined;
     task?: InferenceTask;
 }
 //# sourceMappingURL=types.d.ts.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@huggingface/inference",
-  "version": "4.0.3",
+  "version": "4.0.5",
   "license": "MIT",
   "author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
   "description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
@@ -40,7 +40,7 @@
   },
   "type": "module",
   "dependencies": {
-    "@huggingface/tasks": "^0.19.12",
+    "@huggingface/tasks": "^0.19.15",
     "@huggingface/jinja": "^0.5.0"
   },
   "devDependencies": {

package/src/lib/getInferenceProviderMapping.ts CHANGED Viewed

@@ -6,19 +6,48 @@ import type { InferenceProvider, InferenceProviderOrPolicy, ModelId } from "../t
 import { typedInclude } from "../utils/typedInclude.js";
 import { InferenceClientHubApiError, InferenceClientInputError } from "../errors.js";
-export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMapping>();
+export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMappingEntry[]>();
-export type InferenceProviderMapping = Partial<
-	Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId">>
->;
-export interface InferenceProviderModelMapping {
+export interface InferenceProviderMappingEntry {
 	adapter?: string;
 	adapterWeightsPath?: string;
 	hfModelId: ModelId;
+	provider: string;
 	providerId: string;
 	status: "live" | "staging";
 	task: WidgetType;
+	type?: "single-model" | "tag-filter";
+}
+/**
+ * Normalize inferenceProviderMapping to always return an array format.
+ * This provides backward and forward compatibility for the API changes.
+ *
+ * Vendored from @huggingface/hub to avoid extra dependency.
+ */
+function normalizeInferenceProviderMapping(
+	modelId: ModelId,
+	inferenceProviderMapping?:
+		| InferenceProviderMappingEntry[]
+		| Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>
+): InferenceProviderMappingEntry[] {
+	if (!inferenceProviderMapping) {
+		return [];
+	}
+	// If it's already an array, return it as is
+	if (Array.isArray(inferenceProviderMapping)) {
+		return inferenceProviderMapping;
+	}
+	// Convert mapping to array format
+	return Object.entries(inferenceProviderMapping).map(([provider, mapping]) => ({
+		provider,
+		hfModelId: modelId,
+		providerId: mapping.providerId,
+		status: mapping.status,
+		task: mapping.task,
+	}));
 }
 export async function fetchInferenceProviderMappingForModel(
@@ -27,8 +56,8 @@ export async function fetchInferenceProviderMappingForModel(
 	options?: {
 		fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
 	}
-): Promise<InferenceProviderMapping> {
-	let inferenceProviderMapping: InferenceProviderMapping | null;
+): Promise<InferenceProviderMappingEntry[]> {
+	let inferenceProviderMapping: InferenceProviderMappingEntry[] | null;
 	if (inferenceProviderMappingCache.has(modelId)) {
 		// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
 		inferenceProviderMapping = inferenceProviderMappingCache.get(modelId)!;
@@ -55,7 +84,11 @@ export async function fetchInferenceProviderMappingForModel(
 				);
 			}
 		}
-		let payload: { inferenceProviderMapping?: InferenceProviderMapping } | null = null;
+		let payload: {
+			inferenceProviderMapping?:
+				| InferenceProviderMappingEntry[]
+				| Record<string, { providerId: string; status: "live" | "staging"; task: WidgetType }>;
+		} | null = null;
 		try {
 			payload = await resp.json();
 		} catch {
@@ -72,7 +105,8 @@ export async function fetchInferenceProviderMappingForModel(
 				{ requestId: resp.headers.get("x-request-id") ?? "", status: resp.status, body: await resp.text() }
 			);
 		}
-		inferenceProviderMapping = payload.inferenceProviderMapping;
+		inferenceProviderMapping = normalizeInferenceProviderMapping(modelId, payload.inferenceProviderMapping);
+		inferenceProviderMappingCache.set(modelId, inferenceProviderMapping);
 	}
 	return inferenceProviderMapping;
 }
@@ -87,16 +121,12 @@ export async function getInferenceProviderMapping(
 	options: {
 		fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
 	}
-): Promise<InferenceProviderModelMapping | null> {
+): Promise<InferenceProviderMappingEntry | null> {
 	if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
 		return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
 	}
-	const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(
-		params.modelId,
-		params.accessToken,
-		options
-	);
-	const providerMapping = inferenceProviderMapping[params.provider];
+	const mappings = await fetchInferenceProviderMappingForModel(params.modelId, params.accessToken, options);
+	const providerMapping = mappings.find((mapping) => mapping.provider === params.provider);
 	if (providerMapping) {
 		const equivalentTasks =
 			params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
@@ -112,7 +142,7 @@ export async function getInferenceProviderMapping(
 				`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
 			);
 		}
-		return { ...providerMapping, hfModelId: params.modelId };
+		return providerMapping;
 	}
 	return null;
 }
@@ -139,8 +169,8 @@ export async function resolveProvider(
 		if (!modelId) {
 			throw new InferenceClientInputError("Specifying a model is required when provider is 'auto'");
 		}
-		const inferenceProviderMapping = await fetchInferenceProviderMappingForModel(modelId);
-		provider = Object.keys(inferenceProviderMapping)[0] as InferenceProvider | undefined;
+		const mappings = await fetchInferenceProviderMappingForModel(modelId);
+		provider = mappings[0]?.provider as InferenceProvider | undefined;
 	}
 	if (!provider) {
 		throw new InferenceClientInputError(`No Inference Provider available for model ${modelId}.`);

package/src/lib/makeRequestOptions.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { HF_HEADER_X_BILL_TO, HF_HUB_URL } from "../config.js";
 import { PACKAGE_NAME, PACKAGE_VERSION } from "../package.js";
 import type { InferenceTask, Options, RequestArgs } from "../types.js";
-import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "./getInferenceProviderMapping.js";
 import { getInferenceProviderMapping } from "./getInferenceProviderMapping.js";
 import type { getProviderHelper } from "./getProviderHelper.js";
 import { isUrl } from "./isUrl.js";
@@ -64,6 +64,7 @@ export async function makeRequestOptions(
 	const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
 		? ({
+				provider: provider,
 				// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
 				providerId: removeProviderPrefix(maybeModel!, provider),
 				// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
@@ -71,7 +72,7 @@ export async function makeRequestOptions(
 				status: "live",
 				// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
 				task: task!,
-		  } satisfies InferenceProviderModelMapping)
+		  } satisfies InferenceProviderMappingEntry)
 		: await getInferenceProviderMapping(
 				{
 					modelId: hfModel,
@@ -109,7 +110,7 @@ export function makeRequestOptionsFromResolvedModel(
 		data?: Blob | ArrayBuffer;
 		stream?: boolean;
 	},
-	mapping: InferenceProviderModelMapping | undefined,
+	mapping: InferenceProviderMappingEntry | undefined,
 	options?: Options & {
 		task?: InferenceTask;
 	}

package/src/package.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 // Generated file from package.json. Issues importing JSON directly when publishing on commonjs/ESM - see https://github.com/microsoft/TypeScript/issues/51783
-export const PACKAGE_VERSION = "4.0.3";
+export const PACKAGE_VERSION = "4.0.5";
 export const PACKAGE_NAME = "@huggingface/inference";

package/src/providers/consts.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
 import type { InferenceProvider } from "../types.js";
 import { type ModelId } from "../types.js";
@@ -11,7 +11,7 @@ import { type ModelId } from "../types.js";
  */
 export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
 	InferenceProvider,
-	Record<ModelId, InferenceProviderModelMapping>
+	Record<ModelId, InferenceProviderMappingEntry>
 > = {
 	/**
 	 * "HF model ID" => "Model ID on Inference Provider's side"

package/src/snippets/getInferenceSnippets.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import {
 } from "@huggingface/tasks";
 import type { PipelineType, WidgetType } from "@huggingface/tasks";
 import type { ChatCompletionInputMessage, GenerationParameters } from "@huggingface/tasks";
-import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "../lib/getInferenceProviderMapping.js";
 import { getProviderHelper } from "../lib/getProviderHelper.js";
 import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.js";
 import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
@@ -18,7 +18,8 @@ export type InferenceSnippetOptions = {
 	streaming?: boolean;
 	billTo?: string;
 	accessToken?: string;
-	directRequest?: boolean;
+	directRequest?: boolean; // to bypass HF routing and call the provider directly
+	endpointUrl?: string; // to call a local endpoint directly
 } & Record<string, unknown>;
 const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
@@ -53,6 +54,7 @@ interface TemplateParams {
 	methodName?: string; // specific to snippetBasic
 	importBase64?: boolean; // specific to snippetImportRequests
 	importJson?: boolean; // specific to snippetImportRequests
+	endpointUrl?: string;
 }
 // Helpers to find + load templates
@@ -136,7 +138,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 	return (
 		model: ModelDataMinimal,
 		provider: InferenceProviderOrPolicy,
-		inferenceProviderMapping?: InferenceProviderModelMapping,
+		inferenceProviderMapping?: InferenceProviderMappingEntry,
 		opts?: InferenceSnippetOptions
 	): InferenceSnippet[] => {
 		const providerModelId = inferenceProviderMapping?.providerId ?? model.id;
@@ -172,6 +174,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 			{
 				accessToken: accessTokenOrPlaceholder,
 				provider,
+				endpointUrl: opts?.endpointUrl,
 				...inputs,
 			} as RequestArgs,
 			inferenceProviderMapping,
@@ -217,6 +220,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 			provider,
 			providerModelId: providerModelId ?? model.id,
 			billTo: opts?.billTo,
+			endpointUrl: opts?.endpointUrl,
 		};
 		/// Iterate over clients => check if a snippet exists => generate
@@ -265,7 +269,14 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 						/// Replace access token placeholder
 						if (snippet.includes(placeholder)) {
-							snippet = replaceAccessTokenPlaceholder(opts?.directRequest, placeholder, snippet, language, provider);
+							snippet = replaceAccessTokenPlaceholder(
+								opts?.directRequest,
+								placeholder,
+								snippet,
+								language,
+								provider,
+								opts?.endpointUrl
+							);
 						}
 						/// Snippet is ready!
@@ -320,7 +331,7 @@ const snippets: Partial<
 		(
 			model: ModelDataMinimal,
 			provider: InferenceProviderOrPolicy,
-			inferenceProviderMapping?: InferenceProviderModelMapping,
+			inferenceProviderMapping?: InferenceProviderMappingEntry,
 			opts?: InferenceSnippetOptions
 		) => InferenceSnippet[]
 	>
@@ -359,7 +370,7 @@ const snippets: Partial<
 export function getInferenceSnippets(
 	model: ModelDataMinimal,
 	provider: InferenceProviderOrPolicy,
-	inferenceProviderMapping?: InferenceProviderModelMapping,
+	inferenceProviderMapping?: InferenceProviderMappingEntry,
 	opts?: Record<string, unknown>
 ): InferenceSnippet[] {
 	return model.pipeline_tag && model.pipeline_tag in snippets
@@ -444,21 +455,24 @@ function replaceAccessTokenPlaceholder(
 	placeholder: string,
 	snippet: string,
 	language: InferenceSnippetLanguage,
-	provider: InferenceProviderOrPolicy
+	provider: InferenceProviderOrPolicy,
+	endpointUrl?: string
 ): string {
 	// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
 	// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
 	// Determine if HF_TOKEN or specific provider token should be used
 	const useHfToken =
-		provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
-		(!directRequest && // if explicit directRequest => use provider-specific token
-			(!snippet.includes("https://") || // no URL provided => using a client => use $HF_TOKEN
-				snippet.includes("https://router.huggingface.co"))); // explicit routed request => use $HF_TOKEN
+		!endpointUrl && // custom endpointUrl => use a generic API_TOKEN
+		(provider == "hf-inference" || // hf-inference provider => use $HF_TOKEN
+			(!directRequest && // if explicit directRequest => use provider-specific token
+				(snippet.includes("InferenceClient") || // using a client => use $HF_TOKEN
+					snippet.includes("https://router.huggingface.co")))); // explicit routed request => use $HF_TOKEN
 	const accessTokenEnvVar = useHfToken
 		? "HF_TOKEN" // e.g. routed request or hf-inference
-		: provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
+		: endpointUrl
+		  ? "API_TOKEN"
+		  : provider.toUpperCase().replace("-", "_") + "_API_KEY"; // e.g. "REPLICATE_API_KEY"
 	// Replace the placeholder with the env variable
 	if (language === "sh") {

package/src/snippets/templates.exported.ts CHANGED Viewed

@@ -11,14 +11,14 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
       "zeroShotClassification": "async function query(data) {\n    const response = await fetch(\n\t\t\"{{ fullUrl }}\",\n        {\n            headers: {\n\t\t\t\tAuthorization: \"{{ authorizationHeader }}\",\n                \"Content-Type\": \"application/json\",\n{% if billTo %}\n                \"X-HF-Bill-To\": \"{{ billTo }}\",\n{% endif %}         },\n            method: \"POST\",\n            body: JSON.stringify(data),\n        }\n    );\n    const result = await response.json();\n    return result;\n}\n\nquery({\n    inputs: {{ providerInputs.asObj.inputs }},\n    parameters: { candidate_labels: [\"refund\", \"legal\", \"faq\"] }\n}).then((response) => {\n    console.log(JSON.stringify(response));\n});"
     },
     "huggingface.js": {
-      "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-      "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-      "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
-      "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
-      "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
-      "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
-      "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
-      "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
+      "basic": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tmodel: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+      "basicAudio": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+      "basicImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst data = fs.readFileSync({{inputs.asObj.inputs}});\n\nconst output = await client.{{ methodName }}({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n\tdata,\n\tmodel: \"{{ model.id }}\",\n\tprovider: \"{{ provider }}\",\n}{% if billTo %}, {\n\tbillTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(output);",
+      "conversational": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst chatCompletion = await client.chatCompletion({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nconsole.log(chatCompletion.choices[0].message);",
+      "conversationalStream": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nlet out = \"\";\n\nconst stream = client.chatCompletionStream({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n{{ inputs.asTsString }}\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n\nfor await (const chunk of stream) {\n\tif (chunk.choices && chunk.choices.length > 0) {\n\t\tconst newContent = chunk.choices[0].delta.content;\n\t\tout += newContent;\n\t\tconsole.log(newContent);\n\t}\n}",
+      "textToImage": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst image = await client.textToImage({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n\tparameters: { num_inference_steps: 5 },\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n/// Use the generated image (it's a Blob)",
+      "textToSpeech": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst audio = await client.textToSpeech({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated audio (it's a Blob)",
+      "textToVideo": "import { InferenceClient } from \"@huggingface/inference\";\n\nconst client = new InferenceClient(\"{{ accessToken }}\");\n\nconst video = await client.textToVideo({\n{% if endpointUrl %}\n    endpointUrl: \"{{ endpointUrl }}\",\n{% endif %}\n    provider: \"{{ provider }}\",\n    model: \"{{ model.id }}\",\n\tinputs: {{ inputs.asObj.inputs }},\n}{% if billTo %}, {\n    billTo: \"{{ billTo }}\",\n}{% endif %});\n// Use the generated video (it's a Blob)"
     },
     "openai": {
       "conversational": "import { OpenAI } from \"openai\";\n\nconst client = new OpenAI({\n\tbaseURL: \"{{ baseUrl }}\",\n\tapiKey: \"{{ accessToken }}\",\n{% if billTo %}\n\tdefaultHeaders: {\n\t\t\"X-HF-Bill-To\": \"{{ billTo }}\" \n\t}\n{% endif %}\n});\n\nconst chatCompletion = await client.chat.completions.create({\n\tmodel: \"{{ providerModelId }}\",\n{{ inputs.asTsString }}\n});\n\nconsole.log(chatCompletion.choices[0].message);",
@@ -37,7 +37,7 @@ export const templates: Record<string, Record<string, Record<string, string>>> =
       "conversationalStream": "stream = client.chat.completions.create(\n    model=\"{{ model.id }}\",\n{{ inputs.asPythonString }}\n    stream=True,\n)\n\nfor chunk in stream:\n    print(chunk.choices[0].delta.content, end=\"\") ",
       "documentQuestionAnswering": "output = client.document_question_answering(\n    \"{{ inputs.asObj.image }}\",\n    question=\"{{ inputs.asObj.question }}\",\n    model=\"{{ model.id }}\",\n) ",
       "imageToImage": "# output is a PIL.Image object\nimage = client.image_to_image(\n    \"{{ inputs.asObj.inputs }}\",\n    prompt=\"{{ inputs.asObj.parameters.prompt }}\",\n    model=\"{{ model.id }}\",\n) ",
-      "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n    provider=\"{{ provider }}\",\n    api_key=\"{{ accessToken }}\",\n{% if billTo %}\n    bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
+      "importInferenceClient": "from huggingface_hub import InferenceClient\n\nclient = InferenceClient(\n{% if endpointUrl %}\n    base_url=\"{{ baseUrl }}\",\n{% endif %}\n    provider=\"{{ provider }}\",\n    api_key=\"{{ accessToken }}\",\n{% if billTo %}\n    bill_to=\"{{ billTo }}\",\n{% endif %}\n)",
       "questionAnswering": "answer = client.question_answering(\n    question=\"{{ inputs.asObj.question }}\",\n    context=\"{{ inputs.asObj.context }}\",\n    model=\"{{ model.id }}\",\n) ",
       "tableQuestionAnswering": "answer = client.table_question_answering(\n    query=\"{{ inputs.asObj.query }}\",\n    table={{ inputs.asObj.table }},\n    model=\"{{ model.id }}\",\n) ",
       "textToImage": "# output is a PIL.Image object\nimage = client.text_to_image(\n    {{ inputs.asObj.inputs }},\n    model=\"{{ model.id }}\",\n) ",

package/src/types.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
-import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping.js";
+import type { InferenceProviderMappingEntry } from "./lib/getInferenceProviderMapping.js";
 /**
  * HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
@@ -126,6 +126,6 @@ export interface UrlParams {
 export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
 	args: T;
 	model: string;
-	mapping?: InferenceProviderModelMapping | undefined;
+	mapping?: InferenceProviderMappingEntry | undefined;
 	task?: InferenceTask;
 }