npm - @huggingface/transformers - Versions diffs - 3.0.0-alpha.0 → 3.0.0-alpha.10 - Mend

@huggingface/transformers 3.0.0-alpha.0 → 3.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/README.md +6 -5
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +317 -235
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +1198 -1035
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +34 -40
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +32 -32
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +168 -0
package/dist/transformers.min.mjs.map +1 -0
package/dist/transformers.mjs +31358 -0
package/dist/transformers.mjs.map +1 -0
package/package.json +16 -7
package/src/backends/onnx.js +86 -35
package/src/env.js +6 -6
package/src/generation/logits_process.js +39 -36
package/src/generation/streamers.js +3 -3
package/src/models.js +23 -10
package/src/processors.js +79 -67
package/src/utils/devices.js +15 -4
package/src/utils/dtypes.js +1 -3
package/src/utils/hub.js +17 -16
package/types/backends/onnx.d.ts +6 -5
package/types/backends/onnx.d.ts.map +1 -1
package/types/env.d.ts +6 -2
package/types/env.d.ts.map +1 -1
package/types/generation/logits_process.d.ts.map +1 -1
package/types/models.d.ts +8 -0
package/types/models.d.ts.map +1 -1
package/types/processors.d.ts +15 -1
package/types/processors.d.ts.map +1 -1
package/types/utils/devices.d.ts +11 -1
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts +0 -3
package/types/utils/dtypes.d.ts.map +1 -1
package/types/utils/hub.d.ts +1 -40
package/types/utils/hub.d.ts.map +1 -1
package/types/utils/tensor.d.ts +1 -1

package/package.json CHANGED Viewed

@@ -1,16 +1,25 @@
 {
   "name": "@huggingface/transformers",
-  "version": "3.0.0-alpha.0",
+  "version": "3.0.0-alpha.10",
   "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
   "main": "./src/transformers.js",
   "types": "./types/transformers.d.ts",
   "type": "module",
   "exports": {
     "node": {
-      "import": "./dist/transformers.js",
-      "require": "./dist/transformers.cjs"
+      "import": {
+        "types": "./types/transformers.d.ts",
+        "default": "./dist/transformers.min.mjs"
+      },
+      "require": {
+        "types": "./types/transformers.d.ts",
+        "default": "./dist/transformers.min.cjs"
+      }
     },
-    "default": "./src/transformers.js"
+    "default": {
+      "types": "./types/transformers.d.ts",
+      "default": "./src/transformers.js"
+    }
   },
   "scripts": {
     "format": "prettier --write .",
@@ -48,9 +57,9 @@
   "homepage": "https://github.com/xenova/transformers.js#readme",
   "dependencies": {
     "@huggingface/jinja": "^0.3.0",
-    "onnxruntime-node": "1.18.0",
-    "onnxruntime-web": "1.19.0-dev.20240804-ee2fe87e2d",
-    "sharp": "^0.33.2"
+    "onnxruntime-node": "1.19.0",
+    "onnxruntime-web": "1.20.0-dev.20240827-1d059b8702",
+    "sharp": "^0.33.5"
   },
   "devDependencies": {
     "@types/jest": "^29.5.1",

package/src/backends/onnx.js CHANGED Viewed

@@ -25,23 +25,74 @@ import * as ONNX_WEB from 'onnxruntime-web/webgpu';
 export { Tensor } from 'onnxruntime-common';
-/** @type {import('../utils/devices.js').DeviceType[]} */
-const supportedExecutionProviders = [];
+/**
+ * @typedef {import('onnxruntime-common').InferenceSession.ExecutionProviderConfig} ONNXExecutionProviders
+ */
+/** @type {Record<import("../utils/devices.js").DeviceType, ONNXExecutionProviders>} */
+const DEVICE_TO_EXECUTION_PROVIDER_MAPPING = Object.freeze({
+    auto: null, // Auto-detect based on device and environment
+    gpu: null, // Auto-detect GPU
+    cpu: 'cpu', // CPU
+    wasm: 'wasm', // WebAssembly
+    webgpu: 'webgpu', // WebGPU
+    cuda: 'cuda', // CUDA
+    dml: 'dml', // DirectML
+    webnn: { name: 'webnn', deviceType: 'cpu' }, // WebNN (default)
+    'webnn-npu': { name: 'webnn', deviceType: 'npu' }, // WebNN NPU
+    'webnn-gpu': { name: 'webnn', deviceType: 'gpu' }, // WebNN GPU
+    'webnn-cpu': { name: 'webnn', deviceType: 'cpu' }, // WebNN CPU
+});
+/**
+ * The list of supported devices, sorted by priority/performance.
+ * @type {import("../utils/devices.js").DeviceType[]}
+ */
+const supportedDevices = [];
-/** @type {import('../utils/devices.js').DeviceType[]} */
-let defaultExecutionProviders;
+/** @type {ONNXExecutionProviders[]} */
+let defaultDevices;
 let ONNX;
 if (apis.IS_NODE_ENV) {
     ONNX = ONNX_NODE.default ?? ONNX_NODE;
-    supportedExecutionProviders.push('cpu');
-    defaultExecutionProviders = ['cpu'];
+    // Updated as of ONNX Runtime 1.18.0
+    // The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
+    // | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64         | Linux arm64 | MacOS x64 | MacOS arm64 |
+    // | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
+    // | CPU           | ✔️          | ✔️            | ✔️                | ✔️          | ✔️        | ✔️          |
+    // | DirectML      | ✔️          | ✔️            | ❌                | ❌          | ❌        | ❌          |
+    // | CUDA          | ❌          | ❌            | ✔️ (CUDA v11.8)   | ❌          | ❌        | ❌          |
+    switch (process.platform) {
+        case 'win32': // Windows x64 and Windows arm64
+            supportedDevices.push('dml');
+            break;
+        case 'linux': // Linux x64 and Linux arm64
+            if (process.arch === 'x64') {
+                supportedDevices.push('cuda');
+            }
+            break;
+        case 'darwin': // MacOS x64 and MacOS arm64
+            break;
+    }
+    supportedDevices.push('cpu');
+    defaultDevices = ['cpu'];
 } else {
     ONNX = ONNX_WEB;
+    if (apis.IS_WEBNN_AVAILABLE) {
+        // TODO: Only push supported providers (depending on available hardware)
+        supportedDevices.push('webnn-npu', 'webnn-gpu', 'webnn-cpu', 'webnn');
+    }
     if (apis.IS_WEBGPU_AVAILABLE) {
-        supportedExecutionProviders.push('webgpu');
+        supportedDevices.push('webgpu');
     }
-    supportedExecutionProviders.push('wasm');
-    defaultExecutionProviders = ['wasm'];
+    supportedDevices.push('wasm');
+    defaultDevices = ['wasm'];
 }
 // @ts-ignore
@@ -49,19 +100,28 @@ const InferenceSession = ONNX.InferenceSession;
 /**
  * Map a device to the execution providers to use for the given device.
- * @param {import("../utils/devices.js").DeviceType} [device=null] (Optional) The device to run the inference on.
- * @returns {import("../utils/devices.js").DeviceType[]} The execution providers to use for the given device.
+ * @param {import("../utils/devices.js").DeviceType|"auto"|null} [device=null] (Optional) The device to run the inference on.
+ * @returns {ONNXExecutionProviders[]} The execution providers to use for the given device.
  */
-export function deviceToExecutionProviders(device) {
-    // TODO: Use mapping from device to execution providers for overloaded devices (e.g., 'gpu' or 'cpu').
-    let executionProviders = defaultExecutionProviders;
-    if (device) { // User has specified a device
-        if (!supportedExecutionProviders.includes(device)) {
-            throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedExecutionProviders.join(', ')}.`)
-        }
-        executionProviders = [device];
+export function deviceToExecutionProviders(device = null) {
+    // Use the default execution providers if the user hasn't specified anything
+    if (!device) return defaultDevices;
+    // Handle overloaded cases
+    switch (device) {
+        case "auto":
+            return supportedDevices;
+        case "gpu":
+            return supportedDevices.filter(x =>
+                ["webgpu", "cuda", "dml", "webnn-gpu"].includes(x),
+            );
+    }
+    if (supportedDevices.includes(device)) {
+        return [DEVICE_TO_EXECUTION_PROVIDER_MAPPING[device] ?? device];
     }
-    return executionProviders;
+    throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`)
 }
@@ -76,7 +136,7 @@ let wasmInitPromise = null;
 /**
  * Create an ONNX inference session.
  * @param {Uint8Array} buffer The ONNX model buffer.
- * @param {Object} session_options ONNX inference session options.
+ * @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options ONNX inference session options.
  * @returns {Promise<import('onnxruntime-common').InferenceSession>} The ONNX inference session.
  */
 export async function createInferenceSession(buffer, session_options) {
@@ -100,6 +160,7 @@ export function isONNXTensor(x) {
     return x instanceof ONNX.Tensor;
 }
+/** @type {import('onnxruntime-common').Env} */
 // @ts-ignore
 const ONNX_ENV = ONNX?.env;
 if (ONNX_ENV?.wasm) {
@@ -109,29 +170,19 @@ if (ONNX_ENV?.wasm) {
     // https://onnxruntime.ai/docs/api/js/interfaces/Env.WebAssemblyFlags.html#wasmPaths
     // We use remote wasm files by default to make it easier for newer users.
     // In practice, users should probably self-host the necessary .wasm files.
-    // ONNX_ENV.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.0-dev.20240804-ee2fe87e2d/dist/';
+    ONNX_ENV.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`;
     // TODO: Add support for loading WASM files from cached buffer when we upgrade to onnxruntime-web@1.19.0
     // https://github.com/microsoft/onnxruntime/pull/21534
-    // Proxy the WASM backend to prevent the UI from freezing
-    // NOTE: This is only needed when running in a non-worker browser environment.
-    ONNX_ENV.wasm.proxy = !apis.IS_WEBWORKER_ENV;
+    // Users may wish to proxy the WASM backend to prevent the UI from freezing,
+    // However, this is not necessary when using WebGPU, so we default to false.
+    ONNX_ENV.wasm.proxy = false;
     // https://developer.mozilla.org/en-US/docs/Web/API/crossOriginIsolated
     if (typeof crossOriginIsolated === 'undefined' || !crossOriginIsolated) {
         ONNX_ENV.wasm.numThreads = 1;
     }
-    // Running in a browser-environment
-    // TODO: Check if 1.17.1 fixes this issue.
-    // SIMD for WebAssembly does not operate correctly in some recent versions of iOS (16.4.x).
-    // As a temporary fix, we disable it for now.
-    // For more information, see: https://github.com/microsoft/onnxruntime/issues/15644
-    const isIOS = typeof navigator !== 'undefined' && /iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent);
-    if (isIOS) {
-        ONNX_ENV.wasm.simd = false;
-    }
 }
 if (ONNX_ENV?.webgpu) {

package/src/env.js CHANGED Viewed

@@ -26,13 +26,14 @@ import fs from 'fs';
 import path from 'path';
 import url from 'url';
-const VERSION = '3.0.0-alpha.0';
+const VERSION = '3.0.0-alpha.10';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof self !== 'undefined';
 const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
 const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
 const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
+const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
 const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
 const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
@@ -55,6 +56,9 @@ export const apis = Object.freeze({
     /** Whether the WebGPU API is available */
     IS_WEBGPU_AVAILABLE,
+    /** Whether the WebNN API is available */
+    IS_WEBNN_AVAILABLE,
     /** Whether the Node.js process API is available */
     IS_PROCESS_AVAILABLE,
@@ -88,7 +92,7 @@ const localModelPath = RUNNING_LOCALLY
  * Global variable given visible to users to control execution. This provides users a simple way to configure Transformers.js.
  * @typedef {Object} TransformersEnvironment
  * @property {string} version This version of Transformers.js.
- * @property {Object} backends Expose environment variables of different backends,
+ * @property {{onnx: Partial<import('onnxruntime-common').Env>}} backends Expose environment variables of different backends,
  * allowing users to set these variables if they want to.
  * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
  * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
@@ -115,12 +119,8 @@ export const env = {
     backends: {
         // onnxruntime-web/onnxruntime-node
         onnx: {},
-        // TensorFlow.js
-        tfjs: {},
     },
     /////////////////// Model settings ///////////////////
     allowRemoteModels: true,
     remoteHost: 'https://huggingface.co/',

package/src/generation/logits_process.js CHANGED Viewed

@@ -156,9 +156,9 @@ export class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length === 1) {
-                const batch_logits = logits[i];
-                batch_logits.data.fill(-Infinity);
-                batch_logits.data[this.bos_token_id] = 0;
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                batch_logits_data.fill(-Infinity);
+                batch_logits_data[this.bos_token_id] = 0;
             }
         }
         return logits;
@@ -189,11 +189,10 @@ export class ForcedEOSTokenLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length === this.max_length - 1) {
-                const batch_logits = logits[i];
-                batch_logits.data.fill(-Infinity);
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                batch_logits_data.fill(-Infinity);
                 for (const eos_token of this.eos_token_id) {
-                    batch_logits.data[eos_token] = 0;
+                    batch_logits_data[eos_token] = 0;
                 }
             }
         }
@@ -227,9 +226,9 @@ export class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length === this.begin_index) {
-                const batch_logits = logits[i];
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
                 for (const token_id of this.begin_suppress_tokens) {
-                    batch_logits.data[token_id] = -Infinity;
+                    batch_logits_data[token_id] = -Infinity;
                 }
             }
         }
@@ -271,15 +270,14 @@ export class WhisperTimeStampLogitsProcessor extends LogitsProcessor {
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
-            const logitsData = /** @type {Float32Array} */(batch_logits.data);
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             // suppress <|notimestamps|> which is handled by without_timestamps
-            logitsData[this.no_timestamps_token_id] = -Infinity;
+            batch_logits_data[this.no_timestamps_token_id] = -Infinity;
             if (input_ids[i].length === this.begin_index - 1) {
-                logitsData.fill(-Infinity);
-                logitsData[this.timestamp_begin] = 0;
+                batch_logits_data.fill(-Infinity);
+                batch_logits_data[this.timestamp_begin] = 0;
                 continue;
             }
@@ -290,25 +288,25 @@ export class WhisperTimeStampLogitsProcessor extends LogitsProcessor {
             if (last_was_timestamp) {
                 if (penultimate_was_timestamp) { // has to be non-timestamp
-                    logitsData.subarray(this.timestamp_begin).fill(-Infinity);
+                    batch_logits_data.subarray(this.timestamp_begin).fill(-Infinity);
                 } else { // cannot be normal text tokens
-                    logitsData.subarray(0, this.eos_token_id).fill(-Infinity);
+                    batch_logits_data.subarray(0, this.eos_token_id).fill(-Infinity);
                 }
             }
             // apply the `max_initial_timestamp` option
             if (input_ids[i].length === this.begin_index && this.max_initial_timestamp_index !== null) {
                 const last_allowed = this.timestamp_begin + this.max_initial_timestamp_index;
-                logitsData.subarray(last_allowed + 1).fill(-Infinity);
+                batch_logits_data.subarray(last_allowed + 1).fill(-Infinity);
             }
             // if sum of probability over timestamps is above any other token, sample timestamp
-            const logprobs = log_softmax(logitsData);
+            const logprobs = log_softmax(batch_logits_data);
             const timestamp_logprob = Math.log(logprobs.subarray(this.timestamp_begin).map(Math.exp).reduce((a, b) => a + b));
             const max_text_token_logprob = max(logprobs.subarray(0, this.timestamp_begin))[0];
             if (timestamp_logprob > max_text_token_logprob) {
-                logitsData.subarray(0, this.timestamp_begin).fill(-Infinity);
+                batch_logits_data.subarray(0, this.timestamp_begin).fill(-Infinity);
             }
         }
@@ -397,10 +395,10 @@ export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             const bannedTokens = this.calcBannedNgramTokens(input_ids[i]);
             for (const token of bannedTokens) {
-                batch_logits.data[token] = -Infinity;
+                batch_logits_data[token] = -Infinity;
             }
         }
         return logits;
@@ -432,13 +430,13 @@ export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
         // many times in the output will be penalised more.
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             for (const input_id of input_ids[i]) {
-                if (batch_logits.data[input_id] < 0) {
-                    batch_logits.data[input_id] *= this.penalty;
+                const token = Number(input_id);
+                if (batch_logits_data[token] < 0) {
+                    batch_logits_data[token] *= this.penalty;
                 } else {
-                    batch_logits.data[input_id] /= this.penalty;
+                    batch_logits_data[token] /= this.penalty;
                 }
             }
         }
@@ -471,9 +469,10 @@ export class MinLengthLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length < this.min_length) {
-                const batch_logits = logits[i];
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
                 for (const eos_token of this.eos_token_id) {
-                    batch_logits.data[eos_token] = -Infinity;
+                    batch_logits_data[eos_token] = -Infinity;
                 }
             }
         }
@@ -509,9 +508,10 @@ export class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
         for (let i = 0; i < input_ids.length; ++i) {
             const new_tokens_length = input_ids[i].length - this.prompt_length_to_skip;
             if (new_tokens_length < this.min_new_tokens) {
-                const batch_logits = logits[i];
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
                 for (const eos_token of this.eos_token_id) {
-                    batch_logits[eos_token] = -Infinity;
+                    batch_logits_data[eos_token] = -Infinity;
                 }
             }
         }
@@ -539,7 +539,8 @@ export class NoBadWordsLogitsProcessor extends LogitsProcessor {
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             for (const bad_word_ids of this.bad_words_ids) {
                 // Whether to modify the logits of the last token in the bad word id sequence
                 let mark = true;
@@ -548,14 +549,16 @@ export class NoBadWordsLogitsProcessor extends LogitsProcessor {
                 // then we set the logits of the last bad word id to -Infinity.
                 for (let i = 1; i <= bad_word_ids.length - 1 && bad_word_ids.length < input_ids[i].length; ++i) {
-                    if (bad_word_ids.at(-i - 1) !== Number(input_ids[i].at(-i))) {
+                    // NOTE: We use != instead of !== to compare bigint and number
+                    // @ts-ignore
+                    if (bad_word_ids.at(-i - 1) != input_ids[i].at(-i)) {
                         // We have found a mismatch
                         mark = false;
                         break;
                     }
                 }
                 if (mark) {
-                    batch_logits[bad_word_ids.at(-1)] = -Infinity;
+                    batch_logits_data[bad_word_ids.at(-1)] = -Infinity;
                 }
             }
         }
@@ -650,9 +653,9 @@ export class TemperatureLogitsWarper extends LogitsWarper {
      * @returns {Object} The processed logits.
      */
     _call(input_ids, logits) {
-        const logitsData = /** @type {Float32Array} */(logits.data);
-        for (let i = 0; i < logitsData.length; ++i) {
-            logitsData[i] /= this.temperature;
+        const batch_logits_data = /** @type {Float32Array} */(logits.data);
+        for (let i = 0; i < batch_logits_data.length; ++i) {
+            batch_logits_data[i] /= this.temperature;
         }
         return logits;
     }

package/src/generation/streamers.js CHANGED Viewed

@@ -65,14 +65,14 @@ export class TextStreamer extends BaseStreamer {
             throw Error('TextStreamer only supports batch size of 1');
         }
-        const tokens = value[0];
-        this.token_callback_function?.(tokens)
         if (this.skip_prompt && this.next_tokens_are_prompt) {
             this.next_tokens_are_prompt = false;
             return;
         }
+        const tokens = value[0];
+        this.token_callback_function?.(tokens)
         // Add the new token to the cache and decodes the entire thing.
         this.token_cache = mergeArrays(this.token_cache, tokens);
         const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);

package/src/models.js CHANGED Viewed

@@ -157,9 +157,10 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
     }
     // If the device is not specified, we use the default (supported) execution providers.
-    const executionProviders = deviceToExecutionProviders(
-        /** @type {import("./utils/devices.js").DeviceType|null} */(device)
+    const selectedDevice = /** @type {import("./utils/devices.js").DeviceType} */(
+        device ?? (apis.IS_NODE_ENV ? 'cpu' : 'wasm')
     );
+    const executionProviders = deviceToExecutionProviders(selectedDevice);
     // If options.dtype is specified, we use it to choose the suffix for the model file.
     // Otherwise, we use the default dtype for the device.
@@ -168,19 +169,21 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
         if (dtype && dtype.hasOwnProperty(fileName)) {
             dtype = dtype[fileName];
         } else {
-            dtype = DEFAULT_DEVICE_DTYPE_MAPPING[executionProviders[0]];
-            console.warn(`dtype not specified for "${fileName}". Using the default dtype for this device (${dtype}).`);
+            dtype = DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? DATA_TYPES.fp32;
+            console.warn(`dtype not specified for "${fileName}". Using the default dtype (${dtype}) for this device (${selectedDevice}).`);
         }
     }
-    if (!DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(dtype)) {
-        throw new Error(`Invalid dtype: ${dtype}. Should be one of: ${Object.keys(DATA_TYPES).join(', ')}`);
-    } else if (dtype === DATA_TYPES.fp16 && device === 'webgpu' && !(await isWebGpuFp16Supported())) {
-        throw new Error(`The device (${device}) does not support fp16.`);
+    const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
+    if (!DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
+        throw new Error(`Invalid dtype: ${selectedDtype}. Should be one of: ${Object.keys(DATA_TYPES).join(', ')}`);
+    } else if (selectedDtype === DATA_TYPES.fp16 && selectedDevice === 'webgpu' && !(await isWebGpuFp16Supported())) {
+        throw new Error(`The device (${selectedDevice}) does not support fp16.`);
     }
     // Construct the model file name
-    const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[dtype];
+    const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype];
     const modelFileName = `${options.subfolder ?? ''}/${fileName}${suffix}.onnx`;
     const session_options = { ...options.session_options } ?? {};
@@ -227,7 +230,7 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
         session_options.externalData = await Promise.all(externalDataPromises);
     }
-    if (device === 'webgpu') {
+    if (selectedDevice === 'webgpu') {
         const shapes = getKeyValueShapes(options.config, {
             prefix: 'present',
         });
@@ -4565,6 +4568,14 @@ export class DepthAnythingForDepthEstimation extends DepthAnythingPreTrainedMode
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+export class SapiensPreTrainedModel extends PreTrainedModel { }
+export class SapiensForSemanticSegmentation extends SapiensPreTrainedModel { }
+export class SapiensForDepthEstimation extends SapiensPreTrainedModel { }
+export class SapiensForNormalEstimation extends SapiensPreTrainedModel { }
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 export class GLPNPreTrainedModel extends PreTrainedModel { }
@@ -6535,6 +6546,7 @@ const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = new Map([
 const MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = new Map([
     ['segformer', ['SegformerForSemanticSegmentation', SegformerForSemanticSegmentation]],
+    ['sapiens', ['SapiensForSemanticSegmentation', SapiensForSemanticSegmentation]],
 ]);
 const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = new Map([
@@ -6583,6 +6595,7 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
     ['dpt', ['DPTForDepthEstimation', DPTForDepthEstimation]],
     ['depth_anything', ['DepthAnythingForDepthEstimation', DepthAnythingForDepthEstimation]],
     ['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
+    ['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
 ])
 // NOTE: This is custom to Transformers.js, and is necessary because certain models