npm - @huggingface/transformers - Versions diffs - 3.0.0-alpha.6 → 3.0.0-alpha.7 - Mend

@huggingface/transformers 3.0.0-alpha.6 → 3.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +2 -2
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +94 -43
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +99 -48
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +12 -12
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +8 -8
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +6 -6
package/dist/transformers.min.mjs.map +1 -1
package/dist/transformers.mjs +94 -43
package/dist/transformers.mjs.map +1 -1
package/package.json +3 -3
package/src/backends/onnx.js +61 -22
package/src/env.js +5 -1
package/src/generation/streamers.js +3 -3
package/src/models.js +13 -10
package/src/utils/devices.js +10 -1
package/src/utils/dtypes.js +1 -5
package/types/backends/onnx.d.ts +4 -3
package/types/backends/onnx.d.ts.map +1 -1
package/types/env.d.ts +2 -0
package/types/env.d.ts.map +1 -1
package/types/models.d.ts.map +1 -1
package/types/utils/devices.d.ts +9 -1
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts +0 -5
package/types/utils/dtypes.d.ts.map +1 -1

package/README.md CHANGED Viewed

@@ -101,7 +101,7 @@ npm i @huggingface/transformers
 Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
 ```html
 <script type="module">
-    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.6';
+    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.7';
 </script>
 ```
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
-By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.6/dist/), which should work out-of-the-box. You can customize this as follows:
+By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.7/dist/), which should work out-of-the-box. You can customize this as follows:
 ### Settings

package/dist/ort-wasm-simd-threaded.jsep.wasm CHANGED Viewed

Binary file

package/dist/transformers.cjs CHANGED Viewed

@@ -3832,11 +3832,34 @@ __webpack_require__.r(__webpack_exports__);
-/** @type {import('../utils/devices.js').DeviceType[]} */
-const supportedExecutionProviders = [];
+/**
+ * @typedef {import('onnxruntime-common').InferenceSession.ExecutionProviderConfig} ONNXExecutionProviders
+ */
+/** @type {Record<import("../utils/devices.js").DeviceType, ONNXExecutionProviders>} */
+const DEVICE_TO_EXECUTION_PROVIDER_MAPPING = Object.freeze({
+    auto: null, // Auto-detect based on device and environment
+    gpu: null, // Auto-detect GPU
+    cpu: 'cpu', // CPU
+    wasm: 'wasm', // WebAssembly
+    webgpu: 'webgpu', // WebGPU
+    cuda: 'cuda', // CUDA
+    dml: 'dml', // DirectML
+    webnn: { name: 'webnn', deviceType: 'cpu' }, // WebNN (default)
+    'webnn-npu': { name: 'webnn', deviceType: 'npu' }, // WebNN NPU
+    'webnn-gpu': { name: 'webnn', deviceType: 'gpu' }, // WebNN GPU
+    'webnn-cpu': { name: 'webnn', deviceType: 'cpu' }, // WebNN CPU
+});
+/**
+ * The list of supported devices, sorted by priority/performance.
+ * @type {import("../utils/devices.js").DeviceType[]}
+ */
+const supportedDevices = [];
-/** @type {import('../utils/devices.js').DeviceType[]} */
-let defaultExecutionProviders;
+/** @type {ONNXExecutionProviders[]} */
+let defaultDevices;
 let ONNX;
 if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
     ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
@@ -3850,26 +3873,33 @@ if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
     // | CUDA          | ❌          | ❌            | ✔️ (CUDA v11.8)   | ❌          | ❌        | ❌          |
     switch (process.platform) {
         case 'win32': // Windows x64 and Windows arm64
-            supportedExecutionProviders.push('dml');
+            supportedDevices.push('dml');
             break;
         case 'linux': // Linux x64 and Linux arm64
             if (process.arch === 'x64') {
-                supportedExecutionProviders.push('cuda');
+                supportedDevices.push('cuda');
             }
             break;
         case 'darwin': // MacOS x64 and MacOS arm64
             break;
     }
-    supportedExecutionProviders.push('cpu');
-    defaultExecutionProviders = ['cpu'];
+    supportedDevices.push('cpu');
+    defaultDevices = ['cpu'];
 } else {
     ONNX = /*#__PURE__*/ (onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2__, 2)));
+    if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
+        // TODO: Only push supported providers (depending on available hardware)
+        supportedDevices.push('webnn-npu', 'webnn-gpu', 'webnn-cpu', 'webnn');
+    }
     if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBGPU_AVAILABLE) {
-        supportedExecutionProviders.push('webgpu');
+        supportedDevices.push('webgpu');
     }
-    supportedExecutionProviders.push('wasm');
-    defaultExecutionProviders = ['wasm'];
+    supportedDevices.push('wasm');
+    defaultDevices = ['wasm'];
 }
 // @ts-ignore
@@ -3877,19 +3907,28 @@ const InferenceSession = ONNX.InferenceSession;
 /**
  * Map a device to the execution providers to use for the given device.
- * @param {import("../utils/devices.js").DeviceType} [device=null] (Optional) The device to run the inference on.
- * @returns {import("../utils/devices.js").DeviceType[]} The execution providers to use for the given device.
- */
-function deviceToExecutionProviders(device) {
-    // TODO: Use mapping from device to execution providers for overloaded devices (e.g., 'gpu' or 'cpu').
-    let executionProviders = defaultExecutionProviders;
-    if (device) { // User has specified a device
-        if (!supportedExecutionProviders.includes(device)) {
-            throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedExecutionProviders.join(', ')}.`)
-        }
-        executionProviders = [device];
+ * @param {import("../utils/devices.js").DeviceType|"auto"|null} [device=null] (Optional) The device to run the inference on.
+ * @returns {ONNXExecutionProviders[]} The execution providers to use for the given device.
+ */
+function deviceToExecutionProviders(device = null) {
+    // Use the default execution providers if the user hasn't specified anything
+    if (!device) return defaultDevices;
+    // Handle overloaded cases
+    switch (device) {
+        case "auto":
+            return supportedDevices;
+        case "gpu":
+            return supportedDevices.filter(x =>
+                ["webgpu", "cuda", "dml", "webnn-gpu"].includes(x),
+            );
+    }
+    if (supportedDevices.includes(device)) {
+        return [DEVICE_TO_EXECUTION_PROVIDER_MAPPING[device] ?? device];
     }
-    return executionProviders;
+    throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`)
 }
@@ -4401,13 +4440,14 @@ __webpack_require__.r(__webpack_exports__);
-const VERSION = '3.0.0-alpha.6';
+const VERSION = '3.0.0-alpha.7';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof self !== 'undefined';
 const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
 const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
 const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
+const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
 const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
 const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
@@ -4430,6 +4470,9 @@ const apis = Object.freeze({
     /** Whether the WebGPU API is available */
     IS_WEBGPU_AVAILABLE,
+    /** Whether the WebNN API is available */
+    IS_WEBNN_AVAILABLE,
     /** Whether the Node.js process API is available */
     IS_PROCESS_AVAILABLE,
@@ -6153,14 +6196,14 @@ class TextStreamer extends BaseStreamer {
             throw Error('TextStreamer only supports batch size of 1');
         }
-        const tokens = value[0];
-        this.token_callback_function?.(tokens)
         if (this.skip_prompt && this.next_tokens_are_prompt) {
             this.next_tokens_are_prompt = false;
             return;
         }
+        const tokens = value[0];
+        this.token_callback_function?.(tokens)
         // Add the new token to the cache and decodes the entire thing.
         this.token_cache = (0,_utils_core_js__WEBPACK_IMPORTED_MODULE_0__.mergeArrays)(this.token_cache, tokens);
         const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
@@ -6832,9 +6875,10 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
     }
     // If the device is not specified, we use the default (supported) execution providers.
-    const executionProviders = (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.deviceToExecutionProviders)(
-        /** @type {import("./utils/devices.js").DeviceType|null} */(device)
+    const selectedDevice = /** @type {import("./utils/devices.js").DeviceType} */(
+        device ?? (_env_js__WEBPACK_IMPORTED_MODULE_12__.apis.IS_NODE_ENV ? 'cpu' : 'wasm')
     );
+    const executionProviders = (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.deviceToExecutionProviders)(selectedDevice);
     // If options.dtype is specified, we use it to choose the suffix for the model file.
     // Otherwise, we use the default dtype for the device.
@@ -6843,19 +6887,21 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
         if (dtype && dtype.hasOwnProperty(fileName)) {
             dtype = dtype[fileName];
         } else {
-            dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[executionProviders[0]];
-            console.warn(`dtype not specified for "${fileName}". Using the default dtype for this device (${dtype}).`);
+            dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp32;
+            console.warn(`dtype not specified for "${fileName}". Using the default dtype (${dtype}) for this device (${selectedDevice}).`);
         }
     }
-    if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(dtype)) {
-        throw new Error(`Invalid dtype: ${dtype}. Should be one of: ${Object.keys(_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES).join(', ')}`);
-    } else if (dtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp16 && device === 'webgpu' && !(await (0,_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.isWebGpuFp16Supported)())) {
-        throw new Error(`The device (${device}) does not support fp16.`);
+    const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
+    if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
+        throw new Error(`Invalid dtype: ${selectedDtype}. Should be one of: ${Object.keys(_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES).join(', ')}`);
+    } else if (selectedDtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp16 && selectedDevice === 'webgpu' && !(await (0,_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.isWebGpuFp16Supported)())) {
+        throw new Error(`The device (${selectedDevice}) does not support fp16.`);
     }
     // Construct the model file name
-    const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[dtype];
+    const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype];
     const modelFileName = `${options.subfolder ?? ''}/${fileName}${suffix}.onnx`;
     const session_options = { ...options.session_options } ?? {};
@@ -6902,7 +6948,7 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
         session_options.externalData = await Promise.all(externalDataPromises);
     }
-    if (device === 'webgpu') {
+    if (selectedDevice === 'webgpu') {
         const shapes = (0,_configs_js__WEBPACK_IMPORTED_MODULE_0__.getKeyValueShapes)(options.config, {
             prefix: 'present',
         });
@@ -25936,13 +25982,22 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   "DEVICE_TYPES": () => (/* binding */ DEVICE_TYPES)
 /* harmony export */ });
+/**
+ * The list of devices supported by Transformers.js
+ */
 const DEVICE_TYPES = Object.freeze({
-    cpu: 'cpu', // CPU
+    auto: 'auto', // Auto-detect based on device and environment
     gpu: 'gpu', // Auto-detect GPU
+    cpu: 'cpu', // CPU
     wasm: 'wasm', // WebAssembly
     webgpu: 'webgpu', // WebGPU
     cuda: 'cuda', // CUDA
     dml: 'dml', // DirectML
+    webnn: 'webnn', // WebNN (default)
+    'webnn-npu': 'webnn-npu', // WebNN NPU
+    'webnn-gpu': 'webnn-gpu', // WebNN GPU
+    'webnn-cpu': 'webnn-cpu', // WebNN CPU
 });
 /**
@@ -26013,12 +26068,8 @@ const DATA_TYPES = Object.freeze({
 /** @typedef {keyof typeof DATA_TYPES} DataType */
 const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
-    [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.cpu]: DATA_TYPES.q8,
-    [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.gpu]: DATA_TYPES.fp32,
+    // NOTE: If not specified, will default to fp32
     [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.wasm]: DATA_TYPES.q8,
-    [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.webgpu]: DATA_TYPES.fp32,
-    [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.cuda]: DATA_TYPES.fp32,
-    [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.dml]: DATA_TYPES.fp32,
 });
 /** @type {Record<DataType, string>} */