@huggingface/transformers 3.0.0-alpha.6 → 3.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,7 +101,7 @@ npm i @huggingface/transformers
101
101
  Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
102
102
  ```html
103
103
  <script type="module">
104
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.6';
104
+ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.7';
105
105
  </script>
106
106
  ```
107
107
 
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
134
134
 
135
135
 
136
136
 
137
- By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.6/dist/), which should work out-of-the-box. You can customize this as follows:
137
+ By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.7/dist/), which should work out-of-the-box. You can customize this as follows:
138
138
 
139
139
  ### Settings
140
140
 
@@ -3832,11 +3832,34 @@ __webpack_require__.r(__webpack_exports__);
3832
3832
 
3833
3833
 
3834
3834
 
3835
- /** @type {import('../utils/devices.js').DeviceType[]} */
3836
- const supportedExecutionProviders = [];
3835
+ /**
3836
+ * @typedef {import('onnxruntime-common').InferenceSession.ExecutionProviderConfig} ONNXExecutionProviders
3837
+ */
3838
+
3839
+ /** @type {Record<import("../utils/devices.js").DeviceType, ONNXExecutionProviders>} */
3840
+ const DEVICE_TO_EXECUTION_PROVIDER_MAPPING = Object.freeze({
3841
+ auto: null, // Auto-detect based on device and environment
3842
+ gpu: null, // Auto-detect GPU
3843
+ cpu: 'cpu', // CPU
3844
+ wasm: 'wasm', // WebAssembly
3845
+ webgpu: 'webgpu', // WebGPU
3846
+ cuda: 'cuda', // CUDA
3847
+ dml: 'dml', // DirectML
3848
+
3849
+ webnn: { name: 'webnn', deviceType: 'cpu' }, // WebNN (default)
3850
+ 'webnn-npu': { name: 'webnn', deviceType: 'npu' }, // WebNN NPU
3851
+ 'webnn-gpu': { name: 'webnn', deviceType: 'gpu' }, // WebNN GPU
3852
+ 'webnn-cpu': { name: 'webnn', deviceType: 'cpu' }, // WebNN CPU
3853
+ });
3854
+
3855
+ /**
3856
+ * The list of supported devices, sorted by priority/performance.
3857
+ * @type {import("../utils/devices.js").DeviceType[]}
3858
+ */
3859
+ const supportedDevices = [];
3837
3860
 
3838
- /** @type {import('../utils/devices.js').DeviceType[]} */
3839
- let defaultExecutionProviders;
3861
+ /** @type {ONNXExecutionProviders[]} */
3862
+ let defaultDevices;
3840
3863
  let ONNX;
3841
3864
  if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
3842
3865
  ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
@@ -3850,26 +3873,33 @@ if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
3850
3873
  // | CUDA | ❌ | ❌ | ✔️ (CUDA v11.8) | ❌ | ❌ | ❌ |
3851
3874
  switch (process.platform) {
3852
3875
  case 'win32': // Windows x64 and Windows arm64
3853
- supportedExecutionProviders.push('dml');
3876
+ supportedDevices.push('dml');
3854
3877
  break;
3855
3878
  case 'linux': // Linux x64 and Linux arm64
3856
3879
  if (process.arch === 'x64') {
3857
- supportedExecutionProviders.push('cuda');
3880
+ supportedDevices.push('cuda');
3858
3881
  }
3859
3882
  break;
3860
3883
  case 'darwin': // MacOS x64 and MacOS arm64
3861
3884
  break;
3862
3885
  }
3863
3886
 
3864
- supportedExecutionProviders.push('cpu');
3865
- defaultExecutionProviders = ['cpu'];
3887
+ supportedDevices.push('cpu');
3888
+ defaultDevices = ['cpu'];
3866
3889
  } else {
3867
3890
  ONNX = /*#__PURE__*/ (onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2__, 2)));
3891
+
3892
+ if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
3893
+ // TODO: Only push supported providers (depending on available hardware)
3894
+ supportedDevices.push('webnn-npu', 'webnn-gpu', 'webnn-cpu', 'webnn');
3895
+ }
3896
+
3868
3897
  if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBGPU_AVAILABLE) {
3869
- supportedExecutionProviders.push('webgpu');
3898
+ supportedDevices.push('webgpu');
3870
3899
  }
3871
- supportedExecutionProviders.push('wasm');
3872
- defaultExecutionProviders = ['wasm'];
3900
+
3901
+ supportedDevices.push('wasm');
3902
+ defaultDevices = ['wasm'];
3873
3903
  }
3874
3904
 
3875
3905
  // @ts-ignore
@@ -3877,19 +3907,28 @@ const InferenceSession = ONNX.InferenceSession;
3877
3907
 
3878
3908
  /**
3879
3909
  * Map a device to the execution providers to use for the given device.
3880
- * @param {import("../utils/devices.js").DeviceType} [device=null] (Optional) The device to run the inference on.
3881
- * @returns {import("../utils/devices.js").DeviceType[]} The execution providers to use for the given device.
3882
- */
3883
- function deviceToExecutionProviders(device) {
3884
- // TODO: Use mapping from device to execution providers for overloaded devices (e.g., 'gpu' or 'cpu').
3885
- let executionProviders = defaultExecutionProviders;
3886
- if (device) { // User has specified a device
3887
- if (!supportedExecutionProviders.includes(device)) {
3888
- throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedExecutionProviders.join(', ')}.`)
3889
- }
3890
- executionProviders = [device];
3910
+ * @param {import("../utils/devices.js").DeviceType|"auto"|null} [device=null] (Optional) The device to run the inference on.
3911
+ * @returns {ONNXExecutionProviders[]} The execution providers to use for the given device.
3912
+ */
3913
+ function deviceToExecutionProviders(device = null) {
3914
+ // Use the default execution providers if the user hasn't specified anything
3915
+ if (!device) return defaultDevices;
3916
+
3917
+ // Handle overloaded cases
3918
+ switch (device) {
3919
+ case "auto":
3920
+ return supportedDevices;
3921
+ case "gpu":
3922
+ return supportedDevices.filter(x =>
3923
+ ["webgpu", "cuda", "dml", "webnn-gpu"].includes(x),
3924
+ );
3925
+ }
3926
+
3927
+ if (supportedDevices.includes(device)) {
3928
+ return [DEVICE_TO_EXECUTION_PROVIDER_MAPPING[device] ?? device];
3891
3929
  }
3892
- return executionProviders;
3930
+
3931
+ throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`)
3893
3932
  }
3894
3933
 
3895
3934
 
@@ -4401,13 +4440,14 @@ __webpack_require__.r(__webpack_exports__);
4401
4440
 
4402
4441
 
4403
4442
 
4404
- const VERSION = '3.0.0-alpha.6';
4443
+ const VERSION = '3.0.0-alpha.7';
4405
4444
 
4406
4445
  // Check if various APIs are available (depends on environment)
4407
4446
  const IS_BROWSER_ENV = typeof self !== 'undefined';
4408
4447
  const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
4409
4448
  const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
4410
4449
  const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
4450
+ const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
4411
4451
 
4412
4452
  const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
4413
4453
  const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
@@ -4430,6 +4470,9 @@ const apis = Object.freeze({
4430
4470
  /** Whether the WebGPU API is available */
4431
4471
  IS_WEBGPU_AVAILABLE,
4432
4472
 
4473
+ /** Whether the WebNN API is available */
4474
+ IS_WEBNN_AVAILABLE,
4475
+
4433
4476
  /** Whether the Node.js process API is available */
4434
4477
  IS_PROCESS_AVAILABLE,
4435
4478
 
@@ -6153,14 +6196,14 @@ class TextStreamer extends BaseStreamer {
6153
6196
  throw Error('TextStreamer only supports batch size of 1');
6154
6197
  }
6155
6198
 
6156
- const tokens = value[0];
6157
- this.token_callback_function?.(tokens)
6158
-
6159
6199
  if (this.skip_prompt && this.next_tokens_are_prompt) {
6160
6200
  this.next_tokens_are_prompt = false;
6161
6201
  return;
6162
6202
  }
6163
6203
 
6204
+ const tokens = value[0];
6205
+ this.token_callback_function?.(tokens)
6206
+
6164
6207
  // Add the new token to the cache and decodes the entire thing.
6165
6208
  this.token_cache = (0,_utils_core_js__WEBPACK_IMPORTED_MODULE_0__.mergeArrays)(this.token_cache, tokens);
6166
6209
  const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
@@ -6832,9 +6875,10 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
6832
6875
  }
6833
6876
 
6834
6877
  // If the device is not specified, we use the default (supported) execution providers.
6835
- const executionProviders = (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.deviceToExecutionProviders)(
6836
- /** @type {import("./utils/devices.js").DeviceType|null} */(device)
6878
+ const selectedDevice = /** @type {import("./utils/devices.js").DeviceType} */(
6879
+ device ?? (_env_js__WEBPACK_IMPORTED_MODULE_12__.apis.IS_NODE_ENV ? 'cpu' : 'wasm')
6837
6880
  );
6881
+ const executionProviders = (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.deviceToExecutionProviders)(selectedDevice);
6838
6882
 
6839
6883
  // If options.dtype is specified, we use it to choose the suffix for the model file.
6840
6884
  // Otherwise, we use the default dtype for the device.
@@ -6843,19 +6887,21 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
6843
6887
  if (dtype && dtype.hasOwnProperty(fileName)) {
6844
6888
  dtype = dtype[fileName];
6845
6889
  } else {
6846
- dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[executionProviders[0]];
6847
- console.warn(`dtype not specified for "${fileName}". Using the default dtype for this device (${dtype}).`);
6890
+ dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp32;
6891
+ console.warn(`dtype not specified for "${fileName}". Using the default dtype (${dtype}) for this device (${selectedDevice}).`);
6848
6892
  }
6849
6893
  }
6850
6894
 
6851
- if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(dtype)) {
6852
- throw new Error(`Invalid dtype: ${dtype}. Should be one of: ${Object.keys(_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES).join(', ')}`);
6853
- } else if (dtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp16 && device === 'webgpu' && !(await (0,_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.isWebGpuFp16Supported)())) {
6854
- throw new Error(`The device (${device}) does not support fp16.`);
6895
+ const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
6896
+
6897
+ if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
6898
+ throw new Error(`Invalid dtype: ${selectedDtype}. Should be one of: ${Object.keys(_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES).join(', ')}`);
6899
+ } else if (selectedDtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp16 && selectedDevice === 'webgpu' && !(await (0,_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.isWebGpuFp16Supported)())) {
6900
+ throw new Error(`The device (${selectedDevice}) does not support fp16.`);
6855
6901
  }
6856
6902
 
6857
6903
  // Construct the model file name
6858
- const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[dtype];
6904
+ const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype];
6859
6905
  const modelFileName = `${options.subfolder ?? ''}/${fileName}${suffix}.onnx`;
6860
6906
 
6861
6907
  const session_options = { ...options.session_options } ?? {};
@@ -6902,7 +6948,7 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
6902
6948
  session_options.externalData = await Promise.all(externalDataPromises);
6903
6949
  }
6904
6950
 
6905
- if (device === 'webgpu') {
6951
+ if (selectedDevice === 'webgpu') {
6906
6952
  const shapes = (0,_configs_js__WEBPACK_IMPORTED_MODULE_0__.getKeyValueShapes)(options.config, {
6907
6953
  prefix: 'present',
6908
6954
  });
@@ -25936,13 +25982,22 @@ __webpack_require__.r(__webpack_exports__);
25936
25982
  /* harmony export */ "DEVICE_TYPES": () => (/* binding */ DEVICE_TYPES)
25937
25983
  /* harmony export */ });
25938
25984
 
25985
+ /**
25986
+ * The list of devices supported by Transformers.js
25987
+ */
25939
25988
  const DEVICE_TYPES = Object.freeze({
25940
- cpu: 'cpu', // CPU
25989
+ auto: 'auto', // Auto-detect based on device and environment
25941
25990
  gpu: 'gpu', // Auto-detect GPU
25991
+ cpu: 'cpu', // CPU
25942
25992
  wasm: 'wasm', // WebAssembly
25943
25993
  webgpu: 'webgpu', // WebGPU
25944
25994
  cuda: 'cuda', // CUDA
25945
25995
  dml: 'dml', // DirectML
25996
+
25997
+ webnn: 'webnn', // WebNN (default)
25998
+ 'webnn-npu': 'webnn-npu', // WebNN NPU
25999
+ 'webnn-gpu': 'webnn-gpu', // WebNN GPU
26000
+ 'webnn-cpu': 'webnn-cpu', // WebNN CPU
25946
26001
  });
25947
26002
 
25948
26003
  /**
@@ -26013,12 +26068,8 @@ const DATA_TYPES = Object.freeze({
26013
26068
  /** @typedef {keyof typeof DATA_TYPES} DataType */
26014
26069
 
26015
26070
  const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
26016
- [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.cpu]: DATA_TYPES.q8,
26017
- [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.gpu]: DATA_TYPES.fp32,
26071
+ // NOTE: If not specified, will default to fp32
26018
26072
  [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.wasm]: DATA_TYPES.q8,
26019
- [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.webgpu]: DATA_TYPES.fp32,
26020
- [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.cuda]: DATA_TYPES.fp32,
26021
- [_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.dml]: DATA_TYPES.fp32,
26022
26073
  });
26023
26074
 
26024
26075
  /** @type {Record<DataType, string>} */