@huggingface/transformers 3.0.0-alpha.6 → 3.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +95 -44
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +554 -466
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +13 -13
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +23 -23
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +7 -7
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +95 -44
- package/dist/transformers.mjs.map +1 -1
- package/package.json +4 -4
- package/src/backends/onnx.js +61 -22
- package/src/env.js +5 -1
- package/src/generation/streamers.js +3 -3
- package/src/models.js +13 -10
- package/src/utils/devices.js +10 -1
- package/src/utils/dtypes.js +1 -5
- package/types/backends/onnx.d.ts +4 -3
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/env.d.ts +2 -0
- package/types/env.d.ts.map +1 -1
- package/types/models.d.ts.map +1 -1
- package/types/utils/devices.d.ts +9 -1
- package/types/utils/devices.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +0 -5
- package/types/utils/dtypes.d.ts.map +1 -1
package/README.md
CHANGED
|
@@ -101,7 +101,7 @@ npm i @huggingface/transformers
|
|
|
101
101
|
Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
|
|
102
102
|
```html
|
|
103
103
|
<script type="module">
|
|
104
|
-
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.
|
|
104
|
+
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.8';
|
|
105
105
|
</script>
|
|
106
106
|
```
|
|
107
107
|
|
|
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
|
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.
|
|
137
|
+
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.8/dist/), which should work out-of-the-box. You can customize this as follows:
|
|
138
138
|
|
|
139
139
|
### Settings
|
|
140
140
|
|
|
Binary file
|
package/dist/transformers.cjs
CHANGED
|
@@ -3779,7 +3779,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3779
3779
|
// Licensed under the MIT License.
|
|
3780
3780
|
// This file is generated by /js/scripts/update-version.ts
|
|
3781
3781
|
// Do not modify file content manually.
|
|
3782
|
-
const version = '1.
|
|
3782
|
+
const version = '1.19.0';
|
|
3783
3783
|
//# sourceMappingURL=version.js.map
|
|
3784
3784
|
|
|
3785
3785
|
/***/ }),
|
|
@@ -3832,11 +3832,34 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3832
3832
|
|
|
3833
3833
|
|
|
3834
3834
|
|
|
3835
|
-
/**
|
|
3836
|
-
|
|
3835
|
+
/**
|
|
3836
|
+
* @typedef {import('onnxruntime-common').InferenceSession.ExecutionProviderConfig} ONNXExecutionProviders
|
|
3837
|
+
*/
|
|
3838
|
+
|
|
3839
|
+
/** @type {Record<import("../utils/devices.js").DeviceType, ONNXExecutionProviders>} */
|
|
3840
|
+
const DEVICE_TO_EXECUTION_PROVIDER_MAPPING = Object.freeze({
|
|
3841
|
+
auto: null, // Auto-detect based on device and environment
|
|
3842
|
+
gpu: null, // Auto-detect GPU
|
|
3843
|
+
cpu: 'cpu', // CPU
|
|
3844
|
+
wasm: 'wasm', // WebAssembly
|
|
3845
|
+
webgpu: 'webgpu', // WebGPU
|
|
3846
|
+
cuda: 'cuda', // CUDA
|
|
3847
|
+
dml: 'dml', // DirectML
|
|
3848
|
+
|
|
3849
|
+
webnn: { name: 'webnn', deviceType: 'cpu' }, // WebNN (default)
|
|
3850
|
+
'webnn-npu': { name: 'webnn', deviceType: 'npu' }, // WebNN NPU
|
|
3851
|
+
'webnn-gpu': { name: 'webnn', deviceType: 'gpu' }, // WebNN GPU
|
|
3852
|
+
'webnn-cpu': { name: 'webnn', deviceType: 'cpu' }, // WebNN CPU
|
|
3853
|
+
});
|
|
3854
|
+
|
|
3855
|
+
/**
|
|
3856
|
+
* The list of supported devices, sorted by priority/performance.
|
|
3857
|
+
* @type {import("../utils/devices.js").DeviceType[]}
|
|
3858
|
+
*/
|
|
3859
|
+
const supportedDevices = [];
|
|
3837
3860
|
|
|
3838
|
-
/** @type {
|
|
3839
|
-
let
|
|
3861
|
+
/** @type {ONNXExecutionProviders[]} */
|
|
3862
|
+
let defaultDevices;
|
|
3840
3863
|
let ONNX;
|
|
3841
3864
|
if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
|
|
3842
3865
|
ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
|
|
@@ -3850,26 +3873,33 @@ if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
|
|
|
3850
3873
|
// | CUDA | ❌ | ❌ | ✔️ (CUDA v11.8) | ❌ | ❌ | ❌ |
|
|
3851
3874
|
switch (process.platform) {
|
|
3852
3875
|
case 'win32': // Windows x64 and Windows arm64
|
|
3853
|
-
|
|
3876
|
+
supportedDevices.push('dml');
|
|
3854
3877
|
break;
|
|
3855
3878
|
case 'linux': // Linux x64 and Linux arm64
|
|
3856
3879
|
if (process.arch === 'x64') {
|
|
3857
|
-
|
|
3880
|
+
supportedDevices.push('cuda');
|
|
3858
3881
|
}
|
|
3859
3882
|
break;
|
|
3860
3883
|
case 'darwin': // MacOS x64 and MacOS arm64
|
|
3861
3884
|
break;
|
|
3862
3885
|
}
|
|
3863
3886
|
|
|
3864
|
-
|
|
3865
|
-
|
|
3887
|
+
supportedDevices.push('cpu');
|
|
3888
|
+
defaultDevices = ['cpu'];
|
|
3866
3889
|
} else {
|
|
3867
3890
|
ONNX = /*#__PURE__*/ (onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web_webgpu__WEBPACK_IMPORTED_MODULE_2__, 2)));
|
|
3891
|
+
|
|
3892
|
+
if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
|
|
3893
|
+
// TODO: Only push supported providers (depending on available hardware)
|
|
3894
|
+
supportedDevices.push('webnn-npu', 'webnn-gpu', 'webnn-cpu', 'webnn');
|
|
3895
|
+
}
|
|
3896
|
+
|
|
3868
3897
|
if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBGPU_AVAILABLE) {
|
|
3869
|
-
|
|
3898
|
+
supportedDevices.push('webgpu');
|
|
3870
3899
|
}
|
|
3871
|
-
|
|
3872
|
-
|
|
3900
|
+
|
|
3901
|
+
supportedDevices.push('wasm');
|
|
3902
|
+
defaultDevices = ['wasm'];
|
|
3873
3903
|
}
|
|
3874
3904
|
|
|
3875
3905
|
// @ts-ignore
|
|
@@ -3877,19 +3907,28 @@ const InferenceSession = ONNX.InferenceSession;
|
|
|
3877
3907
|
|
|
3878
3908
|
/**
|
|
3879
3909
|
* Map a device to the execution providers to use for the given device.
|
|
3880
|
-
* @param {import("../utils/devices.js").DeviceType} [device=null] (Optional) The device to run the inference on.
|
|
3881
|
-
* @returns {
|
|
3882
|
-
*/
|
|
3883
|
-
function deviceToExecutionProviders(device) {
|
|
3884
|
-
//
|
|
3885
|
-
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3910
|
+
* @param {import("../utils/devices.js").DeviceType|"auto"|null} [device=null] (Optional) The device to run the inference on.
|
|
3911
|
+
* @returns {ONNXExecutionProviders[]} The execution providers to use for the given device.
|
|
3912
|
+
*/
|
|
3913
|
+
function deviceToExecutionProviders(device = null) {
|
|
3914
|
+
// Use the default execution providers if the user hasn't specified anything
|
|
3915
|
+
if (!device) return defaultDevices;
|
|
3916
|
+
|
|
3917
|
+
// Handle overloaded cases
|
|
3918
|
+
switch (device) {
|
|
3919
|
+
case "auto":
|
|
3920
|
+
return supportedDevices;
|
|
3921
|
+
case "gpu":
|
|
3922
|
+
return supportedDevices.filter(x =>
|
|
3923
|
+
["webgpu", "cuda", "dml", "webnn-gpu"].includes(x),
|
|
3924
|
+
);
|
|
3925
|
+
}
|
|
3926
|
+
|
|
3927
|
+
if (supportedDevices.includes(device)) {
|
|
3928
|
+
return [DEVICE_TO_EXECUTION_PROVIDER_MAPPING[device] ?? device];
|
|
3891
3929
|
}
|
|
3892
|
-
|
|
3930
|
+
|
|
3931
|
+
throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`)
|
|
3893
3932
|
}
|
|
3894
3933
|
|
|
3895
3934
|
|
|
@@ -4401,13 +4440,14 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4401
4440
|
|
|
4402
4441
|
|
|
4403
4442
|
|
|
4404
|
-
const VERSION = '3.0.0-alpha.
|
|
4443
|
+
const VERSION = '3.0.0-alpha.8';
|
|
4405
4444
|
|
|
4406
4445
|
// Check if various APIs are available (depends on environment)
|
|
4407
4446
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
4408
4447
|
const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
|
|
4409
4448
|
const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
|
|
4410
4449
|
const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
|
|
4450
|
+
const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
|
|
4411
4451
|
|
|
4412
4452
|
const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
|
|
4413
4453
|
const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
|
|
@@ -4430,6 +4470,9 @@ const apis = Object.freeze({
|
|
|
4430
4470
|
/** Whether the WebGPU API is available */
|
|
4431
4471
|
IS_WEBGPU_AVAILABLE,
|
|
4432
4472
|
|
|
4473
|
+
/** Whether the WebNN API is available */
|
|
4474
|
+
IS_WEBNN_AVAILABLE,
|
|
4475
|
+
|
|
4433
4476
|
/** Whether the Node.js process API is available */
|
|
4434
4477
|
IS_PROCESS_AVAILABLE,
|
|
4435
4478
|
|
|
@@ -6153,14 +6196,14 @@ class TextStreamer extends BaseStreamer {
|
|
|
6153
6196
|
throw Error('TextStreamer only supports batch size of 1');
|
|
6154
6197
|
}
|
|
6155
6198
|
|
|
6156
|
-
const tokens = value[0];
|
|
6157
|
-
this.token_callback_function?.(tokens)
|
|
6158
|
-
|
|
6159
6199
|
if (this.skip_prompt && this.next_tokens_are_prompt) {
|
|
6160
6200
|
this.next_tokens_are_prompt = false;
|
|
6161
6201
|
return;
|
|
6162
6202
|
}
|
|
6163
6203
|
|
|
6204
|
+
const tokens = value[0];
|
|
6205
|
+
this.token_callback_function?.(tokens)
|
|
6206
|
+
|
|
6164
6207
|
// Add the new token to the cache and decodes the entire thing.
|
|
6165
6208
|
this.token_cache = (0,_utils_core_js__WEBPACK_IMPORTED_MODULE_0__.mergeArrays)(this.token_cache, tokens);
|
|
6166
6209
|
const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
|
|
@@ -6832,9 +6875,10 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
6832
6875
|
}
|
|
6833
6876
|
|
|
6834
6877
|
// If the device is not specified, we use the default (supported) execution providers.
|
|
6835
|
-
const
|
|
6836
|
-
|
|
6878
|
+
const selectedDevice = /** @type {import("./utils/devices.js").DeviceType} */(
|
|
6879
|
+
device ?? (_env_js__WEBPACK_IMPORTED_MODULE_12__.apis.IS_NODE_ENV ? 'cpu' : 'wasm')
|
|
6837
6880
|
);
|
|
6881
|
+
const executionProviders = (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.deviceToExecutionProviders)(selectedDevice);
|
|
6838
6882
|
|
|
6839
6883
|
// If options.dtype is specified, we use it to choose the suffix for the model file.
|
|
6840
6884
|
// Otherwise, we use the default dtype for the device.
|
|
@@ -6843,19 +6887,21 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
6843
6887
|
if (dtype && dtype.hasOwnProperty(fileName)) {
|
|
6844
6888
|
dtype = dtype[fileName];
|
|
6845
6889
|
} else {
|
|
6846
|
-
dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[
|
|
6847
|
-
console.warn(`dtype not specified for "${fileName}". Using the default dtype for this device (${
|
|
6890
|
+
dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp32;
|
|
6891
|
+
console.warn(`dtype not specified for "${fileName}". Using the default dtype (${dtype}) for this device (${selectedDevice}).`);
|
|
6848
6892
|
}
|
|
6849
6893
|
}
|
|
6850
6894
|
|
|
6851
|
-
|
|
6852
|
-
|
|
6853
|
-
|
|
6854
|
-
throw new Error(`
|
|
6895
|
+
const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
|
|
6896
|
+
|
|
6897
|
+
if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
|
|
6898
|
+
throw new Error(`Invalid dtype: ${selectedDtype}. Should be one of: ${Object.keys(_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES).join(', ')}`);
|
|
6899
|
+
} else if (selectedDtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp16 && selectedDevice === 'webgpu' && !(await (0,_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.isWebGpuFp16Supported)())) {
|
|
6900
|
+
throw new Error(`The device (${selectedDevice}) does not support fp16.`);
|
|
6855
6901
|
}
|
|
6856
6902
|
|
|
6857
6903
|
// Construct the model file name
|
|
6858
|
-
const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[
|
|
6904
|
+
const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype];
|
|
6859
6905
|
const modelFileName = `${options.subfolder ?? ''}/${fileName}${suffix}.onnx`;
|
|
6860
6906
|
|
|
6861
6907
|
const session_options = { ...options.session_options } ?? {};
|
|
@@ -6902,7 +6948,7 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
6902
6948
|
session_options.externalData = await Promise.all(externalDataPromises);
|
|
6903
6949
|
}
|
|
6904
6950
|
|
|
6905
|
-
if (
|
|
6951
|
+
if (selectedDevice === 'webgpu') {
|
|
6906
6952
|
const shapes = (0,_configs_js__WEBPACK_IMPORTED_MODULE_0__.getKeyValueShapes)(options.config, {
|
|
6907
6953
|
prefix: 'present',
|
|
6908
6954
|
});
|
|
@@ -25936,13 +25982,22 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
25936
25982
|
/* harmony export */ "DEVICE_TYPES": () => (/* binding */ DEVICE_TYPES)
|
|
25937
25983
|
/* harmony export */ });
|
|
25938
25984
|
|
|
25985
|
+
/**
|
|
25986
|
+
* The list of devices supported by Transformers.js
|
|
25987
|
+
*/
|
|
25939
25988
|
const DEVICE_TYPES = Object.freeze({
|
|
25940
|
-
|
|
25989
|
+
auto: 'auto', // Auto-detect based on device and environment
|
|
25941
25990
|
gpu: 'gpu', // Auto-detect GPU
|
|
25991
|
+
cpu: 'cpu', // CPU
|
|
25942
25992
|
wasm: 'wasm', // WebAssembly
|
|
25943
25993
|
webgpu: 'webgpu', // WebGPU
|
|
25944
25994
|
cuda: 'cuda', // CUDA
|
|
25945
25995
|
dml: 'dml', // DirectML
|
|
25996
|
+
|
|
25997
|
+
webnn: 'webnn', // WebNN (default)
|
|
25998
|
+
'webnn-npu': 'webnn-npu', // WebNN NPU
|
|
25999
|
+
'webnn-gpu': 'webnn-gpu', // WebNN GPU
|
|
26000
|
+
'webnn-cpu': 'webnn-cpu', // WebNN CPU
|
|
25946
26001
|
});
|
|
25947
26002
|
|
|
25948
26003
|
/**
|
|
@@ -26013,12 +26068,8 @@ const DATA_TYPES = Object.freeze({
|
|
|
26013
26068
|
/** @typedef {keyof typeof DATA_TYPES} DataType */
|
|
26014
26069
|
|
|
26015
26070
|
const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
|
|
26016
|
-
|
|
26017
|
-
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.gpu]: DATA_TYPES.fp32,
|
|
26071
|
+
// NOTE: If not specified, will default to fp32
|
|
26018
26072
|
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.wasm]: DATA_TYPES.q8,
|
|
26019
|
-
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.webgpu]: DATA_TYPES.fp32,
|
|
26020
|
-
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.cuda]: DATA_TYPES.fp32,
|
|
26021
|
-
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.dml]: DATA_TYPES.fp32,
|
|
26022
26073
|
});
|
|
26023
26074
|
|
|
26024
26075
|
/** @type {Record<DataType, string>} */
|