@huggingface/transformers 4.0.0-next.5 → 4.0.0-next.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -4
- package/dist/ort-wasm-simd-threaded.jsep.mjs +23 -23
- package/dist/transformers.js +575 -418
- package/dist/transformers.min.js +17 -17
- package/dist/transformers.node.cjs +672 -499
- package/dist/transformers.node.min.cjs +18 -18
- package/dist/transformers.node.min.mjs +18 -18
- package/dist/transformers.node.mjs +651 -491
- package/dist/transformers.web.js +559 -402
- package/dist/transformers.web.min.js +18 -18
- package/package.json +3 -3
- package/src/backends/onnx.js +77 -58
- package/src/backends/utils/cacheWasm.js +22 -43
- package/src/configs.js +17 -5
- package/src/env.js +29 -6
- package/src/models/auto/modeling_auto.js +14 -1
- package/src/models/modeling_utils.js +35 -7
- package/src/models/models.js +5 -0
- package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
- package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +37 -4
- package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
- package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
- package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +3 -0
- package/src/models/registry.js +14 -4
- package/src/pipelines/index.js +2 -84
- package/src/pipelines.js +40 -77
- package/src/utils/cache/FileCache.js +128 -0
- package/src/utils/cache.js +3 -3
- package/src/utils/hub/{files.js → FileResponse.js} +0 -105
- package/src/utils/hub/utils.js +35 -1
- package/src/utils/hub.js +6 -5
- package/src/utils/image.js +12 -13
- package/src/utils/model_registry/ModelRegistry.js +70 -23
- package/src/utils/model_registry/get_model_files.js +12 -1
- package/src/utils/model_registry/get_pipeline_files.js +15 -24
- package/src/utils/model_registry/is_cached.js +81 -4
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/backends/utils/cacheWasm.d.ts +3 -17
- package/types/backends/utils/cacheWasm.d.ts.map +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +10 -3
- package/types/env.d.ts.map +1 -1
- package/types/models/auto/modeling_auto.d.ts +6 -0
- package/types/models/auto/modeling_auto.d.ts.map +1 -1
- package/types/models/modeling_utils.d.ts +13 -2
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +5 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +4 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
- package/types/models/registry.d.ts +2 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/pipelines/index.d.ts +0 -34
- package/types/pipelines/index.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/utils/cache/FileCache.d.ts +39 -0
- package/types/utils/cache/FileCache.d.ts.map +1 -0
- package/types/utils/cache.d.ts +4 -4
- package/types/utils/cache.d.ts.map +1 -1
- package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -38
- package/types/utils/hub/FileResponse.d.ts.map +1 -0
- package/types/utils/hub/utils.d.ts +17 -2
- package/types/utils/hub/utils.d.ts.map +1 -1
- package/types/utils/hub.d.ts +7 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/model_registry/ModelRegistry.d.ts +66 -6
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts +2 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -1
- package/types/utils/model_registry/is_cached.d.ts +47 -4
- package/types/utils/model_registry/is_cached.d.ts.map +1 -1
- package/types/utils/hub/files.d.ts.map +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/transformers",
|
|
3
|
-
"version": "4.0.0-next.
|
|
3
|
+
"version": "4.0.0-next.6",
|
|
4
4
|
"description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
|
|
5
5
|
"main": "./dist/transformers.node.cjs",
|
|
6
6
|
"types": "./types/transformers.d.ts",
|
|
@@ -45,8 +45,8 @@
|
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@huggingface/jinja": "^0.5.5",
|
|
47
47
|
"@huggingface/tokenizers": "^0.1.2",
|
|
48
|
-
"onnxruntime-node": "1.24.
|
|
49
|
-
"onnxruntime-web": "1.25.0-dev.
|
|
48
|
+
"onnxruntime-node": "1.24.3",
|
|
49
|
+
"onnxruntime-web": "1.25.0-dev.20260303-e7e64dc112",
|
|
50
50
|
"sharp": "^0.34.5"
|
|
51
51
|
},
|
|
52
52
|
"devDependencies": {
|
package/src/backends/onnx.js
CHANGED
|
@@ -22,7 +22,8 @@ import { env, apis, LogLevel } from '../env.js';
|
|
|
22
22
|
// In either case, we select the default export if it exists, otherwise we use the named export.
|
|
23
23
|
import * as ONNX_NODE from 'onnxruntime-node';
|
|
24
24
|
import * as ONNX_WEB from 'onnxruntime-web/webgpu';
|
|
25
|
-
import {
|
|
25
|
+
import { loadWasmBinary, loadWasmFactory } from './utils/cacheWasm.js';
|
|
26
|
+
import { isBlobURL, toAbsoluteURL } from '../utils/hub/utils.js';
|
|
26
27
|
import { logger } from '../utils/logger.js';
|
|
27
28
|
export { Tensor } from 'onnxruntime-common';
|
|
28
29
|
|
|
@@ -176,8 +177,6 @@ export function deviceToExecutionProviders(device = null) {
|
|
|
176
177
|
throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`);
|
|
177
178
|
}
|
|
178
179
|
|
|
179
|
-
const IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV;
|
|
180
|
-
|
|
181
180
|
/**
|
|
182
181
|
* Currently, Transformers.js doesn't support simultaneous loading of sessions in WASM/WebGPU.
|
|
183
182
|
* For this reason, we need to chain the loading calls.
|
|
@@ -204,14 +203,23 @@ async function ensureWasmLoaded() {
|
|
|
204
203
|
return wasmLoadPromise;
|
|
205
204
|
}
|
|
206
205
|
|
|
206
|
+
// Check if we should load the WASM binary
|
|
207
207
|
const shouldUseWasmCache =
|
|
208
208
|
env.useWasmCache &&
|
|
209
209
|
typeof ONNX_ENV?.wasm?.wasmPaths === 'object' &&
|
|
210
210
|
ONNX_ENV?.wasm?.wasmPaths?.wasm &&
|
|
211
211
|
ONNX_ENV?.wasm?.wasmPaths?.mjs;
|
|
212
212
|
|
|
213
|
-
// Check if we should load the WASM binary
|
|
214
213
|
if (!shouldUseWasmCache) {
|
|
214
|
+
// In Deno's web runtime, the WASM factory must be loaded via blob URL so that Node.js detection
|
|
215
|
+
// can be patched out (see loadWasmFactory). Without caching, the factory is imported directly
|
|
216
|
+
// from its URL and Deno would crash trying to use Node.js APIs. useWasmCache defaults to true
|
|
217
|
+
// in this environment, so this only happens if the user explicitly disables it.
|
|
218
|
+
if (apis.IS_DENO_WEB_RUNTIME) {
|
|
219
|
+
throw new Error(
|
|
220
|
+
"env.useWasmCache=false is not supported in Deno's web runtime. Remove the useWasmCache override.",
|
|
221
|
+
);
|
|
222
|
+
}
|
|
215
223
|
wasmLoadPromise = Promise.resolve();
|
|
216
224
|
return wasmLoadPromise;
|
|
217
225
|
}
|
|
@@ -222,7 +230,10 @@ async function ensureWasmLoaded() {
|
|
|
222
230
|
// shouldUseWasmCache checks for wasmPaths.wasm and wasmPaths.mjs
|
|
223
231
|
const urls = /** @type {{ wasm: string, mjs: string }} */ (ONNX_ENV.wasm.wasmPaths);
|
|
224
232
|
|
|
225
|
-
// Load
|
|
233
|
+
// Load both in parallel; the .mjs blob URL is only kept if wasmBinary succeeded.
|
|
234
|
+
// ORT only sets locateFile when wasmBinary is provided (onnxruntime PR https://github.com/microsoft/onnxruntime/pull/27411), which
|
|
235
|
+
// prevents new URL(fileName, import.meta.url) from failing inside a blob URL factory.
|
|
236
|
+
let wasmBinaryLoaded = false;
|
|
226
237
|
await Promise.all([
|
|
227
238
|
// Load and cache the WASM binary
|
|
228
239
|
urls.wasm && !isBlobURL(urls.wasm)
|
|
@@ -231,6 +242,7 @@ async function ensureWasmLoaded() {
|
|
|
231
242
|
const wasmBinary = await loadWasmBinary(toAbsoluteURL(urls.wasm));
|
|
232
243
|
if (wasmBinary) {
|
|
233
244
|
ONNX_ENV.wasm.wasmBinary = wasmBinary;
|
|
245
|
+
wasmBinaryLoaded = true;
|
|
234
246
|
}
|
|
235
247
|
} catch (err) {
|
|
236
248
|
logger.warn('Failed to pre-load WASM binary:', err);
|
|
@@ -238,7 +250,7 @@ async function ensureWasmLoaded() {
|
|
|
238
250
|
})()
|
|
239
251
|
: Promise.resolve(),
|
|
240
252
|
|
|
241
|
-
// Load and cache the WASM factory
|
|
253
|
+
// Load and cache the WASM factory as a blob URL
|
|
242
254
|
urls.mjs && !isBlobURL(urls.mjs)
|
|
243
255
|
? (async () => {
|
|
244
256
|
try {
|
|
@@ -253,6 +265,12 @@ async function ensureWasmLoaded() {
|
|
|
253
265
|
})()
|
|
254
266
|
: Promise.resolve(),
|
|
255
267
|
]);
|
|
268
|
+
|
|
269
|
+
// If wasmBinary failed to load, revert wasmPaths.mjs to the original URL (factory can only be loaded from blob if ONNX_ENV.wasm.wasmBinary is set. @see ORT PR #27411)
|
|
270
|
+
if (!wasmBinaryLoaded) {
|
|
271
|
+
// @ts-ignore
|
|
272
|
+
ONNX_ENV.wasm.wasmPaths.mjs = urls.mjs;
|
|
273
|
+
}
|
|
256
274
|
})();
|
|
257
275
|
|
|
258
276
|
return wasmLoadPromise;
|
|
@@ -274,7 +292,7 @@ export async function createInferenceSession(buffer_or_path, session_options, se
|
|
|
274
292
|
logSeverityLevel,
|
|
275
293
|
...session_options,
|
|
276
294
|
});
|
|
277
|
-
const session = await (IS_WEB_ENV ? (webInitChain = webInitChain.then(load)) : load());
|
|
295
|
+
const session = await (apis.IS_WEB_ENV ? (webInitChain = webInitChain.then(load)) : load());
|
|
278
296
|
session.config = session_config;
|
|
279
297
|
return session;
|
|
280
298
|
}
|
|
@@ -294,8 +312,7 @@ let webInferenceChain = Promise.resolve();
|
|
|
294
312
|
*/
|
|
295
313
|
export async function runInferenceSession(session, ortFeed) {
|
|
296
314
|
const run = () => session.run(ortFeed);
|
|
297
|
-
|
|
298
|
-
return output;
|
|
315
|
+
return apis.IS_WEB_ENV ? (webInferenceChain = webInferenceChain.then(run)) : run();
|
|
299
316
|
}
|
|
300
317
|
|
|
301
318
|
/**
|
|
@@ -306,41 +323,8 @@ export async function runInferenceSession(session, ortFeed) {
|
|
|
306
323
|
export function isONNXTensor(x) {
|
|
307
324
|
return x instanceof ONNX.Tensor;
|
|
308
325
|
}
|
|
309
|
-
|
|
310
326
|
/** @type {import('onnxruntime-common').Env} */
|
|
311
327
|
const ONNX_ENV = ONNX?.env;
|
|
312
|
-
if (ONNX_ENV?.wasm) {
|
|
313
|
-
// Initialize wasm backend with suitable default settings.
|
|
314
|
-
|
|
315
|
-
// (Optional) Set path to wasm files. This will override the default path search behavior of onnxruntime-web.
|
|
316
|
-
// By default, we only do this if we are not in a service worker and the wasmPaths are not already set.
|
|
317
|
-
if (
|
|
318
|
-
// @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
|
|
319
|
-
!(typeof ServiceWorkerGlobalScope !== 'undefined' && self instanceof ServiceWorkerGlobalScope) &&
|
|
320
|
-
ONNX_ENV.versions?.web &&
|
|
321
|
-
!ONNX_ENV.wasm.wasmPaths
|
|
322
|
-
) {
|
|
323
|
-
const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
|
|
324
|
-
|
|
325
|
-
ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI
|
|
326
|
-
? {
|
|
327
|
-
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
|
|
328
|
-
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`,
|
|
329
|
-
}
|
|
330
|
-
: {
|
|
331
|
-
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
|
|
332
|
-
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`,
|
|
333
|
-
};
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
// Users may wish to proxy the WASM backend to prevent the UI from freezing,
|
|
337
|
-
// However, this is not necessary when using WebGPU, so we default to false.
|
|
338
|
-
ONNX_ENV.wasm.proxy = false;
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
if (ONNX_ENV?.webgpu) {
|
|
342
|
-
ONNX_ENV.webgpu.powerPreference = 'high-performance';
|
|
343
|
-
}
|
|
344
328
|
|
|
345
329
|
/**
|
|
346
330
|
* Check if ONNX's WASM backend is being proxied.
|
|
@@ -351,21 +335,56 @@ export function isONNXProxy() {
|
|
|
351
335
|
return ONNX_ENV?.wasm?.proxy;
|
|
352
336
|
}
|
|
353
337
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
338
|
+
if (ONNX_ENV) {
|
|
339
|
+
if (ONNX_ENV.wasm) {
|
|
340
|
+
// Initialize wasm backend with suitable default settings.
|
|
341
|
+
|
|
342
|
+
// (Optional) Set path to wasm files. This will override the default path search behavior of onnxruntime-web.
|
|
343
|
+
// By default, we only do this if we are not in a service worker and the wasmPaths are not already set.
|
|
344
|
+
if (
|
|
345
|
+
// @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
|
|
346
|
+
!(typeof ServiceWorkerGlobalScope !== 'undefined' && self instanceof ServiceWorkerGlobalScope) &&
|
|
347
|
+
ONNX_ENV.versions?.web &&
|
|
348
|
+
!ONNX_ENV.wasm.wasmPaths
|
|
349
|
+
) {
|
|
350
|
+
const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
|
|
351
|
+
|
|
352
|
+
ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI
|
|
353
|
+
? {
|
|
354
|
+
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
|
|
355
|
+
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`,
|
|
356
|
+
}
|
|
357
|
+
: {
|
|
358
|
+
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
|
|
359
|
+
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`,
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Users may wish to proxy the WASM backend to prevent the UI from freezing,
|
|
364
|
+
// However, this is not necessary when using WebGPU, so we default to false.
|
|
365
|
+
ONNX_ENV.wasm.proxy = false;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (ONNX_ENV.webgpu) {
|
|
369
|
+
ONNX_ENV.webgpu.powerPreference = 'high-performance';
|
|
370
|
+
}
|
|
363
371
|
|
|
364
|
-
|
|
365
|
-
|
|
372
|
+
/**
|
|
373
|
+
* A function to map Transformers.js log levels to ONNX Runtime log severity
|
|
374
|
+
* levels, and set the log level environment variable in ONNX Runtime.
|
|
375
|
+
* @param {number} logLevel The log level to set.
|
|
376
|
+
*/
|
|
377
|
+
function setLogLevel(logLevel) {
|
|
378
|
+
const severityLevel = getOnnxLogSeverityLevel(logLevel);
|
|
379
|
+
ONNX_ENV.logLevel = ONNX_LOG_LEVEL_NAMES[severityLevel];
|
|
380
|
+
}
|
|
366
381
|
|
|
367
|
-
//
|
|
368
|
-
env.
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
382
|
+
// Set the initial log level to be the default Transformers.js log level.
|
|
383
|
+
setLogLevel(env.logLevel ?? LogLevel.WARNING);
|
|
384
|
+
|
|
385
|
+
// Expose ONNX environment variables to `env.backends.onnx`
|
|
386
|
+
env.backends.onnx = {
|
|
387
|
+
...ONNX_ENV,
|
|
388
|
+
setLogLevel,
|
|
389
|
+
};
|
|
390
|
+
}
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
+
import { apis, env } from '../../env.js';
|
|
1
2
|
import { getCache } from '../../utils/cache.js';
|
|
2
|
-
import { isValidUrl } from '../../utils/hub/utils.js';
|
|
3
3
|
import { logger } from '../../utils/logger.js';
|
|
4
|
-
import { env } from '../../env.js';
|
|
5
4
|
|
|
6
5
|
/**
|
|
7
6
|
* Loads and caches a file from the given URL.
|
|
8
7
|
* @param {string} url The URL of the file to load.
|
|
9
|
-
* @returns {Promise<Response|import('../../utils/hub/
|
|
8
|
+
* @returns {Promise<Response|import('../../utils/hub/FileResponse.js').FileResponse|null|string>} The response object, or null if loading failed.
|
|
10
9
|
*/
|
|
11
10
|
async function loadAndCacheFile(url) {
|
|
12
11
|
const fileName = url.split('/').pop();
|
|
@@ -65,58 +64,38 @@ export async function loadWasmBinary(wasmURL) {
|
|
|
65
64
|
}
|
|
66
65
|
|
|
67
66
|
/**
|
|
68
|
-
* Loads and caches the WASM Factory for ONNX Runtime.
|
|
67
|
+
* Loads and caches the WASM Factory (.mjs file) for ONNX Runtime.
|
|
68
|
+
* Creates a blob URL from cached content (when safe) to bridge Cache API with dynamic imports used in ORT.
|
|
69
69
|
* @param {string} libURL The URL of the WASM Factory to load.
|
|
70
|
-
* @returns {Promise<string|null>} The blob URL
|
|
70
|
+
* @returns {Promise<string|null>} The blob URL (if enabled), original URL (if disabled), or null if loading failed.
|
|
71
71
|
*/
|
|
72
72
|
export async function loadWasmFactory(libURL) {
|
|
73
|
+
// We can't use Blob URLs in some environments (Service Workers, Chrome extensions) due to security restrictions on dynamic import() of blob URLs.
|
|
74
|
+
// In such cases, just return the original URL and don't bother caching since dynamic import() won't use the Cache API anyway.
|
|
75
|
+
// See https://github.com/huggingface/transformers.js/issues/1532.
|
|
76
|
+
if (apis.IS_SERVICE_WORKER_ENV || apis.IS_CHROME_AVAILABLE) {
|
|
77
|
+
return libURL;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Fetch from cache or network, then create blob URL
|
|
73
81
|
const response = await loadAndCacheFile(libURL);
|
|
74
82
|
if (!response || typeof response === 'string') return null;
|
|
75
83
|
|
|
76
84
|
try {
|
|
77
85
|
let code = await response.text();
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
86
|
+
|
|
87
|
+
// Handle the case where we are importing the bundled version of the library in Deno (e.g., via CDN or local file),
|
|
88
|
+
// where we need to patch out Node.js detection in the factory. Without this, Deno (which exposes globalThis.process.versions.node)
|
|
89
|
+
// would enter the Node.js branch and try to use Node.js APIs (worker_threads, fs, etc.) that aren't used in the bundled web version.
|
|
90
|
+
// Only needed for the asyncify (single-threaded) variant loaded via blob URL. The module-level pthread auto-start code is unreachable since asyncify never spawns workers.
|
|
91
|
+
// See https://github.com/huggingface/transformers.js/pull/1546/ for more information.
|
|
92
|
+
//
|
|
93
|
+
// NOTE: This does not affect default usage via Deno (i.e., imported via npm: prefix), since we'll be using onnxruntime-node (Native) instead of onnxruntime-web (WASM).
|
|
81
94
|
code = code.replaceAll('globalThis.process?.versions?.node', 'false');
|
|
82
95
|
const blob = new Blob([code], { type: 'text/javascript' });
|
|
83
96
|
return URL.createObjectURL(blob);
|
|
84
97
|
} catch (error) {
|
|
85
|
-
logger.warn('Failed to read WASM
|
|
98
|
+
logger.warn('Failed to read WASM factory:', error);
|
|
86
99
|
return null;
|
|
87
100
|
}
|
|
88
101
|
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Checks if the given URL is a blob URL (created via URL.createObjectURL).
|
|
92
|
-
* Blob URLs should not be cached as they are temporary in-memory references.
|
|
93
|
-
* @param {string} url - The URL to check.
|
|
94
|
-
* @returns {boolean} True if the URL is a blob URL, false otherwise.
|
|
95
|
-
*/
|
|
96
|
-
export function isBlobURL(url) {
|
|
97
|
-
return isValidUrl(url, ['blob:']);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Converts any URL to an absolute URL if needed.
|
|
102
|
-
* If the URL is already absolute (http://, https://, or blob:), returns it unchanged (handled by new URL(...)).
|
|
103
|
-
* Otherwise, resolves it relative to the current page location (browser) or module location (Node/Bun/Deno).
|
|
104
|
-
* @param {string} url - The URL to convert (can be relative or absolute).
|
|
105
|
-
* @returns {string} The absolute URL.
|
|
106
|
-
*/
|
|
107
|
-
export function toAbsoluteURL(url) {
|
|
108
|
-
let baseURL;
|
|
109
|
-
|
|
110
|
-
if (typeof location !== 'undefined' && location.href) {
|
|
111
|
-
// Browser environment: use location.href
|
|
112
|
-
baseURL = location.href;
|
|
113
|
-
} else if (typeof import.meta !== 'undefined' && import.meta.url) {
|
|
114
|
-
// Node.js/Bun/Deno module environment: use import.meta.url
|
|
115
|
-
baseURL = import.meta.url;
|
|
116
|
-
} else {
|
|
117
|
-
// Fallback: if no base is available, return the URL unchanged
|
|
118
|
-
return url;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
return new URL(url, baseURL).href;
|
|
122
|
-
}
|
package/src/configs.js
CHANGED
|
@@ -76,6 +76,7 @@ function getNormalizedConfig(config) {
|
|
|
76
76
|
case 'mistral3':
|
|
77
77
|
case 'qwen2_5_vl':
|
|
78
78
|
case 'qwen3_vl':
|
|
79
|
+
case 'qwen3_vl_moe':
|
|
79
80
|
// @ts-expect-error TS2339
|
|
80
81
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
81
82
|
break;
|
|
@@ -132,9 +133,12 @@ function getNormalizedConfig(config) {
|
|
|
132
133
|
case 'mistral':
|
|
133
134
|
case 'starcoder2':
|
|
134
135
|
case 'qwen2':
|
|
136
|
+
case 'qwen2_moe':
|
|
135
137
|
case 'qwen2_vl':
|
|
136
138
|
case 'qwen2_5_vl_text':
|
|
139
|
+
case 'qwen3_moe':
|
|
137
140
|
case 'qwen3_vl_text':
|
|
141
|
+
case 'qwen3_vl_moe_text':
|
|
138
142
|
case 'phi':
|
|
139
143
|
case 'phi3':
|
|
140
144
|
case 'phi3_v':
|
|
@@ -348,7 +352,7 @@ export function getCacheShapes(config, options) {
|
|
|
348
352
|
}
|
|
349
353
|
}
|
|
350
354
|
return cache_values;
|
|
351
|
-
} else if (['
|
|
355
|
+
} else if (['qwen3_next', 'qwen3_5_text', 'qwen3_5_moe_text', 'olmo_hybrid'].includes(config.model_type)) {
|
|
352
356
|
const pkv_prefix = options?.prefix ?? 'past_key_values';
|
|
353
357
|
const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
|
|
354
358
|
|
|
@@ -365,13 +369,11 @@ export function getCacheShapes(config, options) {
|
|
|
365
369
|
linear_key_head_dim,
|
|
366
370
|
linear_value_head_dim,
|
|
367
371
|
linear_conv_kernel_dim,
|
|
368
|
-
} = /** @type {any} */ (config)
|
|
372
|
+
} = /** @type {any} */ (config);
|
|
369
373
|
|
|
370
374
|
const key_dim = linear_key_head_dim * linear_num_key_heads;
|
|
371
375
|
const value_dim = linear_value_head_dim * linear_num_value_heads;
|
|
372
376
|
|
|
373
|
-
const conv_dim = key_dim * 2 + value_dim;
|
|
374
|
-
|
|
375
377
|
const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
|
|
376
378
|
const batch_size = options?.batch_size ?? 1;
|
|
377
379
|
for (let i = 0; i < layer_types.length; ++i) {
|
|
@@ -380,7 +382,14 @@ export function getCacheShapes(config, options) {
|
|
|
380
382
|
cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
|
|
381
383
|
}
|
|
382
384
|
} else if (layer_types[i] === 'linear_attention') {
|
|
383
|
-
|
|
385
|
+
if (config.model_type === 'olmo_hybrid') {
|
|
386
|
+
cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
387
|
+
cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
|
|
388
|
+
cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
389
|
+
} else {
|
|
390
|
+
const conv_dim = key_dim * 2 + value_dim;
|
|
391
|
+
cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
|
|
392
|
+
}
|
|
384
393
|
cache_values[`${conv_prefix}_recurrent.${i}`] = [
|
|
385
394
|
batch_size,
|
|
386
395
|
linear_num_value_heads,
|
|
@@ -392,7 +401,10 @@ export function getCacheShapes(config, options) {
|
|
|
392
401
|
}
|
|
393
402
|
}
|
|
394
403
|
return cache_values;
|
|
404
|
+
} else if (['qwen3_5', 'qwen3_5_moe'].includes(config.model_type)) {
|
|
405
|
+
return getCacheShapes(/**@type {any} */ (config).text_config, options);
|
|
395
406
|
}
|
|
407
|
+
|
|
396
408
|
return getKeyValueShapes(config, options);
|
|
397
409
|
}
|
|
398
410
|
|
package/src/env.js
CHANGED
|
@@ -26,11 +26,13 @@ import fs from 'node:fs';
|
|
|
26
26
|
import path from 'node:path';
|
|
27
27
|
import url from 'node:url';
|
|
28
28
|
|
|
29
|
-
const VERSION = '4.0.0-next.
|
|
29
|
+
const VERSION = '4.0.0-next.6';
|
|
30
|
+
|
|
31
|
+
const HAS_SELF = typeof self !== 'undefined';
|
|
30
32
|
|
|
31
33
|
const IS_FS_AVAILABLE = !isEmpty(fs);
|
|
32
34
|
const IS_PATH_AVAILABLE = !isEmpty(path);
|
|
33
|
-
const IS_WEB_CACHE_AVAILABLE =
|
|
35
|
+
const IS_WEB_CACHE_AVAILABLE = HAS_SELF && 'caches' in self;
|
|
34
36
|
|
|
35
37
|
// Runtime detection
|
|
36
38
|
const IS_DENO_RUNTIME = typeof globalThis.Deno !== 'undefined';
|
|
@@ -44,14 +46,24 @@ const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node' &&
|
|
|
44
46
|
// Check if various APIs are available (depends on environment)
|
|
45
47
|
const IS_BROWSER_ENV = typeof window !== 'undefined' && typeof window.document !== 'undefined';
|
|
46
48
|
const IS_WEBWORKER_ENV =
|
|
47
|
-
|
|
49
|
+
HAS_SELF &&
|
|
48
50
|
['DedicatedWorkerGlobalScope', 'ServiceWorkerGlobalScope', 'SharedWorkerGlobalScope'].includes(
|
|
49
51
|
self.constructor?.name,
|
|
50
52
|
);
|
|
53
|
+
const IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
|
|
54
|
+
|
|
51
55
|
const IS_WEBGPU_AVAILABLE = IS_NODE_ENV || (typeof navigator !== 'undefined' && 'gpu' in navigator);
|
|
52
56
|
const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
|
|
53
57
|
const IS_CRYPTO_AVAILABLE = typeof crypto !== 'undefined' && typeof crypto.getRandomValues === 'function';
|
|
54
58
|
|
|
59
|
+
const IS_CHROME_AVAILABLE =
|
|
60
|
+
// @ts-ignore - chrome may not exist in all environments
|
|
61
|
+
typeof chrome !== 'undefined' && typeof chrome.runtime !== 'undefined' && typeof chrome.runtime.id === 'string';
|
|
62
|
+
|
|
63
|
+
const IS_SERVICE_WORKER_ENV =
|
|
64
|
+
// @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
|
|
65
|
+
typeof ServiceWorkerGlobalScope !== 'undefined' && HAS_SELF && self instanceof ServiceWorkerGlobalScope;
|
|
66
|
+
|
|
55
67
|
/**
|
|
56
68
|
* Check if the current environment is Safari browser.
|
|
57
69
|
* Works in both browser and web worker contexts.
|
|
@@ -90,6 +102,15 @@ export const apis = Object.freeze({
|
|
|
90
102
|
/** Whether we are running in a web worker environment */
|
|
91
103
|
IS_WEBWORKER_ENV,
|
|
92
104
|
|
|
105
|
+
/** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
|
|
106
|
+
IS_WEB_ENV,
|
|
107
|
+
|
|
108
|
+
/** Whether we are running in a service worker environment */
|
|
109
|
+
IS_SERVICE_WORKER_ENV,
|
|
110
|
+
|
|
111
|
+
/** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
|
|
112
|
+
IS_DENO_WEB_RUNTIME,
|
|
113
|
+
|
|
93
114
|
/** Whether the Cache API is available */
|
|
94
115
|
IS_WEB_CACHE_AVAILABLE,
|
|
95
116
|
|
|
@@ -116,6 +137,9 @@ export const apis = Object.freeze({
|
|
|
116
137
|
|
|
117
138
|
/** Whether the crypto API is available */
|
|
118
139
|
IS_CRYPTO_AVAILABLE,
|
|
140
|
+
|
|
141
|
+
/** Whether the Chrome runtime API is available */
|
|
142
|
+
IS_CHROME_AVAILABLE,
|
|
119
143
|
});
|
|
120
144
|
|
|
121
145
|
const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
@@ -197,9 +221,8 @@ export const LogLevel = Object.freeze({
|
|
|
197
221
|
* @property {boolean} useCustomCache Whether to use a custom cache system (defined by `customCache`), defaults to `false`.
|
|
198
222
|
* @property {import('./utils/cache.js').CacheInterface|null} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
|
|
199
223
|
* implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache.
|
|
200
|
-
* @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries
|
|
201
|
-
*
|
|
202
|
-
* The MJS loader file still requires network access unless you use a Service Worker.
|
|
224
|
+
* @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries and the WASM factory (.mjs) for ONNX Runtime.
|
|
225
|
+
* Defaults to `true` when cache is available. This can improve performance and enables offline usage by avoiding repeated downloads.
|
|
203
226
|
* @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
|
|
204
227
|
* @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
|
|
205
228
|
*/
|
|
@@ -62,6 +62,19 @@ class PretrainedMixin {
|
|
|
62
62
|
*/
|
|
63
63
|
static BASE_IF_FAIL = false;
|
|
64
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Check whether this AutoModel class supports a given model type.
|
|
67
|
+
* @param {string} model_type The model type from config (e.g., 'bert', 'whisper').
|
|
68
|
+
* @returns {boolean} Whether this class can handle the given model type.
|
|
69
|
+
*/
|
|
70
|
+
static supports(model_type) {
|
|
71
|
+
if (!this.MODEL_CLASS_MAPPINGS) return false;
|
|
72
|
+
for (const mapping of this.MODEL_CLASS_MAPPINGS) {
|
|
73
|
+
if (mapping.has(model_type)) return true;
|
|
74
|
+
}
|
|
75
|
+
return this.BASE_IF_FAIL;
|
|
76
|
+
}
|
|
77
|
+
|
|
65
78
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
66
79
|
static async from_pretrained(
|
|
67
80
|
pretrained_model_name_or_path,
|
|
@@ -97,7 +110,7 @@ class PretrainedMixin {
|
|
|
97
110
|
if (!this.MODEL_CLASS_MAPPINGS) {
|
|
98
111
|
throw new Error('`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: ' + this.name);
|
|
99
112
|
}
|
|
100
|
-
const model_type = options.config
|
|
113
|
+
const { model_type } = options.config;
|
|
101
114
|
for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
|
|
102
115
|
let modelInfo = MODEL_CLASS_MAPPING.get(model_type);
|
|
103
116
|
if (!modelInfo) {
|
|
@@ -37,6 +37,26 @@ import { pick } from '../utils/core.js';
|
|
|
37
37
|
import { ModelOutput } from './modeling_outputs.js';
|
|
38
38
|
import { logger } from '../utils/logger.js';
|
|
39
39
|
|
|
40
|
+
/**
|
|
41
|
+
* Extract the past sequence length from a past_key_values object.
|
|
42
|
+
* For standard models, all entries are attention KV caches with shape [batch, heads, seq_len, head_dim].
|
|
43
|
+
* For hybrid models (e.g., Qwen3.5 with conv/recurrent + attention layers), the first entry
|
|
44
|
+
* may be a conv or recurrent state whose dims don't encode a sequence length.
|
|
45
|
+
* This function finds a `past_key_values.*` entry (standard attention cache) to determine the true past length.
|
|
46
|
+
*
|
|
47
|
+
* @param {Record<string, Tensor>} past_key_values
|
|
48
|
+
* @returns {number} The past sequence length.
|
|
49
|
+
*/
|
|
50
|
+
export function getPastLength(past_key_values) {
|
|
51
|
+
for (const name in past_key_values) {
|
|
52
|
+
if (name.startsWith('past_key_values.')) {
|
|
53
|
+
return past_key_values[name].dims.at(-2);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// Fallback for non-hybrid models (all entries are attention KV)
|
|
57
|
+
return Object.values(past_key_values)[0].dims.at(-2);
|
|
58
|
+
}
|
|
59
|
+
|
|
40
60
|
/**
|
|
41
61
|
* Converts an array or Tensor of integers to an int64 Tensor.
|
|
42
62
|
* @param {any[]|Tensor} items The input integers to be converted.
|
|
@@ -1431,7 +1451,7 @@ export async function decoder_forward(self, model_inputs, is_encoder_decoder = f
|
|
|
1431
1451
|
* @param {Tensor} [params.attention_mask=null]
|
|
1432
1452
|
* @param {Tensor} [params.position_ids=null]
|
|
1433
1453
|
* @param {Tensor} [params.inputs_embeds=null]
|
|
1434
|
-
* @param {Tensor} [params.past_key_values=null]
|
|
1454
|
+
* @param {Record<string, Tensor>} [params.past_key_values=null]
|
|
1435
1455
|
* @param {Object} [params.generation_config=null]
|
|
1436
1456
|
* @param {Object} [params.logits_processor=null]
|
|
1437
1457
|
* @returns {Promise<Tensor>} The model's output tensor
|
|
@@ -1485,7 +1505,7 @@ export async function generic_text_to_text_forward(
|
|
|
1485
1505
|
} else if (past_key_values && modality_values && input_ids.dims[1] === 1) {
|
|
1486
1506
|
// This branch handles the cache case.
|
|
1487
1507
|
const target_length = input_ids.dims[1]; // always 1
|
|
1488
|
-
const past_length =
|
|
1508
|
+
const past_length = getPastLength(past_key_values);
|
|
1489
1509
|
|
|
1490
1510
|
attention_mask = cat(
|
|
1491
1511
|
[
|
|
@@ -1499,11 +1519,19 @@ export async function generic_text_to_text_forward(
|
|
|
1499
1519
|
|
|
1500
1520
|
if (!position_ids) {
|
|
1501
1521
|
if (
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1522
|
+
// Handle special case for qwen vl models
|
|
1523
|
+
[
|
|
1524
|
+
'qwen2_vl',
|
|
1525
|
+
'qwen2_5_vl',
|
|
1526
|
+
'qwen2_5_vl_text',
|
|
1527
|
+
'qwen3_vl',
|
|
1528
|
+
'qwen3_vl_text',
|
|
1529
|
+
'qwen3_5',
|
|
1530
|
+
'qwen3_5_text',
|
|
1531
|
+
'qwen3_5_moe',
|
|
1532
|
+
'qwen3_5_moe_text',
|
|
1533
|
+
].includes(self.config.model_type)
|
|
1505
1534
|
) {
|
|
1506
|
-
// Special case for qwen vl models
|
|
1507
1535
|
// @ts-ignore
|
|
1508
1536
|
const { image_grid_thw, video_grid_thw } = kwargs;
|
|
1509
1537
|
[position_ids] = self.get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask);
|
|
@@ -1616,7 +1644,7 @@ export function create_position_ids(model_inputs, past_key_values = null, start_
|
|
|
1616
1644
|
}
|
|
1617
1645
|
|
|
1618
1646
|
export function decoder_prepare_inputs_for_generation(self, input_ids, model_inputs, generation_config) {
|
|
1619
|
-
const past_length = model_inputs.past_key_values ?
|
|
1647
|
+
const past_length = model_inputs.past_key_values ? getPastLength(model_inputs.past_key_values) : 0;
|
|
1620
1648
|
|
|
1621
1649
|
if (!model_inputs.attention_mask) {
|
|
1622
1650
|
// If the attention mask is not provided, we attempt to infer based on provided inputs
|
package/src/models/models.js
CHANGED
|
@@ -108,6 +108,7 @@ export * from './nomic_bert/modeling_nomic_bert.js';
|
|
|
108
108
|
export * from './olmo/modeling_olmo.js';
|
|
109
109
|
export * from './olmo2/modeling_olmo2.js';
|
|
110
110
|
export * from './olmo3/modeling_olmo3.js';
|
|
111
|
+
export * from './olmo_hybrid/modeling_olmo_hybrid.js';
|
|
111
112
|
export * from './openelm/modeling_openelm.js';
|
|
112
113
|
export * from './opt/modeling_opt.js';
|
|
113
114
|
export * from './owlv2/modeling_owlv2.js';
|
|
@@ -122,10 +123,14 @@ export * from './phi3_v/modeling_phi3_v.js';
|
|
|
122
123
|
export * from './pvt/modeling_pvt.js';
|
|
123
124
|
export * from './pyannote/modeling_pyannote.js';
|
|
124
125
|
export * from './qwen2/modeling_qwen2.js';
|
|
126
|
+
export * from './qwen2_moe/modeling_qwen2_moe.js';
|
|
125
127
|
export * from './qwen2_vl/modeling_qwen2_vl.js';
|
|
126
128
|
export * from './qwen2_5_vl/modeling_qwen2_5_vl.js';
|
|
127
129
|
export * from './qwen3/modeling_qwen3.js';
|
|
130
|
+
export * from './qwen3_moe/modeling_qwen3_moe.js';
|
|
131
|
+
export * from './qwen3_next/modeling_qwen3_next.js';
|
|
128
132
|
export * from './qwen3_vl/modeling_qwen3_vl.js';
|
|
133
|
+
export * from './qwen3_vl_moe/modeling_qwen3_vl_moe.js';
|
|
129
134
|
export * from './qwen3_5/modeling_qwen3_5.js';
|
|
130
135
|
export * from './qwen3_5_moe/modeling_qwen3_5_moe.js';
|
|
131
136
|
export * from './resnet/modeling_resnet.js';
|