@huggingface/transformers 4.0.0-next.5 → 4.0.0-next.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/README.md +12 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.mjs +24 -24
  3. package/dist/transformers.js +2189 -1015
  4. package/dist/transformers.min.js +16 -16
  5. package/dist/transformers.node.cjs +2234 -1029
  6. package/dist/transformers.node.min.cjs +20 -20
  7. package/dist/transformers.node.min.mjs +20 -20
  8. package/dist/transformers.node.mjs +2194 -1017
  9. package/dist/transformers.web.js +2175 -1001
  10. package/dist/transformers.web.min.js +18 -18
  11. package/package.json +4 -4
  12. package/src/backends/onnx.js +77 -58
  13. package/src/backends/utils/cacheWasm.js +22 -43
  14. package/src/cache_utils.js +62 -0
  15. package/src/configs.js +32 -5
  16. package/src/env.js +36 -6
  17. package/src/image_processors_utils.js +3 -3
  18. package/src/models/auto/modeling_auto.js +14 -1
  19. package/src/models/chatterbox/modeling_chatterbox.js +1 -1
  20. package/src/models/detr/image_processing_detr.js +1 -1
  21. package/src/models/feature_extractors.js +2 -0
  22. package/src/models/gemma3n/modeling_gemma3n.js +2 -0
  23. package/src/models/granite_speech/feature_extraction_granite_speech.js +58 -0
  24. package/src/models/granite_speech/modeling_granite_speech.js +5 -0
  25. package/src/models/granite_speech/processing_granite_speech.js +62 -0
  26. package/src/models/grounding_dino/image_processing_grounding_dino.js +1 -1
  27. package/src/models/idefics3/modeling_idefics3.js +5 -32
  28. package/src/models/image_processors.js +1 -0
  29. package/src/models/lfm2_vl/image_processing_lfm2_vl.js +305 -0
  30. package/src/models/lfm2_vl/modeling_lfm2_vl.js +13 -0
  31. package/src/models/lfm2_vl/processing_lfm2_vl.js +77 -0
  32. package/src/models/llava/modeling_llava.js +1 -1
  33. package/src/models/mistral3/modeling_mistral3.js +2 -2
  34. package/src/models/modeling_utils.js +234 -292
  35. package/src/models/models.js +9 -0
  36. package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
  37. package/src/models/paligemma/modeling_paligemma.js +2 -25
  38. package/src/models/processors.js +3 -0
  39. package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -1
  40. package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
  41. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +1 -41
  42. package/src/models/qwen2_vl/modeling_qwen2_vl.js +36 -3
  43. package/src/models/qwen3_5/modeling_qwen3_5.js +1 -0
  44. package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +2 -1
  45. package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
  46. package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
  47. package/src/models/qwen3_vl/modeling_qwen3_vl.js +2 -1
  48. package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +4 -0
  49. package/src/models/registry.js +39 -4
  50. package/src/models/sam/image_processing_sam.js +1 -1
  51. package/src/models/session.js +17 -6
  52. package/src/models/smolvlm/modeling_smolvlm.js +7 -0
  53. package/src/models/ultravox/modeling_ultravox.js +1 -3
  54. package/src/models/voxtral/modeling_voxtral.js +3 -0
  55. package/src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js +71 -0
  56. package/src/models/voxtral_realtime/modeling_voxtral_realtime.js +239 -0
  57. package/src/models/voxtral_realtime/processing_voxtral_realtime.js +113 -0
  58. package/src/models/whisper/feature_extraction_whisper.js +2 -12
  59. package/src/pipelines/index.js +2 -84
  60. package/src/pipelines.js +40 -77
  61. package/src/transformers.js +2 -0
  62. package/src/utils/audio.js +18 -2
  63. package/src/utils/cache/CrossOriginStorageCache.js +251 -0
  64. package/src/utils/cache/FileCache.js +128 -0
  65. package/src/utils/cache/cross-origin-storage.d.ts +38 -0
  66. package/src/utils/cache.js +8 -3
  67. package/src/utils/hub/{files.js → FileResponse.js} +0 -105
  68. package/src/utils/hub/utils.js +35 -1
  69. package/src/utils/hub.js +6 -5
  70. package/src/utils/image.js +12 -13
  71. package/src/utils/lru_cache.js +67 -0
  72. package/src/utils/memoize_promise.js +45 -0
  73. package/src/utils/model_registry/ModelRegistry.js +70 -23
  74. package/src/utils/model_registry/get_file_metadata.js +14 -2
  75. package/src/utils/model_registry/get_model_files.js +63 -78
  76. package/src/utils/model_registry/get_pipeline_files.js +15 -24
  77. package/src/utils/model_registry/is_cached.js +81 -4
  78. package/src/utils/tensor.js +18 -2
  79. package/types/backends/onnx.d.ts.map +1 -1
  80. package/types/backends/utils/cacheWasm.d.ts +3 -17
  81. package/types/backends/utils/cacheWasm.d.ts.map +1 -1
  82. package/types/cache_utils.d.ts +29 -0
  83. package/types/cache_utils.d.ts.map +1 -0
  84. package/types/configs.d.ts.map +1 -1
  85. package/types/env.d.ts +18 -3
  86. package/types/env.d.ts.map +1 -1
  87. package/types/image_processors_utils.d.ts +17 -1
  88. package/types/image_processors_utils.d.ts.map +1 -1
  89. package/types/models/auto/modeling_auto.d.ts +6 -0
  90. package/types/models/auto/modeling_auto.d.ts.map +1 -1
  91. package/types/models/detr/image_processing_detr.d.ts +1 -1
  92. package/types/models/feature_extractors.d.ts +2 -0
  93. package/types/models/gemma3n/modeling_gemma3n.d.ts +2 -0
  94. package/types/models/gemma3n/modeling_gemma3n.d.ts.map +1 -1
  95. package/types/models/granite_speech/feature_extraction_granite_speech.d.ts +16 -0
  96. package/types/models/granite_speech/feature_extraction_granite_speech.d.ts.map +1 -0
  97. package/types/models/granite_speech/modeling_granite_speech.d.ts +4 -0
  98. package/types/models/granite_speech/modeling_granite_speech.d.ts.map +1 -0
  99. package/types/models/granite_speech/processing_granite_speech.d.ts +19 -0
  100. package/types/models/granite_speech/processing_granite_speech.d.ts.map +1 -0
  101. package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +1 -1
  102. package/types/models/idefics3/modeling_idefics3.d.ts +2 -18
  103. package/types/models/idefics3/modeling_idefics3.d.ts.map +1 -1
  104. package/types/models/image_processors.d.ts +1 -0
  105. package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts +41 -0
  106. package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts.map +1 -0
  107. package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts +4 -0
  108. package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts.map +1 -0
  109. package/types/models/lfm2_vl/processing_lfm2_vl.d.ts +18 -0
  110. package/types/models/lfm2_vl/processing_lfm2_vl.d.ts.map +1 -0
  111. package/types/models/mistral3/modeling_mistral3.d.ts +2 -2
  112. package/types/models/mistral3/modeling_mistral3.d.ts.map +1 -1
  113. package/types/models/modeling_utils.d.ts +44 -24
  114. package/types/models/modeling_utils.d.ts.map +1 -1
  115. package/types/models/models.d.ts +9 -0
  116. package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
  117. package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
  118. package/types/models/paligemma/modeling_paligemma.d.ts +2 -8
  119. package/types/models/paligemma/modeling_paligemma.d.ts.map +1 -1
  120. package/types/models/processors.d.ts +3 -0
  121. package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +3 -0
  122. package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -1
  123. package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
  124. package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
  125. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
  126. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +2 -0
  127. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
  128. package/types/models/qwen3_5/modeling_qwen3_5.d.ts +2 -0
  129. package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -1
  130. package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +3 -0
  131. package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -1
  132. package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
  133. package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
  134. package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
  135. package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
  136. package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +3 -0
  137. package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -1
  138. package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +7 -0
  139. package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
  140. package/types/models/registry.d.ts +2 -1
  141. package/types/models/registry.d.ts.map +1 -1
  142. package/types/models/sam/image_processing_sam.d.ts +1 -1
  143. package/types/models/session.d.ts +3 -2
  144. package/types/models/session.d.ts.map +1 -1
  145. package/types/models/smolvlm/modeling_smolvlm.d.ts +8 -0
  146. package/types/models/smolvlm/modeling_smolvlm.d.ts.map +1 -0
  147. package/types/models/ultravox/modeling_ultravox.d.ts +0 -2
  148. package/types/models/ultravox/modeling_ultravox.d.ts.map +1 -1
  149. package/types/models/voxtral/modeling_voxtral.d.ts +4 -0
  150. package/types/models/voxtral/modeling_voxtral.d.ts.map +1 -0
  151. package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts +28 -0
  152. package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts.map +1 -0
  153. package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts +17 -0
  154. package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts.map +1 -0
  155. package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts +44 -0
  156. package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts.map +1 -0
  157. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
  158. package/types/pipelines/index.d.ts +0 -34
  159. package/types/pipelines/index.d.ts.map +1 -1
  160. package/types/pipelines.d.ts.map +1 -1
  161. package/types/transformers.d.ts +1 -0
  162. package/types/transformers.d.ts.map +1 -1
  163. package/types/utils/audio.d.ts +5 -2
  164. package/types/utils/audio.d.ts.map +1 -1
  165. package/types/utils/cache/CrossOriginStorageCache.d.ts +120 -0
  166. package/types/utils/cache/CrossOriginStorageCache.d.ts.map +1 -0
  167. package/types/utils/cache/FileCache.d.ts +39 -0
  168. package/types/utils/cache/FileCache.d.ts.map +1 -0
  169. package/types/utils/cache.d.ts +4 -4
  170. package/types/utils/cache.d.ts.map +1 -1
  171. package/types/utils/dtypes.d.ts +1 -1
  172. package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -38
  173. package/types/utils/hub/FileResponse.d.ts.map +1 -0
  174. package/types/utils/hub/utils.d.ts +17 -2
  175. package/types/utils/hub/utils.d.ts.map +1 -1
  176. package/types/utils/hub.d.ts +7 -7
  177. package/types/utils/hub.d.ts.map +1 -1
  178. package/types/utils/image.d.ts +1 -1
  179. package/types/utils/image.d.ts.map +1 -1
  180. package/types/utils/lru_cache.d.ts +38 -0
  181. package/types/utils/lru_cache.d.ts.map +1 -0
  182. package/types/utils/memoize_promise.d.ts +14 -0
  183. package/types/utils/memoize_promise.d.ts.map +1 -0
  184. package/types/utils/model_registry/ModelRegistry.d.ts +66 -6
  185. package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
  186. package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -1
  187. package/types/utils/model_registry/get_model_files.d.ts +1 -0
  188. package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
  189. package/types/utils/model_registry/get_pipeline_files.d.ts +2 -1
  190. package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -1
  191. package/types/utils/model_registry/is_cached.d.ts +47 -4
  192. package/types/utils/model_registry/is_cached.d.ts.map +1 -1
  193. package/types/utils/tensor.d.ts.map +1 -1
  194. package/src/utils/data-structures.js +0 -572
  195. package/types/utils/data-structures.d.ts +0 -294
  196. package/types/utils/data-structures.d.ts.map +0 -1
  197. package/types/utils/hub/files.d.ts.map +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/transformers",
3
- "version": "4.0.0-next.5",
3
+ "version": "4.0.0-next.7",
4
4
  "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
5
5
  "main": "./dist/transformers.node.cjs",
6
6
  "types": "./types/transformers.d.ts",
@@ -43,10 +43,10 @@
43
43
  },
44
44
  "homepage": "https://github.com/huggingface/transformers.js#readme",
45
45
  "dependencies": {
46
- "@huggingface/jinja": "^0.5.5",
46
+ "@huggingface/jinja": "^0.5.6",
47
47
  "@huggingface/tokenizers": "^0.1.2",
48
- "onnxruntime-node": "1.24.2",
49
- "onnxruntime-web": "1.25.0-dev.20260228-6e72d31970",
48
+ "onnxruntime-node": "1.24.3",
49
+ "onnxruntime-web": "1.25.0-dev.20260307-d626b568e0",
50
50
  "sharp": "^0.34.5"
51
51
  },
52
52
  "devDependencies": {
@@ -22,7 +22,8 @@ import { env, apis, LogLevel } from '../env.js';
22
22
  // In either case, we select the default export if it exists, otherwise we use the named export.
23
23
  import * as ONNX_NODE from 'onnxruntime-node';
24
24
  import * as ONNX_WEB from 'onnxruntime-web/webgpu';
25
- import { isBlobURL, loadWasmBinary, loadWasmFactory, toAbsoluteURL } from './utils/cacheWasm.js';
25
+ import { loadWasmBinary, loadWasmFactory } from './utils/cacheWasm.js';
26
+ import { isBlobURL, toAbsoluteURL } from '../utils/hub/utils.js';
26
27
  import { logger } from '../utils/logger.js';
27
28
  export { Tensor } from 'onnxruntime-common';
28
29
 
@@ -176,8 +177,6 @@ export function deviceToExecutionProviders(device = null) {
176
177
  throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`);
177
178
  }
178
179
 
179
- const IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV;
180
-
181
180
  /**
182
181
  * Currently, Transformers.js doesn't support simultaneous loading of sessions in WASM/WebGPU.
183
182
  * For this reason, we need to chain the loading calls.
@@ -204,14 +203,23 @@ async function ensureWasmLoaded() {
204
203
  return wasmLoadPromise;
205
204
  }
206
205
 
206
+ // Check if we should load the WASM binary
207
207
  const shouldUseWasmCache =
208
208
  env.useWasmCache &&
209
209
  typeof ONNX_ENV?.wasm?.wasmPaths === 'object' &&
210
210
  ONNX_ENV?.wasm?.wasmPaths?.wasm &&
211
211
  ONNX_ENV?.wasm?.wasmPaths?.mjs;
212
212
 
213
- // Check if we should load the WASM binary
214
213
  if (!shouldUseWasmCache) {
214
+ // In Deno's web runtime, the WASM factory must be loaded via blob URL so that Node.js detection
215
+ // can be patched out (see loadWasmFactory). Without caching, the factory is imported directly
216
+ // from its URL and Deno would crash trying to use Node.js APIs. useWasmCache defaults to true
217
+ // in this environment, so this only happens if the user explicitly disables it.
218
+ if (apis.IS_DENO_WEB_RUNTIME) {
219
+ throw new Error(
220
+ "env.useWasmCache=false is not supported in Deno's web runtime. Remove the useWasmCache override.",
221
+ );
222
+ }
215
223
  wasmLoadPromise = Promise.resolve();
216
224
  return wasmLoadPromise;
217
225
  }
@@ -222,7 +230,10 @@ async function ensureWasmLoaded() {
222
230
  // shouldUseWasmCache checks for wasmPaths.wasm and wasmPaths.mjs
223
231
  const urls = /** @type {{ wasm: string, mjs: string }} */ (ONNX_ENV.wasm.wasmPaths);
224
232
 
225
- // Load and cache both the WASM binary and factory
233
+ // Load both in parallel; the .mjs blob URL is only kept if wasmBinary succeeded.
234
+ // ORT only sets locateFile when wasmBinary is provided (onnxruntime PR https://github.com/microsoft/onnxruntime/pull/27411), which
235
+ // prevents new URL(fileName, import.meta.url) from failing inside a blob URL factory.
236
+ let wasmBinaryLoaded = false;
226
237
  await Promise.all([
227
238
  // Load and cache the WASM binary
228
239
  urls.wasm && !isBlobURL(urls.wasm)
@@ -231,6 +242,7 @@ async function ensureWasmLoaded() {
231
242
  const wasmBinary = await loadWasmBinary(toAbsoluteURL(urls.wasm));
232
243
  if (wasmBinary) {
233
244
  ONNX_ENV.wasm.wasmBinary = wasmBinary;
245
+ wasmBinaryLoaded = true;
234
246
  }
235
247
  } catch (err) {
236
248
  logger.warn('Failed to pre-load WASM binary:', err);
@@ -238,7 +250,7 @@ async function ensureWasmLoaded() {
238
250
  })()
239
251
  : Promise.resolve(),
240
252
 
241
- // Load and cache the WASM factory
253
+ // Load and cache the WASM factory as a blob URL
242
254
  urls.mjs && !isBlobURL(urls.mjs)
243
255
  ? (async () => {
244
256
  try {
@@ -253,6 +265,12 @@ async function ensureWasmLoaded() {
253
265
  })()
254
266
  : Promise.resolve(),
255
267
  ]);
268
+
269
+ // If wasmBinary failed to load, revert wasmPaths.mjs to the original URL (factory can only be loaded from blob if ONNX_ENV.wasm.wasmBinary is set. @see ORT PR #27411)
270
+ if (!wasmBinaryLoaded) {
271
+ // @ts-ignore
272
+ ONNX_ENV.wasm.wasmPaths.mjs = urls.mjs;
273
+ }
256
274
  })();
257
275
 
258
276
  return wasmLoadPromise;
@@ -274,7 +292,7 @@ export async function createInferenceSession(buffer_or_path, session_options, se
274
292
  logSeverityLevel,
275
293
  ...session_options,
276
294
  });
277
- const session = await (IS_WEB_ENV ? (webInitChain = webInitChain.then(load)) : load());
295
+ const session = await (apis.IS_WEB_ENV ? (webInitChain = webInitChain.then(load)) : load());
278
296
  session.config = session_config;
279
297
  return session;
280
298
  }
@@ -294,8 +312,7 @@ let webInferenceChain = Promise.resolve();
294
312
  */
295
313
  export async function runInferenceSession(session, ortFeed) {
296
314
  const run = () => session.run(ortFeed);
297
- const output = await (IS_WEB_ENV ? (webInferenceChain = webInferenceChain.then(run)) : run());
298
- return output;
315
+ return apis.IS_WEB_ENV ? (webInferenceChain = webInferenceChain.then(run)) : run();
299
316
  }
300
317
 
301
318
  /**
@@ -306,41 +323,8 @@ export async function runInferenceSession(session, ortFeed) {
306
323
  export function isONNXTensor(x) {
307
324
  return x instanceof ONNX.Tensor;
308
325
  }
309
-
310
326
  /** @type {import('onnxruntime-common').Env} */
311
327
  const ONNX_ENV = ONNX?.env;
312
- if (ONNX_ENV?.wasm) {
313
- // Initialize wasm backend with suitable default settings.
314
-
315
- // (Optional) Set path to wasm files. This will override the default path search behavior of onnxruntime-web.
316
- // By default, we only do this if we are not in a service worker and the wasmPaths are not already set.
317
- if (
318
- // @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
319
- !(typeof ServiceWorkerGlobalScope !== 'undefined' && self instanceof ServiceWorkerGlobalScope) &&
320
- ONNX_ENV.versions?.web &&
321
- !ONNX_ENV.wasm.wasmPaths
322
- ) {
323
- const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
324
-
325
- ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI
326
- ? {
327
- mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
328
- wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`,
329
- }
330
- : {
331
- mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
332
- wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`,
333
- };
334
- }
335
-
336
- // Users may wish to proxy the WASM backend to prevent the UI from freezing,
337
- // However, this is not necessary when using WebGPU, so we default to false.
338
- ONNX_ENV.wasm.proxy = false;
339
- }
340
-
341
- if (ONNX_ENV?.webgpu) {
342
- ONNX_ENV.webgpu.powerPreference = 'high-performance';
343
- }
344
328
 
345
329
  /**
346
330
  * Check if ONNX's WASM backend is being proxied.
@@ -351,21 +335,56 @@ export function isONNXProxy() {
351
335
  return ONNX_ENV?.wasm?.proxy;
352
336
  }
353
337
 
354
- /**
355
- * A function to map Transformers.js log levels to ONNX Runtime log severity
356
- * levels, and set the log level environment variable in ONNX Runtime.
357
- * @param {number} logLevel The log level to set.
358
- */
359
- function setLogLevel(logLevel) {
360
- const severityLevel = getOnnxLogSeverityLevel(logLevel);
361
- ONNX_ENV.logLevel = ONNX_LOG_LEVEL_NAMES[severityLevel];
362
- }
338
+ if (ONNX_ENV) {
339
+ if (ONNX_ENV.wasm) {
340
+ // Initialize wasm backend with suitable default settings.
341
+
342
+ // (Optional) Set path to wasm files. This will override the default path search behavior of onnxruntime-web.
343
+ // By default, we only do this if we are not in a service worker and the wasmPaths are not already set.
344
+ if (
345
+ // @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
346
+ !(typeof ServiceWorkerGlobalScope !== 'undefined' && self instanceof ServiceWorkerGlobalScope) &&
347
+ ONNX_ENV.versions?.web &&
348
+ !ONNX_ENV.wasm.wasmPaths
349
+ ) {
350
+ const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
351
+
352
+ ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI
353
+ ? {
354
+ mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
355
+ wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`,
356
+ }
357
+ : {
358
+ mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
359
+ wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`,
360
+ };
361
+ }
362
+
363
+ // Users may wish to proxy the WASM backend to prevent the UI from freezing,
364
+ // However, this is not necessary when using WebGPU, so we default to false.
365
+ ONNX_ENV.wasm.proxy = false;
366
+ }
367
+
368
+ if (ONNX_ENV.webgpu) {
369
+ ONNX_ENV.webgpu.powerPreference = 'high-performance';
370
+ }
363
371
 
364
- // Set the initial log level to be the default Transformers.js log level.
365
- setLogLevel(env.logLevel ?? LogLevel.WARNING);
372
+ /**
373
+ * A function to map Transformers.js log levels to ONNX Runtime log severity
374
+ * levels, and set the log level environment variable in ONNX Runtime.
375
+ * @param {number} logLevel The log level to set.
376
+ */
377
+ function setLogLevel(logLevel) {
378
+ const severityLevel = getOnnxLogSeverityLevel(logLevel);
379
+ ONNX_ENV.logLevel = ONNX_LOG_LEVEL_NAMES[severityLevel];
380
+ }
366
381
 
367
- // Expose ONNX environment variables to `env.backends.onnx`
368
- env.backends.onnx = {
369
- ...ONNX_ENV,
370
- setLogLevel,
371
- };
382
+ // Set the initial log level to be the default Transformers.js log level.
383
+ setLogLevel(env.logLevel ?? LogLevel.WARNING);
384
+
385
+ // Expose ONNX environment variables to `env.backends.onnx`
386
+ env.backends.onnx = {
387
+ ...ONNX_ENV,
388
+ setLogLevel,
389
+ };
390
+ }
@@ -1,12 +1,11 @@
1
+ import { apis, env } from '../../env.js';
1
2
  import { getCache } from '../../utils/cache.js';
2
- import { isValidUrl } from '../../utils/hub/utils.js';
3
3
  import { logger } from '../../utils/logger.js';
4
- import { env } from '../../env.js';
5
4
 
6
5
  /**
7
6
  * Loads and caches a file from the given URL.
8
7
  * @param {string} url The URL of the file to load.
9
- * @returns {Promise<Response|import('../../utils/hub/files.js').FileResponse|null|string>} The response object, or null if loading failed.
8
+ * @returns {Promise<Response|import('../../utils/hub/FileResponse.js').FileResponse|null|string>} The response object, or null if loading failed.
10
9
  */
11
10
  async function loadAndCacheFile(url) {
12
11
  const fileName = url.split('/').pop();
@@ -65,58 +64,38 @@ export async function loadWasmBinary(wasmURL) {
65
64
  }
66
65
 
67
66
  /**
68
- * Loads and caches the WASM Factory for ONNX Runtime.
67
+ * Loads and caches the WASM Factory (.mjs file) for ONNX Runtime.
68
+ * Creates a blob URL from cached content (when safe) to bridge Cache API with dynamic imports used in ORT.
69
69
  * @param {string} libURL The URL of the WASM Factory to load.
70
- * @returns {Promise<string|null>} The blob URL of the WASM Factory, or null if loading failed.
70
+ * @returns {Promise<string|null>} The blob URL (if enabled), original URL (if disabled), or null if loading failed.
71
71
  */
72
72
  export async function loadWasmFactory(libURL) {
73
+ // We can't use Blob URLs in some environments (Service Workers, Chrome extensions) due to security restrictions on dynamic import() of blob URLs.
74
+ // In such cases, just return the original URL and don't bother caching since dynamic import() won't use the Cache API anyway.
75
+ // See https://github.com/huggingface/transformers.js/issues/1532.
76
+ if (apis.IS_SERVICE_WORKER_ENV || apis.IS_CHROME_AVAILABLE) {
77
+ return libURL;
78
+ }
79
+
80
+ // Fetch from cache or network, then create blob URL
73
81
  const response = await loadAndCacheFile(libURL);
74
82
  if (!response || typeof response === 'string') return null;
75
83
 
76
84
  try {
77
85
  let code = await response.text();
78
- // Fix relative paths when loading factory from blob, overwrite import.meta.url with actual baseURL
79
- const baseUrl = libURL.split('/').slice(0, -1).join('/');
80
- code = code.replaceAll('import.meta.url', `"${baseUrl}"`);
86
+
87
+ // Handle the case where we are importing the bundled version of the library in Deno (e.g., via CDN or local file),
88
+ // where we need to patch out Node.js detection in the factory. Without this, Deno (which exposes globalThis.process.versions.node)
89
+ // would enter the Node.js branch and try to use Node.js APIs (worker_threads, fs, etc.) that aren't used in the bundled web version.
90
+ // Only needed for the asyncify (single-threaded) variant loaded via blob URL. The module-level pthread auto-start code is unreachable since asyncify never spawns workers.
91
+ // See https://github.com/huggingface/transformers.js/pull/1546/ for more information.
92
+ //
93
+ // NOTE: This does not affect default usage via Deno (i.e., imported via npm: prefix), since we'll be using onnxruntime-node (Native) instead of onnxruntime-web (WASM).
81
94
  code = code.replaceAll('globalThis.process?.versions?.node', 'false');
82
95
  const blob = new Blob([code], { type: 'text/javascript' });
83
96
  return URL.createObjectURL(blob);
84
97
  } catch (error) {
85
- logger.warn('Failed to read WASM binary:', error);
98
+ logger.warn('Failed to read WASM factory:', error);
86
99
  return null;
87
100
  }
88
101
  }
89
-
90
- /**
91
- * Checks if the given URL is a blob URL (created via URL.createObjectURL).
92
- * Blob URLs should not be cached as they are temporary in-memory references.
93
- * @param {string} url - The URL to check.
94
- * @returns {boolean} True if the URL is a blob URL, false otherwise.
95
- */
96
- export function isBlobURL(url) {
97
- return isValidUrl(url, ['blob:']);
98
- }
99
-
100
- /**
101
- * Converts any URL to an absolute URL if needed.
102
- * If the URL is already absolute (http://, https://, or blob:), returns it unchanged (handled by new URL(...)).
103
- * Otherwise, resolves it relative to the current page location (browser) or module location (Node/Bun/Deno).
104
- * @param {string} url - The URL to convert (can be relative or absolute).
105
- * @returns {string} The absolute URL.
106
- */
107
- export function toAbsoluteURL(url) {
108
- let baseURL;
109
-
110
- if (typeof location !== 'undefined' && location.href) {
111
- // Browser environment: use location.href
112
- baseURL = location.href;
113
- } else if (typeof import.meta !== 'undefined' && import.meta.url) {
114
- // Node.js/Bun/Deno module environment: use import.meta.url
115
- baseURL = import.meta.url;
116
- } else {
117
- // Fallback: if no base is available, return the URL unchanged
118
- return url;
119
- }
120
-
121
- return new URL(url, baseURL).href;
122
- }
@@ -0,0 +1,62 @@
1
+ import { Tensor } from './utils/tensor.js';
2
+
3
+ /**
4
+ * A cache class that stores past key values as named tensors.
5
+ */
6
+ class _DynamicCache {
7
+ /**
8
+ * Create a DynamicCache, optionally pre-populated with entries.
9
+ * @param {Record<string, Tensor>} [entries] Initial name→Tensor mappings.
10
+ */
11
+ constructor(entries) {
12
+ if (!entries) return;
13
+ for (const key in entries) {
14
+ if (key in this) {
15
+ throw new TypeError(`Key "${key}" conflicts with an existing property on DynamicCache`);
16
+ }
17
+ const value = entries[key];
18
+ if (!(value instanceof Tensor)) {
19
+ throw new TypeError(`Expected a Tensor for key "${key}", got ${typeof value}`);
20
+ }
21
+ this[key] = value;
22
+ }
23
+ }
24
+
25
+ /**
26
+ * Get the cached sequence length. This requires at least one attention cache entry to be present.
27
+ * @returns {number} The past sequence length.
28
+ */
29
+ get_seq_length() {
30
+ /** @type {Record<string, Tensor>} */
31
+ const self = /** @type {any} */ (this);
32
+ for (const name in self) {
33
+ if (name.startsWith('past_key_values.')) {
34
+ return self[name].dims.at(-2);
35
+ }
36
+ }
37
+ throw new Error('Unable to determine sequence length from the cache.');
38
+ }
39
+
40
+ /**
41
+ * Dispose all contained tensors whose data resides on the GPU.
42
+ * Returns a promise that resolves when all disposals are complete.
43
+ * @returns {Promise<void>} Promise that resolves when all GPU tensors are disposed.
44
+ */
45
+ async dispose() {
46
+ const promises = [];
47
+ for (const t of /** @type {Tensor[]} */ (Object.values(this))) {
48
+ if (t.location === 'gpu-buffer') {
49
+ promises.push(t.dispose());
50
+ }
51
+ }
52
+ await Promise.all(promises);
53
+ }
54
+ }
55
+
56
+ /**
57
+ * @typedef {_DynamicCache & Record<string, Tensor>} DynamicCache
58
+ */
59
+
60
+ export const DynamicCache = /** @type {new (entries?: Record<string, Tensor>) => DynamicCache} */ (
61
+ /** @type {unknown} */ (_DynamicCache)
62
+ );
package/src/configs.js CHANGED
@@ -68,14 +68,18 @@ function getNormalizedConfig(config) {
68
68
  case 'florence2':
69
69
  case 'llava_onevision':
70
70
  case 'idefics3':
71
+ case 'granite_speech':
71
72
  case 'ultravox':
72
73
  case 'voxtral':
74
+ case 'voxtral_realtime':
73
75
  case 'smolvlm':
74
76
  case 'gemma3n':
77
+ case 'lfm2_vl':
75
78
  case 'chatterbox':
76
79
  case 'mistral3':
77
80
  case 'qwen2_5_vl':
78
81
  case 'qwen3_vl':
82
+ case 'qwen3_vl_moe':
79
83
  // @ts-expect-error TS2339
80
84
  init_normalized_config = getNormalizedConfig(config.text_config);
81
85
  break;
@@ -130,11 +134,17 @@ function getNormalizedConfig(config) {
130
134
  case 'cohere':
131
135
  case 'cohere2':
132
136
  case 'mistral':
137
+ case 'voxtral_realtime_text':
138
+ case 'voxtral_realtime_encoder':
133
139
  case 'starcoder2':
134
140
  case 'qwen2':
141
+ case 'qwen2_moe':
135
142
  case 'qwen2_vl':
143
+ case 'qwen2_vl_text':
136
144
  case 'qwen2_5_vl_text':
145
+ case 'qwen3_moe':
137
146
  case 'qwen3_vl_text':
147
+ case 'qwen3_vl_moe_text':
138
148
  case 'phi':
139
149
  case 'phi3':
140
150
  case 'phi3_v':
@@ -289,6 +299,9 @@ function getNormalizedConfig(config) {
289
299
  * @returns {Record<string, number[]>}
290
300
  */
291
301
  export function getCacheShapes(config, options) {
302
+ if (!(config instanceof PretrainedConfig)) {
303
+ config = new PretrainedConfig(config);
304
+ }
292
305
  if (['lfm2', 'lfm2_moe'].includes(config.model_type)) {
293
306
  const pkv_prefix = options?.prefix ?? 'past_key_values';
294
307
  const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
@@ -348,7 +361,7 @@ export function getCacheShapes(config, options) {
348
361
  }
349
362
  }
350
363
  return cache_values;
351
- } else if (['qwen3_5', 'qwen3_5_moe'].includes(config.model_type)) {
364
+ } else if (['qwen3_next', 'qwen3_5_text', 'qwen3_5_moe_text', 'olmo_hybrid'].includes(config.model_type)) {
352
365
  const pkv_prefix = options?.prefix ?? 'past_key_values';
353
366
  const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
354
367
 
@@ -365,13 +378,11 @@ export function getCacheShapes(config, options) {
365
378
  linear_key_head_dim,
366
379
  linear_value_head_dim,
367
380
  linear_conv_kernel_dim,
368
- } = /** @type {any} */ (config).text_config;
381
+ } = /** @type {any} */ (config);
369
382
 
370
383
  const key_dim = linear_key_head_dim * linear_num_key_heads;
371
384
  const value_dim = linear_value_head_dim * linear_num_value_heads;
372
385
 
373
- const conv_dim = key_dim * 2 + value_dim;
374
-
375
386
  const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
376
387
  const batch_size = options?.batch_size ?? 1;
377
388
  for (let i = 0; i < layer_types.length; ++i) {
@@ -380,7 +391,14 @@ export function getCacheShapes(config, options) {
380
391
  cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
381
392
  }
382
393
  } else if (layer_types[i] === 'linear_attention') {
383
- cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
394
+ if (config.model_type === 'olmo_hybrid') {
395
+ cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
396
+ cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
397
+ cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
398
+ } else {
399
+ const conv_dim = key_dim * 2 + value_dim;
400
+ cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
401
+ }
384
402
  cache_values[`${conv_prefix}_recurrent.${i}`] = [
385
403
  batch_size,
386
404
  linear_num_value_heads,
@@ -392,7 +410,16 @@ export function getCacheShapes(config, options) {
392
410
  }
393
411
  }
394
412
  return cache_values;
413
+ } else if (['lfm2_vl', 'qwen3_5', 'qwen3_5_moe', 'voxtral_realtime'].includes(config.model_type)) {
414
+ let subConfig;
415
+ if (config.model_type === 'voxtral_realtime' && options?.session_name === 'audio_encoder') {
416
+ subConfig = /** @type {any} */ (config).audio_config;
417
+ } else {
418
+ subConfig = /** @type {any} */ (config).text_config;
419
+ }
420
+ return getCacheShapes(subConfig, options);
395
421
  }
422
+
396
423
  return getKeyValueShapes(config, options);
397
424
  }
398
425
 
package/src/env.js CHANGED
@@ -26,11 +26,13 @@ import fs from 'node:fs';
26
26
  import path from 'node:path';
27
27
  import url from 'node:url';
28
28
 
29
- const VERSION = '4.0.0-next.5';
29
+ const VERSION = '4.0.0-next.7';
30
+
31
+ const HAS_SELF = typeof self !== 'undefined';
30
32
 
31
33
  const IS_FS_AVAILABLE = !isEmpty(fs);
32
34
  const IS_PATH_AVAILABLE = !isEmpty(path);
33
- const IS_WEB_CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self;
35
+ const IS_WEB_CACHE_AVAILABLE = HAS_SELF && 'caches' in self;
34
36
 
35
37
  // Runtime detection
36
38
  const IS_DENO_RUNTIME = typeof globalThis.Deno !== 'undefined';
@@ -44,14 +46,24 @@ const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node' &&
44
46
  // Check if various APIs are available (depends on environment)
45
47
  const IS_BROWSER_ENV = typeof window !== 'undefined' && typeof window.document !== 'undefined';
46
48
  const IS_WEBWORKER_ENV =
47
- typeof self !== 'undefined' &&
49
+ HAS_SELF &&
48
50
  ['DedicatedWorkerGlobalScope', 'ServiceWorkerGlobalScope', 'SharedWorkerGlobalScope'].includes(
49
51
  self.constructor?.name,
50
52
  );
53
+ const IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
54
+
51
55
  const IS_WEBGPU_AVAILABLE = IS_NODE_ENV || (typeof navigator !== 'undefined' && 'gpu' in navigator);
52
56
  const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
53
57
  const IS_CRYPTO_AVAILABLE = typeof crypto !== 'undefined' && typeof crypto.getRandomValues === 'function';
54
58
 
59
+ const IS_CHROME_AVAILABLE =
60
+ // @ts-ignore - chrome may not exist in all environments
61
+ typeof chrome !== 'undefined' && typeof chrome.runtime !== 'undefined' && typeof chrome.runtime.id === 'string';
62
+
63
+ const IS_SERVICE_WORKER_ENV =
64
+ // @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
65
+ typeof ServiceWorkerGlobalScope !== 'undefined' && HAS_SELF && self instanceof ServiceWorkerGlobalScope;
66
+
55
67
  /**
56
68
  * Check if the current environment is Safari browser.
57
69
  * Works in both browser and web worker contexts.
@@ -90,6 +102,15 @@ export const apis = Object.freeze({
90
102
  /** Whether we are running in a web worker environment */
91
103
  IS_WEBWORKER_ENV,
92
104
 
105
+ /** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
106
+ IS_WEB_ENV,
107
+
108
+ /** Whether we are running in a service worker environment */
109
+ IS_SERVICE_WORKER_ENV,
110
+
111
+ /** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
112
+ IS_DENO_WEB_RUNTIME,
113
+
93
114
  /** Whether the Cache API is available */
94
115
  IS_WEB_CACHE_AVAILABLE,
95
116
 
@@ -116,6 +137,9 @@ export const apis = Object.freeze({
116
137
 
117
138
  /** Whether the crypto API is available */
118
139
  IS_CRYPTO_AVAILABLE,
140
+
141
+ /** Whether the Chrome runtime API is available */
142
+ IS_CHROME_AVAILABLE,
119
143
  });
120
144
 
121
145
  const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
@@ -197,10 +221,14 @@ export const LogLevel = Object.freeze({
197
221
  * @property {boolean} useCustomCache Whether to use a custom cache system (defined by `customCache`), defaults to `false`.
198
222
  * @property {import('./utils/cache.js').CacheInterface|null} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
199
223
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache.
200
- * @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries for ONNX Runtime. Defaults to `true` when cache is available.
201
- * This can improve performance by avoiding repeated downloads of WASM files. Note: Only the WASM binary is cached.
202
- * The MJS loader file still requires network access unless you use a Service Worker.
224
+ * @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries and the WASM factory (.mjs) for ONNX Runtime.
225
+ * Defaults to `true` when cache is available. This can improve performance and enables offline usage by avoiding repeated downloads.
203
226
  * @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
227
+ * @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files
228
+ * across origins, allowing different sites to share the same cached model weights. Defaults to `false`.
229
+ * Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}.
230
+ * The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be
231
+ * removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}.
204
232
  * @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
205
233
  */
206
234
 
@@ -247,6 +275,8 @@ export const env = {
247
275
  useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
248
276
  cacheKey: 'transformers-cache',
249
277
 
278
+ experimental_useCrossOriginStorage: false,
279
+
250
280
  /////////////////// Custom fetch /////////////////////
251
281
  fetch: DEFAULT_FETCH,
252
282
 
@@ -14,7 +14,7 @@ import { logger } from './utils/logger.js';
14
14
  */
15
15
 
16
16
  /**
17
- * @typedef {object} ImageProcessorResult
17
+ * @typedef {Object} ImageProcessorResult
18
18
  * @property {Tensor} pixel_values The pixel values of the batched preprocessed images.
19
19
  * @property {HeightWidth[]} original_sizes Array of two-dimensional tuples like [[480, 640]].
20
20
  * @property {HeightWidth[]} reshaped_input_sizes Array of two-dimensional tuples like [[1000, 1330]].
@@ -407,7 +407,7 @@ function compute_segments(
407
407
  * @returns {[number, number]} The new height and width of the image.
408
408
  * @throws {Error} If the height or width is smaller than the factor.
409
409
  */
410
- function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
410
+ export function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
411
411
  if (height < factor || width < factor) {
412
412
  throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
413
413
  } else if (Math.max(height, width) / Math.min(height, width) > 200) {
@@ -886,7 +886,7 @@ export class ImageProcessor extends Callable {
886
886
  }
887
887
 
888
888
  /**
889
- * @typedef {object} PreprocessedImage
889
+ * @typedef {Object} PreprocessedImage
890
890
  * @property {HeightWidth} original_size The original size of the image.
891
891
  * @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
892
892
  * @property {Tensor} pixel_values The pixel values of the preprocessed image.
@@ -62,6 +62,19 @@ class PretrainedMixin {
62
62
  */
63
63
  static BASE_IF_FAIL = false;
64
64
 
65
+ /**
66
+ * Check whether this AutoModel class supports a given model type.
67
+ * @param {string} model_type The model type from config (e.g., 'bert', 'whisper').
68
+ * @returns {boolean} Whether this class can handle the given model type.
69
+ */
70
+ static supports(model_type) {
71
+ if (!this.MODEL_CLASS_MAPPINGS) return false;
72
+ for (const mapping of this.MODEL_CLASS_MAPPINGS) {
73
+ if (mapping.has(model_type)) return true;
74
+ }
75
+ return this.BASE_IF_FAIL;
76
+ }
77
+
65
78
  /** @type {typeof PreTrainedModel.from_pretrained} */
66
79
  static async from_pretrained(
67
80
  pretrained_model_name_or_path,
@@ -97,7 +110,7 @@ class PretrainedMixin {
97
110
  if (!this.MODEL_CLASS_MAPPINGS) {
98
111
  throw new Error('`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: ' + this.name);
99
112
  }
100
- const model_type = options.config.model_type;
113
+ const { model_type } = options.config;
101
114
  for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
102
115
  let modelInfo = MODEL_CLASS_MAPPING.get(model_type);
103
116
  if (!modelInfo) {
@@ -105,7 +105,7 @@ export class ChatterboxModel extends ChatterboxPreTrainedModel {
105
105
  if (!past_key_values || target_length !== 1) {
106
106
  throw new Error('Incorrect state encountered during generation.');
107
107
  }
108
- const past_length = Object.values(past_key_values)[0].dims.at(-2);
108
+ const past_length = past_key_values.get_seq_length();
109
109
  attention_mask = ones([inputs_embeds.dims[0], past_length + target_length]);
110
110
  }
111
111
  }