@huggingface/transformers 4.0.0-next.1 → 4.0.0-next.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. package/README.md +32 -6
  2. package/dist/ort-wasm-simd-threaded.jsep.mjs +31 -31
  3. package/dist/transformers.js +6050 -3202
  4. package/dist/transformers.min.js +23 -21
  5. package/dist/transformers.node.cjs +6119 -3174
  6. package/dist/transformers.node.min.cjs +25 -23
  7. package/dist/transformers.node.min.mjs +25 -23
  8. package/dist/transformers.node.mjs +6034 -3168
  9. package/dist/transformers.web.js +4255 -1381
  10. package/dist/transformers.web.min.js +23 -19
  11. package/package.json +5 -5
  12. package/src/backends/onnx.js +128 -53
  13. package/src/backends/utils/cacheWasm.js +28 -46
  14. package/src/cache_utils.js +62 -0
  15. package/src/configs.js +123 -23
  16. package/src/env.js +100 -11
  17. package/src/generation/logits_sampler.js +3 -15
  18. package/src/generation/parameters.js +1 -1
  19. package/src/generation/streamers.js +21 -0
  20. package/src/image_processors_utils.js +29 -23
  21. package/src/models/afmoe/modeling_afmoe.js +5 -0
  22. package/src/models/auto/image_processing_auto.js +2 -1
  23. package/src/models/auto/modeling_auto.js +16 -2
  24. package/src/models/auto/tokenization_auto.js +2 -1
  25. package/src/models/chatterbox/modeling_chatterbox.js +1 -1
  26. package/src/models/chmv2/image_processing_chmv2.js +3 -0
  27. package/src/models/chmv2/modeling_chmv2.js +4 -0
  28. package/src/models/clap/feature_extraction_clap.js +2 -1
  29. package/src/models/cohere2/modeling_cohere2.js +5 -0
  30. package/src/models/cohere_asr/feature_extraction_cohere_asr.js +117 -0
  31. package/src/models/cohere_asr/modeling_cohere_asr.js +11 -0
  32. package/src/models/cohere_asr/processing_cohere_asr.js +55 -0
  33. package/src/models/cohere_asr/tokenization_cohere_asr.js +3 -0
  34. package/src/models/deepseek_v3/modeling_deepseek_v3.js +5 -0
  35. package/src/models/detr/image_processing_detr.js +1 -1
  36. package/src/models/eurobert/modeling_eurobert.js +41 -0
  37. package/src/models/feature_extractors.js +3 -0
  38. package/src/models/gemma3/image_processing_gemma3.js +3 -0
  39. package/src/models/gemma3/modeling_gemma3.js +4 -1
  40. package/src/models/gemma3/processing_gemma3.js +45 -0
  41. package/src/models/gemma3n/modeling_gemma3n.js +2 -0
  42. package/src/models/glm46v/image_processing_glm46v.js +12 -0
  43. package/src/models/glm46v/processing_glm46v.js +5 -0
  44. package/src/models/glm_moe_dsa/modeling_glm_moe_dsa.js +5 -0
  45. package/src/models/glm_ocr/modeling_glm_ocr.js +78 -0
  46. package/src/models/granite_speech/feature_extraction_granite_speech.js +58 -0
  47. package/src/models/granite_speech/modeling_granite_speech.js +5 -0
  48. package/src/models/granite_speech/processing_granite_speech.js +62 -0
  49. package/src/models/grounding_dino/image_processing_grounding_dino.js +1 -1
  50. package/src/models/idefics3/modeling_idefics3.js +5 -32
  51. package/src/models/image_processors.js +4 -0
  52. package/src/models/lfm2_vl/image_processing_lfm2_vl.js +305 -0
  53. package/src/models/lfm2_vl/modeling_lfm2_vl.js +13 -0
  54. package/src/models/lfm2_vl/processing_lfm2_vl.js +77 -0
  55. package/src/models/lighton_ocr/modeling_lighton_ocr.js +3 -0
  56. package/src/models/llava/modeling_llava.js +1 -1
  57. package/src/models/marian/tokenization_marian.js +3 -2
  58. package/src/models/mistral3/modeling_mistral3.js +2 -2
  59. package/src/models/mistral4/modeling_mistral4.js +5 -0
  60. package/src/models/modeling_utils.js +283 -300
  61. package/src/models/models.js +26 -1
  62. package/src/models/nemotron_h/modeling_nemotron_h.js +5 -0
  63. package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
  64. package/src/models/paligemma/modeling_paligemma.js +2 -25
  65. package/src/models/paligemma/processing_paligemma.js +3 -2
  66. package/src/models/processors.js +8 -0
  67. package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +9 -0
  68. package/src/models/qwen2_5_vl/processing_qwen2_5_vl.js +3 -0
  69. package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
  70. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +15 -1
  71. package/src/models/qwen2_vl/modeling_qwen2_vl.js +240 -143
  72. package/src/models/qwen2_vl/processing_qwen2_vl.js +5 -4
  73. package/src/models/qwen3_5/modeling_qwen3_5.js +4 -0
  74. package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +4 -0
  75. package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
  76. package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
  77. package/src/models/qwen3_vl/modeling_qwen3_vl.js +4 -0
  78. package/src/models/qwen3_vl/processing_qwen3_vl.js +3 -0
  79. package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +4 -0
  80. package/src/models/registry.js +61 -5
  81. package/src/models/sam/image_processing_sam.js +1 -1
  82. package/src/models/session.js +33 -56
  83. package/src/models/smolvlm/modeling_smolvlm.js +7 -0
  84. package/src/models/solar_open/modeling_solar_open.js +5 -0
  85. package/src/models/tokenizers.js +1 -0
  86. package/src/models/ultravox/modeling_ultravox.js +1 -3
  87. package/src/models/voxtral/modeling_voxtral.js +3 -0
  88. package/src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js +71 -0
  89. package/src/models/voxtral_realtime/modeling_voxtral_realtime.js +239 -0
  90. package/src/models/voxtral_realtime/processing_voxtral_realtime.js +113 -0
  91. package/src/models/whisper/feature_extraction_whisper.js +4 -13
  92. package/src/models/whisper/modeling_whisper.js +6 -5
  93. package/src/models/xlm/tokenization_xlm.js +2 -1
  94. package/src/pipelines/automatic-speech-recognition.js +47 -3
  95. package/src/pipelines/document-question-answering.js +1 -1
  96. package/src/pipelines/image-to-text.js +2 -2
  97. package/src/pipelines/index.js +313 -0
  98. package/src/pipelines/summarization.js +1 -1
  99. package/src/pipelines/text-generation.js +5 -1
  100. package/src/pipelines/text-to-audio.js +4 -2
  101. package/src/pipelines/text2text-generation.js +1 -1
  102. package/src/pipelines/translation.js +1 -1
  103. package/src/pipelines/zero-shot-classification.js +3 -2
  104. package/src/pipelines.js +140 -428
  105. package/src/tokenization_utils.js +42 -21
  106. package/src/transformers.js +10 -1
  107. package/src/utils/audio.js +20 -3
  108. package/src/utils/cache/CrossOriginStorageCache.js +251 -0
  109. package/src/utils/cache/FileCache.js +128 -0
  110. package/src/utils/cache/cross-origin-storage.d.ts +38 -0
  111. package/src/utils/cache.js +12 -4
  112. package/src/utils/core.js +23 -1
  113. package/src/utils/devices.js +22 -0
  114. package/src/utils/dtypes.js +55 -0
  115. package/src/utils/hub/{files.js → FileResponse.js} +0 -90
  116. package/src/utils/hub/utils.js +45 -5
  117. package/src/utils/hub.js +67 -23
  118. package/src/utils/image.js +14 -14
  119. package/src/utils/logger.js +67 -0
  120. package/src/utils/lru_cache.js +67 -0
  121. package/src/utils/memoize_promise.js +45 -0
  122. package/src/utils/model-loader.js +35 -17
  123. package/src/utils/model_registry/ModelRegistry.js +382 -0
  124. package/src/utils/model_registry/clear_cache.js +128 -0
  125. package/src/utils/model_registry/get_available_dtypes.js +68 -0
  126. package/src/utils/model_registry/get_file_metadata.js +162 -0
  127. package/src/utils/model_registry/get_files.js +42 -0
  128. package/src/utils/model_registry/get_model_files.js +114 -0
  129. package/src/utils/model_registry/get_pipeline_files.js +44 -0
  130. package/src/utils/model_registry/get_processor_files.js +20 -0
  131. package/src/utils/model_registry/get_tokenizer_files.js +21 -0
  132. package/src/utils/model_registry/is_cached.js +169 -0
  133. package/src/utils/model_registry/resolve_model_type.js +66 -0
  134. package/src/utils/random.js +225 -0
  135. package/src/utils/tensor.js +26 -23
  136. package/src/utils/video.js +2 -2
  137. package/types/backends/onnx.d.ts.map +1 -1
  138. package/types/backends/utils/cacheWasm.d.ts +3 -17
  139. package/types/backends/utils/cacheWasm.d.ts.map +1 -1
  140. package/types/cache_utils.d.ts +29 -0
  141. package/types/cache_utils.d.ts.map +1 -0
  142. package/types/configs.d.ts.map +1 -1
  143. package/types/env.d.ts +60 -27
  144. package/types/env.d.ts.map +1 -1
  145. package/types/generation/logits_sampler.d.ts +2 -2
  146. package/types/generation/logits_sampler.d.ts.map +1 -1
  147. package/types/generation/parameters.d.ts +1 -1
  148. package/types/generation/parameters.d.ts.map +1 -1
  149. package/types/generation/streamers.d.ts +1 -0
  150. package/types/generation/streamers.d.ts.map +1 -1
  151. package/types/image_processors_utils.d.ts +18 -1
  152. package/types/image_processors_utils.d.ts.map +1 -1
  153. package/types/models/afmoe/modeling_afmoe.d.ts +8 -0
  154. package/types/models/afmoe/modeling_afmoe.d.ts.map +1 -0
  155. package/types/models/{ast/modeling_ast.d.ts → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts} +1 -1
  156. package/types/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts.map +1 -0
  157. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  158. package/types/models/auto/modeling_auto.d.ts +6 -0
  159. package/types/models/auto/modeling_auto.d.ts.map +1 -1
  160. package/types/models/auto/tokenization_auto.d.ts.map +1 -1
  161. package/types/models/chmv2/image_processing_chmv2.d.ts +4 -0
  162. package/types/models/chmv2/image_processing_chmv2.d.ts.map +1 -0
  163. package/types/models/chmv2/modeling_chmv2.d.ts +6 -0
  164. package/types/models/chmv2/modeling_chmv2.d.ts.map +1 -0
  165. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
  166. package/types/models/cohere2/modeling_cohere2.d.ts +8 -0
  167. package/types/models/cohere2/modeling_cohere2.d.ts.map +1 -0
  168. package/types/models/cohere_asr/feature_extraction_cohere_asr.d.ts +25 -0
  169. package/types/models/cohere_asr/feature_extraction_cohere_asr.d.ts.map +1 -0
  170. package/types/models/cohere_asr/modeling_cohere_asr.d.ts +9 -0
  171. package/types/models/cohere_asr/modeling_cohere_asr.d.ts.map +1 -0
  172. package/types/models/cohere_asr/processing_cohere_asr.d.ts +27 -0
  173. package/types/models/cohere_asr/processing_cohere_asr.d.ts.map +1 -0
  174. package/types/models/cohere_asr/tokenization_cohere_asr.d.ts +4 -0
  175. package/types/models/cohere_asr/tokenization_cohere_asr.d.ts.map +1 -0
  176. package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts +8 -0
  177. package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts.map +1 -0
  178. package/types/models/detr/image_processing_detr.d.ts +1 -1
  179. package/types/models/eurobert/modeling_eurobert.d.ts +36 -0
  180. package/types/models/eurobert/modeling_eurobert.d.ts.map +1 -0
  181. package/types/models/feature_extractors.d.ts +3 -0
  182. package/types/models/gemma3/image_processing_gemma3.d.ts +4 -0
  183. package/types/models/gemma3/image_processing_gemma3.d.ts.map +1 -0
  184. package/types/models/gemma3/modeling_gemma3.d.ts +4 -1
  185. package/types/models/gemma3/modeling_gemma3.d.ts.map +1 -1
  186. package/types/models/gemma3/processing_gemma3.d.ts +20 -0
  187. package/types/models/gemma3/processing_gemma3.d.ts.map +1 -0
  188. package/types/models/gemma3n/modeling_gemma3n.d.ts +2 -0
  189. package/types/models/gemma3n/modeling_gemma3n.d.ts.map +1 -1
  190. package/types/models/glm46v/image_processing_glm46v.d.ts +4 -0
  191. package/types/models/glm46v/image_processing_glm46v.d.ts.map +1 -0
  192. package/types/models/glm46v/processing_glm46v.d.ts +4 -0
  193. package/types/models/glm46v/processing_glm46v.d.ts.map +1 -0
  194. package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts +8 -0
  195. package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts.map +1 -0
  196. package/types/models/glm_ocr/modeling_glm_ocr.d.ts +26 -0
  197. package/types/models/glm_ocr/modeling_glm_ocr.d.ts.map +1 -0
  198. package/types/models/granite_speech/feature_extraction_granite_speech.d.ts +16 -0
  199. package/types/models/granite_speech/feature_extraction_granite_speech.d.ts.map +1 -0
  200. package/types/models/granite_speech/modeling_granite_speech.d.ts +4 -0
  201. package/types/models/granite_speech/modeling_granite_speech.d.ts.map +1 -0
  202. package/types/models/granite_speech/processing_granite_speech.d.ts +19 -0
  203. package/types/models/granite_speech/processing_granite_speech.d.ts.map +1 -0
  204. package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +1 -1
  205. package/types/models/idefics3/modeling_idefics3.d.ts +2 -18
  206. package/types/models/idefics3/modeling_idefics3.d.ts.map +1 -1
  207. package/types/models/image_processors.d.ts +4 -0
  208. package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts +41 -0
  209. package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts.map +1 -0
  210. package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts +4 -0
  211. package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts.map +1 -0
  212. package/types/models/lfm2_vl/processing_lfm2_vl.d.ts +18 -0
  213. package/types/models/lfm2_vl/processing_lfm2_vl.d.ts.map +1 -0
  214. package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts +4 -0
  215. package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts.map +1 -0
  216. package/types/models/marian/tokenization_marian.d.ts.map +1 -1
  217. package/types/models/mistral3/modeling_mistral3.d.ts +2 -2
  218. package/types/models/mistral3/modeling_mistral3.d.ts.map +1 -1
  219. package/types/models/mistral4/modeling_mistral4.d.ts +8 -0
  220. package/types/models/mistral4/modeling_mistral4.d.ts.map +1 -0
  221. package/types/models/modeling_utils.d.ts +46 -27
  222. package/types/models/modeling_utils.d.ts.map +1 -1
  223. package/types/models/models.d.ts +26 -1
  224. package/types/models/nemotron_h/modeling_nemotron_h.d.ts +8 -0
  225. package/types/models/nemotron_h/modeling_nemotron_h.d.ts.map +1 -0
  226. package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
  227. package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
  228. package/types/models/paligemma/modeling_paligemma.d.ts +2 -8
  229. package/types/models/paligemma/modeling_paligemma.d.ts.map +1 -1
  230. package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
  231. package/types/models/processors.d.ts +8 -0
  232. package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +7 -0
  233. package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -0
  234. package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts +4 -0
  235. package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts.map +1 -0
  236. package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
  237. package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
  238. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +3 -0
  239. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
  240. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +44 -6
  241. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
  242. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +1 -0
  243. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
  244. package/types/models/qwen3_5/modeling_qwen3_5.d.ts +6 -0
  245. package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -0
  246. package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +7 -0
  247. package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -0
  248. package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
  249. package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
  250. package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
  251. package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
  252. package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +7 -0
  253. package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -0
  254. package/types/models/qwen3_vl/processing_qwen3_vl.d.ts +4 -0
  255. package/types/models/qwen3_vl/processing_qwen3_vl.d.ts.map +1 -0
  256. package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +7 -0
  257. package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
  258. package/types/models/registry.d.ts +2 -1
  259. package/types/models/registry.d.ts.map +1 -1
  260. package/types/models/sam/image_processing_sam.d.ts +1 -1
  261. package/types/models/session.d.ts +3 -2
  262. package/types/models/session.d.ts.map +1 -1
  263. package/types/models/smolvlm/modeling_smolvlm.d.ts +8 -0
  264. package/types/models/smolvlm/modeling_smolvlm.d.ts.map +1 -0
  265. package/types/models/solar_open/modeling_solar_open.d.ts +8 -0
  266. package/types/models/solar_open/modeling_solar_open.d.ts.map +1 -0
  267. package/types/models/tokenizers.d.ts +1 -0
  268. package/types/models/ultravox/modeling_ultravox.d.ts +0 -2
  269. package/types/models/ultravox/modeling_ultravox.d.ts.map +1 -1
  270. package/types/models/voxtral/modeling_voxtral.d.ts +4 -0
  271. package/types/models/voxtral/modeling_voxtral.d.ts.map +1 -0
  272. package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts +28 -0
  273. package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts.map +1 -0
  274. package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts +17 -0
  275. package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts.map +1 -0
  276. package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts +44 -0
  277. package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts.map +1 -0
  278. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
  279. package/types/models/whisper/modeling_whisper.d.ts.map +1 -1
  280. package/types/models/xlm/tokenization_xlm.d.ts.map +1 -1
  281. package/types/pipelines/automatic-speech-recognition.d.ts +7 -2
  282. package/types/pipelines/automatic-speech-recognition.d.ts.map +1 -1
  283. package/types/pipelines/document-question-answering.d.ts +2 -2
  284. package/types/pipelines/document-question-answering.d.ts.map +1 -1
  285. package/types/pipelines/image-to-text.d.ts +4 -4
  286. package/types/pipelines/image-to-text.d.ts.map +1 -1
  287. package/types/pipelines/index.d.ts +265 -0
  288. package/types/pipelines/index.d.ts.map +1 -0
  289. package/types/pipelines/summarization.d.ts +2 -2
  290. package/types/pipelines/summarization.d.ts.map +1 -1
  291. package/types/pipelines/text-generation.d.ts +7 -3
  292. package/types/pipelines/text-generation.d.ts.map +1 -1
  293. package/types/pipelines/text-to-audio.d.ts.map +1 -1
  294. package/types/pipelines/text2text-generation.d.ts +3 -3
  295. package/types/pipelines/text2text-generation.d.ts.map +1 -1
  296. package/types/pipelines/translation.d.ts +2 -2
  297. package/types/pipelines/translation.d.ts.map +1 -1
  298. package/types/pipelines/zero-shot-classification.d.ts.map +1 -1
  299. package/types/pipelines.d.ts +51 -291
  300. package/types/pipelines.d.ts.map +1 -1
  301. package/types/tokenization_utils.d.ts +44 -26
  302. package/types/tokenization_utils.d.ts.map +1 -1
  303. package/types/transformers.d.ts +7 -1
  304. package/types/transformers.d.ts.map +1 -1
  305. package/types/utils/audio.d.ts +5 -2
  306. package/types/utils/audio.d.ts.map +1 -1
  307. package/types/utils/cache/CrossOriginStorageCache.d.ts +120 -0
  308. package/types/utils/cache/CrossOriginStorageCache.d.ts.map +1 -0
  309. package/types/utils/cache/FileCache.d.ts +39 -0
  310. package/types/utils/cache/FileCache.d.ts.map +1 -0
  311. package/types/utils/cache.d.ts +10 -4
  312. package/types/utils/cache.d.ts.map +1 -1
  313. package/types/utils/core.d.ts +59 -2
  314. package/types/utils/core.d.ts.map +1 -1
  315. package/types/utils/devices.d.ts +15 -0
  316. package/types/utils/devices.d.ts.map +1 -1
  317. package/types/utils/dtypes.d.ts +17 -1
  318. package/types/utils/dtypes.d.ts.map +1 -1
  319. package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -32
  320. package/types/utils/hub/FileResponse.d.ts.map +1 -0
  321. package/types/utils/hub/utils.d.ts +19 -3
  322. package/types/utils/hub/utils.d.ts.map +1 -1
  323. package/types/utils/hub.d.ts +36 -7
  324. package/types/utils/hub.d.ts.map +1 -1
  325. package/types/utils/image.d.ts +1 -1
  326. package/types/utils/logger.d.ts +28 -0
  327. package/types/utils/logger.d.ts.map +1 -0
  328. package/types/utils/lru_cache.d.ts +38 -0
  329. package/types/utils/lru_cache.d.ts.map +1 -0
  330. package/types/utils/memoize_promise.d.ts +14 -0
  331. package/types/utils/memoize_promise.d.ts.map +1 -0
  332. package/types/utils/model-loader.d.ts +15 -0
  333. package/types/utils/model-loader.d.ts.map +1 -1
  334. package/types/utils/model_registry/ModelRegistry.d.ts +298 -0
  335. package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -0
  336. package/types/utils/model_registry/clear_cache.d.ts +74 -0
  337. package/types/utils/model_registry/clear_cache.d.ts.map +1 -0
  338. package/types/utils/model_registry/get_available_dtypes.d.ts +26 -0
  339. package/types/utils/model_registry/get_available_dtypes.d.ts.map +1 -0
  340. package/types/utils/model_registry/get_file_metadata.d.ts +20 -0
  341. package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -0
  342. package/types/utils/model_registry/get_files.d.ts +23 -0
  343. package/types/utils/model_registry/get_files.d.ts.map +1 -0
  344. package/types/utils/model_registry/get_model_files.d.ts +48 -0
  345. package/types/utils/model_registry/get_model_files.d.ts.map +1 -0
  346. package/types/utils/model_registry/get_pipeline_files.d.ts +22 -0
  347. package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -0
  348. package/types/utils/model_registry/get_processor_files.d.ts +9 -0
  349. package/types/utils/model_registry/get_processor_files.d.ts.map +1 -0
  350. package/types/utils/model_registry/get_tokenizer_files.d.ts +9 -0
  351. package/types/utils/model_registry/get_tokenizer_files.d.ts.map +1 -0
  352. package/types/utils/model_registry/is_cached.d.ts +105 -0
  353. package/types/utils/model_registry/is_cached.d.ts.map +1 -0
  354. package/types/utils/model_registry/resolve_model_type.d.ts +24 -0
  355. package/types/utils/model_registry/resolve_model_type.d.ts.map +1 -0
  356. package/types/utils/random.d.ts +86 -0
  357. package/types/utils/random.d.ts.map +1 -0
  358. package/types/utils/tensor.d.ts.map +1 -1
  359. package/src/utils/data-structures.js +0 -572
  360. package/types/models/ast/modeling_ast.d.ts.map +0 -1
  361. package/types/utils/data-structures.d.ts +0 -294
  362. package/types/utils/data-structures.d.ts.map +0 -1
  363. package/types/utils/hub/files.d.ts.map +0 -1
  364. /package/src/models/{ast/modeling_ast.js → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.js} +0 -0
package/src/configs.js CHANGED
@@ -68,12 +68,20 @@ function getNormalizedConfig(config) {
68
68
  case 'florence2':
69
69
  case 'llava_onevision':
70
70
  case 'idefics3':
71
+ case 'granite_speech':
71
72
  case 'ultravox':
72
73
  case 'voxtral':
74
+ case 'voxtral_realtime':
73
75
  case 'smolvlm':
74
76
  case 'gemma3n':
77
+ case 'lfm2_vl':
75
78
  case 'chatterbox':
79
+ case 'lighton_ocr':
80
+ case 'glm_ocr':
76
81
  case 'mistral3':
82
+ case 'qwen2_5_vl':
83
+ case 'qwen3_vl':
84
+ case 'qwen3_vl_moe':
77
85
  // @ts-expect-error TS2339
78
86
  init_normalized_config = getNormalizedConfig(config.text_config);
79
87
  break;
@@ -115,6 +123,7 @@ function getNormalizedConfig(config) {
115
123
  case 'nanochat':
116
124
  case 'apertus':
117
125
  case 'arcee':
126
+ case 'afmoe':
118
127
  case 'lfm2':
119
128
  case 'lfm2_moe':
120
129
  case 'smollm3':
@@ -125,10 +134,19 @@ function getNormalizedConfig(config) {
125
134
  case 'granite':
126
135
  case 'granitemoehybrid':
127
136
  case 'cohere':
137
+ case 'cohere2':
128
138
  case 'mistral':
139
+ case 'voxtral_realtime_text':
140
+ case 'voxtral_realtime_encoder':
129
141
  case 'starcoder2':
130
142
  case 'qwen2':
143
+ case 'qwen2_moe':
131
144
  case 'qwen2_vl':
145
+ case 'qwen2_vl_text':
146
+ case 'qwen2_5_vl_text':
147
+ case 'qwen3_moe':
148
+ case 'qwen3_vl_text':
149
+ case 'qwen3_vl_moe_text':
132
150
  case 'phi':
133
151
  case 'phi3':
134
152
  case 'phi3_v':
@@ -140,6 +158,8 @@ function getNormalizedConfig(config) {
140
158
  mapping['dim_kv'] = 'head_dim';
141
159
  break;
142
160
  case 'qwen3':
161
+ case 'solar_open':
162
+ case 'glm_ocr_text':
143
163
  case 'gemma':
144
164
  case 'gemma2':
145
165
  case 'vaultgemma':
@@ -150,6 +170,7 @@ function getNormalizedConfig(config) {
150
170
  case 'ernie4_5':
151
171
  case 'hunyuan_v1_dense':
152
172
  case 'falcon_h1':
173
+ case 'nemotron_h':
153
174
  case 'ministral':
154
175
  case 'ministral3':
155
176
  mapping['num_heads'] = 'num_key_value_heads';
@@ -184,6 +205,9 @@ function getNormalizedConfig(config) {
184
205
  mapping['num_attention_heads'] = 'num_attention_heads';
185
206
  break;
186
207
  case 'youtu':
208
+ case 'deepseek_v3':
209
+ case 'glm_moe_dsa':
210
+ case 'mistral4':
187
211
  mapping['num_heads'] = 'num_key_value_heads';
188
212
  mapping['num_layers'] = 'num_hidden_layers';
189
213
  mapping['dim_kv'] = 'qk_head_dim';
@@ -242,6 +266,24 @@ function getNormalizedConfig(config) {
242
266
  mapping['num_encoder_heads'] = 'encoder_num_key_value_heads';
243
267
  mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
244
268
  break;
269
+ case 'cohere_asr':
270
+ mapping['num_decoder_layers'] = 'num_hidden_layers';
271
+ mapping['num_decoder_heads'] = 'num_key_value_heads';
272
+ mapping['decoder_hidden_size'] = 'hidden_size';
273
+ mapping['decoder_dim_kv'] = 'head_dim';
274
+ const {
275
+ num_hidden_layers: num_encoder_layers,
276
+ num_attention_heads: num_encoder_heads,
277
+ hidden_size: encoder_hidden_size,
278
+ } = /** @type {any} */ (config).encoder_config;
279
+ init_normalized_config = {
280
+ num_encoder_layers,
281
+ num_encoder_heads,
282
+ encoder_hidden_size,
283
+ // @ts-expect-error TS2339
284
+ encoder_dim_kv: config.head_dim,
285
+ };
286
+ break;
245
287
  case 'vision-encoder-decoder':
246
288
  // @ts-expect-error TS2339
247
289
  const decoderConfig = getNormalizedConfig(config.decoder);
@@ -283,17 +325,20 @@ function getNormalizedConfig(config) {
283
325
  * @returns {Record<string, number[]>}
284
326
  */
285
327
  export function getCacheShapes(config, options) {
328
+ if (!(config instanceof PretrainedConfig)) {
329
+ config = new PretrainedConfig(config);
330
+ }
331
+
332
+ const batch_size = options?.batch_size ?? 1;
286
333
  if (['lfm2', 'lfm2_moe'].includes(config.model_type)) {
287
334
  const pkv_prefix = options?.prefix ?? 'past_key_values';
288
335
  const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
289
336
 
290
- // Custom caching mechanism for LFM2
291
337
  /** @type {Record<string, number[]>} */
292
338
  const cache_values = {};
293
- // @ts-expect-error TS2339
294
- const { layer_types, num_attention_heads, num_key_value_heads, hidden_size, conv_L_cache } = config;
339
+ const { layer_types, num_attention_heads, num_key_value_heads, hidden_size, conv_L_cache } =
340
+ /** @type {any} */ (config);
295
341
  const head_dim = hidden_size / num_attention_heads;
296
- const batch_size = options?.batch_size ?? 1;
297
342
  for (let i = 0; i < layer_types.length; ++i) {
298
343
  if (layer_types[i] === 'full_attention') {
299
344
  for (const kv of ['key', 'value']) {
@@ -306,44 +351,99 @@ export function getCacheShapes(config, options) {
306
351
  }
307
352
  }
308
353
  return cache_values;
309
- } else if (['granitemoehybrid', 'falcon_h1'].includes(config.model_type)) {
354
+ } else if (['granitemoehybrid', 'falcon_h1', 'nemotron_h'].includes(config.model_type)) {
310
355
  const pkv_prefix = options?.prefix ?? 'past_key_values';
311
356
  const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
312
357
 
358
+ const c = /** @type {any} */ (config);
359
+
360
+ // Normalize config field names across model types
361
+ const layer_types = c.layer_types ?? c.layers_block_type;
362
+ const num_layers = c.num_hidden_layers ?? layer_types?.length;
363
+ const num_key_value_heads = c.num_key_value_heads;
364
+ const head_dim = c.head_dim ?? c.hidden_size / c.num_attention_heads;
365
+ const mamba_n_heads = c.mamba_n_heads ?? c.mamba_num_heads;
366
+ const mamba_d_head = c.mamba_d_head ?? c.mamba_head_dim;
367
+ const mamba_d_state = c.mamba_d_state ?? c.ssm_state_size;
368
+ const mamba_n_groups = c.mamba_n_groups ?? c.n_groups;
369
+ const mamba_d_conv = c.mamba_d_conv ?? c.conv_kernel;
370
+ const mamba_d_ssm =
371
+ c.mamba_d_ssm ?? (c.mamba_expand ? c.mamba_expand * c.hidden_size : mamba_n_heads * mamba_d_head);
372
+ const conv_d_inner = mamba_d_ssm + 2 * mamba_n_groups * mamba_d_state;
373
+
313
374
  /** @type {Record<string, number[]>} */
314
375
  const cache_values = {};
315
376
 
377
+ for (let i = 0; i < num_layers; ++i) {
378
+ if (!layer_types || layer_types[i] === 'mamba') {
379
+ cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
380
+ cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
381
+ }
382
+ if (!layer_types || layer_types[i] === 'attention') {
383
+ for (const kv of ['key', 'value']) {
384
+ cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, head_dim];
385
+ }
386
+ }
387
+ }
388
+ return cache_values;
389
+ } else if (['qwen3_next', 'qwen3_5_text', 'qwen3_5_moe_text', 'olmo_hybrid'].includes(config.model_type)) {
390
+ const pkv_prefix = options?.prefix ?? 'past_key_values';
391
+ const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
392
+
393
+ /** @type {Record<string, number[]>} */
394
+ const cache_values = {};
316
395
  const {
396
+ head_dim,
317
397
  layer_types,
318
- num_hidden_layers,
319
398
  num_attention_heads,
320
399
  num_key_value_heads,
321
400
  hidden_size,
322
- mamba_d_conv,
323
- mamba_n_heads,
324
- mamba_d_head,
325
- mamba_d_state,
326
- mamba_n_groups,
327
- mamba_expand,
328
- mamba_d_ssm,
401
+ linear_num_value_heads,
402
+ linear_num_key_heads,
403
+ linear_key_head_dim,
404
+ linear_value_head_dim,
405
+ linear_conv_kernel_dim,
329
406
  } = /** @type {any} */ (config);
330
- const head_dim = hidden_size / num_attention_heads;
331
- const batch_size = options?.batch_size ?? 1;
332
407
 
333
- const conv_d_inner = (mamba_d_ssm ?? mamba_expand * hidden_size) + 2 * mamba_n_groups * mamba_d_state;
334
- for (let i = 0; i < num_hidden_layers; ++i) {
335
- if (!layer_types || layer_types[i] === 'mamba') {
336
- cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
337
- cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
338
- }
339
- if (!layer_types || layer_types[i] === 'attention') {
408
+ const key_dim = linear_key_head_dim * linear_num_key_heads;
409
+ const value_dim = linear_value_head_dim * linear_num_value_heads;
410
+
411
+ const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
412
+ for (let i = 0; i < layer_types.length; ++i) {
413
+ if (layer_types[i] === 'full_attention') {
340
414
  for (const kv of ['key', 'value']) {
341
- cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, head_dim];
415
+ cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
342
416
  }
417
+ } else if (layer_types[i] === 'linear_attention') {
418
+ if (config.model_type === 'olmo_hybrid') {
419
+ cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
420
+ cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
421
+ cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
422
+ } else {
423
+ const conv_dim = key_dim * 2 + value_dim;
424
+ cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
425
+ }
426
+ cache_values[`${conv_prefix}_recurrent.${i}`] = [
427
+ batch_size,
428
+ linear_num_value_heads,
429
+ linear_key_head_dim,
430
+ linear_value_head_dim,
431
+ ];
432
+ } else {
433
+ throw new Error(`Unsupported layer type: ${layer_types[i]}`);
343
434
  }
344
435
  }
345
436
  return cache_values;
437
+ } else if (['lfm2_vl', 'qwen3_5', 'qwen3_5_moe', 'voxtral_realtime'].includes(config.model_type)) {
438
+ let subConfig;
439
+ if (config.model_type === 'voxtral_realtime' && options?.session_name === 'audio_encoder') {
440
+ subConfig = /** @type {any} */ (config).audio_config;
441
+ } else {
442
+ subConfig = /** @type {any} */ (config).text_config;
443
+ }
444
+ return getCacheShapes(subConfig, options);
346
445
  }
446
+
347
447
  return getKeyValueShapes(config, options);
348
448
  }
349
449
 
package/src/env.js CHANGED
@@ -26,27 +26,43 @@ import fs from 'node:fs';
26
26
  import path from 'node:path';
27
27
  import url from 'node:url';
28
28
 
29
- const VERSION = '4.0.0-next.1';
29
+ const VERSION = '4.0.0-next.10';
30
+
31
+ const HAS_SELF = typeof self !== 'undefined';
30
32
 
31
- const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
32
- const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
33
33
  const IS_FS_AVAILABLE = !isEmpty(fs);
34
34
  const IS_PATH_AVAILABLE = !isEmpty(path);
35
+ const IS_WEB_CACHE_AVAILABLE = HAS_SELF && 'caches' in self;
35
36
 
36
37
  // Runtime detection
37
38
  const IS_DENO_RUNTIME = typeof globalThis.Deno !== 'undefined';
38
39
  const IS_BUN_RUNTIME = typeof globalThis.Bun !== 'undefined';
39
40
 
41
+ const IS_DENO_WEB_RUNTIME = IS_DENO_RUNTIME && IS_WEB_CACHE_AVAILABLE && !IS_FS_AVAILABLE;
42
+
43
+ const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
44
+ const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node' && !IS_DENO_WEB_RUNTIME;
45
+
40
46
  // Check if various APIs are available (depends on environment)
41
47
  const IS_BROWSER_ENV = typeof window !== 'undefined' && typeof window.document !== 'undefined';
42
48
  const IS_WEBWORKER_ENV =
43
- typeof self !== 'undefined' &&
49
+ HAS_SELF &&
44
50
  ['DedicatedWorkerGlobalScope', 'ServiceWorkerGlobalScope', 'SharedWorkerGlobalScope'].includes(
45
51
  self.constructor?.name,
46
52
  );
47
- const IS_WEB_CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self;
53
+ const IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
54
+
48
55
  const IS_WEBGPU_AVAILABLE = IS_NODE_ENV || (typeof navigator !== 'undefined' && 'gpu' in navigator);
49
56
  const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
57
+ const IS_CRYPTO_AVAILABLE = typeof crypto !== 'undefined' && typeof crypto.getRandomValues === 'function';
58
+
59
+ const IS_CHROME_AVAILABLE =
60
+ // @ts-ignore - chrome may not exist in all environments
61
+ typeof chrome !== 'undefined' && typeof chrome.runtime !== 'undefined' && typeof chrome.runtime.id === 'string';
62
+
63
+ const IS_SERVICE_WORKER_ENV =
64
+ // @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
65
+ typeof ServiceWorkerGlobalScope !== 'undefined' && HAS_SELF && self instanceof ServiceWorkerGlobalScope;
50
66
 
51
67
  /**
52
68
  * Check if the current environment is Safari browser.
@@ -86,6 +102,15 @@ export const apis = Object.freeze({
86
102
  /** Whether we are running in a web worker environment */
87
103
  IS_WEBWORKER_ENV,
88
104
 
105
+ /** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
106
+ IS_WEB_ENV,
107
+
108
+ /** Whether we are running in a service worker environment */
109
+ IS_SERVICE_WORKER_ENV,
110
+
111
+ /** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
112
+ IS_DENO_WEB_RUNTIME,
113
+
89
114
  /** Whether the Cache API is available */
90
115
  IS_WEB_CACHE_AVAILABLE,
91
116
 
@@ -109,6 +134,12 @@ export const apis = Object.freeze({
109
134
 
110
135
  /** Whether the path API is available */
111
136
  IS_PATH_AVAILABLE,
137
+
138
+ /** Whether the crypto API is available */
139
+ IS_CRYPTO_AVAILABLE,
140
+
141
+ /** Whether the Chrome runtime API is available */
142
+ IS_CHROME_AVAILABLE,
112
143
  });
113
144
 
114
145
  const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
@@ -134,12 +165,48 @@ const DEFAULT_CACHE_DIR = RUNNING_LOCALLY ? path.join(dirname__, '/.cache/') : n
134
165
  const DEFAULT_LOCAL_MODEL_PATH = '/models/';
135
166
  const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODEL_PATH) : DEFAULT_LOCAL_MODEL_PATH;
136
167
 
168
+ // Ensure default fetch is called with the correct receiver in browser environments.
169
+ const DEFAULT_FETCH = typeof globalThis.fetch === 'function' ? globalThis.fetch.bind(globalThis) : undefined;
170
+
171
+ /**
172
+ * Log levels for controlling output verbosity.
173
+ *
174
+ * Each level is represented by a number, where higher numbers include all lower level messages.
175
+ * Use these values to set `env.logLevel`.
176
+ *
177
+ * @example
178
+ * import { env, LogLevel } from '@huggingface/transformers';
179
+ *
180
+ * // Set log level to show only errors
181
+ * env.logLevel = LogLevel.ERROR;
182
+ *
183
+ * // Set log level to show errors, warnings, and info
184
+ * env.logLevel = LogLevel.INFO;
185
+ *
186
+ * // Disable all logging
187
+ * env.logLevel = LogLevel.NONE;
188
+ *
189
+ */
190
+ export const LogLevel = Object.freeze({
191
+ /** All messages including debug output (value: 10) */
192
+ DEBUG: 10,
193
+ /** Errors, warnings, and info messages (value: 20) */
194
+ INFO: 20,
195
+ /** Errors and warnings (value: 30) */
196
+ WARNING: 30,
197
+ /** Only error messages (value: 40) */
198
+ ERROR: 40,
199
+ /** No logging output (value: 50) */
200
+ NONE: 50,
201
+ });
202
+
137
203
  /**
138
204
  * Global variable given visible to users to control execution. This provides users a simple way to configure Transformers.js.
139
205
  * @typedef {Object} TransformersEnvironment
140
206
  * @property {string} version This version of Transformers.js.
141
- * @property {{onnx: Partial<import('onnxruntime-common').Env>}} backends Expose environment variables of different backends,
207
+ * @property {{onnx: Partial<import('onnxruntime-common').Env> & { setLogLevel?: (logLevel: number) => void }}} backends Expose environment variables of different backends,
142
208
  * allowing users to set these variables if they want to.
209
+ * @property {number} logLevel The logging level. Use LogLevel enum values. Defaults to LogLevel.ERROR.
143
210
  * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
144
211
  * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
145
212
  * @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
@@ -154,12 +221,18 @@ const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODE
154
221
  * @property {boolean} useCustomCache Whether to use a custom cache system (defined by `customCache`), defaults to `false`.
155
222
  * @property {import('./utils/cache.js').CacheInterface|null} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
156
223
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache.
157
- * @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries for ONNX Runtime. Defaults to `true` when cache is available.
158
- * This can improve performance by avoiding repeated downloads of WASM files. Note: Only the WASM binary is cached.
159
- * The MJS loader file still requires network access unless you use a Service Worker.
224
+ * @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries and the WASM factory (.mjs) for ONNX Runtime.
225
+ * Defaults to `true` when cache is available. This can improve performance and enables offline usage by avoiding repeated downloads.
160
226
  * @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
227
+ * @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files
228
+ * across origins, allowing different sites to share the same cached model weights. Defaults to `false`.
229
+ * Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}.
230
+ * The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be
231
+ * removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}.
232
+ * @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
161
233
  */
162
234
 
235
+ let logLevel = LogLevel.WARNING; // Default log level
163
236
  /** @type {TransformersEnvironment} */
164
237
  export const env = {
165
238
  version: VERSION,
@@ -171,17 +244,27 @@ export const env = {
171
244
  onnx: {},
172
245
  },
173
246
 
247
+ /////////////////// Logging settings ///////////////////
248
+ get logLevel() {
249
+ return logLevel;
250
+ },
251
+ set logLevel(level) {
252
+ logLevel = level;
253
+
254
+ // invoke hook to set ONNX Runtime log level when Transformers.js log level changes
255
+ env.backends.onnx?.setLogLevel?.(level);
256
+ },
174
257
  /////////////////// Model settings ///////////////////
175
258
  allowRemoteModels: true,
176
259
  remoteHost: 'https://huggingface.co/',
177
260
  remotePathTemplate: '{model}/resolve/{revision}/',
178
261
 
179
- allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
262
+ allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME), // Default to true for non-web environments, false for web environments
180
263
  localModelPath: localModelPath,
181
264
  useFS: IS_FS_AVAILABLE,
182
265
 
183
266
  /////////////////// Cache settings ///////////////////
184
- useBrowserCache: IS_WEB_CACHE_AVAILABLE && !IS_DENO_RUNTIME,
267
+ useBrowserCache: IS_WEB_CACHE_AVAILABLE,
185
268
 
186
269
  useFSCache: IS_FS_AVAILABLE,
187
270
  cacheDir: DEFAULT_CACHE_DIR,
@@ -191,6 +274,12 @@ export const env = {
191
274
 
192
275
  useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
193
276
  cacheKey: 'transformers-cache',
277
+
278
+ experimental_useCrossOriginStorage: false,
279
+
280
+ /////////////////// Custom fetch /////////////////////
281
+ fetch: DEFAULT_FETCH,
282
+
194
283
  //////////////////////////////////////////////////////
195
284
  };
196
285
 
@@ -6,6 +6,7 @@ import { Callable } from '../utils/generic.js';
6
6
  import { Tensor, topk } from '../utils/tensor.js';
7
7
 
8
8
  import { max, softmax } from '../utils/maths.js';
9
+ import { _weightedIndex } from '../utils/random.js';
9
10
  import { GenerationConfig } from '../generation/configuration_utils.js';
10
11
 
11
12
  /**
@@ -64,24 +65,11 @@ export class LogitsSampler extends Callable {
64
65
 
65
66
  /**
66
67
  * Selects an item randomly based on the specified probabilities.
67
- * @param {import("../transformers.js").DataArray} probabilities An array of probabilities to use for selection.
68
+ * @param {Float32Array} probabilities An array of probabilities to use for selection.
68
69
  * @returns {number} The index of the selected item.
69
70
  */
70
71
  randomSelect(probabilities) {
71
- // Return index of chosen item
72
- let sumProbabilities = 0;
73
- for (let i = 0; i < probabilities.length; ++i) {
74
- sumProbabilities += probabilities[i];
75
- }
76
-
77
- let r = Math.random() * sumProbabilities;
78
- for (let i = 0; i < probabilities.length; ++i) {
79
- r -= probabilities[i];
80
- if (r <= 0) {
81
- return i;
82
- }
83
- }
84
- return 0; // return first (most probable) as a fallback
72
+ return _weightedIndex(probabilities);
85
73
  }
86
74
 
87
75
  /**
@@ -21,7 +21,7 @@
21
21
  * Custom logits processors that complement the default logits processors built from arguments and
22
22
  * generation config. If a logit processor is passed that is already created with the arguments or a
23
23
  * generation config an error is thrown. This feature is intended for advanced users.
24
- * @property {import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
24
+ * @property {import('./stopping_criteria.js').StoppingCriteria|import('./stopping_criteria.js').StoppingCriteria[]|import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
25
25
  * Custom stopping criteria that complements the default stopping criteria built from arguments and a
26
26
  * generation config. If a stopping criteria is passed that is already created with the arguments or a
27
27
  * generation config an error is thrown. This feature is intended for advanced users.
@@ -70,6 +70,9 @@ export class TextStreamer extends BaseStreamer {
70
70
  this.token_cache = [];
71
71
  this.print_len = 0;
72
72
  this.next_tokens_are_prompt = true;
73
+
74
+ // Track special token IDs for special handling during streaming.
75
+ this.special_ids = new Set(this.tokenizer.all_special_ids.map(BigInt));
73
76
  }
74
77
 
75
78
  /**
@@ -90,6 +93,24 @@ export class TextStreamer extends BaseStreamer {
90
93
  const tokens = value[0];
91
94
  this.token_callback_function?.(tokens);
92
95
 
96
+ // Handle special tokens: flush any existing text, then print or skip them
97
+ if (tokens.length === 1 && this.special_ids.has(tokens[0])) {
98
+ if (this.decode_kwargs.skip_special_tokens) return;
99
+
100
+ // Flush any existing cached text first
101
+ if (this.token_cache.length > 0) {
102
+ const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
103
+ const printable_text = text.slice(this.print_len);
104
+ this.on_finalized_text(printable_text, false);
105
+ this.token_cache = [];
106
+ this.print_len = 0;
107
+ }
108
+ // Print the special token immediately
109
+ const special_text = this.tokenizer.decode(tokens, this.decode_kwargs);
110
+ this.on_finalized_text(special_text, false);
111
+ return;
112
+ }
113
+
93
114
  // Add the new token to the cache and decodes the entire thing.
94
115
  this.token_cache = mergeArrays(this.token_cache, tokens);
95
116
  const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
@@ -5,6 +5,7 @@ import { RawImage } from './utils/image.js';
5
5
  import { calculateReflectOffset } from './utils/core.js';
6
6
  import { getModelJSON } from './utils/hub.js';
7
7
  import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';
8
+ import { logger } from './utils/logger.js';
8
9
 
9
10
  /**
10
11
  * Named tuple to indicate the order we are using is (height x width),
@@ -13,7 +14,7 @@ import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';
13
14
  */
14
15
 
15
16
  /**
16
- * @typedef {object} ImageProcessorResult
17
+ * @typedef {Object} ImageProcessorResult
17
18
  * @property {Tensor} pixel_values The pixel values of the batched preprocessed images.
18
19
  * @property {HeightWidth[]} original_sizes Array of two-dimensional tuples like [[480, 640]].
19
20
  * @property {HeightWidth[]} reshaped_input_sizes Array of two-dimensional tuples like [[1000, 1330]].
@@ -403,13 +404,24 @@ function compute_segments(
403
404
  * @param {number} [factor=28] The factor to use for resizing.
404
405
  * @param {number} [min_pixels=56*56] The minimum number of pixels.
405
406
  * @param {number} [max_pixels=14*14*4*1280] The maximum number of pixels.
406
- * @returns {[number, number]} The new height and width of the image.
407
+ * @param {number} [temporal_factor=1] The temporal factor to include in the pixel budget (e.g. temporal_patch_size for video/3D models).
408
+ * @returns {[number, number]} The new width and height of the image.
407
409
  * @throws {Error} If the height or width is smaller than the factor.
408
410
  */
409
- function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
411
+ export function smart_resize(
412
+ height,
413
+ width,
414
+ factor = 28,
415
+ min_pixels = 56 * 56,
416
+ max_pixels = 14 * 14 * 4 * 1280,
417
+ temporal_factor = 1,
418
+ ) {
410
419
  if (height < factor || width < factor) {
411
- throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
412
- } else if (Math.max(height, width) / Math.min(height, width) > 200) {
420
+ const scale = Math.max(factor / height, factor / width);
421
+ height = Math.round(height * scale);
422
+ width = Math.round(width * scale);
423
+ }
424
+ if (Math.max(height, width) / Math.min(height, width) > 200) {
413
425
  throw new Error(
414
426
  `absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`,
415
427
  );
@@ -418,17 +430,17 @@ function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixe
418
430
  let h_bar = Math.round(height / factor) * factor;
419
431
  let w_bar = Math.round(width / factor) * factor;
420
432
 
421
- if (h_bar * w_bar > max_pixels) {
422
- const beta = Math.sqrt((height * width) / max_pixels);
423
- h_bar = Math.floor(height / beta / factor) * factor;
424
- w_bar = Math.floor(width / beta / factor) * factor;
425
- } else if (h_bar * w_bar < min_pixels) {
426
- const beta = Math.sqrt(min_pixels / (height * width));
433
+ if (temporal_factor * h_bar * w_bar > max_pixels) {
434
+ const beta = Math.sqrt((temporal_factor * height * width) / max_pixels);
435
+ h_bar = Math.max(factor, Math.floor(height / beta / factor) * factor);
436
+ w_bar = Math.max(factor, Math.floor(width / beta / factor) * factor);
437
+ } else if (temporal_factor * h_bar * w_bar < min_pixels) {
438
+ const beta = Math.sqrt(min_pixels / (temporal_factor * height * width));
427
439
  h_bar = Math.ceil((height * beta) / factor) * factor;
428
440
  w_bar = Math.ceil((width * beta) / factor) * factor;
429
441
  }
430
442
 
431
- return [h_bar, w_bar];
443
+ return [w_bar, h_bar];
432
444
  }
433
445
 
434
446
  /**
@@ -450,7 +462,7 @@ export function post_process_panoptic_segmentation(
450
462
  target_sizes = null,
451
463
  ) {
452
464
  if (label_ids_to_fuse === null) {
453
- console.warn('`label_ids_to_fuse` unset. No instance will be fused.');
465
+ logger.warn('`label_ids_to_fuse` unset. No instance will be fused.');
454
466
  label_ids_to_fuse = new Set();
455
467
  }
456
468
 
@@ -592,6 +604,7 @@ export class ImageProcessor extends Callable {
592
604
  if (
593
605
  this.do_pad &&
594
606
  !this.pad_size &&
607
+ !this.size_divisibility &&
595
608
  this.size &&
596
609
  this.size.width !== undefined &&
597
610
  this.size.height !== undefined
@@ -864,11 +877,6 @@ export class ImageProcessor extends Callable {
864
877
  return [newWidth, newHeight];
865
878
  } else if (this.size_divisibility !== undefined) {
866
879
  return enforce_size_divisibility([srcWidth, srcHeight], this.size_divisibility);
867
- } else if (this.min_pixels !== undefined && this.max_pixels !== undefined) {
868
- // Custom resize logic for Qwen2-VL models
869
- // @ts-expect-error TS2339
870
- const factor = this.config.patch_size * this.config.merge_size;
871
- return smart_resize(srcHeight, srcWidth, factor, this.min_pixels, this.max_pixels);
872
880
  } else {
873
881
  throw new Error(
874
882
  `Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(size)}`,
@@ -890,7 +898,7 @@ export class ImageProcessor extends Callable {
890
898
  }
891
899
 
892
900
  /**
893
- * @typedef {object} PreprocessedImage
901
+ * @typedef {Object} PreprocessedImage
894
902
  * @property {HeightWidth} original_size The original size of the image.
895
903
  * @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
896
904
  * @property {Tensor} pixel_values The pixel values of the preprocessed image.
@@ -1000,10 +1008,8 @@ export class ImageProcessor extends Callable {
1000
1008
  const padded = this.pad_image(pixelData, [image.height, image.width, image.channels], this.pad_size);
1001
1009
  [pixelData, imgDims] = padded; // Update pixel data and image dimensions
1002
1010
  } else if (this.size_divisibility) {
1003
- const [paddedWidth, paddedHeight] = enforce_size_divisibility(
1004
- [imgDims[1], imgDims[0]],
1005
- this.size_divisibility,
1006
- );
1011
+ const paddedWidth = Math.ceil(imgDims[1] / this.size_divisibility) * this.size_divisibility;
1012
+ const paddedHeight = Math.ceil(imgDims[0] / this.size_divisibility) * this.size_divisibility;
1007
1013
  [pixelData, imgDims] = this.pad_image(pixelData, imgDims, { width: paddedWidth, height: paddedHeight });
1008
1014
  }
1009
1015
  }
@@ -0,0 +1,5 @@
1
+ import { PreTrainedModel } from '../modeling_utils.js';
2
+
3
+ export class AfmoePreTrainedModel extends PreTrainedModel {}
4
+ export class AfmoeModel extends AfmoePreTrainedModel {}
5
+ export class AfmoeForCausalLM extends AfmoePreTrainedModel {}
@@ -2,6 +2,7 @@ import { getModelJSON } from '../../utils/hub.js';
2
2
  import { ImageProcessor } from '../../image_processors_utils.js';
3
3
  import * as AllImageProcessors from '../image_processors.js';
4
4
  import { GITHUB_ISSUE_URL, IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
5
+ import { logger } from '../../utils/logger.js';
5
6
 
6
7
  export class AutoImageProcessor {
7
8
  /** @type {typeof ImageProcessor.from_pretrained} */
@@ -20,7 +21,7 @@ export class AutoImageProcessor {
20
21
  if (!image_processor_class) {
21
22
  if (key !== undefined) {
22
23
  // Only log a warning if the class is not found and the key is set.
23
- console.warn(
24
+ logger.warn(
24
25
  `Image processor type '${key}' not found, assuming base ImageProcessor. Please report this at ${GITHUB_ISSUE_URL}.`,
25
26
  );
26
27
  }