@huggingface/transformers 4.0.0-next.1 → 4.0.0-next.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -6
- package/dist/ort-wasm-simd-threaded.jsep.mjs +31 -31
- package/dist/transformers.js +6050 -3202
- package/dist/transformers.min.js +23 -21
- package/dist/transformers.node.cjs +6119 -3174
- package/dist/transformers.node.min.cjs +25 -23
- package/dist/transformers.node.min.mjs +25 -23
- package/dist/transformers.node.mjs +6034 -3168
- package/dist/transformers.web.js +4255 -1381
- package/dist/transformers.web.min.js +23 -19
- package/package.json +5 -5
- package/src/backends/onnx.js +128 -53
- package/src/backends/utils/cacheWasm.js +28 -46
- package/src/cache_utils.js +62 -0
- package/src/configs.js +123 -23
- package/src/env.js +100 -11
- package/src/generation/logits_sampler.js +3 -15
- package/src/generation/parameters.js +1 -1
- package/src/generation/streamers.js +21 -0
- package/src/image_processors_utils.js +29 -23
- package/src/models/afmoe/modeling_afmoe.js +5 -0
- package/src/models/auto/image_processing_auto.js +2 -1
- package/src/models/auto/modeling_auto.js +16 -2
- package/src/models/auto/tokenization_auto.js +2 -1
- package/src/models/chatterbox/modeling_chatterbox.js +1 -1
- package/src/models/chmv2/image_processing_chmv2.js +3 -0
- package/src/models/chmv2/modeling_chmv2.js +4 -0
- package/src/models/clap/feature_extraction_clap.js +2 -1
- package/src/models/cohere2/modeling_cohere2.js +5 -0
- package/src/models/cohere_asr/feature_extraction_cohere_asr.js +117 -0
- package/src/models/cohere_asr/modeling_cohere_asr.js +11 -0
- package/src/models/cohere_asr/processing_cohere_asr.js +55 -0
- package/src/models/cohere_asr/tokenization_cohere_asr.js +3 -0
- package/src/models/deepseek_v3/modeling_deepseek_v3.js +5 -0
- package/src/models/detr/image_processing_detr.js +1 -1
- package/src/models/eurobert/modeling_eurobert.js +41 -0
- package/src/models/feature_extractors.js +3 -0
- package/src/models/gemma3/image_processing_gemma3.js +3 -0
- package/src/models/gemma3/modeling_gemma3.js +4 -1
- package/src/models/gemma3/processing_gemma3.js +45 -0
- package/src/models/gemma3n/modeling_gemma3n.js +2 -0
- package/src/models/glm46v/image_processing_glm46v.js +12 -0
- package/src/models/glm46v/processing_glm46v.js +5 -0
- package/src/models/glm_moe_dsa/modeling_glm_moe_dsa.js +5 -0
- package/src/models/glm_ocr/modeling_glm_ocr.js +78 -0
- package/src/models/granite_speech/feature_extraction_granite_speech.js +58 -0
- package/src/models/granite_speech/modeling_granite_speech.js +5 -0
- package/src/models/granite_speech/processing_granite_speech.js +62 -0
- package/src/models/grounding_dino/image_processing_grounding_dino.js +1 -1
- package/src/models/idefics3/modeling_idefics3.js +5 -32
- package/src/models/image_processors.js +4 -0
- package/src/models/lfm2_vl/image_processing_lfm2_vl.js +305 -0
- package/src/models/lfm2_vl/modeling_lfm2_vl.js +13 -0
- package/src/models/lfm2_vl/processing_lfm2_vl.js +77 -0
- package/src/models/lighton_ocr/modeling_lighton_ocr.js +3 -0
- package/src/models/llava/modeling_llava.js +1 -1
- package/src/models/marian/tokenization_marian.js +3 -2
- package/src/models/mistral3/modeling_mistral3.js +2 -2
- package/src/models/mistral4/modeling_mistral4.js +5 -0
- package/src/models/modeling_utils.js +283 -300
- package/src/models/models.js +26 -1
- package/src/models/nemotron_h/modeling_nemotron_h.js +5 -0
- package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
- package/src/models/paligemma/modeling_paligemma.js +2 -25
- package/src/models/paligemma/processing_paligemma.js +3 -2
- package/src/models/processors.js +8 -0
- package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +9 -0
- package/src/models/qwen2_5_vl/processing_qwen2_5_vl.js +3 -0
- package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +15 -1
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +240 -143
- package/src/models/qwen2_vl/processing_qwen2_vl.js +5 -4
- package/src/models/qwen3_5/modeling_qwen3_5.js +4 -0
- package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +4 -0
- package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
- package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
- package/src/models/qwen3_vl/modeling_qwen3_vl.js +4 -0
- package/src/models/qwen3_vl/processing_qwen3_vl.js +3 -0
- package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +4 -0
- package/src/models/registry.js +61 -5
- package/src/models/sam/image_processing_sam.js +1 -1
- package/src/models/session.js +33 -56
- package/src/models/smolvlm/modeling_smolvlm.js +7 -0
- package/src/models/solar_open/modeling_solar_open.js +5 -0
- package/src/models/tokenizers.js +1 -0
- package/src/models/ultravox/modeling_ultravox.js +1 -3
- package/src/models/voxtral/modeling_voxtral.js +3 -0
- package/src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js +71 -0
- package/src/models/voxtral_realtime/modeling_voxtral_realtime.js +239 -0
- package/src/models/voxtral_realtime/processing_voxtral_realtime.js +113 -0
- package/src/models/whisper/feature_extraction_whisper.js +4 -13
- package/src/models/whisper/modeling_whisper.js +6 -5
- package/src/models/xlm/tokenization_xlm.js +2 -1
- package/src/pipelines/automatic-speech-recognition.js +47 -3
- package/src/pipelines/document-question-answering.js +1 -1
- package/src/pipelines/image-to-text.js +2 -2
- package/src/pipelines/index.js +313 -0
- package/src/pipelines/summarization.js +1 -1
- package/src/pipelines/text-generation.js +5 -1
- package/src/pipelines/text-to-audio.js +4 -2
- package/src/pipelines/text2text-generation.js +1 -1
- package/src/pipelines/translation.js +1 -1
- package/src/pipelines/zero-shot-classification.js +3 -2
- package/src/pipelines.js +140 -428
- package/src/tokenization_utils.js +42 -21
- package/src/transformers.js +10 -1
- package/src/utils/audio.js +20 -3
- package/src/utils/cache/CrossOriginStorageCache.js +251 -0
- package/src/utils/cache/FileCache.js +128 -0
- package/src/utils/cache/cross-origin-storage.d.ts +38 -0
- package/src/utils/cache.js +12 -4
- package/src/utils/core.js +23 -1
- package/src/utils/devices.js +22 -0
- package/src/utils/dtypes.js +55 -0
- package/src/utils/hub/{files.js → FileResponse.js} +0 -90
- package/src/utils/hub/utils.js +45 -5
- package/src/utils/hub.js +67 -23
- package/src/utils/image.js +14 -14
- package/src/utils/logger.js +67 -0
- package/src/utils/lru_cache.js +67 -0
- package/src/utils/memoize_promise.js +45 -0
- package/src/utils/model-loader.js +35 -17
- package/src/utils/model_registry/ModelRegistry.js +382 -0
- package/src/utils/model_registry/clear_cache.js +128 -0
- package/src/utils/model_registry/get_available_dtypes.js +68 -0
- package/src/utils/model_registry/get_file_metadata.js +162 -0
- package/src/utils/model_registry/get_files.js +42 -0
- package/src/utils/model_registry/get_model_files.js +114 -0
- package/src/utils/model_registry/get_pipeline_files.js +44 -0
- package/src/utils/model_registry/get_processor_files.js +20 -0
- package/src/utils/model_registry/get_tokenizer_files.js +21 -0
- package/src/utils/model_registry/is_cached.js +169 -0
- package/src/utils/model_registry/resolve_model_type.js +66 -0
- package/src/utils/random.js +225 -0
- package/src/utils/tensor.js +26 -23
- package/src/utils/video.js +2 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/backends/utils/cacheWasm.d.ts +3 -17
- package/types/backends/utils/cacheWasm.d.ts.map +1 -1
- package/types/cache_utils.d.ts +29 -0
- package/types/cache_utils.d.ts.map +1 -0
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +60 -27
- package/types/env.d.ts.map +1 -1
- package/types/generation/logits_sampler.d.ts +2 -2
- package/types/generation/logits_sampler.d.ts.map +1 -1
- package/types/generation/parameters.d.ts +1 -1
- package/types/generation/parameters.d.ts.map +1 -1
- package/types/generation/streamers.d.ts +1 -0
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/image_processors_utils.d.ts +18 -1
- package/types/image_processors_utils.d.ts.map +1 -1
- package/types/models/afmoe/modeling_afmoe.d.ts +8 -0
- package/types/models/afmoe/modeling_afmoe.d.ts.map +1 -0
- package/types/models/{ast/modeling_ast.d.ts → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts} +1 -1
- package/types/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts.map +1 -0
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/auto/modeling_auto.d.ts +6 -0
- package/types/models/auto/modeling_auto.d.ts.map +1 -1
- package/types/models/auto/tokenization_auto.d.ts.map +1 -1
- package/types/models/chmv2/image_processing_chmv2.d.ts +4 -0
- package/types/models/chmv2/image_processing_chmv2.d.ts.map +1 -0
- package/types/models/chmv2/modeling_chmv2.d.ts +6 -0
- package/types/models/chmv2/modeling_chmv2.d.ts.map +1 -0
- package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
- package/types/models/cohere2/modeling_cohere2.d.ts +8 -0
- package/types/models/cohere2/modeling_cohere2.d.ts.map +1 -0
- package/types/models/cohere_asr/feature_extraction_cohere_asr.d.ts +25 -0
- package/types/models/cohere_asr/feature_extraction_cohere_asr.d.ts.map +1 -0
- package/types/models/cohere_asr/modeling_cohere_asr.d.ts +9 -0
- package/types/models/cohere_asr/modeling_cohere_asr.d.ts.map +1 -0
- package/types/models/cohere_asr/processing_cohere_asr.d.ts +27 -0
- package/types/models/cohere_asr/processing_cohere_asr.d.ts.map +1 -0
- package/types/models/cohere_asr/tokenization_cohere_asr.d.ts +4 -0
- package/types/models/cohere_asr/tokenization_cohere_asr.d.ts.map +1 -0
- package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts +8 -0
- package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts.map +1 -0
- package/types/models/detr/image_processing_detr.d.ts +1 -1
- package/types/models/eurobert/modeling_eurobert.d.ts +36 -0
- package/types/models/eurobert/modeling_eurobert.d.ts.map +1 -0
- package/types/models/feature_extractors.d.ts +3 -0
- package/types/models/gemma3/image_processing_gemma3.d.ts +4 -0
- package/types/models/gemma3/image_processing_gemma3.d.ts.map +1 -0
- package/types/models/gemma3/modeling_gemma3.d.ts +4 -1
- package/types/models/gemma3/modeling_gemma3.d.ts.map +1 -1
- package/types/models/gemma3/processing_gemma3.d.ts +20 -0
- package/types/models/gemma3/processing_gemma3.d.ts.map +1 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts.map +1 -1
- package/types/models/glm46v/image_processing_glm46v.d.ts +4 -0
- package/types/models/glm46v/image_processing_glm46v.d.ts.map +1 -0
- package/types/models/glm46v/processing_glm46v.d.ts +4 -0
- package/types/models/glm46v/processing_glm46v.d.ts.map +1 -0
- package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts +8 -0
- package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts.map +1 -0
- package/types/models/glm_ocr/modeling_glm_ocr.d.ts +26 -0
- package/types/models/glm_ocr/modeling_glm_ocr.d.ts.map +1 -0
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts +16 -0
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts +4 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts +19 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts.map +1 -0
- package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +1 -1
- package/types/models/idefics3/modeling_idefics3.d.ts +2 -18
- package/types/models/idefics3/modeling_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +4 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts +41 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts +4 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts +18 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts +4 -0
- package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts.map +1 -0
- package/types/models/marian/tokenization_marian.d.ts.map +1 -1
- package/types/models/mistral3/modeling_mistral3.d.ts +2 -2
- package/types/models/mistral3/modeling_mistral3.d.ts.map +1 -1
- package/types/models/mistral4/modeling_mistral4.d.ts +8 -0
- package/types/models/mistral4/modeling_mistral4.d.ts.map +1 -0
- package/types/models/modeling_utils.d.ts +46 -27
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +26 -1
- package/types/models/nemotron_h/modeling_nemotron_h.d.ts +8 -0
- package/types/models/nemotron_h/modeling_nemotron_h.d.ts.map +1 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
- package/types/models/paligemma/modeling_paligemma.d.ts +2 -8
- package/types/models/paligemma/modeling_paligemma.d.ts.map +1 -1
- package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
- package/types/models/processors.d.ts +8 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +7 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -0
- package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts +4 -0
- package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts.map +1 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +3 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +44 -6
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +1 -0
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts +6 -0
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +7 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +7 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -0
- package/types/models/qwen3_vl/processing_qwen3_vl.d.ts +4 -0
- package/types/models/qwen3_vl/processing_qwen3_vl.d.ts.map +1 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +7 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
- package/types/models/registry.d.ts +2 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/models/sam/image_processing_sam.d.ts +1 -1
- package/types/models/session.d.ts +3 -2
- package/types/models/session.d.ts.map +1 -1
- package/types/models/smolvlm/modeling_smolvlm.d.ts +8 -0
- package/types/models/smolvlm/modeling_smolvlm.d.ts.map +1 -0
- package/types/models/solar_open/modeling_solar_open.d.ts +8 -0
- package/types/models/solar_open/modeling_solar_open.d.ts.map +1 -0
- package/types/models/tokenizers.d.ts +1 -0
- package/types/models/ultravox/modeling_ultravox.d.ts +0 -2
- package/types/models/ultravox/modeling_ultravox.d.ts.map +1 -1
- package/types/models/voxtral/modeling_voxtral.d.ts +4 -0
- package/types/models/voxtral/modeling_voxtral.d.ts.map +1 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts +28 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts +17 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts +44 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts.map +1 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/models/whisper/modeling_whisper.d.ts.map +1 -1
- package/types/models/xlm/tokenization_xlm.d.ts.map +1 -1
- package/types/pipelines/automatic-speech-recognition.d.ts +7 -2
- package/types/pipelines/automatic-speech-recognition.d.ts.map +1 -1
- package/types/pipelines/document-question-answering.d.ts +2 -2
- package/types/pipelines/document-question-answering.d.ts.map +1 -1
- package/types/pipelines/image-to-text.d.ts +4 -4
- package/types/pipelines/image-to-text.d.ts.map +1 -1
- package/types/pipelines/index.d.ts +265 -0
- package/types/pipelines/index.d.ts.map +1 -0
- package/types/pipelines/summarization.d.ts +2 -2
- package/types/pipelines/summarization.d.ts.map +1 -1
- package/types/pipelines/text-generation.d.ts +7 -3
- package/types/pipelines/text-generation.d.ts.map +1 -1
- package/types/pipelines/text-to-audio.d.ts.map +1 -1
- package/types/pipelines/text2text-generation.d.ts +3 -3
- package/types/pipelines/text2text-generation.d.ts.map +1 -1
- package/types/pipelines/translation.d.ts +2 -2
- package/types/pipelines/translation.d.ts.map +1 -1
- package/types/pipelines/zero-shot-classification.d.ts.map +1 -1
- package/types/pipelines.d.ts +51 -291
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenization_utils.d.ts +44 -26
- package/types/tokenization_utils.d.ts.map +1 -1
- package/types/transformers.d.ts +7 -1
- package/types/transformers.d.ts.map +1 -1
- package/types/utils/audio.d.ts +5 -2
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/cache/CrossOriginStorageCache.d.ts +120 -0
- package/types/utils/cache/CrossOriginStorageCache.d.ts.map +1 -0
- package/types/utils/cache/FileCache.d.ts +39 -0
- package/types/utils/cache/FileCache.d.ts.map +1 -0
- package/types/utils/cache.d.ts +10 -4
- package/types/utils/cache.d.ts.map +1 -1
- package/types/utils/core.d.ts +59 -2
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/devices.d.ts +15 -0
- package/types/utils/devices.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +17 -1
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -32
- package/types/utils/hub/FileResponse.d.ts.map +1 -0
- package/types/utils/hub/utils.d.ts +19 -3
- package/types/utils/hub/utils.d.ts.map +1 -1
- package/types/utils/hub.d.ts +36 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +1 -1
- package/types/utils/logger.d.ts +28 -0
- package/types/utils/logger.d.ts.map +1 -0
- package/types/utils/lru_cache.d.ts +38 -0
- package/types/utils/lru_cache.d.ts.map +1 -0
- package/types/utils/memoize_promise.d.ts +14 -0
- package/types/utils/memoize_promise.d.ts.map +1 -0
- package/types/utils/model-loader.d.ts +15 -0
- package/types/utils/model-loader.d.ts.map +1 -1
- package/types/utils/model_registry/ModelRegistry.d.ts +298 -0
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -0
- package/types/utils/model_registry/clear_cache.d.ts +74 -0
- package/types/utils/model_registry/clear_cache.d.ts.map +1 -0
- package/types/utils/model_registry/get_available_dtypes.d.ts +26 -0
- package/types/utils/model_registry/get_available_dtypes.d.ts.map +1 -0
- package/types/utils/model_registry/get_file_metadata.d.ts +20 -0
- package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -0
- package/types/utils/model_registry/get_files.d.ts +23 -0
- package/types/utils/model_registry/get_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_model_files.d.ts +48 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_pipeline_files.d.ts +22 -0
- package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_processor_files.d.ts +9 -0
- package/types/utils/model_registry/get_processor_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_tokenizer_files.d.ts +9 -0
- package/types/utils/model_registry/get_tokenizer_files.d.ts.map +1 -0
- package/types/utils/model_registry/is_cached.d.ts +105 -0
- package/types/utils/model_registry/is_cached.d.ts.map +1 -0
- package/types/utils/model_registry/resolve_model_type.d.ts +24 -0
- package/types/utils/model_registry/resolve_model_type.d.ts.map +1 -0
- package/types/utils/random.d.ts +86 -0
- package/types/utils/random.d.ts.map +1 -0
- package/types/utils/tensor.d.ts.map +1 -1
- package/src/utils/data-structures.js +0 -572
- package/types/models/ast/modeling_ast.d.ts.map +0 -1
- package/types/utils/data-structures.d.ts +0 -294
- package/types/utils/data-structures.d.ts.map +0 -1
- package/types/utils/hub/files.d.ts.map +0 -1
- /package/src/models/{ast/modeling_ast.js → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.js} +0 -0
package/src/configs.js
CHANGED
|
@@ -68,12 +68,20 @@ function getNormalizedConfig(config) {
|
|
|
68
68
|
case 'florence2':
|
|
69
69
|
case 'llava_onevision':
|
|
70
70
|
case 'idefics3':
|
|
71
|
+
case 'granite_speech':
|
|
71
72
|
case 'ultravox':
|
|
72
73
|
case 'voxtral':
|
|
74
|
+
case 'voxtral_realtime':
|
|
73
75
|
case 'smolvlm':
|
|
74
76
|
case 'gemma3n':
|
|
77
|
+
case 'lfm2_vl':
|
|
75
78
|
case 'chatterbox':
|
|
79
|
+
case 'lighton_ocr':
|
|
80
|
+
case 'glm_ocr':
|
|
76
81
|
case 'mistral3':
|
|
82
|
+
case 'qwen2_5_vl':
|
|
83
|
+
case 'qwen3_vl':
|
|
84
|
+
case 'qwen3_vl_moe':
|
|
77
85
|
// @ts-expect-error TS2339
|
|
78
86
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
79
87
|
break;
|
|
@@ -115,6 +123,7 @@ function getNormalizedConfig(config) {
|
|
|
115
123
|
case 'nanochat':
|
|
116
124
|
case 'apertus':
|
|
117
125
|
case 'arcee':
|
|
126
|
+
case 'afmoe':
|
|
118
127
|
case 'lfm2':
|
|
119
128
|
case 'lfm2_moe':
|
|
120
129
|
case 'smollm3':
|
|
@@ -125,10 +134,19 @@ function getNormalizedConfig(config) {
|
|
|
125
134
|
case 'granite':
|
|
126
135
|
case 'granitemoehybrid':
|
|
127
136
|
case 'cohere':
|
|
137
|
+
case 'cohere2':
|
|
128
138
|
case 'mistral':
|
|
139
|
+
case 'voxtral_realtime_text':
|
|
140
|
+
case 'voxtral_realtime_encoder':
|
|
129
141
|
case 'starcoder2':
|
|
130
142
|
case 'qwen2':
|
|
143
|
+
case 'qwen2_moe':
|
|
131
144
|
case 'qwen2_vl':
|
|
145
|
+
case 'qwen2_vl_text':
|
|
146
|
+
case 'qwen2_5_vl_text':
|
|
147
|
+
case 'qwen3_moe':
|
|
148
|
+
case 'qwen3_vl_text':
|
|
149
|
+
case 'qwen3_vl_moe_text':
|
|
132
150
|
case 'phi':
|
|
133
151
|
case 'phi3':
|
|
134
152
|
case 'phi3_v':
|
|
@@ -140,6 +158,8 @@ function getNormalizedConfig(config) {
|
|
|
140
158
|
mapping['dim_kv'] = 'head_dim';
|
|
141
159
|
break;
|
|
142
160
|
case 'qwen3':
|
|
161
|
+
case 'solar_open':
|
|
162
|
+
case 'glm_ocr_text':
|
|
143
163
|
case 'gemma':
|
|
144
164
|
case 'gemma2':
|
|
145
165
|
case 'vaultgemma':
|
|
@@ -150,6 +170,7 @@ function getNormalizedConfig(config) {
|
|
|
150
170
|
case 'ernie4_5':
|
|
151
171
|
case 'hunyuan_v1_dense':
|
|
152
172
|
case 'falcon_h1':
|
|
173
|
+
case 'nemotron_h':
|
|
153
174
|
case 'ministral':
|
|
154
175
|
case 'ministral3':
|
|
155
176
|
mapping['num_heads'] = 'num_key_value_heads';
|
|
@@ -184,6 +205,9 @@ function getNormalizedConfig(config) {
|
|
|
184
205
|
mapping['num_attention_heads'] = 'num_attention_heads';
|
|
185
206
|
break;
|
|
186
207
|
case 'youtu':
|
|
208
|
+
case 'deepseek_v3':
|
|
209
|
+
case 'glm_moe_dsa':
|
|
210
|
+
case 'mistral4':
|
|
187
211
|
mapping['num_heads'] = 'num_key_value_heads';
|
|
188
212
|
mapping['num_layers'] = 'num_hidden_layers';
|
|
189
213
|
mapping['dim_kv'] = 'qk_head_dim';
|
|
@@ -242,6 +266,24 @@ function getNormalizedConfig(config) {
|
|
|
242
266
|
mapping['num_encoder_heads'] = 'encoder_num_key_value_heads';
|
|
243
267
|
mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
|
|
244
268
|
break;
|
|
269
|
+
case 'cohere_asr':
|
|
270
|
+
mapping['num_decoder_layers'] = 'num_hidden_layers';
|
|
271
|
+
mapping['num_decoder_heads'] = 'num_key_value_heads';
|
|
272
|
+
mapping['decoder_hidden_size'] = 'hidden_size';
|
|
273
|
+
mapping['decoder_dim_kv'] = 'head_dim';
|
|
274
|
+
const {
|
|
275
|
+
num_hidden_layers: num_encoder_layers,
|
|
276
|
+
num_attention_heads: num_encoder_heads,
|
|
277
|
+
hidden_size: encoder_hidden_size,
|
|
278
|
+
} = /** @type {any} */ (config).encoder_config;
|
|
279
|
+
init_normalized_config = {
|
|
280
|
+
num_encoder_layers,
|
|
281
|
+
num_encoder_heads,
|
|
282
|
+
encoder_hidden_size,
|
|
283
|
+
// @ts-expect-error TS2339
|
|
284
|
+
encoder_dim_kv: config.head_dim,
|
|
285
|
+
};
|
|
286
|
+
break;
|
|
245
287
|
case 'vision-encoder-decoder':
|
|
246
288
|
// @ts-expect-error TS2339
|
|
247
289
|
const decoderConfig = getNormalizedConfig(config.decoder);
|
|
@@ -283,17 +325,20 @@ function getNormalizedConfig(config) {
|
|
|
283
325
|
* @returns {Record<string, number[]>}
|
|
284
326
|
*/
|
|
285
327
|
export function getCacheShapes(config, options) {
|
|
328
|
+
if (!(config instanceof PretrainedConfig)) {
|
|
329
|
+
config = new PretrainedConfig(config);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const batch_size = options?.batch_size ?? 1;
|
|
286
333
|
if (['lfm2', 'lfm2_moe'].includes(config.model_type)) {
|
|
287
334
|
const pkv_prefix = options?.prefix ?? 'past_key_values';
|
|
288
335
|
const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
|
|
289
336
|
|
|
290
|
-
// Custom caching mechanism for LFM2
|
|
291
337
|
/** @type {Record<string, number[]>} */
|
|
292
338
|
const cache_values = {};
|
|
293
|
-
|
|
294
|
-
|
|
339
|
+
const { layer_types, num_attention_heads, num_key_value_heads, hidden_size, conv_L_cache } =
|
|
340
|
+
/** @type {any} */ (config);
|
|
295
341
|
const head_dim = hidden_size / num_attention_heads;
|
|
296
|
-
const batch_size = options?.batch_size ?? 1;
|
|
297
342
|
for (let i = 0; i < layer_types.length; ++i) {
|
|
298
343
|
if (layer_types[i] === 'full_attention') {
|
|
299
344
|
for (const kv of ['key', 'value']) {
|
|
@@ -306,44 +351,99 @@ export function getCacheShapes(config, options) {
|
|
|
306
351
|
}
|
|
307
352
|
}
|
|
308
353
|
return cache_values;
|
|
309
|
-
} else if (['granitemoehybrid', 'falcon_h1'].includes(config.model_type)) {
|
|
354
|
+
} else if (['granitemoehybrid', 'falcon_h1', 'nemotron_h'].includes(config.model_type)) {
|
|
310
355
|
const pkv_prefix = options?.prefix ?? 'past_key_values';
|
|
311
356
|
const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
|
|
312
357
|
|
|
358
|
+
const c = /** @type {any} */ (config);
|
|
359
|
+
|
|
360
|
+
// Normalize config field names across model types
|
|
361
|
+
const layer_types = c.layer_types ?? c.layers_block_type;
|
|
362
|
+
const num_layers = c.num_hidden_layers ?? layer_types?.length;
|
|
363
|
+
const num_key_value_heads = c.num_key_value_heads;
|
|
364
|
+
const head_dim = c.head_dim ?? c.hidden_size / c.num_attention_heads;
|
|
365
|
+
const mamba_n_heads = c.mamba_n_heads ?? c.mamba_num_heads;
|
|
366
|
+
const mamba_d_head = c.mamba_d_head ?? c.mamba_head_dim;
|
|
367
|
+
const mamba_d_state = c.mamba_d_state ?? c.ssm_state_size;
|
|
368
|
+
const mamba_n_groups = c.mamba_n_groups ?? c.n_groups;
|
|
369
|
+
const mamba_d_conv = c.mamba_d_conv ?? c.conv_kernel;
|
|
370
|
+
const mamba_d_ssm =
|
|
371
|
+
c.mamba_d_ssm ?? (c.mamba_expand ? c.mamba_expand * c.hidden_size : mamba_n_heads * mamba_d_head);
|
|
372
|
+
const conv_d_inner = mamba_d_ssm + 2 * mamba_n_groups * mamba_d_state;
|
|
373
|
+
|
|
313
374
|
/** @type {Record<string, number[]>} */
|
|
314
375
|
const cache_values = {};
|
|
315
376
|
|
|
377
|
+
for (let i = 0; i < num_layers; ++i) {
|
|
378
|
+
if (!layer_types || layer_types[i] === 'mamba') {
|
|
379
|
+
cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
|
|
380
|
+
cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
|
|
381
|
+
}
|
|
382
|
+
if (!layer_types || layer_types[i] === 'attention') {
|
|
383
|
+
for (const kv of ['key', 'value']) {
|
|
384
|
+
cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, head_dim];
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
return cache_values;
|
|
389
|
+
} else if (['qwen3_next', 'qwen3_5_text', 'qwen3_5_moe_text', 'olmo_hybrid'].includes(config.model_type)) {
|
|
390
|
+
const pkv_prefix = options?.prefix ?? 'past_key_values';
|
|
391
|
+
const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
|
|
392
|
+
|
|
393
|
+
/** @type {Record<string, number[]>} */
|
|
394
|
+
const cache_values = {};
|
|
316
395
|
const {
|
|
396
|
+
head_dim,
|
|
317
397
|
layer_types,
|
|
318
|
-
num_hidden_layers,
|
|
319
398
|
num_attention_heads,
|
|
320
399
|
num_key_value_heads,
|
|
321
400
|
hidden_size,
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
mamba_expand,
|
|
328
|
-
mamba_d_ssm,
|
|
401
|
+
linear_num_value_heads,
|
|
402
|
+
linear_num_key_heads,
|
|
403
|
+
linear_key_head_dim,
|
|
404
|
+
linear_value_head_dim,
|
|
405
|
+
linear_conv_kernel_dim,
|
|
329
406
|
} = /** @type {any} */ (config);
|
|
330
|
-
const head_dim = hidden_size / num_attention_heads;
|
|
331
|
-
const batch_size = options?.batch_size ?? 1;
|
|
332
407
|
|
|
333
|
-
const
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
if (!layer_types || layer_types[i] === 'attention') {
|
|
408
|
+
const key_dim = linear_key_head_dim * linear_num_key_heads;
|
|
409
|
+
const value_dim = linear_value_head_dim * linear_num_value_heads;
|
|
410
|
+
|
|
411
|
+
const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
|
|
412
|
+
for (let i = 0; i < layer_types.length; ++i) {
|
|
413
|
+
if (layer_types[i] === 'full_attention') {
|
|
340
414
|
for (const kv of ['key', 'value']) {
|
|
341
|
-
cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0,
|
|
415
|
+
cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
|
|
342
416
|
}
|
|
417
|
+
} else if (layer_types[i] === 'linear_attention') {
|
|
418
|
+
if (config.model_type === 'olmo_hybrid') {
|
|
419
|
+
cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
420
|
+
cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
|
|
421
|
+
cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
422
|
+
} else {
|
|
423
|
+
const conv_dim = key_dim * 2 + value_dim;
|
|
424
|
+
cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
|
|
425
|
+
}
|
|
426
|
+
cache_values[`${conv_prefix}_recurrent.${i}`] = [
|
|
427
|
+
batch_size,
|
|
428
|
+
linear_num_value_heads,
|
|
429
|
+
linear_key_head_dim,
|
|
430
|
+
linear_value_head_dim,
|
|
431
|
+
];
|
|
432
|
+
} else {
|
|
433
|
+
throw new Error(`Unsupported layer type: ${layer_types[i]}`);
|
|
343
434
|
}
|
|
344
435
|
}
|
|
345
436
|
return cache_values;
|
|
437
|
+
} else if (['lfm2_vl', 'qwen3_5', 'qwen3_5_moe', 'voxtral_realtime'].includes(config.model_type)) {
|
|
438
|
+
let subConfig;
|
|
439
|
+
if (config.model_type === 'voxtral_realtime' && options?.session_name === 'audio_encoder') {
|
|
440
|
+
subConfig = /** @type {any} */ (config).audio_config;
|
|
441
|
+
} else {
|
|
442
|
+
subConfig = /** @type {any} */ (config).text_config;
|
|
443
|
+
}
|
|
444
|
+
return getCacheShapes(subConfig, options);
|
|
346
445
|
}
|
|
446
|
+
|
|
347
447
|
return getKeyValueShapes(config, options);
|
|
348
448
|
}
|
|
349
449
|
|
package/src/env.js
CHANGED
|
@@ -26,27 +26,43 @@ import fs from 'node:fs';
|
|
|
26
26
|
import path from 'node:path';
|
|
27
27
|
import url from 'node:url';
|
|
28
28
|
|
|
29
|
-
const VERSION = '4.0.0-next.
|
|
29
|
+
const VERSION = '4.0.0-next.10';
|
|
30
|
+
|
|
31
|
+
const HAS_SELF = typeof self !== 'undefined';
|
|
30
32
|
|
|
31
|
-
const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
|
|
32
|
-
const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
|
|
33
33
|
const IS_FS_AVAILABLE = !isEmpty(fs);
|
|
34
34
|
const IS_PATH_AVAILABLE = !isEmpty(path);
|
|
35
|
+
const IS_WEB_CACHE_AVAILABLE = HAS_SELF && 'caches' in self;
|
|
35
36
|
|
|
36
37
|
// Runtime detection
|
|
37
38
|
const IS_DENO_RUNTIME = typeof globalThis.Deno !== 'undefined';
|
|
38
39
|
const IS_BUN_RUNTIME = typeof globalThis.Bun !== 'undefined';
|
|
39
40
|
|
|
41
|
+
const IS_DENO_WEB_RUNTIME = IS_DENO_RUNTIME && IS_WEB_CACHE_AVAILABLE && !IS_FS_AVAILABLE;
|
|
42
|
+
|
|
43
|
+
const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
|
|
44
|
+
const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node' && !IS_DENO_WEB_RUNTIME;
|
|
45
|
+
|
|
40
46
|
// Check if various APIs are available (depends on environment)
|
|
41
47
|
const IS_BROWSER_ENV = typeof window !== 'undefined' && typeof window.document !== 'undefined';
|
|
42
48
|
const IS_WEBWORKER_ENV =
|
|
43
|
-
|
|
49
|
+
HAS_SELF &&
|
|
44
50
|
['DedicatedWorkerGlobalScope', 'ServiceWorkerGlobalScope', 'SharedWorkerGlobalScope'].includes(
|
|
45
51
|
self.constructor?.name,
|
|
46
52
|
);
|
|
47
|
-
const
|
|
53
|
+
const IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
|
|
54
|
+
|
|
48
55
|
const IS_WEBGPU_AVAILABLE = IS_NODE_ENV || (typeof navigator !== 'undefined' && 'gpu' in navigator);
|
|
49
56
|
const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
|
|
57
|
+
const IS_CRYPTO_AVAILABLE = typeof crypto !== 'undefined' && typeof crypto.getRandomValues === 'function';
|
|
58
|
+
|
|
59
|
+
const IS_CHROME_AVAILABLE =
|
|
60
|
+
// @ts-ignore - chrome may not exist in all environments
|
|
61
|
+
typeof chrome !== 'undefined' && typeof chrome.runtime !== 'undefined' && typeof chrome.runtime.id === 'string';
|
|
62
|
+
|
|
63
|
+
const IS_SERVICE_WORKER_ENV =
|
|
64
|
+
// @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
|
|
65
|
+
typeof ServiceWorkerGlobalScope !== 'undefined' && HAS_SELF && self instanceof ServiceWorkerGlobalScope;
|
|
50
66
|
|
|
51
67
|
/**
|
|
52
68
|
* Check if the current environment is Safari browser.
|
|
@@ -86,6 +102,15 @@ export const apis = Object.freeze({
|
|
|
86
102
|
/** Whether we are running in a web worker environment */
|
|
87
103
|
IS_WEBWORKER_ENV,
|
|
88
104
|
|
|
105
|
+
/** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
|
|
106
|
+
IS_WEB_ENV,
|
|
107
|
+
|
|
108
|
+
/** Whether we are running in a service worker environment */
|
|
109
|
+
IS_SERVICE_WORKER_ENV,
|
|
110
|
+
|
|
111
|
+
/** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
|
|
112
|
+
IS_DENO_WEB_RUNTIME,
|
|
113
|
+
|
|
89
114
|
/** Whether the Cache API is available */
|
|
90
115
|
IS_WEB_CACHE_AVAILABLE,
|
|
91
116
|
|
|
@@ -109,6 +134,12 @@ export const apis = Object.freeze({
|
|
|
109
134
|
|
|
110
135
|
/** Whether the path API is available */
|
|
111
136
|
IS_PATH_AVAILABLE,
|
|
137
|
+
|
|
138
|
+
/** Whether the crypto API is available */
|
|
139
|
+
IS_CRYPTO_AVAILABLE,
|
|
140
|
+
|
|
141
|
+
/** Whether the Chrome runtime API is available */
|
|
142
|
+
IS_CHROME_AVAILABLE,
|
|
112
143
|
});
|
|
113
144
|
|
|
114
145
|
const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
@@ -134,12 +165,48 @@ const DEFAULT_CACHE_DIR = RUNNING_LOCALLY ? path.join(dirname__, '/.cache/') : n
|
|
|
134
165
|
const DEFAULT_LOCAL_MODEL_PATH = '/models/';
|
|
135
166
|
const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODEL_PATH) : DEFAULT_LOCAL_MODEL_PATH;
|
|
136
167
|
|
|
168
|
+
// Ensure default fetch is called with the correct receiver in browser environments.
|
|
169
|
+
const DEFAULT_FETCH = typeof globalThis.fetch === 'function' ? globalThis.fetch.bind(globalThis) : undefined;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Log levels for controlling output verbosity.
|
|
173
|
+
*
|
|
174
|
+
* Each level is represented by a number, where higher numbers include all lower level messages.
|
|
175
|
+
* Use these values to set `env.logLevel`.
|
|
176
|
+
*
|
|
177
|
+
* @example
|
|
178
|
+
* import { env, LogLevel } from '@huggingface/transformers';
|
|
179
|
+
*
|
|
180
|
+
* // Set log level to show only errors
|
|
181
|
+
* env.logLevel = LogLevel.ERROR;
|
|
182
|
+
*
|
|
183
|
+
* // Set log level to show errors, warnings, and info
|
|
184
|
+
* env.logLevel = LogLevel.INFO;
|
|
185
|
+
*
|
|
186
|
+
* // Disable all logging
|
|
187
|
+
* env.logLevel = LogLevel.NONE;
|
|
188
|
+
*
|
|
189
|
+
*/
|
|
190
|
+
export const LogLevel = Object.freeze({
|
|
191
|
+
/** All messages including debug output (value: 10) */
|
|
192
|
+
DEBUG: 10,
|
|
193
|
+
/** Errors, warnings, and info messages (value: 20) */
|
|
194
|
+
INFO: 20,
|
|
195
|
+
/** Errors and warnings (value: 30) */
|
|
196
|
+
WARNING: 30,
|
|
197
|
+
/** Only error messages (value: 40) */
|
|
198
|
+
ERROR: 40,
|
|
199
|
+
/** No logging output (value: 50) */
|
|
200
|
+
NONE: 50,
|
|
201
|
+
});
|
|
202
|
+
|
|
137
203
|
/**
|
|
138
204
|
* Global variable given visible to users to control execution. This provides users a simple way to configure Transformers.js.
|
|
139
205
|
* @typedef {Object} TransformersEnvironment
|
|
140
206
|
* @property {string} version This version of Transformers.js.
|
|
141
|
-
* @property {{onnx: Partial<import('onnxruntime-common').Env>}} backends Expose environment variables of different backends,
|
|
207
|
+
* @property {{onnx: Partial<import('onnxruntime-common').Env> & { setLogLevel?: (logLevel: number) => void }}} backends Expose environment variables of different backends,
|
|
142
208
|
* allowing users to set these variables if they want to.
|
|
209
|
+
* @property {number} logLevel The logging level. Use LogLevel enum values. Defaults to LogLevel.ERROR.
|
|
143
210
|
* @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
|
|
144
211
|
* If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
|
|
145
212
|
* @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
|
|
@@ -154,12 +221,18 @@ const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODE
|
|
|
154
221
|
* @property {boolean} useCustomCache Whether to use a custom cache system (defined by `customCache`), defaults to `false`.
|
|
155
222
|
* @property {import('./utils/cache.js').CacheInterface|null} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
|
|
156
223
|
* implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache.
|
|
157
|
-
* @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries
|
|
158
|
-
*
|
|
159
|
-
* The MJS loader file still requires network access unless you use a Service Worker.
|
|
224
|
+
* @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries and the WASM factory (.mjs) for ONNX Runtime.
|
|
225
|
+
* Defaults to `true` when cache is available. This can improve performance and enables offline usage by avoiding repeated downloads.
|
|
160
226
|
* @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
|
|
227
|
+
* @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files
|
|
228
|
+
* across origins, allowing different sites to share the same cached model weights. Defaults to `false`.
|
|
229
|
+
* Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}.
|
|
230
|
+
* The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be
|
|
231
|
+
* removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}.
|
|
232
|
+
* @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
|
|
161
233
|
*/
|
|
162
234
|
|
|
235
|
+
let logLevel = LogLevel.WARNING; // Default log level
|
|
163
236
|
/** @type {TransformersEnvironment} */
|
|
164
237
|
export const env = {
|
|
165
238
|
version: VERSION,
|
|
@@ -171,17 +244,27 @@ export const env = {
|
|
|
171
244
|
onnx: {},
|
|
172
245
|
},
|
|
173
246
|
|
|
247
|
+
/////////////////// Logging settings ///////////////////
|
|
248
|
+
get logLevel() {
|
|
249
|
+
return logLevel;
|
|
250
|
+
},
|
|
251
|
+
set logLevel(level) {
|
|
252
|
+
logLevel = level;
|
|
253
|
+
|
|
254
|
+
// invoke hook to set ONNX Runtime log level when Transformers.js log level changes
|
|
255
|
+
env.backends.onnx?.setLogLevel?.(level);
|
|
256
|
+
},
|
|
174
257
|
/////////////////// Model settings ///////////////////
|
|
175
258
|
allowRemoteModels: true,
|
|
176
259
|
remoteHost: 'https://huggingface.co/',
|
|
177
260
|
remotePathTemplate: '{model}/resolve/{revision}/',
|
|
178
261
|
|
|
179
|
-
allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
|
|
262
|
+
allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME), // Default to true for non-web environments, false for web environments
|
|
180
263
|
localModelPath: localModelPath,
|
|
181
264
|
useFS: IS_FS_AVAILABLE,
|
|
182
265
|
|
|
183
266
|
/////////////////// Cache settings ///////////////////
|
|
184
|
-
useBrowserCache: IS_WEB_CACHE_AVAILABLE
|
|
267
|
+
useBrowserCache: IS_WEB_CACHE_AVAILABLE,
|
|
185
268
|
|
|
186
269
|
useFSCache: IS_FS_AVAILABLE,
|
|
187
270
|
cacheDir: DEFAULT_CACHE_DIR,
|
|
@@ -191,6 +274,12 @@ export const env = {
|
|
|
191
274
|
|
|
192
275
|
useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
|
|
193
276
|
cacheKey: 'transformers-cache',
|
|
277
|
+
|
|
278
|
+
experimental_useCrossOriginStorage: false,
|
|
279
|
+
|
|
280
|
+
/////////////////// Custom fetch /////////////////////
|
|
281
|
+
fetch: DEFAULT_FETCH,
|
|
282
|
+
|
|
194
283
|
//////////////////////////////////////////////////////
|
|
195
284
|
};
|
|
196
285
|
|
|
@@ -6,6 +6,7 @@ import { Callable } from '../utils/generic.js';
|
|
|
6
6
|
import { Tensor, topk } from '../utils/tensor.js';
|
|
7
7
|
|
|
8
8
|
import { max, softmax } from '../utils/maths.js';
|
|
9
|
+
import { _weightedIndex } from '../utils/random.js';
|
|
9
10
|
import { GenerationConfig } from '../generation/configuration_utils.js';
|
|
10
11
|
|
|
11
12
|
/**
|
|
@@ -64,24 +65,11 @@ export class LogitsSampler extends Callable {
|
|
|
64
65
|
|
|
65
66
|
/**
|
|
66
67
|
* Selects an item randomly based on the specified probabilities.
|
|
67
|
-
* @param {
|
|
68
|
+
* @param {Float32Array} probabilities An array of probabilities to use for selection.
|
|
68
69
|
* @returns {number} The index of the selected item.
|
|
69
70
|
*/
|
|
70
71
|
randomSelect(probabilities) {
|
|
71
|
-
|
|
72
|
-
let sumProbabilities = 0;
|
|
73
|
-
for (let i = 0; i < probabilities.length; ++i) {
|
|
74
|
-
sumProbabilities += probabilities[i];
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
let r = Math.random() * sumProbabilities;
|
|
78
|
-
for (let i = 0; i < probabilities.length; ++i) {
|
|
79
|
-
r -= probabilities[i];
|
|
80
|
-
if (r <= 0) {
|
|
81
|
-
return i;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return 0; // return first (most probable) as a fallback
|
|
72
|
+
return _weightedIndex(probabilities);
|
|
85
73
|
}
|
|
86
74
|
|
|
87
75
|
/**
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* Custom logits processors that complement the default logits processors built from arguments and
|
|
22
22
|
* generation config. If a logit processor is passed that is already created with the arguments or a
|
|
23
23
|
* generation config an error is thrown. This feature is intended for advanced users.
|
|
24
|
-
* @property {import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
|
|
24
|
+
* @property {import('./stopping_criteria.js').StoppingCriteria|import('./stopping_criteria.js').StoppingCriteria[]|import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
|
|
25
25
|
* Custom stopping criteria that complements the default stopping criteria built from arguments and a
|
|
26
26
|
* generation config. If a stopping criteria is passed that is already created with the arguments or a
|
|
27
27
|
* generation config an error is thrown. This feature is intended for advanced users.
|
|
@@ -70,6 +70,9 @@ export class TextStreamer extends BaseStreamer {
|
|
|
70
70
|
this.token_cache = [];
|
|
71
71
|
this.print_len = 0;
|
|
72
72
|
this.next_tokens_are_prompt = true;
|
|
73
|
+
|
|
74
|
+
// Track special token IDs for special handling during streaming.
|
|
75
|
+
this.special_ids = new Set(this.tokenizer.all_special_ids.map(BigInt));
|
|
73
76
|
}
|
|
74
77
|
|
|
75
78
|
/**
|
|
@@ -90,6 +93,24 @@ export class TextStreamer extends BaseStreamer {
|
|
|
90
93
|
const tokens = value[0];
|
|
91
94
|
this.token_callback_function?.(tokens);
|
|
92
95
|
|
|
96
|
+
// Handle special tokens: flush any existing text, then print or skip them
|
|
97
|
+
if (tokens.length === 1 && this.special_ids.has(tokens[0])) {
|
|
98
|
+
if (this.decode_kwargs.skip_special_tokens) return;
|
|
99
|
+
|
|
100
|
+
// Flush any existing cached text first
|
|
101
|
+
if (this.token_cache.length > 0) {
|
|
102
|
+
const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
|
|
103
|
+
const printable_text = text.slice(this.print_len);
|
|
104
|
+
this.on_finalized_text(printable_text, false);
|
|
105
|
+
this.token_cache = [];
|
|
106
|
+
this.print_len = 0;
|
|
107
|
+
}
|
|
108
|
+
// Print the special token immediately
|
|
109
|
+
const special_text = this.tokenizer.decode(tokens, this.decode_kwargs);
|
|
110
|
+
this.on_finalized_text(special_text, false);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
|
|
93
114
|
// Add the new token to the cache and decodes the entire thing.
|
|
94
115
|
this.token_cache = mergeArrays(this.token_cache, tokens);
|
|
95
116
|
const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
|
|
@@ -5,6 +5,7 @@ import { RawImage } from './utils/image.js';
|
|
|
5
5
|
import { calculateReflectOffset } from './utils/core.js';
|
|
6
6
|
import { getModelJSON } from './utils/hub.js';
|
|
7
7
|
import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';
|
|
8
|
+
import { logger } from './utils/logger.js';
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
11
|
* Named tuple to indicate the order we are using is (height x width),
|
|
@@ -13,7 +14,7 @@ import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';
|
|
|
13
14
|
*/
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
|
-
* @typedef {
|
|
17
|
+
* @typedef {Object} ImageProcessorResult
|
|
17
18
|
* @property {Tensor} pixel_values The pixel values of the batched preprocessed images.
|
|
18
19
|
* @property {HeightWidth[]} original_sizes Array of two-dimensional tuples like [[480, 640]].
|
|
19
20
|
* @property {HeightWidth[]} reshaped_input_sizes Array of two-dimensional tuples like [[1000, 1330]].
|
|
@@ -403,13 +404,24 @@ function compute_segments(
|
|
|
403
404
|
* @param {number} [factor=28] The factor to use for resizing.
|
|
404
405
|
* @param {number} [min_pixels=56*56] The minimum number of pixels.
|
|
405
406
|
* @param {number} [max_pixels=14*14*4*1280] The maximum number of pixels.
|
|
406
|
-
* @
|
|
407
|
+
* @param {number} [temporal_factor=1] The temporal factor to include in the pixel budget (e.g. temporal_patch_size for video/3D models).
|
|
408
|
+
* @returns {[number, number]} The new width and height of the image.
|
|
407
409
|
* @throws {Error} If the height or width is smaller than the factor.
|
|
408
410
|
*/
|
|
409
|
-
function smart_resize(
|
|
411
|
+
export function smart_resize(
|
|
412
|
+
height,
|
|
413
|
+
width,
|
|
414
|
+
factor = 28,
|
|
415
|
+
min_pixels = 56 * 56,
|
|
416
|
+
max_pixels = 14 * 14 * 4 * 1280,
|
|
417
|
+
temporal_factor = 1,
|
|
418
|
+
) {
|
|
410
419
|
if (height < factor || width < factor) {
|
|
411
|
-
|
|
412
|
-
|
|
420
|
+
const scale = Math.max(factor / height, factor / width);
|
|
421
|
+
height = Math.round(height * scale);
|
|
422
|
+
width = Math.round(width * scale);
|
|
423
|
+
}
|
|
424
|
+
if (Math.max(height, width) / Math.min(height, width) > 200) {
|
|
413
425
|
throw new Error(
|
|
414
426
|
`absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`,
|
|
415
427
|
);
|
|
@@ -418,17 +430,17 @@ function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixe
|
|
|
418
430
|
let h_bar = Math.round(height / factor) * factor;
|
|
419
431
|
let w_bar = Math.round(width / factor) * factor;
|
|
420
432
|
|
|
421
|
-
if (h_bar * w_bar > max_pixels) {
|
|
422
|
-
const beta = Math.sqrt((height * width) / max_pixels);
|
|
423
|
-
h_bar = Math.floor(height / beta / factor) * factor;
|
|
424
|
-
w_bar = Math.floor(width / beta / factor) * factor;
|
|
425
|
-
} else if (h_bar * w_bar < min_pixels) {
|
|
426
|
-
const beta = Math.sqrt(min_pixels / (height * width));
|
|
433
|
+
if (temporal_factor * h_bar * w_bar > max_pixels) {
|
|
434
|
+
const beta = Math.sqrt((temporal_factor * height * width) / max_pixels);
|
|
435
|
+
h_bar = Math.max(factor, Math.floor(height / beta / factor) * factor);
|
|
436
|
+
w_bar = Math.max(factor, Math.floor(width / beta / factor) * factor);
|
|
437
|
+
} else if (temporal_factor * h_bar * w_bar < min_pixels) {
|
|
438
|
+
const beta = Math.sqrt(min_pixels / (temporal_factor * height * width));
|
|
427
439
|
h_bar = Math.ceil((height * beta) / factor) * factor;
|
|
428
440
|
w_bar = Math.ceil((width * beta) / factor) * factor;
|
|
429
441
|
}
|
|
430
442
|
|
|
431
|
-
return [
|
|
443
|
+
return [w_bar, h_bar];
|
|
432
444
|
}
|
|
433
445
|
|
|
434
446
|
/**
|
|
@@ -450,7 +462,7 @@ export function post_process_panoptic_segmentation(
|
|
|
450
462
|
target_sizes = null,
|
|
451
463
|
) {
|
|
452
464
|
if (label_ids_to_fuse === null) {
|
|
453
|
-
|
|
465
|
+
logger.warn('`label_ids_to_fuse` unset. No instance will be fused.');
|
|
454
466
|
label_ids_to_fuse = new Set();
|
|
455
467
|
}
|
|
456
468
|
|
|
@@ -592,6 +604,7 @@ export class ImageProcessor extends Callable {
|
|
|
592
604
|
if (
|
|
593
605
|
this.do_pad &&
|
|
594
606
|
!this.pad_size &&
|
|
607
|
+
!this.size_divisibility &&
|
|
595
608
|
this.size &&
|
|
596
609
|
this.size.width !== undefined &&
|
|
597
610
|
this.size.height !== undefined
|
|
@@ -864,11 +877,6 @@ export class ImageProcessor extends Callable {
|
|
|
864
877
|
return [newWidth, newHeight];
|
|
865
878
|
} else if (this.size_divisibility !== undefined) {
|
|
866
879
|
return enforce_size_divisibility([srcWidth, srcHeight], this.size_divisibility);
|
|
867
|
-
} else if (this.min_pixels !== undefined && this.max_pixels !== undefined) {
|
|
868
|
-
// Custom resize logic for Qwen2-VL models
|
|
869
|
-
// @ts-expect-error TS2339
|
|
870
|
-
const factor = this.config.patch_size * this.config.merge_size;
|
|
871
|
-
return smart_resize(srcHeight, srcWidth, factor, this.min_pixels, this.max_pixels);
|
|
872
880
|
} else {
|
|
873
881
|
throw new Error(
|
|
874
882
|
`Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(size)}`,
|
|
@@ -890,7 +898,7 @@ export class ImageProcessor extends Callable {
|
|
|
890
898
|
}
|
|
891
899
|
|
|
892
900
|
/**
|
|
893
|
-
* @typedef {
|
|
901
|
+
* @typedef {Object} PreprocessedImage
|
|
894
902
|
* @property {HeightWidth} original_size The original size of the image.
|
|
895
903
|
* @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
|
|
896
904
|
* @property {Tensor} pixel_values The pixel values of the preprocessed image.
|
|
@@ -1000,10 +1008,8 @@ export class ImageProcessor extends Callable {
|
|
|
1000
1008
|
const padded = this.pad_image(pixelData, [image.height, image.width, image.channels], this.pad_size);
|
|
1001
1009
|
[pixelData, imgDims] = padded; // Update pixel data and image dimensions
|
|
1002
1010
|
} else if (this.size_divisibility) {
|
|
1003
|
-
const
|
|
1004
|
-
|
|
1005
|
-
this.size_divisibility,
|
|
1006
|
-
);
|
|
1011
|
+
const paddedWidth = Math.ceil(imgDims[1] / this.size_divisibility) * this.size_divisibility;
|
|
1012
|
+
const paddedHeight = Math.ceil(imgDims[0] / this.size_divisibility) * this.size_divisibility;
|
|
1007
1013
|
[pixelData, imgDims] = this.pad_image(pixelData, imgDims, { width: paddedWidth, height: paddedHeight });
|
|
1008
1014
|
}
|
|
1009
1015
|
}
|
|
@@ -2,6 +2,7 @@ import { getModelJSON } from '../../utils/hub.js';
|
|
|
2
2
|
import { ImageProcessor } from '../../image_processors_utils.js';
|
|
3
3
|
import * as AllImageProcessors from '../image_processors.js';
|
|
4
4
|
import { GITHUB_ISSUE_URL, IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
|
|
5
|
+
import { logger } from '../../utils/logger.js';
|
|
5
6
|
|
|
6
7
|
export class AutoImageProcessor {
|
|
7
8
|
/** @type {typeof ImageProcessor.from_pretrained} */
|
|
@@ -20,7 +21,7 @@ export class AutoImageProcessor {
|
|
|
20
21
|
if (!image_processor_class) {
|
|
21
22
|
if (key !== undefined) {
|
|
22
23
|
// Only log a warning if the class is not found and the key is set.
|
|
23
|
-
|
|
24
|
+
logger.warn(
|
|
24
25
|
`Image processor type '${key}' not found, assuming base ImageProcessor. Please report this at ${GITHUB_ISSUE_URL}.`,
|
|
25
26
|
);
|
|
26
27
|
}
|