npm - @huggingface/transformers - Versions diffs - 4.0.0-next.3 → 4.0.0-next.5 - Mend

@huggingface/transformers 4.0.0-next.3 → 4.0.0-next.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

package/README.md +10 -4
package/dist/ort-wasm-simd-threaded.jsep.mjs +28 -28
package/dist/transformers.js +3109 -2099
package/dist/transformers.min.js +17 -19
package/dist/transformers.node.cjs +3100 -2060
package/dist/transformers.node.min.cjs +19 -21
package/dist/transformers.node.min.mjs +19 -21
package/dist/transformers.node.mjs +3085 -2060
package/dist/transformers.web.js +1312 -276
package/dist/transformers.web.min.js +15 -15
package/package.json +4 -4
package/src/backends/onnx.js +66 -10
package/src/backends/utils/cacheWasm.js +9 -6
package/src/configs.js +52 -3
package/src/env.js +66 -7
package/src/generation/logits_sampler.js +3 -15
package/src/image_processors_utils.js +2 -6
package/src/models/afmoe/modeling_afmoe.js +5 -0
package/src/models/auto/image_processing_auto.js +2 -1
package/src/models/auto/modeling_auto.js +2 -1
package/src/models/auto/tokenization_auto.js +2 -1
package/src/models/clap/feature_extraction_clap.js +2 -1
package/src/models/cohere2/modeling_cohere2.js +5 -0
package/src/models/marian/tokenization_marian.js +3 -2
package/src/models/modeling_utils.js +14 -4
package/src/models/models.js +6 -0
package/src/models/paligemma/processing_paligemma.js +3 -2
package/src/models/processors.js +2 -0
package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -0
package/src/models/qwen2_5_vl/processing_qwen2_5_vl.js +3 -0
package/src/models/qwen2_vl/image_processing_qwen2_vl.js +54 -0
package/src/models/qwen2_vl/modeling_qwen2_vl.js +8 -2
package/src/models/qwen3_5/modeling_qwen3_5.js +3 -0
package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +3 -0
package/src/models/qwen3_vl/modeling_qwen3_vl.js +3 -0
package/src/models/qwen3_vl/processing_qwen3_vl.js +3 -0
package/src/models/registry.js +9 -1
package/src/models/session.js +16 -50
package/src/models/whisper/feature_extraction_whisper.js +2 -1
package/src/models/whisper/modeling_whisper.js +6 -5
package/src/models/xlm/tokenization_xlm.js +2 -1
package/src/pipelines/automatic-speech-recognition.js +3 -2
package/src/pipelines/index.js +395 -0
package/src/pipelines/text-generation.js +4 -0
package/src/pipelines/text-to-audio.js +4 -2
package/src/pipelines/zero-shot-classification.js +3 -2
package/src/pipelines.js +104 -356
package/src/tokenization_utils.js +42 -21
package/src/transformers.js +8 -1
package/src/utils/audio.js +2 -1
package/src/utils/cache.js +4 -1
package/src/utils/core.js +23 -1
package/src/utils/devices.js +22 -0
package/src/utils/dtypes.js +55 -0
package/src/utils/hub/files.js +17 -2
package/src/utils/hub/utils.js +10 -4
package/src/utils/hub.js +57 -17
package/src/utils/image.js +2 -1
package/src/utils/logger.js +67 -0
package/src/utils/model-loader.js +35 -17
package/src/utils/model_registry/ModelRegistry.js +299 -0
package/src/utils/model_registry/clear_cache.js +128 -0
package/src/utils/model_registry/get_file_metadata.js +149 -0
package/src/utils/model_registry/get_files.js +42 -0
package/src/utils/model_registry/get_model_files.js +182 -0
package/src/utils/model_registry/get_pipeline_files.js +53 -0
package/src/utils/model_registry/get_processor_files.js +20 -0
package/src/utils/model_registry/get_tokenizer_files.js +21 -0
package/src/utils/model_registry/is_cached.js +92 -0
package/src/utils/random.js +225 -0
package/src/utils/tensor.js +8 -21
package/src/utils/video.js +2 -2
package/types/backends/onnx.d.ts.map +1 -1
package/types/backends/utils/cacheWasm.d.ts.map +1 -1
package/types/configs.d.ts.map +1 -1
package/types/env.d.ts +42 -24
package/types/env.d.ts.map +1 -1
package/types/generation/logits_sampler.d.ts +2 -2
package/types/generation/logits_sampler.d.ts.map +1 -1
package/types/image_processors_utils.d.ts.map +1 -1
package/types/models/afmoe/modeling_afmoe.d.ts +8 -0
package/types/models/afmoe/modeling_afmoe.d.ts.map +1 -0
package/types/models/auto/image_processing_auto.d.ts.map +1 -1
package/types/models/auto/modeling_auto.d.ts.map +1 -1
package/types/models/auto/tokenization_auto.d.ts.map +1 -1
package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
package/types/models/cohere2/modeling_cohere2.d.ts +8 -0
package/types/models/cohere2/modeling_cohere2.d.ts.map +1 -0
package/types/models/marian/tokenization_marian.d.ts.map +1 -1
package/types/models/modeling_utils.d.ts.map +1 -1
package/types/models/models.d.ts +6 -0
package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
package/types/models/processors.d.ts +2 -0
package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +4 -0
package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -0
package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts +4 -0
package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts.map +1 -0
package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +3 -0
package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +1 -0
package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
package/types/models/qwen3_5/modeling_qwen3_5.d.ts +4 -0
package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -0
package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +4 -0
package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -0
package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +4 -0
package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -0
package/types/models/qwen3_vl/processing_qwen3_vl.d.ts +4 -0
package/types/models/qwen3_vl/processing_qwen3_vl.d.ts.map +1 -0
package/types/models/registry.d.ts.map +1 -1
package/types/models/session.d.ts.map +1 -1
package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
package/types/models/whisper/modeling_whisper.d.ts.map +1 -1
package/types/models/xlm/tokenization_xlm.d.ts.map +1 -1
package/types/pipelines/automatic-speech-recognition.d.ts.map +1 -1
package/types/pipelines/index.d.ts +299 -0
package/types/pipelines/index.d.ts.map +1 -0
package/types/pipelines/text-generation.d.ts +5 -1
package/types/pipelines/text-generation.d.ts.map +1 -1
package/types/pipelines/text-to-audio.d.ts.map +1 -1
package/types/pipelines/zero-shot-classification.d.ts.map +1 -1
package/types/pipelines.d.ts +50 -291
package/types/pipelines.d.ts.map +1 -1
package/types/tokenization_utils.d.ts +44 -26
package/types/tokenization_utils.d.ts.map +1 -1
package/types/transformers.d.ts +6 -1
package/types/transformers.d.ts.map +1 -1
package/types/utils/audio.d.ts.map +1 -1
package/types/utils/cache.d.ts +6 -0
package/types/utils/cache.d.ts.map +1 -1
package/types/utils/core.d.ts +59 -2
package/types/utils/core.d.ts.map +1 -1
package/types/utils/devices.d.ts +15 -0
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts +16 -0
package/types/utils/dtypes.d.ts.map +1 -1
package/types/utils/hub/files.d.ts +6 -0
package/types/utils/hub/files.d.ts.map +1 -1
package/types/utils/hub/utils.d.ts +2 -1
package/types/utils/hub/utils.d.ts.map +1 -1
package/types/utils/hub.d.ts +29 -0
package/types/utils/hub.d.ts.map +1 -1
package/types/utils/image.d.ts.map +1 -1
package/types/utils/logger.d.ts +28 -0
package/types/utils/logger.d.ts.map +1 -0
package/types/utils/model-loader.d.ts +15 -0
package/types/utils/model-loader.d.ts.map +1 -1
package/types/utils/model_registry/ModelRegistry.d.ts +211 -0
package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -0
package/types/utils/model_registry/clear_cache.d.ts +74 -0
package/types/utils/model_registry/clear_cache.d.ts.map +1 -0
package/types/utils/model_registry/get_file_metadata.d.ts +20 -0
package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -0
package/types/utils/model_registry/get_files.d.ts +23 -0
package/types/utils/model_registry/get_files.d.ts.map +1 -0
package/types/utils/model_registry/get_model_files.d.ts +22 -0
package/types/utils/model_registry/get_model_files.d.ts.map +1 -0
package/types/utils/model_registry/get_pipeline_files.d.ts +21 -0
package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -0
package/types/utils/model_registry/get_processor_files.d.ts +9 -0
package/types/utils/model_registry/get_processor_files.d.ts.map +1 -0
package/types/utils/model_registry/get_tokenizer_files.d.ts +9 -0
package/types/utils/model_registry/get_tokenizer_files.d.ts.map +1 -0
package/types/utils/model_registry/is_cached.d.ts +62 -0
package/types/utils/model_registry/is_cached.d.ts.map +1 -0
package/types/utils/random.d.ts +86 -0
package/types/utils/random.d.ts.map +1 -0
package/types/utils/tensor.d.ts.map +1 -1

package/src/pipelines.js CHANGED Viewed

@@ -13,362 +13,40 @@
  * @module pipelines
  */
-import { AutoTokenizer } from './models/auto/tokenization_auto.js';
-import { AutoProcessor } from './models/auto/processing_auto.js';
-import {
-    AutoModel,
-    AutoModelForSequenceClassification,
-    AutoModelForAudioClassification,
-    AutoModelForTokenClassification,
-    AutoModelForQuestionAnswering,
-    AutoModelForMaskedLM,
-    AutoModelForSeq2SeqLM,
-    AutoModelForSpeechSeq2Seq,
-    AutoModelForTextToWaveform,
-    AutoModelForTextToSpectrogram,
-    AutoModelForCTC,
-    AutoModelForCausalLM,
-    AutoModelForVision2Seq,
-    AutoModelForImageClassification,
-    AutoModelForImageSegmentation,
-    AutoModelForSemanticSegmentation,
-    AutoModelForUniversalSegmentation,
-    AutoModelForObjectDetection,
-    AutoModelForZeroShotObjectDetection,
-    AutoModelForDocumentQuestionAnswering,
-    AutoModelForImageToImage,
-    AutoModelForDepthEstimation,
-    AutoModelForImageFeatureExtraction,
-} from './models/auto/modeling_auto.js';
 import { dispatchCallback } from './utils/core.js';
+import { logger } from './utils/logger.js';
-import { TextClassificationPipeline } from './pipelines/text-classification.js';
-import { TokenClassificationPipeline } from './pipelines/token-classification.js';
-import { QuestionAnsweringPipeline } from './pipelines/question-answering.js';
-import { FillMaskPipeline } from './pipelines/fill-mask.js';
-import { SummarizationPipeline } from './pipelines/summarization.js';
-import { TranslationPipeline } from './pipelines/translation.js';
-import { Text2TextGenerationPipeline } from './pipelines/text2text-generation.js';
-import { TextGenerationPipeline } from './pipelines/text-generation.js';
-import { ZeroShotClassificationPipeline } from './pipelines/zero-shot-classification.js';
-import { AudioClassificationPipeline } from './pipelines/audio-classification.js';
-import { ZeroShotAudioClassificationPipeline } from './pipelines/zero-shot-audio-classification.js';
-import { AutomaticSpeechRecognitionPipeline } from './pipelines/automatic-speech-recognition.js';
-import { TextToAudioPipeline } from './pipelines/text-to-audio.js';
-import { ImageToTextPipeline } from './pipelines/image-to-text.js';
-import { ImageClassificationPipeline } from './pipelines/image-classification.js';
-import { ImageSegmentationPipeline } from './pipelines/image-segmentation.js';
-import { BackgroundRemovalPipeline } from './pipelines/background-removal.js';
-import { ZeroShotImageClassificationPipeline } from './pipelines/zero-shot-image-classification.js';
-import { ObjectDetectionPipeline } from './pipelines/object-detection.js';
-import { ZeroShotObjectDetectionPipeline } from './pipelines/zero-shot-object-detection.js';
-import { DocumentQuestionAnsweringPipeline } from './pipelines/document-question-answering.js';
-import { ImageToImagePipeline } from './pipelines/image-to-image.js';
-import { DepthEstimationPipeline } from './pipelines/depth-estimation.js';
-import { FeatureExtractionPipeline } from './pipelines/feature-extraction.js';
-import { ImageFeatureExtractionPipeline } from './pipelines/image-feature-extraction.js';
-const SUPPORTED_TASKS = Object.freeze({
-    'text-classification': {
-        tokenizer: AutoTokenizer,
-        pipeline: TextClassificationPipeline,
-        model: AutoModelForSequenceClassification,
-        default: {
-            // TODO: replace with original
-            // "model": "distilbert-base-uncased-finetuned-sst-2-english",
-            model: 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
-        },
-        type: 'text',
-    },
-    'token-classification': {
-        tokenizer: AutoTokenizer,
-        pipeline: TokenClassificationPipeline,
-        model: AutoModelForTokenClassification,
-        default: {
-            // TODO: replace with original
-            // "model": "Davlan/bert-base-multilingual-cased-ner-hrl",
-            model: 'Xenova/bert-base-multilingual-cased-ner-hrl',
-        },
-        type: 'text',
-    },
-    'question-answering': {
-        tokenizer: AutoTokenizer,
-        pipeline: QuestionAnsweringPipeline,
-        model: AutoModelForQuestionAnswering,
-        default: {
-            // TODO: replace with original
-            // "model": "distilbert-base-cased-distilled-squad",
-            model: 'Xenova/distilbert-base-cased-distilled-squad',
-        },
-        type: 'text',
-    },
-    'fill-mask': {
-        tokenizer: AutoTokenizer,
-        pipeline: FillMaskPipeline,
-        model: AutoModelForMaskedLM,
-        default: {
-            model: 'onnx-community/ettin-encoder-32m-ONNX',
-            dtype: 'fp32',
-        },
-        type: 'text',
-    },
-    summarization: {
-        tokenizer: AutoTokenizer,
-        pipeline: SummarizationPipeline,
-        model: AutoModelForSeq2SeqLM,
-        default: {
-            // TODO: replace with original
-            // "model": "sshleifer/distilbart-cnn-6-6",
-            model: 'Xenova/distilbart-cnn-6-6',
-        },
-        type: 'text',
-    },
-    translation: {
-        tokenizer: AutoTokenizer,
-        pipeline: TranslationPipeline,
-        model: AutoModelForSeq2SeqLM,
-        default: {
-            // TODO: replace with original
-            // "model": "t5-small",
-            model: 'Xenova/t5-small',
-        },
-        type: 'text',
-    },
-    'text2text-generation': {
-        tokenizer: AutoTokenizer,
-        pipeline: Text2TextGenerationPipeline,
-        model: AutoModelForSeq2SeqLM,
-        default: {
-            // TODO: replace with original
-            // "model": "google/flan-t5-small",
-            model: 'Xenova/flan-t5-small',
-        },
-        type: 'text',
-    },
-    'text-generation': {
-        tokenizer: AutoTokenizer,
-        pipeline: TextGenerationPipeline,
-        model: AutoModelForCausalLM,
-        default: {
-            model: 'onnx-community/Qwen3-0.6B-ONNX',
-            dtype: 'q4',
-        },
-        type: 'text',
-    },
-    'zero-shot-classification': {
-        tokenizer: AutoTokenizer,
-        pipeline: ZeroShotClassificationPipeline,
-        model: AutoModelForSequenceClassification,
-        default: {
-            // TODO: replace with original
-            // "model": "typeform/distilbert-base-uncased-mnli",
-            model: 'Xenova/distilbert-base-uncased-mnli',
-        },
-        type: 'text',
-    },
-    'audio-classification': {
-        pipeline: AudioClassificationPipeline,
-        model: AutoModelForAudioClassification,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "superb/wav2vec2-base-superb-ks",
-            model: 'Xenova/wav2vec2-base-superb-ks',
-        },
-        type: 'audio',
-    },
-    'zero-shot-audio-classification': {
-        tokenizer: AutoTokenizer,
-        pipeline: ZeroShotAudioClassificationPipeline,
-        model: AutoModel,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "laion/clap-htsat-fused",
-            model: 'Xenova/clap-htsat-unfused',
-        },
-        type: 'multimodal',
-    },
-    'automatic-speech-recognition': {
-        tokenizer: AutoTokenizer,
-        pipeline: AutomaticSpeechRecognitionPipeline,
-        model: [AutoModelForSpeechSeq2Seq, AutoModelForCTC],
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "openai/whisper-tiny.en",
-            model: 'Xenova/whisper-tiny.en',
-        },
-        type: 'multimodal',
-    },
-    'text-to-audio': {
-        tokenizer: AutoTokenizer,
-        pipeline: TextToAudioPipeline,
-        model: [AutoModelForTextToWaveform, AutoModelForTextToSpectrogram],
-        processor: [AutoProcessor, /* Some don't use a processor */ null],
-        default: {
-            model: 'onnx-community/Supertonic-TTS-ONNX',
-            dtype: 'fp32',
-        },
-        type: 'text',
-    },
-    'image-to-text': {
-        tokenizer: AutoTokenizer,
-        pipeline: ImageToTextPipeline,
-        model: AutoModelForVision2Seq,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "nlpconnect/vit-gpt2-image-captioning",
-            model: 'Xenova/vit-gpt2-image-captioning',
-        },
-        type: 'multimodal',
-    },
-    'image-classification': {
-        // no tokenizer
-        pipeline: ImageClassificationPipeline,
-        model: AutoModelForImageClassification,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "google/vit-base-patch16-224",
-            model: 'Xenova/vit-base-patch16-224',
-        },
-        type: 'multimodal',
-    },
-    'image-segmentation': {
-        // no tokenizer
-        pipeline: ImageSegmentationPipeline,
-        model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "facebook/detr-resnet-50-panoptic",
-            model: 'Xenova/detr-resnet-50-panoptic',
-        },
-        type: 'multimodal',
-    },
-    'background-removal': {
-        // no tokenizer
-        pipeline: BackgroundRemovalPipeline,
-        model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
-        processor: AutoProcessor,
-        default: {
-            model: 'Xenova/modnet',
-        },
-        type: 'image',
-    },
-    'zero-shot-image-classification': {
-        tokenizer: AutoTokenizer,
-        pipeline: ZeroShotImageClassificationPipeline,
-        model: AutoModel,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "openai/clip-vit-base-patch32",
-            model: 'Xenova/clip-vit-base-patch32',
-        },
-        type: 'multimodal',
-    },
-    'object-detection': {
-        // no tokenizer
-        pipeline: ObjectDetectionPipeline,
-        model: AutoModelForObjectDetection,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "facebook/detr-resnet-50",
-            model: 'Xenova/detr-resnet-50',
-        },
-        type: 'multimodal',
-    },
-    'zero-shot-object-detection': {
-        tokenizer: AutoTokenizer,
-        pipeline: ZeroShotObjectDetectionPipeline,
-        model: AutoModelForZeroShotObjectDetection,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "google/owlvit-base-patch32",
-            model: 'Xenova/owlvit-base-patch32',
-        },
-        type: 'multimodal',
-    },
-    'document-question-answering': {
-        tokenizer: AutoTokenizer,
-        pipeline: DocumentQuestionAnsweringPipeline,
-        model: AutoModelForDocumentQuestionAnswering,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "naver-clova-ix/donut-base-finetuned-docvqa",
-            model: 'Xenova/donut-base-finetuned-docvqa',
-        },
-        type: 'multimodal',
-    },
-    'image-to-image': {
-        // no tokenizer
-        pipeline: ImageToImagePipeline,
-        model: AutoModelForImageToImage,
-        processor: AutoProcessor,
-        default: {
-            // TODO: replace with original
-            // "model": "caidas/swin2SR-classical-sr-x2-64",
-            model: 'Xenova/swin2SR-classical-sr-x2-64',
-        },
-        type: 'image',
-    },
-    'depth-estimation': {
-        // no tokenizer
-        pipeline: DepthEstimationPipeline,
-        model: AutoModelForDepthEstimation,
-        processor: AutoProcessor,
-        default: {
-            model: 'onnx-community/depth-anything-v2-small',
-        },
-        type: 'image',
-    },
-    // This task serves as a useful interface for dealing with sentence-transformers (https://huggingface.co/sentence-transformers).
-    'feature-extraction': {
-        tokenizer: AutoTokenizer,
-        pipeline: FeatureExtractionPipeline,
-        model: AutoModel,
-        default: {
-            model: 'onnx-community/all-MiniLM-L6-v2-ONNX',
-            dtype: 'fp32',
-        },
-        type: 'text',
-    },
-    'image-feature-extraction': {
-        processor: AutoProcessor,
-        pipeline: ImageFeatureExtractionPipeline,
-        model: [AutoModelForImageFeatureExtraction, AutoModel],
-        default: {
-            model: 'onnx-community/dinov3-vits16-pretrain-lvd1689m-ONNX',
-            dtype: 'fp32',
-        },
-        type: 'image',
-    },
-});
-// TODO: Add types for TASK_ALIASES
-const TASK_ALIASES = Object.freeze({
-    'sentiment-analysis': 'text-classification',
-    ner: 'token-classification',
-    // "vqa": "visual-question-answering", // TODO: Add
-    asr: 'automatic-speech-recognition',
-    'text-to-speech': 'text-to-audio',
-    // Add for backwards compatibility
-    embeddings: 'feature-extraction',
-});
+import {
+    SUPPORTED_TASKS,
+    TASK_ALIASES,
+    TextClassificationPipeline,
+    TokenClassificationPipeline,
+    QuestionAnsweringPipeline,
+    FillMaskPipeline,
+    SummarizationPipeline,
+    TranslationPipeline,
+    Text2TextGenerationPipeline,
+    TextGenerationPipeline,
+    ZeroShotClassificationPipeline,
+    AudioClassificationPipeline,
+    ZeroShotAudioClassificationPipeline,
+    AutomaticSpeechRecognitionPipeline,
+    TextToAudioPipeline,
+    ImageToTextPipeline,
+    ImageClassificationPipeline,
+    ImageSegmentationPipeline,
+    BackgroundRemovalPipeline,
+    ZeroShotImageClassificationPipeline,
+    ObjectDetectionPipeline,
+    ZeroShotObjectDetectionPipeline,
+    DocumentQuestionAnsweringPipeline,
+    ImageToImagePipeline,
+    DepthEstimationPipeline,
+    FeatureExtractionPipeline,
+    ImageFeatureExtractionPipeline,
+} from './pipelines/index.js';
+import { get_pipeline_files } from './utils/model_registry/get_pipeline_files.js';
+import { get_file_metadata } from './utils/model_registry/get_file_metadata.js';
 /**
  * @typedef {keyof typeof SUPPORTED_TASKS} TaskType
@@ -443,14 +121,57 @@ export async function pipeline(
     // Use model if specified, otherwise, use default
     if (!model) {
         model = pipelineInfo.default.model;
-        console.log(`No model specified. Using default model: "${model}".`);
+        logger.info(`No model specified. Using default model: "${model}".`);
         if (!dtype && pipelineInfo.default.dtype) {
             dtype = pipelineInfo.default.dtype;
         }
     }
+    /** @type {import('./utils/core.js').FilesLoadingMap} */
+    let files_loading = {};
+    if (progress_callback) {
+        const expected_files = await get_pipeline_files(task, model, {
+            device,
+            dtype,
+        });
+        /** @type {Array<{exists: boolean, size?: number, contentType?: string, fromCache?: boolean}>} */
+        const metadata = await Promise.all(expected_files.map(async (file) => get_file_metadata(model, file)));
+        metadata.forEach((m, i) => {
+            if (m.exists) {
+                files_loading[expected_files[i]] = {
+                    loaded: 0,
+                    total: m.size ?? 0,
+                };
+            }
+        });
+    }
     const pretrainedOptions = {
-        progress_callback,
+        progress_callback: progress_callback
+            ? /** @param {import('./utils/core.js').ProgressInfo} info */
+              (info) => {
+                  if (info.status === 'progress') {
+                      files_loading[info.file] = {
+                          loaded: info.loaded,
+                          total: info.total,
+                      };
+                      const loaded = Object.values(files_loading).reduce((acc, curr) => acc + curr.loaded, 0);
+                      const total = Object.values(files_loading).reduce((acc, curr) => acc + curr.total, 0);
+                      const progress = total > 0 ? (loaded / total) * 100 : 0;
+                      progress_callback({
+                          status: 'progress_total',
+                          name: info.name,
+                          progress,
+                          loaded,
+                          total,
+                          files: structuredClone(files_loading),
+                      });
+                  }
+                  progress_callback(info);
+              }
+            : undefined,
         config,
         cache_dir,
         local_files_only,
@@ -574,3 +295,30 @@ export {
     FeatureExtractionPipeline,
     ImageFeatureExtractionPipeline,
 };
+// Export pipeline output types
+/**
+ * @typedef {import('./pipelines/fill-mask.js').FillMaskOutput} FillMaskOutput
+ * @typedef {import('./pipelines/text-classification.js').TextClassificationOutput} TextClassificationOutput
+ * @typedef {import('./pipelines/token-classification.js').TokenClassificationOutput} TokenClassificationOutput
+ * @typedef {import('./pipelines/question-answering.js').QuestionAnsweringOutput} QuestionAnsweringOutput
+ * @typedef {import('./pipelines/summarization.js').SummarizationOutput} SummarizationOutput
+ * @typedef {import('./pipelines/translation.js').TranslationOutput} TranslationOutput
+ * @typedef {import('./pipelines/text2text-generation.js').Text2TextGenerationOutput} Text2TextGenerationOutput
+ * @typedef {import('./pipelines/text-generation.js').TextGenerationOutput} TextGenerationOutput
+ * @typedef {import('./pipelines/text-generation.js').TextGenerationStringOutput} TextGenerationStringOutput
+ * @typedef {import('./pipelines/text-generation.js').TextGenerationChatOutput} TextGenerationChatOutput
+ * @typedef {import('./pipelines/zero-shot-classification.js').ZeroShotClassificationOutput} ZeroShotClassificationOutput
+ * @typedef {import('./pipelines/audio-classification.js').AudioClassificationOutput} AudioClassificationOutput
+ * @typedef {import('./pipelines/zero-shot-audio-classification.js').ZeroShotAudioClassificationOutput} ZeroShotAudioClassificationOutput
+ * @typedef {import('./pipelines/automatic-speech-recognition.js').AutomaticSpeechRecognitionOutput} AutomaticSpeechRecognitionOutput
+ * @typedef {import('./pipelines/text-to-audio.js').TextToAudioOutput} TextToAudioOutput
+ * @typedef {import('./pipelines/image-classification.js').ImageClassificationOutput} ImageClassificationOutput
+ * @typedef {import('./pipelines/image-segmentation.js').ImageSegmentationOutput} ImageSegmentationOutput
+ * @typedef {import('./pipelines/image-to-text.js').ImageToTextOutput} ImageToTextOutput
+ * @typedef {import('./pipelines/object-detection.js').ObjectDetectionOutput} ObjectDetectionOutput
+ * @typedef {import('./pipelines/zero-shot-object-detection.js').ZeroShotObjectDetectionOutput} ZeroShotObjectDetectionOutput
+ * @typedef {import('./pipelines/zero-shot-image-classification.js').ZeroShotImageClassificationOutput} ZeroShotImageClassificationOutput
+ * @typedef {import('./pipelines/document-question-answering.js').DocumentQuestionAnsweringOutput} DocumentQuestionAnsweringOutput
+ * @typedef {import('./pipelines/depth-estimation.js').DepthEstimationOutput} DepthEstimationOutput
+ */

package/src/tokenization_utils.js CHANGED Viewed

@@ -12,6 +12,8 @@ import { isIntegralNumber, mergeArrays } from './utils/core.js';
 import { getModelJSON } from './utils/hub.js';
 import { max } from './utils/maths.js';
 import { Tensor } from './utils/tensor.js';
+import { logger } from './utils/logger.js';
+import { get_tokenizer_files } from './utils/model_registry/get_tokenizer_files.js';
 /**
  * @typedef {import('./utils/hub.js').PretrainedOptions} PretrainedTokenizerOptions
@@ -24,11 +26,10 @@ import { Tensor } from './utils/tensor.js';
  * @returns {Promise<any[]>} A promise that resolves with information about the loaded tokenizer.
  */
 export async function loadTokenizer(pretrained_model_name_or_path, options) {
-    const info = await Promise.all([
-        getModelJSON(pretrained_model_name_or_path, 'tokenizer.json', true, options),
-        getModelJSON(pretrained_model_name_or_path, 'tokenizer_config.json', true, options),
-    ]);
-    return info;
+    const tokenizerFiles = await get_tokenizer_files(pretrained_model_name_or_path);
+    return await Promise.all(
+        tokenizerFiles.map((file) => getModelJSON(pretrained_model_name_or_path, file, true, options)),
+    );
 }
 /**
@@ -64,10 +65,30 @@ const SPECIAL_TOKEN_ATTRIBUTES = [
     // additional_special_tokens (TODO)
 ];
+/**
+ * @typedef {{ type: 'text', text: string, [key: string]: any }} TextContent
+ * @property {'text'} type The type of content (must be 'text').
+ * @property {string} text The text content.
+ */
+/**
+ * @typedef {{ type: 'image', image?: string | import('./utils/image.js').RawImage, [key: string]: any }} ImageContent
+ * @property {'image'} type The type of content (must be 'image').
+ * @property {string | import('./utils/image.js').RawImage} [image] Optional URL or instance of the image.
+ *
+ * Note: This works for SmolVLM. Qwen2VL and Idefics3 have different implementations.
+ */
+/**
+ * @typedef {TextContent | ImageContent | { type: string & {}, [key: string]: any }} MessageContent
+ * Base type for message content. This is a discriminated union that can be extended with additional content types.
+ * Example: `@typedef {TextContent | ImageContent | AudioContent} MessageContent`
+ */
 /**
  * @typedef {Object} Message
- * @property {string} role The role of the message (e.g., "user" or "assistant" or "system").
- * @property {string} content The content of the message.
+ * @property {'user' | 'assistant' | 'system' | (string & {})} role The role of the message.
+ * @property {string | MessageContent[]} content The content of the message. Can be a simple string or an array of content objects.
  */
 /**
@@ -276,10 +297,10 @@ export class PreTrainedTokenizer extends Callable {
      * @param {string|string[]} [options.text_pair=null] Optional second sequence to be encoded. If set, must be the same type as text.
      * @param {boolean|'max_length'} [options.padding=false] Whether to pad the input sequences.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
-     * @param {boolean} [options.truncation=null] Whether to truncate the input sequences.
-     * @param {number} [options.max_length=null] Maximum length of the returned list and optionally padding length.
+     * @param {boolean|null} [options.truncation=null] Whether to truncate the input sequences.
+     * @param {number|null} [options.max_length=null] Maximum length of the returned list and optionally padding length.
      * @param {boolean} [options.return_tensor=true] Whether to return the results as Tensors or arrays.
-     * @param {boolean} [options.return_token_type_ids=null] Whether to return the token type ids.
+     * @param {boolean|null} [options.return_token_type_ids=null] Whether to return the token type ids.
      * @returns {BatchEncoding} Object to be passed to the model.
      */
     _call(
@@ -339,13 +360,13 @@ export class PreTrainedTokenizer extends Callable {
             max_length = this.model_max_length;
         } else if (truncation === null) {
             if (padding === true) {
-                console.warn(
+                logger.warn(
                     '`max_length` is ignored when `padding: true` and there is no truncation strategy. ' +
                         "To pad to max length, use `padding: 'max_length'`.",
                 );
                 max_length = this.model_max_length;
             } else if (padding === false) {
-                console.warn(
+                logger.warn(
                     'Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation: true` to explicitly truncate examples to max length.',
                 );
                 truncation = true;
@@ -455,9 +476,9 @@ export class PreTrainedTokenizer extends Callable {
      *
      * @param {string} text The text to encode.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.text_pair=null] The optional second text to encode.
+     * @param {string|null} [options.text_pair=null] The optional second text to encode.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
-     * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
+     * @param {boolean|null} [options.return_token_type_ids=null] Whether to return token_type_ids.
      * @returns {{input_ids: number[], attention_mask: number[], token_type_ids?: number[]}} An object containing the encoded text.
      * @private
      */
@@ -478,7 +499,7 @@ export class PreTrainedTokenizer extends Callable {
      * Converts a string into a sequence of tokens.
      * @param {string} text The sequence to be encoded.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.pair] A second sequence to be encoded with the first.
+     * @param {string|null} [options.pair] A second sequence to be encoded with the first.
      * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model.
      * @returns {string[]} The list of tokens.
      */
@@ -491,9 +512,9 @@ export class PreTrainedTokenizer extends Callable {
      *
      * @param {string} text The text to encode.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.text_pair=null] The optional second text to encode.
+     * @param {string|null} [options.text_pair=null] The optional second text to encode.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
-     * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
+     * @param {boolean|null} [options.return_token_type_ids=null] Whether to return token_type_ids.
      * @returns {number[]} An array of token IDs representing the encoded text(s).
      */
     encode(text, { text_pair = null, add_special_tokens = true, return_token_type_ids = null } = {}) {
@@ -545,7 +566,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {number[]|bigint[]} token_ids List of token ids to decode
      * @param {Object} decode_args Optional arguments for decoding
      * @param {boolean} [decode_args.skip_special_tokens=false] Whether to skip special tokens during decoding
-     * @param {boolean} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding.
+     * @param {boolean|null} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding.
      * If null, the value is set to `this.decoder.cleanup` if it exists, falling back to `this.clean_up_tokenization_spaces` if it exists, falling back to `true`.
      * @returns {string} The decoded string
      */
@@ -562,7 +583,7 @@ export class PreTrainedTokenizer extends Callable {
      * template for better generation tracking.
      *
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.chat_template=null]
+     * @param {string|null} [options.chat_template=null]
      * A Jinja template or the name of a template to use for this conversion.
      * It is usually not necessary to pass anything to this argument,
      * as the model's template will be used by default.
@@ -642,7 +663,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys,
      * representing the chat history so far.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If
+     * @param {string|null} [options.chat_template=null] A Jinja template to use for this conversion. If
      * this is not passed, the model's chat template will be used instead.
      * @param {Object[]} [options.tools=null]
      * A list of tools (callable functions) that will be accessible to the model. If the template does not
@@ -663,7 +684,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {boolean} [options.tokenize=true] Whether to tokenize the output. If false, the output will be a string.
      * @param {boolean} [options.padding=false] Whether to pad sequences to the maximum length. Has no effect if tokenize is false.
      * @param {boolean} [options.truncation=false] Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.
-     * @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
+     * @param {number|null} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
      * If not specified, the tokenizer's `max_length` attribute will be used as a default.
      * @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
      * @param {boolean} [options.return_dict=true] Whether to return a dictionary with named outputs. Has no effect if tokenize is false.

package/src/transformers.js CHANGED Viewed

@@ -13,7 +13,7 @@
  */
 // Environment variables
-export { env } from './env.js';
+export { env, LogLevel } from './env.js';
 // Pipelines
 export * from './pipelines.js';
@@ -51,12 +51,19 @@ export { load_image, RawImage } from './utils/image.js';
 export { load_video, RawVideo, RawVideoFrame } from './utils/video.js';
 export * from './utils/tensor.js';
 export { softmax, log_softmax, dot, cos_sim } from './utils/maths.js';
+export { random } from './utils/random.js';
+// Cache and file management
+export { ModelRegistry } from './utils/model_registry/ModelRegistry.js';
 // Expose common types used across the library for developers to access
 /**
  * @typedef {import('./utils/hub.js').PretrainedModelOptions} PretrainedModelOptions
  * @typedef {import('./processing_utils.js').PretrainedProcessorOptions} PretrainedProcessorOptions
+ * @typedef {import('./tokenization_utils.js').Message} Message
  * @typedef {import('./tokenization_utils.js').PretrainedTokenizerOptions} PretrainedTokenizerOptions
  * @typedef {import('./utils/dtypes.js').DataType} DataType
  * @typedef {import('./utils/devices.js').DeviceType} DeviceType
+ * @typedef {import('./utils/core.js').ProgressCallback} ProgressCallback
+ * @typedef {import('./utils/core.js').ProgressInfo} ProgressInfo
  */