@huggingface/transformers 4.0.0-next.4 → 4.0.0-next.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -5
- package/dist/ort-wasm-simd-threaded.jsep.mjs +28 -28
- package/dist/transformers.js +3446 -2296
- package/dist/transformers.min.js +17 -19
- package/dist/transformers.node.cjs +3456 -2263
- package/dist/transformers.node.min.cjs +20 -22
- package/dist/transformers.node.min.mjs +20 -22
- package/dist/transformers.node.mjs +3426 -2258
- package/dist/transformers.web.js +1647 -471
- package/dist/transformers.web.min.js +18 -18
- package/package.json +4 -4
- package/src/backends/onnx.js +128 -53
- package/src/backends/utils/cacheWasm.js +28 -46
- package/src/configs.js +63 -3
- package/src/env.js +93 -11
- package/src/generation/logits_sampler.js +3 -15
- package/src/image_processors_utils.js +2 -6
- package/src/models/afmoe/modeling_afmoe.js +5 -0
- package/src/models/auto/image_processing_auto.js +2 -1
- package/src/models/auto/modeling_auto.js +16 -2
- package/src/models/auto/tokenization_auto.js +2 -1
- package/src/models/clap/feature_extraction_clap.js +2 -1
- package/src/models/marian/tokenization_marian.js +3 -2
- package/src/models/modeling_utils.js +45 -7
- package/src/models/models.js +10 -0
- package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
- package/src/models/paligemma/processing_paligemma.js +3 -2
- package/src/models/processors.js +2 -0
- package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -0
- package/src/models/qwen2_5_vl/processing_qwen2_5_vl.js +3 -0
- package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +54 -0
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +45 -6
- package/src/models/qwen3_5/modeling_qwen3_5.js +3 -0
- package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +3 -0
- package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
- package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
- package/src/models/qwen3_vl/modeling_qwen3_vl.js +3 -0
- package/src/models/qwen3_vl/processing_qwen3_vl.js +3 -0
- package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +3 -0
- package/src/models/registry.js +21 -5
- package/src/models/session.js +16 -50
- package/src/models/whisper/feature_extraction_whisper.js +2 -1
- package/src/models/whisper/modeling_whisper.js +6 -5
- package/src/models/xlm/tokenization_xlm.js +2 -1
- package/src/pipelines/automatic-speech-recognition.js +3 -2
- package/src/pipelines/index.js +313 -0
- package/src/pipelines/text-generation.js +4 -0
- package/src/pipelines/text-to-audio.js +4 -2
- package/src/pipelines/zero-shot-classification.js +3 -2
- package/src/pipelines.js +139 -428
- package/src/tokenization_utils.js +42 -21
- package/src/transformers.js +6 -1
- package/src/utils/audio.js +2 -1
- package/src/utils/cache/FileCache.js +128 -0
- package/src/utils/cache.js +7 -4
- package/src/utils/core.js +23 -1
- package/src/utils/devices.js +22 -0
- package/src/utils/dtypes.js +55 -0
- package/src/utils/hub/{files.js → FileResponse.js} +0 -90
- package/src/utils/hub/utils.js +45 -5
- package/src/utils/hub.js +63 -22
- package/src/utils/image.js +14 -14
- package/src/utils/logger.js +67 -0
- package/src/utils/model-loader.js +35 -17
- package/src/utils/model_registry/ModelRegistry.js +346 -0
- package/src/utils/model_registry/clear_cache.js +128 -0
- package/src/utils/model_registry/get_file_metadata.js +149 -0
- package/src/utils/model_registry/get_files.js +42 -0
- package/src/utils/model_registry/get_model_files.js +193 -0
- package/src/utils/model_registry/get_pipeline_files.js +44 -0
- package/src/utils/model_registry/get_processor_files.js +20 -0
- package/src/utils/model_registry/get_tokenizer_files.js +21 -0
- package/src/utils/model_registry/is_cached.js +169 -0
- package/src/utils/random.js +225 -0
- package/src/utils/tensor.js +8 -21
- package/src/utils/video.js +2 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/backends/utils/cacheWasm.d.ts +3 -17
- package/types/backends/utils/cacheWasm.d.ts.map +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +52 -27
- package/types/env.d.ts.map +1 -1
- package/types/generation/logits_sampler.d.ts +2 -2
- package/types/generation/logits_sampler.d.ts.map +1 -1
- package/types/image_processors_utils.d.ts.map +1 -1
- package/types/models/afmoe/modeling_afmoe.d.ts +8 -0
- package/types/models/afmoe/modeling_afmoe.d.ts.map +1 -0
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/auto/modeling_auto.d.ts +6 -0
- package/types/models/auto/modeling_auto.d.ts.map +1 -1
- package/types/models/auto/tokenization_auto.d.ts.map +1 -1
- package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
- package/types/models/marian/tokenization_marian.d.ts.map +1 -1
- package/types/models/modeling_utils.d.ts +13 -2
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +10 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
- package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
- package/types/models/processors.d.ts +2 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +4 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -0
- package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts +4 -0
- package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts.map +1 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +3 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +1 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts +4 -0
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +4 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +4 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -0
- package/types/models/qwen3_vl/processing_qwen3_vl.d.ts +4 -0
- package/types/models/qwen3_vl/processing_qwen3_vl.d.ts.map +1 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +4 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
- package/types/models/registry.d.ts +2 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/models/session.d.ts.map +1 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/models/whisper/modeling_whisper.d.ts.map +1 -1
- package/types/models/xlm/tokenization_xlm.d.ts.map +1 -1
- package/types/pipelines/automatic-speech-recognition.d.ts.map +1 -1
- package/types/pipelines/index.d.ts +265 -0
- package/types/pipelines/index.d.ts.map +1 -0
- package/types/pipelines/text-generation.d.ts +5 -1
- package/types/pipelines/text-generation.d.ts.map +1 -1
- package/types/pipelines/text-to-audio.d.ts.map +1 -1
- package/types/pipelines/zero-shot-classification.d.ts.map +1 -1
- package/types/pipelines.d.ts +50 -291
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenization_utils.d.ts +44 -26
- package/types/tokenization_utils.d.ts.map +1 -1
- package/types/transformers.d.ts +4 -1
- package/types/transformers.d.ts.map +1 -1
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/cache/FileCache.d.ts +39 -0
- package/types/utils/cache/FileCache.d.ts.map +1 -0
- package/types/utils/cache.d.ts +10 -4
- package/types/utils/cache.d.ts.map +1 -1
- package/types/utils/core.d.ts +59 -2
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/devices.d.ts +15 -0
- package/types/utils/devices.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +16 -0
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -32
- package/types/utils/hub/FileResponse.d.ts.map +1 -0
- package/types/utils/hub/utils.d.ts +19 -3
- package/types/utils/hub/utils.d.ts.map +1 -1
- package/types/utils/hub.d.ts +36 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/logger.d.ts +28 -0
- package/types/utils/logger.d.ts.map +1 -0
- package/types/utils/model-loader.d.ts +15 -0
- package/types/utils/model-loader.d.ts.map +1 -1
- package/types/utils/model_registry/ModelRegistry.d.ts +271 -0
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -0
- package/types/utils/model_registry/clear_cache.d.ts +74 -0
- package/types/utils/model_registry/clear_cache.d.ts.map +1 -0
- package/types/utils/model_registry/get_file_metadata.d.ts +20 -0
- package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -0
- package/types/utils/model_registry/get_files.d.ts +23 -0
- package/types/utils/model_registry/get_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_model_files.d.ts +22 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_pipeline_files.d.ts +22 -0
- package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_processor_files.d.ts +9 -0
- package/types/utils/model_registry/get_processor_files.d.ts.map +1 -0
- package/types/utils/model_registry/get_tokenizer_files.d.ts +9 -0
- package/types/utils/model_registry/get_tokenizer_files.d.ts.map +1 -0
- package/types/utils/model_registry/is_cached.d.ts +105 -0
- package/types/utils/model_registry/is_cached.d.ts.map +1 -0
- package/types/utils/random.d.ts +86 -0
- package/types/utils/random.d.ts.map +1 -0
- package/types/utils/tensor.d.ts.map +1 -1
- package/types/utils/hub/files.d.ts.map +0 -1
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Pipeline task configurations and aliases
|
|
3
|
+
*
|
|
4
|
+
* Defines which pipeline class and model class(es) each pipeline task needs.
|
|
5
|
+
* Tokenizer and processor loading is determined automatically from the model's files.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
AutoModel,
|
|
10
|
+
AutoModelForSequenceClassification,
|
|
11
|
+
AutoModelForAudioClassification,
|
|
12
|
+
AutoModelForTokenClassification,
|
|
13
|
+
AutoModelForQuestionAnswering,
|
|
14
|
+
AutoModelForMaskedLM,
|
|
15
|
+
AutoModelForSeq2SeqLM,
|
|
16
|
+
AutoModelForSpeechSeq2Seq,
|
|
17
|
+
AutoModelForTextToWaveform,
|
|
18
|
+
AutoModelForTextToSpectrogram,
|
|
19
|
+
AutoModelForCTC,
|
|
20
|
+
AutoModelForCausalLM,
|
|
21
|
+
AutoModelForVision2Seq,
|
|
22
|
+
AutoModelForImageClassification,
|
|
23
|
+
AutoModelForImageSegmentation,
|
|
24
|
+
AutoModelForSemanticSegmentation,
|
|
25
|
+
AutoModelForUniversalSegmentation,
|
|
26
|
+
AutoModelForObjectDetection,
|
|
27
|
+
AutoModelForZeroShotObjectDetection,
|
|
28
|
+
AutoModelForDocumentQuestionAnswering,
|
|
29
|
+
AutoModelForImageToImage,
|
|
30
|
+
AutoModelForDepthEstimation,
|
|
31
|
+
AutoModelForImageFeatureExtraction,
|
|
32
|
+
} from '../models/auto/modeling_auto.js';
|
|
33
|
+
|
|
34
|
+
import { TextClassificationPipeline } from './text-classification.js';
|
|
35
|
+
import { TokenClassificationPipeline } from './token-classification.js';
|
|
36
|
+
import { QuestionAnsweringPipeline } from './question-answering.js';
|
|
37
|
+
import { FillMaskPipeline } from './fill-mask.js';
|
|
38
|
+
import { SummarizationPipeline } from './summarization.js';
|
|
39
|
+
import { TranslationPipeline } from './translation.js';
|
|
40
|
+
import { Text2TextGenerationPipeline } from './text2text-generation.js';
|
|
41
|
+
import { TextGenerationPipeline } from './text-generation.js';
|
|
42
|
+
import { ZeroShotClassificationPipeline } from './zero-shot-classification.js';
|
|
43
|
+
import { AudioClassificationPipeline } from './audio-classification.js';
|
|
44
|
+
import { ZeroShotAudioClassificationPipeline } from './zero-shot-audio-classification.js';
|
|
45
|
+
import { AutomaticSpeechRecognitionPipeline } from './automatic-speech-recognition.js';
|
|
46
|
+
import { TextToAudioPipeline } from './text-to-audio.js';
|
|
47
|
+
import { ImageToTextPipeline } from './image-to-text.js';
|
|
48
|
+
import { ImageClassificationPipeline } from './image-classification.js';
|
|
49
|
+
import { ImageSegmentationPipeline } from './image-segmentation.js';
|
|
50
|
+
import { BackgroundRemovalPipeline } from './background-removal.js';
|
|
51
|
+
import { ZeroShotImageClassificationPipeline } from './zero-shot-image-classification.js';
|
|
52
|
+
import { ObjectDetectionPipeline } from './object-detection.js';
|
|
53
|
+
import { ZeroShotObjectDetectionPipeline } from './zero-shot-object-detection.js';
|
|
54
|
+
import { DocumentQuestionAnsweringPipeline } from './document-question-answering.js';
|
|
55
|
+
import { ImageToImagePipeline } from './image-to-image.js';
|
|
56
|
+
import { DepthEstimationPipeline } from './depth-estimation.js';
|
|
57
|
+
import { FeatureExtractionPipeline } from './feature-extraction.js';
|
|
58
|
+
import { ImageFeatureExtractionPipeline } from './image-feature-extraction.js';
|
|
59
|
+
|
|
60
|
+
export const SUPPORTED_TASKS = Object.freeze({
|
|
61
|
+
'text-classification': {
|
|
62
|
+
pipeline: TextClassificationPipeline,
|
|
63
|
+
model: AutoModelForSequenceClassification,
|
|
64
|
+
default: {
|
|
65
|
+
model: 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
|
|
66
|
+
},
|
|
67
|
+
type: 'text',
|
|
68
|
+
},
|
|
69
|
+
'token-classification': {
|
|
70
|
+
pipeline: TokenClassificationPipeline,
|
|
71
|
+
model: AutoModelForTokenClassification,
|
|
72
|
+
default: {
|
|
73
|
+
model: 'Xenova/bert-base-multilingual-cased-ner-hrl',
|
|
74
|
+
},
|
|
75
|
+
type: 'text',
|
|
76
|
+
},
|
|
77
|
+
'question-answering': {
|
|
78
|
+
pipeline: QuestionAnsweringPipeline,
|
|
79
|
+
model: AutoModelForQuestionAnswering,
|
|
80
|
+
default: {
|
|
81
|
+
model: 'Xenova/distilbert-base-cased-distilled-squad',
|
|
82
|
+
},
|
|
83
|
+
type: 'text',
|
|
84
|
+
},
|
|
85
|
+
'fill-mask': {
|
|
86
|
+
pipeline: FillMaskPipeline,
|
|
87
|
+
model: AutoModelForMaskedLM,
|
|
88
|
+
default: {
|
|
89
|
+
model: 'onnx-community/ettin-encoder-32m-ONNX',
|
|
90
|
+
dtype: 'fp32',
|
|
91
|
+
},
|
|
92
|
+
type: 'text',
|
|
93
|
+
},
|
|
94
|
+
summarization: {
|
|
95
|
+
pipeline: SummarizationPipeline,
|
|
96
|
+
model: AutoModelForSeq2SeqLM,
|
|
97
|
+
default: {
|
|
98
|
+
model: 'Xenova/distilbart-cnn-6-6',
|
|
99
|
+
},
|
|
100
|
+
type: 'text',
|
|
101
|
+
},
|
|
102
|
+
translation: {
|
|
103
|
+
pipeline: TranslationPipeline,
|
|
104
|
+
model: AutoModelForSeq2SeqLM,
|
|
105
|
+
default: {
|
|
106
|
+
model: 'Xenova/t5-small',
|
|
107
|
+
},
|
|
108
|
+
type: 'text',
|
|
109
|
+
},
|
|
110
|
+
'text2text-generation': {
|
|
111
|
+
pipeline: Text2TextGenerationPipeline,
|
|
112
|
+
model: AutoModelForSeq2SeqLM,
|
|
113
|
+
default: {
|
|
114
|
+
model: 'Xenova/flan-t5-small',
|
|
115
|
+
},
|
|
116
|
+
type: 'text',
|
|
117
|
+
},
|
|
118
|
+
'text-generation': {
|
|
119
|
+
pipeline: TextGenerationPipeline,
|
|
120
|
+
model: AutoModelForCausalLM,
|
|
121
|
+
default: {
|
|
122
|
+
model: 'onnx-community/Qwen3-0.6B-ONNX',
|
|
123
|
+
dtype: 'q4',
|
|
124
|
+
},
|
|
125
|
+
type: 'text',
|
|
126
|
+
},
|
|
127
|
+
'zero-shot-classification': {
|
|
128
|
+
pipeline: ZeroShotClassificationPipeline,
|
|
129
|
+
model: AutoModelForSequenceClassification,
|
|
130
|
+
default: {
|
|
131
|
+
model: 'Xenova/distilbert-base-uncased-mnli',
|
|
132
|
+
},
|
|
133
|
+
type: 'text',
|
|
134
|
+
},
|
|
135
|
+
'audio-classification': {
|
|
136
|
+
pipeline: AudioClassificationPipeline,
|
|
137
|
+
model: AutoModelForAudioClassification,
|
|
138
|
+
default: {
|
|
139
|
+
model: 'Xenova/wav2vec2-base-superb-ks',
|
|
140
|
+
},
|
|
141
|
+
type: 'audio',
|
|
142
|
+
},
|
|
143
|
+
'zero-shot-audio-classification': {
|
|
144
|
+
pipeline: ZeroShotAudioClassificationPipeline,
|
|
145
|
+
model: AutoModel,
|
|
146
|
+
default: {
|
|
147
|
+
model: 'Xenova/clap-htsat-unfused',
|
|
148
|
+
},
|
|
149
|
+
type: 'multimodal',
|
|
150
|
+
},
|
|
151
|
+
'automatic-speech-recognition': {
|
|
152
|
+
pipeline: AutomaticSpeechRecognitionPipeline,
|
|
153
|
+
model: [AutoModelForSpeechSeq2Seq, AutoModelForCTC],
|
|
154
|
+
default: {
|
|
155
|
+
model: 'Xenova/whisper-tiny.en',
|
|
156
|
+
},
|
|
157
|
+
type: 'multimodal',
|
|
158
|
+
},
|
|
159
|
+
'text-to-audio': {
|
|
160
|
+
pipeline: TextToAudioPipeline,
|
|
161
|
+
model: [AutoModelForTextToWaveform, AutoModelForTextToSpectrogram],
|
|
162
|
+
default: {
|
|
163
|
+
model: 'onnx-community/Supertonic-TTS-ONNX',
|
|
164
|
+
dtype: 'fp32',
|
|
165
|
+
},
|
|
166
|
+
type: 'text',
|
|
167
|
+
},
|
|
168
|
+
'image-to-text': {
|
|
169
|
+
pipeline: ImageToTextPipeline,
|
|
170
|
+
model: AutoModelForVision2Seq,
|
|
171
|
+
default: {
|
|
172
|
+
model: 'Xenova/vit-gpt2-image-captioning',
|
|
173
|
+
},
|
|
174
|
+
type: 'multimodal',
|
|
175
|
+
},
|
|
176
|
+
'image-classification': {
|
|
177
|
+
pipeline: ImageClassificationPipeline,
|
|
178
|
+
model: AutoModelForImageClassification,
|
|
179
|
+
default: {
|
|
180
|
+
model: 'Xenova/vit-base-patch16-224',
|
|
181
|
+
},
|
|
182
|
+
type: 'multimodal',
|
|
183
|
+
},
|
|
184
|
+
'image-segmentation': {
|
|
185
|
+
pipeline: ImageSegmentationPipeline,
|
|
186
|
+
model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
187
|
+
default: {
|
|
188
|
+
model: 'Xenova/detr-resnet-50-panoptic',
|
|
189
|
+
},
|
|
190
|
+
type: 'multimodal',
|
|
191
|
+
},
|
|
192
|
+
'background-removal': {
|
|
193
|
+
pipeline: BackgroundRemovalPipeline,
|
|
194
|
+
model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
195
|
+
default: {
|
|
196
|
+
model: 'Xenova/modnet',
|
|
197
|
+
},
|
|
198
|
+
type: 'image',
|
|
199
|
+
},
|
|
200
|
+
'zero-shot-image-classification': {
|
|
201
|
+
pipeline: ZeroShotImageClassificationPipeline,
|
|
202
|
+
model: AutoModel,
|
|
203
|
+
default: {
|
|
204
|
+
model: 'Xenova/clip-vit-base-patch32',
|
|
205
|
+
},
|
|
206
|
+
type: 'multimodal',
|
|
207
|
+
},
|
|
208
|
+
'object-detection': {
|
|
209
|
+
pipeline: ObjectDetectionPipeline,
|
|
210
|
+
model: AutoModelForObjectDetection,
|
|
211
|
+
default: {
|
|
212
|
+
model: 'Xenova/detr-resnet-50',
|
|
213
|
+
},
|
|
214
|
+
type: 'multimodal',
|
|
215
|
+
},
|
|
216
|
+
'zero-shot-object-detection': {
|
|
217
|
+
pipeline: ZeroShotObjectDetectionPipeline,
|
|
218
|
+
model: AutoModelForZeroShotObjectDetection,
|
|
219
|
+
default: {
|
|
220
|
+
model: 'Xenova/owlvit-base-patch32',
|
|
221
|
+
},
|
|
222
|
+
type: 'multimodal',
|
|
223
|
+
},
|
|
224
|
+
'document-question-answering': {
|
|
225
|
+
pipeline: DocumentQuestionAnsweringPipeline,
|
|
226
|
+
model: AutoModelForDocumentQuestionAnswering,
|
|
227
|
+
default: {
|
|
228
|
+
model: 'Xenova/donut-base-finetuned-docvqa',
|
|
229
|
+
},
|
|
230
|
+
type: 'multimodal',
|
|
231
|
+
},
|
|
232
|
+
'image-to-image': {
|
|
233
|
+
pipeline: ImageToImagePipeline,
|
|
234
|
+
model: AutoModelForImageToImage,
|
|
235
|
+
default: {
|
|
236
|
+
model: 'Xenova/swin2SR-classical-sr-x2-64',
|
|
237
|
+
},
|
|
238
|
+
type: 'image',
|
|
239
|
+
},
|
|
240
|
+
'depth-estimation': {
|
|
241
|
+
pipeline: DepthEstimationPipeline,
|
|
242
|
+
model: AutoModelForDepthEstimation,
|
|
243
|
+
default: {
|
|
244
|
+
model: 'onnx-community/depth-anything-v2-small',
|
|
245
|
+
},
|
|
246
|
+
type: 'image',
|
|
247
|
+
},
|
|
248
|
+
'feature-extraction': {
|
|
249
|
+
pipeline: FeatureExtractionPipeline,
|
|
250
|
+
model: AutoModel,
|
|
251
|
+
default: {
|
|
252
|
+
model: 'onnx-community/all-MiniLM-L6-v2-ONNX',
|
|
253
|
+
dtype: 'fp32',
|
|
254
|
+
},
|
|
255
|
+
type: 'text',
|
|
256
|
+
},
|
|
257
|
+
'image-feature-extraction': {
|
|
258
|
+
pipeline: ImageFeatureExtractionPipeline,
|
|
259
|
+
model: [AutoModelForImageFeatureExtraction, AutoModel],
|
|
260
|
+
default: {
|
|
261
|
+
model: 'onnx-community/dinov3-vits16-pretrain-lvd1689m-ONNX',
|
|
262
|
+
dtype: 'fp32',
|
|
263
|
+
},
|
|
264
|
+
type: 'image',
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// TODO: Add types for TASK_ALIASES
|
|
269
|
+
|
|
270
|
+
export const TASK_ALIASES = Object.freeze({
|
|
271
|
+
'sentiment-analysis': 'text-classification',
|
|
272
|
+
ner: 'token-classification',
|
|
273
|
+
// "vqa": "visual-question-answering", // TODO: Add
|
|
274
|
+
asr: 'automatic-speech-recognition',
|
|
275
|
+
'text-to-speech': 'text-to-audio',
|
|
276
|
+
|
|
277
|
+
// Add for backwards compatibility
|
|
278
|
+
embeddings: 'feature-extraction',
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
export {
|
|
282
|
+
TextClassificationPipeline,
|
|
283
|
+
TokenClassificationPipeline,
|
|
284
|
+
QuestionAnsweringPipeline,
|
|
285
|
+
FillMaskPipeline,
|
|
286
|
+
SummarizationPipeline,
|
|
287
|
+
TranslationPipeline,
|
|
288
|
+
Text2TextGenerationPipeline,
|
|
289
|
+
TextGenerationPipeline,
|
|
290
|
+
ZeroShotClassificationPipeline,
|
|
291
|
+
AudioClassificationPipeline,
|
|
292
|
+
ZeroShotAudioClassificationPipeline,
|
|
293
|
+
AutomaticSpeechRecognitionPipeline,
|
|
294
|
+
TextToAudioPipeline,
|
|
295
|
+
ImageToTextPipeline,
|
|
296
|
+
ImageClassificationPipeline,
|
|
297
|
+
ImageSegmentationPipeline,
|
|
298
|
+
BackgroundRemovalPipeline,
|
|
299
|
+
ZeroShotImageClassificationPipeline,
|
|
300
|
+
ObjectDetectionPipeline,
|
|
301
|
+
ZeroShotObjectDetectionPipeline,
|
|
302
|
+
DocumentQuestionAnsweringPipeline,
|
|
303
|
+
ImageToImagePipeline,
|
|
304
|
+
DepthEstimationPipeline,
|
|
305
|
+
FeatureExtractionPipeline,
|
|
306
|
+
ImageFeatureExtractionPipeline,
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* @typedef {keyof typeof SUPPORTED_TASKS} TaskType
|
|
311
|
+
* @typedef {keyof typeof TASK_ALIASES} AliasType
|
|
312
|
+
* @typedef {TaskType | AliasType} PipelineType All possible pipeline types.
|
|
313
|
+
*/
|
|
@@ -100,6 +100,10 @@ function isChat(x) {
|
|
|
100
100
|
export class TextGenerationPipeline
|
|
101
101
|
extends /** @type {new (options: TextPipelineConstructorArgs) => TextGenerationPipelineType} */ (Pipeline)
|
|
102
102
|
{
|
|
103
|
+
/**
|
|
104
|
+
* @param {string | string[] | import('../tokenization_utils.js').Message[] | import('../tokenization_utils.js').Message[][]} texts
|
|
105
|
+
* @param {Partial<TextGenerationConfig>} generate_kwargs
|
|
106
|
+
*/
|
|
103
107
|
async _call(texts, generate_kwargs = {}) {
|
|
104
108
|
let isBatched = false;
|
|
105
109
|
let isChatInput = false;
|
|
@@ -2,8 +2,10 @@ import { Pipeline } from './_base.js';
|
|
|
2
2
|
|
|
3
3
|
import { Tensor } from '../utils/tensor.js';
|
|
4
4
|
import { RawAudio } from '../utils/audio.js';
|
|
5
|
+
import { logger } from '../utils/logger.js';
|
|
5
6
|
|
|
6
7
|
import { AutoModel } from '../models/auto/modeling_auto.js';
|
|
8
|
+
import { env } from '../env.js';
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* @typedef {import('./_base.js').TextAudioPipelineConstructorArgs} TextAudioPipelineConstructorArgs
|
|
@@ -92,7 +94,7 @@ export class TextToAudioPipeline
|
|
|
92
94
|
// Load speaker embeddings as Float32Array from path/URL
|
|
93
95
|
if (typeof speaker_embeddings === 'string' || speaker_embeddings instanceof URL) {
|
|
94
96
|
// Load from URL with fetch
|
|
95
|
-
speaker_embeddings = new Float32Array(await (await fetch(speaker_embeddings)).arrayBuffer());
|
|
97
|
+
speaker_embeddings = new Float32Array(await (await env.fetch(speaker_embeddings)).arrayBuffer());
|
|
96
98
|
}
|
|
97
99
|
|
|
98
100
|
if (speaker_embeddings instanceof Float32Array) {
|
|
@@ -194,7 +196,7 @@ export class TextToAudioPipeline
|
|
|
194
196
|
async _call_text_to_spectrogram(text_inputs, { speaker_embeddings }) {
|
|
195
197
|
// Load vocoder, if not provided
|
|
196
198
|
if (!this.vocoder) {
|
|
197
|
-
|
|
199
|
+
logger.info('No vocoder specified, using default HifiGan vocoder.');
|
|
198
200
|
this.vocoder = await AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID, { dtype: 'fp32' });
|
|
199
201
|
}
|
|
200
202
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Pipeline } from './_base.js';
|
|
2
2
|
|
|
3
3
|
import { softmax } from '../utils/maths.js';
|
|
4
|
+
import { logger } from '../utils/logger.js';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* @typedef {import('./_base.js').TextPipelineConstructorArgs} TextPipelineConstructorArgs
|
|
@@ -93,13 +94,13 @@ export class ZeroShotClassificationPipeline
|
|
|
93
94
|
|
|
94
95
|
this.entailment_id = this.label2id['entailment'];
|
|
95
96
|
if (this.entailment_id === undefined) {
|
|
96
|
-
|
|
97
|
+
logger.warn("Could not find 'entailment' in label2id mapping. Using 2 as entailment_id.");
|
|
97
98
|
this.entailment_id = 2;
|
|
98
99
|
}
|
|
99
100
|
|
|
100
101
|
this.contradiction_id = this.label2id['contradiction'] ?? this.label2id['not_entailment'];
|
|
101
102
|
if (this.contradiction_id === undefined) {
|
|
102
|
-
|
|
103
|
+
logger.warn("Could not find 'contradiction' in label2id mapping. Using 0 as contradiction_id.");
|
|
103
104
|
this.contradiction_id = 0;
|
|
104
105
|
}
|
|
105
106
|
}
|